部分列更新--ByteHouse云数仓版-火山引擎

文档中心

立即注册

ByteHouse云数仓版

DML

部分列更新

ByteHouse云数仓版的唯一键表支持部分列更新操作，适用于：数据更新操作来自多个数据源，每个数据源更新数据行的一部分列的数据；存量数据做 ETL 操作，更新存量数据的一部分列的数据。本文为您介绍部分列更新的工作原理和使用示例。

功能概述

部分列更新使用功能列_update_columns_来指定希望更新哪些列。

_update_columns_中的内容是需要更新的列，以逗号分隔各列名，引擎在解析时不会处理列名前后的特殊字符，如空格、Tab、换行符等，且不支持正则表达式。
_update_columns_支持主动指定、自动解析两种模式。当主动指定_update_columns_为''时表示更新所有列。

_update_columns_列的更新方式遵循如下规则：

非Map类型：列属于_update_columns_时直接更新，允许更新为默认值。
Map类型：partial_update_enable_merge_map = true时对有旧值的key进行更新，新key直接写入，未更新的旧key保留原值；为false时直接替换value。
Nullable类型：使用 REPLACE_IF_NOT_NULL 语义时，写入 null 时保留旧值。

工作原理

部分列更新包含如下步骤：

根据增量数据以及唯一键索引定位到存量数据所在位置。
读取存量数据的相关列（下方示例图中红色部分）。
将存量数据对应行的 delete bitmap 列设置为 1，进行标记删除。
增量数据生成。

以下为一个示例，示例中表为分区级唯一，PK 为 partition key，UK 为 unique key，delete bitmap 为标记删除列。

Delete bitmap 列为 1 的数据被标记删除，在增量数据写入后，经过部分列更新后，最新数据为：

2020-10-29，1，m，1001
2020-10-29，2，n，1002
2020-10-30，1，z，2001
2020-10-30，2，k，0

使用示例

partial_update_enable_merge_map = false

-- 引擎默认保证 unique key 在分区内的唯一性
CREATE TABLE t8 (
  k Int32,
  c1 Int32,
  c2 Nullable(Float64),
  c3 Nullable(String),
  c4 Nullable(Int64),
  m1 Map(String, Int32),
  a1 Array(String))
ENGINE = CnchMergeTree
UNIQUE KEY k ORDER BY k SETTINGS enable_unique_partial_update = 1, 
partial_update_enable_merge_map = 0;

-- 可以不显式的设置，session 级别 enable_unique_partial_update 默认值为 1
SET enable_unique_partial_update = 1; 

-- 不指定 _update_columns_，自动解析填充 _update_columns_ 为'k,c1,c2,m1,a1'
-- 等价于INSERT INTO t8 (k, c1, c2, m1, a1, _update_columns_) VALUES (1, 10, 3.14, {'k1':1}, ['hello'], 'k,c1,c2,m1,a1');
INSERT INTO t8 (k, c1, c2, m1, a1) VALUES (1, 10, 3.14, {'k1':1}, ['hello']);
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1───────┬─a1────────┐
│ 1 │ 10 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k1':1} │ ['hello'] │
└───┴────┴──────┴──────┴──────┴──────────┴───────────┘

-- 主动指定 _update_columns_, 标粗字体代表已更新
INSERT INTO t8 (k, c1, m1, a1, _update_columns_) VALUES (1, 20, {'k2':2}, ['world'], 'k,c1,m1,a1');
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1───────┬─a1────────┐
│ 1 │ 20 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k2':2} │ ['world'] │
└───┴────┴──────┴──────┴──────┴──────────┴───────────┘

-- 主动指定 _update_columns_，不在_update_columns_中的列，就算有数据也不会更新
INSERT INTO t8 (k, c1, m1, a1, _update_columns_) VALUES (1, 200, {'k2':20}, ['world20'], 'k');
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1───────┬─a1────────┐
│ 1 │ 20 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k2':2} │ ['world'] │
└───┴────┴──────┴──────┴──────┴──────────┴───────────┘

-- 不指定 _update_columns_，自动解析填充 _update_columns_ 为'k,c1,c3,m1'
-- 等价于INSERT INTO t8 (k, c1, c3, m1, _update_columns_) VALUES (1, 0, 'foo', {'k3':3}, 'k,c1,c3,m1');
INSERT INTO t8 (k, c1, c3, m1) VALUES (1, 0, 'foo', {'k3':3});
┌─k─┬─c1─┬───c2─┬─c3──┬───c4─┬─m1───────┬─a1────────┐
│ 1 │  0 │ 3.14 │ foo │ ᴺᵁᴸᴸ │ {'k3':3} │ ['world'] │
└───┴────┴──────┴─────┴──────┴──────────┴───────────┘

-- 当主动指定 _update_columns_ 为''时表示更新所有列
INSERT INTO t8 (k, c1, c2, c3, c4, m1, a1, _update_columns_) VALUES (1, 10, 31.4, 'goo', 15, {'k4': 4}, ['hello', 'world'], '');
┌─k─┬─c1─┬───c2─┬─c3──┬─c4─┬─m1───────┬─a1────────────────┐
│ 1 │ 10 │ 31.4 │ goo │ 15 │ {'k4':4} │ ['hello','world'] │
└───┴────┴──────┴─────┴────┴──────────┴───────────────────┘

-- 更灵活的用法：行级别指定 _update_columns_
INSERT INTO t8 (k, c1, c2, c3, c4, m1, a1, _update_columns_) FORMAT JSONEachRow {"k":"1", "c1": "100", "_update_columns_":"k,c1"} {"k":"1", "c2": "314.0", "_update_columns_":"k,c2"};
┌─k─┬──c1─┬──c2─┬─c3──┬─c4─┬─m1───────┬─a1────────────────┐
│ 1 │ 100 │ 314 │ goo │ 15 │ {'k4':4} │ ['hello','world'] │
└───┴─────┴─────┴─────┴────┴──────────┴───────────────────┘

partial_update_enable_merge_map = true

-- 引擎默认保证 unique key 在分区内的唯一性
CREATE TABLE t8m (
  k Int32,
  c1 Int32,
  c2 Nullable(Float64),
  c3 Nullable(String),
  c4 Nullable(Int64),
  m1 Map(String, Int32),
  a1 Array(String))
ENGINE = CnchMergeTree
UNIQUE KEY k ORDER BY k SETTINGS enable_unique_partial_update = 1, 
partial_update_enable_merge_map = 1;

-- 可以不显式的设置，session 级别 enable_unique_partial_update 默认值为 1
SET enable_unique_partial_update = 1; 

-- 不指定 _update_columns_，自动解析填充 _update_columns_ 为'k,c1,c2,m1,a1'
-- 等价于INSERT INTO t8m (k, c1, c2, m1, a1, _update_columns_) VALUES (1, 10, 3.14, {'k1':1}, ['hello'], 'k,c1,c2,m1,a1');
INSERT INTO t8m (k, c1, c2, m1, a1) VALUES (1, 10, 3.14, {'k1':1}, ['hello']);
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1───────┬─a1────────┐
│ 1 │ 10 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k1':1} │ ['hello'] │
└───┴────┴──────┴──────┴──────┴──────────┴───────────┘

-- 主动指定 _update_columns_, 标粗字体代表已更新
INSERT INTO t8m (k, c1, m1, a1, _update_columns_) VALUES (1, 20, {'k2':2}, ['world'], 'k,c1,m1,a1');
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1──────────────┬─a1────────┐
│ 1 │ 20 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k1':1,'k2':2} │ ['world'] │
└───┴────┴──────┴──────┴──────┴─────────────────┴───────────┘

-- 主动指定 _update_columns_，不在_update_columns_中的列，就算有数据也不会更新
INSERT INTO t8m (k, c1, m1, a1, _update_columns_) VALUES (1, 200, {'k2':20}, ['world20'], 'k');
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1──────────────┬─a1────────┐
│ 1 │ 20 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k1':1,'k2':2} │ ['world'] │
└───┴────┴──────┴──────┴──────┴─────────────────┴───────────┘

-- 不指定 _update_columns_，自动解析填充 _update_columns_ 为'k,c1,c3,m1'
-- 等价于INSERT INTO t8m (k, c1, c3, m1, _update_columns_) VALUES (1, 0, 'foo', {'k3':3}, 'k,c1,c3,m1');
INSERT INTO t8m (k, c1, c3, m1) VALUES (1, 0, 'foo', {'k3':3});
┌─k─┬─c1─┬───c2─┬─c3──┬───c4─┬─m1──────────────────────┬─a1────────┐
│ 1 │  0 │ 3.14 │ foo │ ᴺᵁᴸᴸ │ {'k1':1,'k2':2,'k3':3} │ ['world'] │
└───┴────┴──────┴─────┴──────┴─────────────────────────┴───────────┘

-- 当主动指定 _update_columns_ 为''时表示更新所有列
INSERT INTO t8m (k, c1, c2, c3, c4, m1, a1, _update_columns_) VALUES (1, 10, 31.4, 'goo', 15, {'k4': 4}, ['hello', 'world'], '');
┌─k─┬─c1─┬───c2─┬─c3──┬─c4─┬─m1─────────────────────────────┬─a1────────────────┐
│ 1 │ 10 │ 31.4 │ goo │ 15 │ {'k1':1,'k2':2,'k3':3,'k4':4} │ ['hello','world'] │
└───┴────┴──────┴─────┴────┴────────────────────────────────┴───────────────────┘

-- 更灵活的用法：行级别指定 _update_columns_
INSERT INTO t8m (k, c1, c2, c3, c4, m1, a1, _update_columns_) FORMAT JSONEachRow {"k":"1", "c1": "100", "_update_columns_":"k,c1"} {"k":"1", "c2": "314.0", "_update_columns_":"k,c2"};
┌─k─┬──c1─┬──c2─┬─c3──┬─c4─┬─m1─────────────────────────────┬─a1────────────────┐
│ 1 │ 100 │ 314 │ goo │ 15 │ {'k1':1,'k2':2,'k3':3,'k4':4} │ ['hello','world'] │
└───┴─────┴─────┴─────┴────┴────────────────────────────────┴───────────────────┘

REPLACE_IF_NOT_NULL 语义

-- 引擎默认保证 unique key 在分区内的唯一性
CREATE TABLE t8n (
  k Int32,
  c1 Int32,
  c2 Nullable(Float64) REPLACE_IF_NOT_NULL,
  c3 Nullable(String),
  c4 Nullable(Int64),
  m1 Map(String, Int32),
  a1 Array(String))
ENGINE = CnchMergeTree
UNIQUE KEY k ORDER BY k SETTINGS enable_unique_partial_update = 1, 
partial_update_enable_merge_map = 1;

-- 可以不显式的设置，session 级别 enable_unique_partial_update 默认值为 1
SET enable_unique_partial_update = 1; 

-- 不指定 _update_columns_，自动解析填充 _update_columns_ 为'k,c1,c2,c3,m1,a1'
-- 等价于INSERT INTO t8n (k, c1, c2, c3, m1, a1, _update_columns_) VALUES (1, 10, 3.14, 'foo', {'k1':1}, ['hello'], 'k,c1,c2,c3,m1,a1');
INSERT INTO t8n (k, c1, c2, c3, m1, a1) VALUES (1, 10, 3.14, 'foo', {'k1':1}, ['hello']);
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1───────┬─a1────────┐
│ 1 │ 10 │ 3.14 │ foo │ ᴺᵁᴸᴸ │ {'k1':1} │ ['hello'] │
└───┴────┴──────┴──────┴──────┴──────────┴───────────┘

-- 不指定 _update_columns_，自动解析填充 _update_columns_ 为'k,c2,c3,m1,a1'
-- 对 c2 写入 null, 基于 REPLACE_IF_NOT_NULL 语义, 维持旧值 
-- 对 c3 写入 null, 数据列更新为 null, 标粗字体代表已更新
INSERT INTO t8n (k, c2, c3, m1, a1) VALUES (1, null, null, {'k2':2}, ['world']);
┌─k─┬─c1─┬───c2─┬─c3───┬───c4─┬─m1──────────────┬─a1────────┐
│ 1 │ 10 │ 3.14 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ {'k1':1,'k2':2} │ ['world'] │
└───┴────┴──────┴──────┴──────┴─────────────────┴───────────┘

Insert on duplicate key 语法

-- Attention：
-- 1. 需要开启部分列更新 enable_unique_partial_update = 1
-- 2. on duplicate action 所做的转换应该保证
    -- 转换前后具有相同的数据类型 or 转换前后的列都是数值类型
:) CREATE TABLE mysql_test_on_duplicate_action (
    `a` Int32,
    `b` Int32
)
ENGINE = CnchMergeTree
ORDER BY a
UNIQUE KEY a
SETTINGS enable_unique_partial_update = 1;

:) insert into mysql_test_on_duplicate_action values (1, 1);
:) select * from mysql_test_on_duplicate_action;
┌─a─┬─b─┐
│ 1 │ 1 │
└───┴───┘

:) insert into mysql_test_on_duplicate_action on duplicate key update b=4 values (1, 2), (2, 3);
:) select * from mysql_test_on_duplicate_action;
┌─a─┬─b─┐
│ 1 │ 4 │
│ 2 │ 3 │
└───┴───┘

最近更新时间：2025.12.11 10:05:53

这个页面对您有帮助吗？

有用

无用

ByteHouse云数仓版

功能概述 #

工作原理 #

使用示例 #

partial_update_enable_merge_map = false #

partial_update_enable_merge_map = true #

REPLACE_IF_NOT_NULL 语义 #