当前开源ClickHouse的物化视图是同步视图,对于使用场景有如下限制:
为了更好的支持多表关联的场景,减少对于数据导入影响,一个比较好的解决方法是使用异步视图,其用途主要包括:
视图类型 | 单表聚合 | 多表关联 | 查询改写 | 刷新策略 | 数据导入影响 |
---|---|---|---|---|---|
异步物化视图 | 是 | 是 | 是 | 异步刷新 | 无 |
同步物化视图 | 是 | 否 | 是 | 导入同步刷新 | 有 |
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [database.]<mv_name> [REFRESH [ASYNC [START (start_time)] [EVERY INTERVAL refresh_interval] | SYNC | MANUAL]] AS <query_statement>; SYSTEM [START VIEW | STOP VIEW] [database.]<mv_name>; ALTER TABLE [database.]<mv_name> MODIFY REFRESH EVERY INTERVAL ... ; REFRESH MATERIALIZED VIEW [database.]<mv_name>;
以下是一个使用异步物化视图的使用示例。
CREATE DATABASE business; -- 源数据表1 CREATE TABLE business.goods( create_date DateTime, series_id UInt64, goods_id Int64, item_name String, price Float64 ) ENGINE = CnchMergeTree PARTITION BY (toDate(create_date), series_id) ORDER BY (goods_id, item_name) SETTINGS index_granularity = 8192; -- 源数据表2 CREATE TABLE business.order_list( order_id Int64, client_id Int64, goods_id Int64, shop_id UInt64, order_date DateTime ) ENGINE = CnchMergeTree PARTITION BY (toDate(order_date), shop_id) ORDER BY (order_id,goods_id, client_id) SETTINGS index_granularity = 8192; -- 目标聚合表 CREATE TABLE business.order_statistics (date Date, encoded_shop_id UInt64, encoded_goods_series_id UInt64, item_id Int64, total_price SimpleAggregateFunction(sum, Float64), max_price SimpleAggregateFunction(max, Float64)) ENGINE = CnchAggregatingMergeTree() PARTITION BY (date, toUInt64(encoded_shop_id + 10)) ORDER BY (date, encoded_shop_id, encoded_goods_series_id, item_id) SETTINGS index_granularity = 8192; -- 异步物化视图 CREATE MATERIALIZED VIEW business.order_report_mv TO business.order_statistics REFRESH ASYNC START('2023-12-05 10:00:00') EVERY(INTERVAL 1 MINUTE) AS SELECT date_trunc('month', toDate(order.order_date)) as date, multiIf(order.shop_id >= 2702, toUInt64(30000), shop_id <= 2701, toUInt64(40000), toUInt64(50000)) as encoded_shop_id, multiIf(good.series_id = 1, toUInt64(100), good.series_id = 2, toUInt64(101), good.series_id = 3, toUInt64(103), toUInt64(10)) as encoded_goods_series_id, order.order_id as item_id, sum(good.price) as total_price, max(good.price) as max_price FROM business.order_list as order INNER JOIN business.goods as good ON good.goods_id = order.goods_id WHERE date > '2023-04-01' and date <= '2023-12-01' and order.shop_id >= 2702 GROUP BY date, encoded_shop_id, encoded_goods_series_id, item_id;