概述
拉链表:维护历史状态以及最新状态数据的表
作用场景
1. 数据量比较大。
2. 表中的部分字段会被更新,比如用户的地址,银行利率,订单的状态等。
3. 需要查看某一个时间点或者时间段的历史快照信息,比如,查看利率在历史某一个时间点的状态。
4. 变化的比例和频率不是很大,比如,总共有1000万的会员,每天新增和发生变化的有10万左右。
5. 如果对这边表每天都保留一份全量,那么每次全量中会保存很多不变的信息,对存储是极大的浪费;拉链历史表,既能满足反应数据的历史状态,又可以最大程度的节省存储。
链表的更新
假设以天为维度,以每天的最后一个状态为当天的最终状态。以一张订单表为例,如下是原始数据,2023/03/03 的订单状态明细
1 2023-03-03 2023-03-03 创建
2 2023-03-03 2023-03-03 创建
3 2023-03-03 2023-03-03 创建
建表并全量更新
create external table if not exists orders
(
    orderid      int,
    createdate   string,
    modifiedtime string,
    status       string
)
    row format delimited fields terminated by '\t'
    location '/tmp/lalian/orders';
create table ods_orders_inc
(
    orderid      int,
    createdate   string,
    modifiedtime string,
    status       string
) partitioned by (day string)
    row format delimited fields terminated by '\t';
insert overwrite table ods_orders_inc partition (day = '2023-03-03')
select orderid, createdate, modifiedtime, status
from orders;
create table dws_orders_his
(
    orderid      int,
    createdate   string,
    modifiedtime string,
    status       string,
    start_time   string,
    end_time     string
)
    row format delimited fields terminated by '\t';
insert overwrite table dws_orders_his
select orderid, createdate, modifiedtime, status, modifiedtime, '9999-12-31'
from ods_orders_inc
where day = '2023-03-03';如下结果

2023/03/04 的订单状态明细
1 2023-03-03 2023-03-03 创建
2 2023-03-03 2023-03-03 创建
3 2023-03-03 2023-03-03 创建
1 2023-03-03 2023-03-04 支付
2 2023-03-03 2023-03-04 支付
4 2023-03-04 2023-03-04 创建
增量更新
insert overwrite table ods_orders_inc partition (day = '2023-03-04')
select orderid, createdate, modifiedtime, status
from orders
where modifiedtime = '2023-03-04';
insert overwrite table dws_orders_his
select *
from ((select orderid, createdate, modifiedtime, status, modifiedtime start_time, '9999-12-31' end_time
       from ods_orders_inc
       where day = '2023-03-04')
      union all
      (select t1.orderid,
              t1.createdate,
              t1.modifiedtime,
              t1.status,
              t1.start_time,
              `if`(t2.orderid is not null and t1.end_time > '2023-03-04', '2023-03-04', t1.end_time) end_time
       from dws_orders_his t1
                left join (select orderid, modifiedtime from ods_orders_inc where day = '2023-03-04') t2
                          on t1.orderid = t2.orderid)
     ) tb1
order by tb1.orderid, tb1.modifiedtime;结果如下

2023/03/05 的订单状态明细
1 2023-03-03 2023-03-03 创建
2 2023-03-03 2023-03-03 创建
3 2023-03-03 2023-03-03 创建
1 2023-03-03 2023-03-04 支付
2 2023-03-03 2023-03-04 支付
4 2023-03-04 2023-03-04 创建
1 2023-03-03 2023-03-05 完成
3 2023-03-03 2023-03-05 支付
4 2023-03-04 2023-03-05 支付
5 2023-03-05 2023-03-05 创建
增量更新
insert overwrite table ods_orders_inc partition (day = '2023-03-05')
select orderid,
       createdate,
       modifiedtime,
       status
from orders
where modifiedtime = '2023-03-05'
   or (createdate = '2023-03-05' and modifiedtime = '2023-03-05');
insert overwrite table dws_orders_his
select *
from ((select orderid, createdate, modifiedtime, status, modifiedtime start_time, '9999-12-31' end_time
       from ods_orders_inc
       where day = '2023-03-05')
      union all
      (select t1.orderid,
              t1.createdate,
              t1.modifiedtime,
              t1.status,
              t1.start_time,
              `if`(t2.orderid is not null and t1.end_time > '2023-03-05', '2023-03-05', t1.end_time) end_time
       from dws_orders_his t1
                left join (select orderid, modifiedtime from ods_orders_inc where day = '2023-03-05') t2
                          on t1.orderid = t2.orderid)
     ) tb1
order by tb1.orderid, tb1.modifiedtime;结果如下



















![[golang]Go语言从入门到实践-反射](https://img-blog.csdnimg.cn/76c88bb3476c461caec52f9c4ec5cc46.png)
