如何插入或覆写数据_云原生大数据计算服务 MaxCompute-阿里云帮助中心

MaxCompute支持通过 insert into 或 insert overwrite 操作向目标表或静态分区中插入、更新数据。

本文中的命令您可以在如下工具平台执行：

前提条件

执行 insert into 和 insert overwrite 操作前需要具备目标表的修改权限（Alter）及源表的元信息读取权限（Describe）。授权操作请参见 MaxCompute权限。

功能介绍

在使用MaxCompute SQL处理数据时， insert into 或 insert overwrite 操作可以将 select 查询的结果保存至目标表中。二者的区别是：

insert into ：直接向表或静态分区中插入数据。您可以在 insert 语句中直接指定分区值，将数据插入指定的分区。如果您需要插入少量测试数据，可以配合 VALUES 使用。
insert overwrite ：先清空表中的原有数据，再向表或静态分区中插入数据。

insert {into|overwrite} table <table_name> [partition (<pt_spec>)] [(<col_name> [,<col_name> ...)]]
<select_statement>
from <from_statement>
[zorder by <zcol_name> [, <zcol_name> ...]];

--创建一张非分区表websites。
create table if not exists websites
(id int,
 name string,
 url string
--创建一张非分区表apps
create table if not exists apps
(id int,
 app_name string,
 url string
--向表apps追加数据。其中：insert into table table_name可以简写为insert into table_name
insert into apps (id,app_name,url) values 
(1,'Aliyun','https://www.aliyun.com');
--复制apps的表数据追加至websites表
insert into websites (id,name,url) select id,app_name,url
from  apps;
--执行select语句查看表websites中的数据。
select * from websites;
--返回结果。
+------------+------------+------------+
| id         | name       | url        |
+------------+------------+------------+
| 1          | Aliyun     | https://www.aliyun.com |
+------------+------------+------------+

--创建一张分区表sale_detail。
create table if not exists sale_detail
shop_name     string,
customer_id   string,
total_price   double
partitioned by (sale_date string, region string);
--向源表增加分区。
alter table sale_detail add partition (sale_date='2013', region='china');
--向源表追加数据。其中：insert into table table_name可以简写为insert into table_name，但insert overwrite table table_name不可以省略table关键字。
insert into sale_detail partition (sale_date='2013', region='china') values ('s1','c1',100.1),('s2','c2',100.2),('s3','c3',100.3);
--开启全表扫描，仅此Session有效。执行select语句查看表sale_detail中的数据。
set odps.sql.allow.fullscan=true; 
select * from sale_detail;
--返回结果。
+------------+-------------+-------------+------------+------------+
| shop_name  | customer_id | total_price | sale_date  | region     |
+------------+-------------+-------------+------------+------------+
| s1         | c1          | 100.1       | 2013       | china      |
| s2         | c2          | 100.2       | 2013       | china      |
| s3         | c3          | 100.3       | 2013       | china      |
+------------+-------------+-------------+------------+------------+

--创建目标表sale_detail_insert，与sale_detail有相同的结构。
create table sale_detail_insert like sale_detail;
--给目标表增加分区。
alter table sale_detail_insert add partition (sale_date='2013', region='china');
--从源表sale_detail中取出数据插入目标表sale_detail_insert。注意不需要声明目标表字段，也不支持重排目标表字段顺序。
--对于静态分区目标表，分区字段赋值已经在partition()部分声明，不需要在select_statement中包含，只要按照目标表普通列顺序查出对应字段，按顺序映射到目标表即可。动态分区表则需要在select中包含分区字段，详情请参见插入或覆写动态分区数据（DYNAMIC PARTITION）。
insert overwrite table sale_detail_insert partition (sale_date='2013', region='china')
  select 
  shop_name, 
  customer_id,
  total_price 
  from sale_detail
  zorder by customer_id, total_price;
--开启全表扫描，仅此Session有效。执行select语句查看表sale_detail_insert中的数据。
set odps.sql.allow.fullscan=true;
select * from sale_detail_insert;
--返回结果。
+------------+-------------+-------------+------------+------------+
| shop_name  | customer_id | total_price | sale_date  | region     |
+------------+-------------+-------------+------------+------------+
| s1         | c1          | 100.1       | 2013       | china      |
| s2         | c2          | 100.2       | 2013       | china      |
| s3         | c3          | 100.3       | 2013       | china      |
+------------+-------------+-------------+------------+------------+

insert overwrite table sale_detail_insert partition (sale_date='2013', region='china')
    select customer_id, shop_name, total_price from sale_detail;    
select * from sale_detail_insert;

+------------+-------------+-------------+------------+------------+
| shop_name  | customer_id | total_price | sale_date  | region     |
+------------+-------------+-------------+------------+------------+
| c1         | s1          | 100.1       | 2013       | china      |
| c2         | s2          | 100.2       | 2013       | china      |
| c3         | s3          | 100.3       | 2013       | china      |
+------------+-------------+-------------+------------+------------+

```
insert overwrite table sale_detail_insert partition (sale_date='2013', region='china')
   select shop_name, customer_id, total_price, sale_date, region from sale_detail;
```

insert overwrite table sale_detail_insert partition (sale_date=datepart('2016-09-18 01:10:00', 'yyyy') , region='china')
   select shop_name, customer_id, total_price from sale_detail;

--创建目标表mf_src。
create table mf_src (key string, value string);
insert overwrite table mf_src
select a, b from values ('1', '1'),('3', '3'),('2', '2')
as t(a, b);
select * from mf_src;
--返回结果
+-----+-------+
| key | value |
+-----+-------+
| 1   | 1     |
| 3   | 3     |
| 2   | 2     |
+-----+-------+
--创建目标表mf_zorder_src，与mf_src有相同的结构。
create table mf_zorder_src like mf_src;
--使用global zorder模式排序。
set odps.sql.default.zorder.type=global;
insert overwrite table mf_zorder_src
select key, value from mf_src 
zorder by key, value;
select * from mf_zorder_src;
--返回结果
+-----+-------+
| key | value |
+-----+-------+
| 1   | 1     |
| 2   | 2     |
| 3   | 3     |
+-----+-------+

-- target表是存量表
set odps.sql.default.zorder.type=global;
insert overwrite table target
select key, value from target 
zorder by key, value;

--创建Transaction Table2.0表
create table mf_tt6 (pk bigint not null primary key, 
                  val bigint not null) 
                  partitioned by (dd string, hh string) 
                  tblproperties ("transactional"="true");

--插入Transaction Table2.0表数据
insert overwrite table mf_tt6 partition (dd='01', hh='01') 
                 values (1, 1), (2, 2), (3, 3);
select * from mf_tt6 where dd='01' and hh='01';
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 1          | 01 | 02 |
| 3          | 3          | 01 | 02 |
| 2          | 2          | 01 | 02 |
+------------+------------+----+----+
insert into table mf_tt6 partition(dd='01', hh='01') 
            values (3, 30), (4, 4), (5, 5);
select * from mf_tt6 where dd='01' and hh='01';
--返回：
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 1          | 01 | 02 |
| 3          | 30         | 01 | 02 |
| 4          | 4          | 01 | 02 |
| 5          | 5          | 01 | 02 |
| 2          | 2          | 01 | 02 |
+------------+------------+----+----+
insert overwrite table mf_tt6 partition (dd='01', hh='02') 
                 values (1, 1), (2, 2), (3, 3);
select * from mf_tt6 where dd='01' and hh='02';
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 1          | 01 | 02 |
| 3          | 3          | 01 | 02 |
| 2          | 2          | 01 | 02 |
+------------+------------+----+----+
insert into table mf_tt6 partition(dd='01', hh='02') 
            values (3, 30), (4, 4), (5, 5);
select * from mf_tt6 where dd='01' and hh='02';
--返回：
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 1          | 01 | 02 |
| 3          | 30         | 01 | 02 |
| 4          | 4          | 01 | 02 |
| 5          | 5          | 01 | 02 |
| 2          | 2          | 01 | 02 |
+------------+------------+----+----+

--Transaction Table2.0表Update / Delete:
update mf_tt6 set val = delta.val 
              from (select pk, val from values (1, 10), (2, 20) t (pk, val)) delta 
              where delta.pk = mf_tt6.pk and mf_tt6.dd='01' and mf_tt6.hh='01';
select * from mf_tt6 where dd='01' and hh='01';
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 10         | 01 | 01 |
| 3          | 30         | 01 | 01 |
| 4          | 4          | 01 | 01 |
| 5          | 5          | 01 | 01 |
| 2          | 20         | 01 | 01 |
+------------+------------+----+----+
update mf_tt6 set val = 40 where pk = 4 and dd='01' and hh='01';
select * from mf_tt6 where dd='01' and hh='01';
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 10         | 01 | 01 |
| 3          | 30         | 01 | 01 |
| 4          | 40         | 01 | 01 |
| 5          | 5          | 01 | 01 |
| 2          | 20         | 01 | 01 |
+------------+------------+----+----+

--删除记录
delete from mf_tt6 where val = 5  and dd='01' and hh='01';
select * from mf_tt6 where dd='01' and hh='01';
+------------+------------+----+----+
| pk         | val        | dd | hh |
+------------+------------+----+----+
| 1          | 10         | 01 | 01 |
| 3          | 30         | 01 | 01 |
| 4          | 40         | 01 | 01 |
| 2          | 20         | 01 | 01 |
+------------+------------+----+----+

--Transaction Table2.0表Merge Into
--创建Transaction Table2.0表
create table mf_delta as 
       select pk, val 
         from values (1, 10), (2, 20), (6, 60) t (pk, val);
         merge into mf_tt6 using mf_delta 
                 on mf_tt6.pk = mf_delta.pk and mf_tt6.dd='01' and mf_tt6.hh='01' 
               when matched and (mf_tt6.pk > 1) then  update set mf_tt6.val = mf_delta.val 
               when matched then delete 
               when not matched then insert values (mf_delta.pk, mf_delta.val, '01', '01');
select * from mf_delta;
+------+------+
| pk   | val  |
+------+------+
| 1    | 10   |
| 2    | 20   |
| 6    | 60   |
+------+------+

前提条件

功能介绍

使用限制

命令格式

使用示例：普通表

使用示例：Transaction Table2.0类型表

最佳实践

优先考虑Clustered Index而不是 Z-Order的场景

Z-Order使用建议