Powered By Blogger

Saturday, November 23, 2019

Dynamic Partitioning in Hive

============================================================
Dynamic partitioing
============================================================
By default

check existing values
set hive.exec.dynamic.partition
set hive.exec.dyanamic.partition.mode

set hive.exec.dynamic.partition=true
set hive.exec.dyanamic.partition.mode=nonstrict


hive> set hive.exec.dynamic.partition
    > ;
hive.exec.dynamic.partition=true
hive> set hive.exec.dyanamic.partition.mode
    > ;
hive.exec.dyanamic.partition.mode is undefined
hive>

Dump data from normal table to partitioned table

//set the hive settings for dynamic partitioning
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;

//create table without any partitions and load data, observer state is part of the column
create table orders_no_partition
(
  id string,
  customer_id string,
  product_id string,
  quantity int,
  amount double,
  zipcode char(5),
  state char(2)
)
COMMENT 'Table with no partition applied'
row format delimited fields terminated by ',';



orders_CA.csv
o1, c1, p1, 1, 1.11, 90111,CA
o2, c2, p2, 1, 2.22, 90222,CA
o3, c3, p3, 1, 3.33, 90333,CA
o4, c4, p4, 1, 4.44, 90444,CA

orders_CT.csv
o10, c10, p10, 10, 10.11, 900111,CT
o20, c20, p20, 10, 20.22, 900222,CT
o30, c30, p30, 10, 30.33, 900333,CT
o40, c40, p40, 10, 40.44, 900444,CT


load data local inpath '/home/cloudera/Desktop/orders_CA.csv'
into table orders_no_partition


load data local inpath '/home/cloudera/Desktop/orders_CT.csv'
into table orders_no_partition

//create table with  partition, see its written at partitioned by
create table orders_partition
(
  id string,
  customer_id string,
  product_id string,
  quantity int,
  amount double,
  zipcode char(5)
)partitioned by (state char(2));

//load the data into partitioned table from non partitioned table
insert into table orders_partition partition(state) select * from orders_no_partition;

//observe contents are organised as per partitioning stratergy
[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_partition
Found 2 items
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 23:20 /user/hive/warehouse/basan.db/orders_partition/state=CA
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 23:20 /user/hive/warehouse/basan.db/orders_partition/state=CT
[root@quickstart Desktop]#

No comments:

Post a Comment