bucketing
clustered by column
column will be part of create table.
hive> set hive.enforce.bucketing
> ;
hive.enforce.bucketing=false
To enable bucketing
set hive.enforce.bucketing=true
//enable bucketing
set hive.enforce.bucketing=true
//create table without any bucket
create table products_no_buckets
(
id int,
name string,
cost double,
category string
)
row format delimited fields terminated by ',';
//load the data to no bucket table
load data local inpath '/home/cloudera/Desktop/newproducts.csv'
into table products_no_buckets;
newproducts.csv
1,iPhone,379.99,mobiles
2,doll,8.99,toys
3,Galaxy X,100,mobile
5,Nokia Y,39.99,mobile
6,truck,7.99,toys
7,makeup,100,fashion
8,earings,69,fashion
9,chair,129,furniture
10,table,269,furniture
11,waterpistol,9,toys
//create table with bucketing
create table products_w_buckets ( id int, name string, cost double, category string ) CLUSTERED BY (id) INTO 4 BUCKETS;
//insert data from non bucket table to bucketed table
insert into table products_w_buckets select id, name, cost, category from products_no_buckets;
//check the file created for each bucket
hdfs dfs -ls /user/hive/warehouse/basan.db/products_w_buckets
root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/products_w_buckets
Found 1 items
-rwxrwxrwx 1 cloudera supergroup 225 2019-11-23 23:42 /user/hive/warehouse/basan.db/products_w_buckets/000000_0
[root@quickstart Desktop]#
No comments:
Post a Comment