Powered By Blogger

Saturday, November 23, 2019

Bucketing Hive


bucketing

clustered by  column

column will be part of create table.

hive> set hive.enforce.bucketing
    > ;
hive.enforce.bucketing=false


To enable bucketing
set hive.enforce.bucketing=true


//enable bucketing
set hive.enforce.bucketing=true

//create table without any bucket
create table products_no_buckets
(
  id int,
  name string,
  cost double,
  category string
)
row format delimited fields terminated by ',';

//load the data to no bucket table
load data local inpath '/home/cloudera/Desktop/newproducts.csv'
into table products_no_buckets;



newproducts.csv

1,iPhone,379.99,mobiles
2,doll,8.99,toys
3,Galaxy X,100,mobile
5,Nokia Y,39.99,mobile
6,truck,7.99,toys
7,makeup,100,fashion
8,earings,69,fashion
9,chair,129,furniture
10,table,269,furniture
11,waterpistol,9,toys


//create table with bucketing
create table products_w_buckets ( id int, name string, cost double, category string ) CLUSTERED BY (id) INTO 4 BUCKETS;

//insert data from non bucket table to bucketed table
insert into table products_w_buckets select id, name, cost, category from products_no_buckets;

//check the file created for each bucket
hdfs dfs -ls /user/hive/warehouse/basan.db/products_w_buckets

root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/products_w_buckets
Found 1 items
-rwxrwxrwx   1 cloudera supergroup        225 2019-11-23 23:42 /user/hive/warehouse/basan.db/products_w_buckets/000000_0
[root@quickstart Desktop]# 

No comments:

Post a Comment