Powered By Blogger

Saturday, November 23, 2019

Partitioning and folder creation in Hive


Column on which we want to partition that need not be there in
the create table definition just need to put it in partition by tag

but when we do select, the column will be shown as virtual column

static partitioning and dyanamic partitioning


http://www.decloedtjr.com.br/hive-cheat-sheet/

http://www.decloedtjr.com.br/hive-cheat-sheet/
http://rkeagle.com/public/blog.php?category=hadoop&page=9

create table orders_no_partition
(
  id string,
  customer_id string,
  product_id string,
  quantity int,
  amount double,
  zipcode char(5),
  state char(2)
)
COMMENT 'Table with no partition applied'
row format delimited fields terminated by ',';



Logging initialized using configuration in file:/etc/hive/conf.dist/hive-log4j.properties
WARNING: Hive CLI is deprecated and migration to Beeline is recommended.
hive> use basan
    > ;
OK
Time taken: 0.31 seconds
hive> create table orders_no_partition
    > (
    >   id string,
    >   customer_id string,
    >   product_id string,
    >   quantity int,
    >   amount double,
    >   zipcode char(5),
    >   state char(2)
    > )
    > COMMENT 'Table with no partition applied'
    > row format delimited fields terminated by ',';
OK
Time taken: 0.256 seconds
hive> describe formatted orders_no_partition;
OK
# col_name            data_type            comment           

id                  string                                 
customer_id          string                                 
product_id          string                                 
quantity            int                                     
amount              double                                 
zipcode              char(5)                                 
state                char(2)                                 

# Detailed Table Information
Database:            basan               
Owner:              cloudera           
CreateTime:          Sat Nov 23 22:35:16 PST 2019
LastAccessTime:      UNKNOWN             
Protect Mode:        None               
Retention:          0                   
Location:            hdfs://quickstart.cloudera:8020/user/hive/warehouse/basan.db/orders_no_partition
Table Type:          MANAGED_TABLE       
Table Parameters:
comment              Table with no partition applied
transient_lastDdlTime 1574577316         

# Storage Information
SerDe Library:      org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat:        org.apache.hadoop.mapred.TextInputFormat
OutputFormat:        org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed:          No                 
Num Buckets:        -1                 
Bucket Columns:      []                 
Sort Columns:        []                 
Storage Desc Params:
field.delim          ,                 
serialization.format ,                 
Time taken: 0.153 seconds, Fetched: 34 row(s)
hive>



ime taken: 0.153 seconds, Fetched: 34 row(s)
hive> create table orders_w_partition
    > (
    >   id string,
    >   customer_id string,
    >   product_id string,
    >   quantity int,
    >   amount double,
    >   zipcode char(5)
    > )
    > COMMENT 'Table with partition applied by state'
    > partitioned by (state char(2));
OK
Time taken: 0.124 seconds
hive>
    > show tables like 'orders_w*';
OK
orders_w_partition
Time taken: 0.024 seconds, Fetched: 1 row(s)
hive>


show tables like 'orders_w*';
OK
+---------------------+--+
|      tab_name       |
+---------------------+--+
| orders_w_partition  |
+---------------------+--+

show partitions orders_w_partition;
OK
+------------+--+
| partition  |
+------------+--+
+------------+--+

insert into orders_w_partition
partition(state="CA")
values
("o1", "c1", "p1", 1, 1.11, "90111"),
("o2", "c2", "p2", 1, 2.22, "90222"),
("o3", "c3", "p3", 1, 3.33, "90333"),
("o4", "c4", "p4", 1, 4.44, "90444");

insert into orders_w_partition
partition(state="WA")
values
("o10", "c10", "p10", 2, 10.11, "91111"),
("o20", "c20", "p20", 2, 20.22, "91222"),
("o30", "c30", "p30", 2, 30.33, "91333"),
("o40", "c40", "p40", 2, 40.44, "91444");

insert into orders_w_partition
partition(state="CA")
values ("o5", "c5", "p5", 1, 5.55, "90555");

insert into orders_w_partition
partition(state="NJ")
values
("o100", "c100", "p100", 3, 100.11, "92111"),
("o200", "c200", "p200", 3, 200.22, "92222");

insert into orders_w_partition
partition(state="NY")
values
("o201", "c201", "p201", 4, 201.22, "92122");





hive> show partitions orders_w_partition;
OK
state=CA
state=NJ
state=NY
state=WA
Time taken: 0.069 seconds, Fetched: 4 row(s)


[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition
Found 4 items
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=CA
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 22:41 /user/hive/warehouse/basan.db/orders_w_partition/state=NJ
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 22:41 /user/hive/warehouse/basan.db/orders_w_partition/state=NY
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=WA
[root@quickstart Desktop]#

Observer the folders created for the partition

[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition/state=CA
Found 2 items
-rwxrwxrwx   1 cloudera supergroup         88 2019-11-23 22:39 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0
-rwxrwxrwx   1 cloudera supergroup         22 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0_copy_1
[root@quickstart Desktop]#

[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition/state=CA
Found 2 items
-rwxrwxrwx   1 cloudera supergroup         88 2019-11-23 22:39 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0
-rwxrwxrwx   1 cloudera supergroup         22 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0_copy_1
[root@quickstart Desktop]# hdfs dfs -cat /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0
o1 c1 p1 1 1.11 90111
o2 c2 p2 1 2.22 90222
o3 c3 p3 1 3.33 90333
o4 c4 p4 1 4.44 90444
[root@quickstart Desktop]# pwd

==========================================================

Loading from Files into a Partitioned Table

drop table orders_w_partition;

create table orders_w_partition
(
  id string,
  customer_id string,
  product_id string,
  quantity int,
  amount double,
  zipcode char(5)
)
COMMENT 'Table with partition applied by state, load data from fs'
partitioned by (state char(2))
row format delimited fields terminated by ',';




load data local inpath '/home/cloudera/Desktop/orders_CA.csv'
into table orders_w_partition
partition (state="CA");

load data local inpath '/home/cloudera/Desktop/orders_CT.csv'
into table orders_w_partition
partition (state="CT");

show partitions orders_w_partition;


[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition
Found 2 items
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 22:53 /user/hive/warehouse/basan.db/orders_w_partition/state=CA
drwxrwxrwx   - cloudera supergroup          0 2019-11-23 22:53 /user/hive/warehouse/basan.db/orders_w_partition/state=CT
[root@quickstart Desktop]#

No comments:

Post a Comment