Column on which we want to partition that need not be there in
the create table definition just need to put it in partition by tag
but when we do select, the column will be shown as virtual column
static partitioning and dyanamic partitioning
http://www.decloedtjr.com.br/hive-cheat-sheet/
http://www.decloedtjr.com.br/hive-cheat-sheet/
http://rkeagle.com/public/blog.php?category=hadoop&page=9
create table orders_no_partition
(
id string,
customer_id string,
product_id string,
quantity int,
amount double,
zipcode char(5),
state char(2)
)
COMMENT 'Table with no partition applied'
row format delimited fields terminated by ',';
Logging initialized using configuration in file:/etc/hive/conf.dist/hive-log4j.properties
WARNING: Hive CLI is deprecated and migration to Beeline is recommended.
hive> use basan
> ;
OK
Time taken: 0.31 seconds
hive> create table orders_no_partition
> (
> id string,
> customer_id string,
> product_id string,
> quantity int,
> amount double,
> zipcode char(5),
> state char(2)
> )
> COMMENT 'Table with no partition applied'
> row format delimited fields terminated by ',';
OK
Time taken: 0.256 seconds
hive> describe formatted orders_no_partition;
OK
# col_name data_type comment
id string
customer_id string
product_id string
quantity int
amount double
zipcode char(5)
state char(2)
# Detailed Table Information
Database: basan
Owner: cloudera
CreateTime: Sat Nov 23 22:35:16 PST 2019
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://quickstart.cloudera:8020/user/hive/warehouse/basan.db/orders_no_partition
Table Type: MANAGED_TABLE
Table Parameters:
comment Table with no partition applied
transient_lastDdlTime 1574577316
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim ,
serialization.format ,
Time taken: 0.153 seconds, Fetched: 34 row(s)
hive>
ime taken: 0.153 seconds, Fetched: 34 row(s)
hive> create table orders_w_partition
> (
> id string,
> customer_id string,
> product_id string,
> quantity int,
> amount double,
> zipcode char(5)
> )
> COMMENT 'Table with partition applied by state'
> partitioned by (state char(2));
OK
Time taken: 0.124 seconds
hive>
> show tables like 'orders_w*';
OK
orders_w_partition
Time taken: 0.024 seconds, Fetched: 1 row(s)
hive>
show tables like 'orders_w*';
OK
+---------------------+--+
| tab_name |
+---------------------+--+
| orders_w_partition |
+---------------------+--+
show partitions orders_w_partition;
OK
+------------+--+
| partition |
+------------+--+
+------------+--+
insert into orders_w_partition
partition(state="CA")
values
("o1", "c1", "p1", 1, 1.11, "90111"),
("o2", "c2", "p2", 1, 2.22, "90222"),
("o3", "c3", "p3", 1, 3.33, "90333"),
("o4", "c4", "p4", 1, 4.44, "90444");
insert into orders_w_partition
partition(state="WA")
values
("o10", "c10", "p10", 2, 10.11, "91111"),
("o20", "c20", "p20", 2, 20.22, "91222"),
("o30", "c30", "p30", 2, 30.33, "91333"),
("o40", "c40", "p40", 2, 40.44, "91444");
insert into orders_w_partition
partition(state="CA")
values ("o5", "c5", "p5", 1, 5.55, "90555");
insert into orders_w_partition
partition(state="NJ")
values
("o100", "c100", "p100", 3, 100.11, "92111"),
("o200", "c200", "p200", 3, 200.22, "92222");
insert into orders_w_partition
partition(state="NY")
values
("o201", "c201", "p201", 4, 201.22, "92122");
hive> show partitions orders_w_partition;
OK
state=CA
state=NJ
state=NY
state=WA
Time taken: 0.069 seconds, Fetched: 4 row(s)
[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition
Found 4 items
drwxrwxrwx - cloudera supergroup 0 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=CA
drwxrwxrwx - cloudera supergroup 0 2019-11-23 22:41 /user/hive/warehouse/basan.db/orders_w_partition/state=NJ
drwxrwxrwx - cloudera supergroup 0 2019-11-23 22:41 /user/hive/warehouse/basan.db/orders_w_partition/state=NY
drwxrwxrwx - cloudera supergroup 0 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=WA
[root@quickstart Desktop]#
Observer the folders created for the partition
[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition/state=CA
Found 2 items
-rwxrwxrwx 1 cloudera supergroup 88 2019-11-23 22:39 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0
-rwxrwxrwx 1 cloudera supergroup 22 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0_copy_1
[root@quickstart Desktop]#
[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition/state=CA
Found 2 items
-rwxrwxrwx 1 cloudera supergroup 88 2019-11-23 22:39 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0
-rwxrwxrwx 1 cloudera supergroup 22 2019-11-23 22:40 /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0_copy_1
[root@quickstart Desktop]# hdfs dfs -cat /user/hive/warehouse/basan.db/orders_w_partition/state=CA/000000_0
o1 c1 p1 1 1.11 90111
o2 c2 p2 1 2.22 90222
o3 c3 p3 1 3.33 90333
o4 c4 p4 1 4.44 90444
[root@quickstart Desktop]# pwd
==========================================================
Loading from Files into a Partitioned Table
drop table orders_w_partition;
create table orders_w_partition
(
id string,
customer_id string,
product_id string,
quantity int,
amount double,
zipcode char(5)
)
COMMENT 'Table with partition applied by state, load data from fs'
partitioned by (state char(2))
row format delimited fields terminated by ',';
load data local inpath '/home/cloudera/Desktop/orders_CA.csv'
into table orders_w_partition
partition (state="CA");
load data local inpath '/home/cloudera/Desktop/orders_CT.csv'
into table orders_w_partition
partition (state="CT");
show partitions orders_w_partition;
[root@quickstart Desktop]# hdfs dfs -ls /user/hive/warehouse/basan.db/orders_w_partition
Found 2 items
drwxrwxrwx - cloudera supergroup 0 2019-11-23 22:53 /user/hive/warehouse/basan.db/orders_w_partition/state=CA
drwxrwxrwx - cloudera supergroup 0 2019-11-23 22:53 /user/hive/warehouse/basan.db/orders_w_partition/state=CT
[root@quickstart Desktop]#
No comments:
Post a Comment