Powered By Blogger

Sunday, November 17, 2019

Managed table - data load


---------- without the location external table-----

create table if not exists products_managed(
id string,
title string,
cost float
)
row format delimited fields terminated by ','
stored as textfile;


We are not mentioning the path of the content.


[cloudera@quickstart Desktop]$ pwd
/home/cloudera/Desktop
[cloudera@quickstart Desktop]$ ls
beelineScriptExec.hql   Eclipse.desktop     Express.desktop   mapreduce_input   Parcels.desktop  wordCountNew.jar
beelineScriptExec.hql~  Enterprise.desktop  Kerberos.desktop  mapreduce_output  products.csv
[cloudera@quickstart Desktop]$


load data local inpath '/home/cloudera/Desktop/products.csv' into table products_managed;


hive> load data local inpath '/home/cloudera/Desktop/products.csv' into table products_managed;
Loading data to table basan.products_managed
Table basan.products_managed stats: [numFiles=1, totalSize=120]
OK
Time taken: 0.495 seconds
hive> select * from products_managed;
OK
iphone7 iPhone 7 950.0
camera_canon Canon 570x 1000.0
washingmachine_samsung Samsung Swift 400.0
tv_vu Vu 56 Inch 600.0
Time taken: 0.103 seconds, Fetched: 4 row(s)
hive>


describe formatted products_managed


hive> select * from products_managed;
OK
iphone7 iPhone 7 950.0
camera_canon Canon 570x 1000.0
washingmachine_samsung Samsung Swift 400.0
tv_vu Vu 56 Inch 600.0
Time taken: 0.103 seconds, Fetched: 4 row(s)
hive> describe formatted products_managed
    > ;
OK
# col_name            data_type            comment           

id                  string                                 
title                string                                 
cost                float                                   

# Detailed Table Information
Database:            basan               
Owner:              cloudera           
CreateTime:          Sat Nov 16 23:29:48 PST 2019
LastAccessTime:      UNKNOWN             
Protect Mode:        None               
Retention:          0                   
Location:            hdfs://quickstart.cloudera:8020/user/hive/warehouse/basan.db/products_managed
Table Type:          MANAGED_TABLE       
Table Parameters:
COLUMN_STATS_ACCURATE true               
numFiles            1                 
totalSize            120               
transient_lastDdlTime 1573976013         

# Storage Information
SerDe Library:      org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat:        org.apache.hadoop.mapred.TextInputFormat
OutputFormat:        org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed:          No                 
Num Buckets:        -1                 
Bucket Columns:      []                 
Sort Columns:        []                 
Storage Desc Params:
field.delim          ,                 
serialization.format ,                 
Time taken: 0.133 seconds, Fetched: 32 row(s)
hive>

Observe we are loading the content from local machine to table.


------------------

hadoop fs -mkdir /data1
hadoop fs -copyFromLocal /home/cloudera/Desktop/products.csv /data1/




hive> create table if not exists products_managed(
    > id string,
    > title string,
    > cost float
    > )
    > row format delimited fields terminated by ','
    > stored as textfile;
OK
Time taken: 0.114 seconds

select * from products_managed

//loading data from hdfs , this is cut paste operation
load data inpath '/data1/products.csv' into table products_managed



hive> load data inpath '/data1/products.csv' into table products_managed
    > ;
Loading data to table basan.products_managed
Table basan.products_managed stats: [numFiles=1, totalSize=120]
OK
Time taken: 0.585 seconds
hive> select * from products_managed;
OK
iphone7 iPhone 7 950.0
camera_canon Canon 570x 1000.0
washingmachine_samsung Samsung Swift 400.0
tv_vu Vu 56 Inch 600.0
Time taken: 0.083 seconds, Fetched: 4 row(s)
hive> dfs -ls /data1/
    > ;
hive> describe formatted products_managed;
OK
# col_name            data_type            comment           

id                  string                                 
title                string                                 
cost                float                                   

# Detailed Table Information
Database:            basan               
Owner:              cloudera           
CreateTime:          Sat Nov 16 23:41:06 PST 2019
LastAccessTime:      UNKNOWN             
Protect Mode:        None               
Retention:          0                   
Location:            hdfs://quickstart.cloudera:8020/user/hive/warehouse/basan.db/products_managed
Table Type:          MANAGED_TABLE       
Table Parameters:
COLUMN_STATS_ACCURATE true               
numFiles            1                 
totalSize            120               
transient_lastDdlTime 1573976608         

# Storage Information
SerDe Library:      org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat:        org.apache.hadoop.mapred.TextInputFormat
OutputFormat:        org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed:          No                 
Num Buckets:        -1                 
Bucket Columns:      []                 
Sort Columns:        []                 
Storage Desc Params:
field.delim          ,                 
serialization.format ,                 
Time taken: 0.082 seconds, Fetched: 32 row(s)
hive>
observer hive> dfs -ls /data1/; is not listing the products.csv as it is cut paste operation.

No comments:

Post a Comment