Wednesday, October 4, 2017

Hive Table Creation - Avro,Parquet

                         import table orders to HDFS  as Avro datafile with deflate 
                       compression
sqoop import \
--connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
--username root \
--password cloudera \
--table orders \
--target-dir practice/sqoop/orders_avro_deflate \
--as-avrodatafile \
--m 1 \
--compress \
--compression-codec org.apache.hadoop.io.compress.DeflateCodec

create external table using avro files in Hive

create external table avro_deflate
    > stored as AVRO
    > location 'hdfs://quickstart.cloudera:8020/user/cloudera/practice/sqoop/orders_avro_deflate'
    > TBLPROPERTIES ('avro.schema.url'='hdfs://quickstart.cloudera:8020/user/cloudera/practice/sqoop/orders_avro_deflate_avsc/orders.avsc');

Note: put avsc and avro files in different folders
                Give absolute path

                Hdfs://quickstart.cloudera:8020/path

                                             import table orders_items to HDFS as parquet with snappy compression

                               note : use fully qualified path and also specify column names
                                  sqoop import \
                                   --connect jdbc:mysql://quickstart.cloudera:3306/retail_db \
                                  --username root \
                                  --password cloudera \
                                  --table order_items \
                                 --target-dir practice/sqoop/oi_parquet_snappy \
                                 --as-parquetfile \
                                 --m 1 \
                                --compress \
                                --compression-codec org.apache.hadoop.io.compress.SnappyCodec

                         create external parquet table

create external table parquet_snappy(order_item_id int,order_item_order_id int,order_item_product_id int ,order_item_quantity int,order_item_subtotal float,order_item_product_price float)
ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
STORED AS PARQUET
location 'hdfs://quickstart.cloudera:8020/user/cloudera/practice/sqoop/oi_parquet_snappy';

No comments:

Post a Comment