import table orders to HDFS as Avro datafile with deflate
compression
sqoop import \
--connect
jdbc:mysql://quickstart.cloudera:3306/retail_db \
--username root \
--password cloudera \
--table orders \
--target-dir
practice/sqoop/orders_avro_deflate \
--as-avrodatafile \
--m 1 \
--compress \
--compression-codec
org.apache.hadoop.io.compress.DeflateCodec
create external table using avro
files in Hive
create
external table avro_deflate
> stored as AVRO
> location
'hdfs://quickstart.cloudera:8020/user/cloudera/practice/sqoop/orders_avro_deflate'
> TBLPROPERTIES
('avro.schema.url'='hdfs://quickstart.cloudera:8020/user/cloudera/practice/sqoop/orders_avro_deflate_avsc/orders.avsc');
Note: put avsc and avro files in different folders
Give absolute path
Hdfs://quickstart.cloudera:8020/path
import table orders_items to HDFS as parquet
with snappy compression
note : use fully qualified path and also
specify column names
sqoop import \
--connect
jdbc:mysql://quickstart.cloudera:3306/retail_db \
--username root \
--password cloudera \
--table order_items \
--target-dir
practice/sqoop/oi_parquet_snappy \
--as-parquetfile \
--m 1 \
--compress \
--compression-codec
org.apache.hadoop.io.compress.SnappyCodec
create external table parquet_snappy(order_item_id
int,order_item_order_id int,order_item_product_id int ,order_item_quantity
int,order_item_subtotal float,order_item_product_price float)
ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
STORED AS PARQUET
location
'hdfs://quickstart.cloudera:8020/user/cloudera/practice/sqoop/oi_parquet_snappy';
No comments:
Post a Comment