Friday 29 November 2013

Create HBase Table

Create HBase Table

package ddl;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.HBaseAdmin;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: Hdoop Share
 * Date: 29/11/13
 * Time: 5:54 PM
 * To change this template use File | Settings | File Templates.
 */
public class Table {
    private static Configuration configuration = HBaseConfiguration.create();
    static {
            configuration.set("hbase.zookeeper.quorum","localhost");
            configuration.set("hbase.zookeeper.property.port","2181");
    }

    public void createTable(String tableName, List<String> columnFamilyList){
        HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
        for(String columnFamily:columnFamilyList){
            HColumnDescriptor.isLegalFamilyName(columnFamily.getBytes());
            tableDescriptor.addFamily(new HColumnDescriptor(columnFamily));
        }
        try {
            HBaseAdmin hBaseAdmin = new HBaseAdmin(configuration);
            hBaseAdmin.createTable(tableDescriptor);
        } catch (MasterNotRunningException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (ZooKeeperConnectionException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }


    public static void main(String[] args) {
           Table table = new Table();

           List<String> columnFamilies = new ArrayList<String>();
           columnFamilies.add("FAMILY_A");
           columnFamilies.add("FAMILY_B");
           columnFamilies.add("FAMILY_C");

           table.createTable("TABLE_NAME",columnFamilies);
    }


}




You can verify the create table using Hbase Shell :


hbase(main):002:0> describe 'TABLE_NAME'
DESCRIPTION                                                                                  ENABLED                                           
 {NAME => 'TABLE_NAME', FAMILIES => [{NAME => 'FAMILY_A', DATA_BLOCK_ENCODING => 'NONE', BLO true                                              
 OMFILTER => 'NONE', REPLICATION_SCOPE => '0', VERSIONS => '3', COMPRESSION => 'NONE', MIN_V                                                   
 ERSIONS => '0', TTL => '2147483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', I                                                   
 N_MEMORY => 'false', ENCODE_ON_DISK => 'true', BLOCKCACHE => 'true'}, {NAME => 'FAMILY_B',                                                    
 DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'NONE', REPLICATION_SCOPE => '0', VERSIONS =>                                                   
  '3', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => '2147483647', KEEP_DELETED_CELLS =                                                   
 > 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', ENCODE_ON_DISK => 'true', BLOCKCACHE                                                   
  => 'true'}, {NAME => 'FAMILY_C', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'NONE', REP                                                   
 LICATION_SCOPE => '0', VERSIONS => '3', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL =>                                                    
 '2147483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', EN                                                   
 CODE_ON_DISK => 'true', BLOCKCACHE => 'true'}]}                                                                                               
1 row(s) in 0.3070 seconds

Copy Cell From Existing To New Column Family

Copy Cell From Existing To New Column Family


package copyrecord;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;

/**
 * Created with IntelliJ IDEA.
 * User: Hadoop Share
 * Date: 29/11/13
 * Time: 1:52 PM
 * To change this template use File | Settings | File Templates.
 */
public class CopyCellFromExistingToNewFamily {
    private static Configuration configuration = HBaseConfiguration.create();
    static {
        configuration.set("hbase.zookeeper.quorum","localhost");
        configuration.set("hbase.zookeeper.property.port","2181");
    }

    private void copyCellFromOneToAnotherFamily(String tableName, String currentFamily, String toBeFamily) throws IOException {
        HTableInterface table = new HTable(configuration,tableName);
        Scan scan = new Scan();
        scan.addFamily(currentFamily.getBytes());

        Filter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20131022:"));    // fetch all records for a certain date
        Filter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("REQUIRED_QUALIFIER:")); // fetch required qualifier
        FilterList filterList = new FilterList();
        filterList.addFilter(rowFilter);
        filterList.addFilter(qualifierFilter);
        scan.setFilter(filterList);

        ResultScanner resultScanner = table.getScanner(scan);
        List<Put> putList = new ArrayList<Put>();
        for(Result result:resultScanner){
            NavigableMap<byte[],byte[]> map = result.getFamilyMap(currentFamily.getBytes());
            String qualifier = null;
            String value = null;
            for(byte[]key:map.keySet()){
                qualifier = Bytes.toString(key);
                value = Bytes.toString(map.get(key));
                System.out.println(Bytes.toString(result.getRow()) + " :: "  + " QUALIFIER: " + qualifier  +" VALUE: " + value) ;

                Put put = new Put(result.getRow());
                put.add(toBeFamily.getBytes(),qualifier.getBytes(),value.getBytes()) ;
                putList.add(put);
            }
        }
        table.put(putList);

    }

    public static void main(String[] args) {
        CopyCellFromExistingToNewFamily copyCellFromExistingToNewFamily = new CopyCellFromExistingToNewFamily();
        try {
            copyCellFromExistingToNewFamily.copyCellFromOneToAnotherFamily("TABLE_NAME","EXISTING_FAMILY","NEW_FAMILY");
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}

Wednesday 27 November 2013

Read Records (that begins with some value) using RowFilter from given HBase Table

ReadRecordWithRowFilter

package filter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: Hadoop Share
 * Date: 27/11/13
 * Time: 6:07 PM
 * To change this template use File | Settings | File Templates.
 */
public class ReadRecordWithRowFilter {
    private static Configuration configuration = HBaseConfiguration.create();

    static{
        configuration.set("hbase.zookeeper.quorum","localhost");
        configuration.set("hbase.zookeeper.property.clientPort","2181");
    }

    List<String> getRowKeysBeginningWith(String tableName,String rowKeyBeginsWith) throws IOException {
        List<String> rowKeyList = new ArrayList<String>();
        HTableInterface table = new HTable(configuration,tableName);
        Scan scan = new Scan();
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(rowKeyBeginsWith));
        scan.setFilter(rowFilter);
        ResultScanner resultScanner =  table.getScanner(scan);
        for(Result result:resultScanner){
            rowKeyList.add(Bytes.toString(result.getRow()));
        }
        return rowKeyList;
    }

    public static void main(String[] args) {
        ReadRecordWithRowFilter filterReader = new ReadRecordWithRowFilter();
        try {
            List<String> rowKeyList = filterReader.getRowKeysBeginningWith("TABLE_NAME","20131022:");
            System.out.println("TotalNoOfRows:" + rowKeyList.size());
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}

Tuesday 26 November 2013

Communicate with a single HBase table

Sample Code to read record from Hbase Table

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: hadoopshare
 * Date: 26/11/13
 * Time: 6:31 PM
 * To change this template use File | Settings | File Templates.
 */
public class ReadRecordFromTable {
    private static Configuration configuration = HBaseConfiguration.create();

    static{
        configuration.set("hbase.zookeeper.quorum", "localhost");
        configuration.set("hbase.zookeeper.property.clientPort", "2181");
    }

    private List<String> getlistOfRowKeys(String tableName) throws IOException {
        HTableInterface table = new HTable(configuration,tableName);
        Scan scan = new Scan();
        ResultScanner resultScanner = table.getScanner(scan);
        List<String> rowKeyList = new ArrayList<String>();
        for(Result result: resultScanner) {
             rowKeyList.add(Bytes.toString(result.getRow()));
        }
        return rowKeyList;
    }

    public static void main(String[] args) {
        ReadRecordFromTable tableReader = new ReadRecordFromTable();
        try {
            List<String> rowKeyList = tableReader.getlistOfRowKeys("TABLE_NAME");
            System.out.println("No. Of records In Table: " + rowKeyList.size());
            for(String rowKey:rowKeyList){
                System.out.println(rowKey);
            }
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}