Tuesday 3 December 2013

Copy Specific Records Between HBase Tables

Copy Specific Records Between HBase Tables

package copyrecord;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.StringTokenizer;

/**
 * Created with IntelliJ IDEA.
 * User: Hadoop Share
 * Date: 2/12/13
 * Time: 11:58 AM
 * To change this template use File | Settings | File Templates.
 */
public class CopySpecificRecordsBetweenTables {
    private static Configuration configuration = HBaseConfiguration.create();
    static{
        configuration.set("hbase.zookeeper.quorum","localhost");
        configuration.set("hbase.zookeeper.property.port","2181");
    }



    private void copyRecords(String sourceTable, String sourceDate, String targetTable, String targetDate) throws IOException {
        System.out.println("Begin Copy");
        HTableInterface source = new HTable(configuration,sourceTable);
        HTableInterface target = new HTable(configuration,targetTable);
        RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(sourceDate));
        Scan scan = new Scan();
        scan.setFilter(filter);

        ResultScanner resultScanner = source.getScanner(scan);
        List<Put> putList = new ArrayList<Put>();
        for(Result result:resultScanner){
            System.out.println("  Row Key To Copy: " + Bytes.toString(result.getRow()));
            StringTokenizer tokenizer = new StringTokenizer(Bytes.toString(result.getRow()),":");
            tokenizer.nextToken();//skip date
            String newKey = targetDate + tokenizer.nextToken();
            Put put = new Put(newKey.getBytes());
            System.out.println("Row Key to Insert: " + Bytes.toString(put.getRow()));
            NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyCellMap = result.getNoVersionMap();
            for(byte[] family:familyCellMap.keySet()){
                NavigableMap<byte[], byte[]> cell = familyCellMap.get(family);
                for (byte[] qualifier: cell.keySet()){
                   // System.out.println("Family: " + Bytes.toString(family) + " Qualifier: " + Bytes.toString(qualifier) + " Value: " + Bytes.toString(cell.get(qualifier)));
                    put.add(family,qualifier,cell.get(qualifier));
                }
            }
            putList.add(put);
        }
        target.put(putList);
        System.out.println("End of Copy");
    }

    public static void main(String[] args) throws IOException {
        /*
        * Row key format assumed to be date:key*/
        CopySpecificRecordsBetweenTables copyRecordFromDateToDate = new CopySpecificRecordsBetweenTables();
        copyRecordFromDateToDate.copyRecords("FROM_TABLE", "20131011:", "TO_TABLE", "20131012:");
    }


}

Monday 2 December 2013

Copy Record From Current Date To New Date

Copy Record From Current Date To New Date



package copyrecord;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.StringTokenizer;

/**
 * Created with IntelliJ IDEA.
 * User: Hadoop Share
 * Date: 2/12/13
 * Time: 11:58 AM
 * To change this template use File | Settings | File Templates.
 */
public class CopyRecordFromCurrentDateToNewDate {
    private static Configuration configuration = HBaseConfiguration.create();
    static{
        configuration.set("hbase.zookeeper.quorum","localhost");
        configuration.set("hbase.zookeeper.property.port","2181");
    }

    private void copyRecords(String tableName, String fromDate, String toDate) throws IOException {
        System.out.println("Begin Copy");
        HTableInterface table = new HTable(configuration,tableName);
        RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(fromDate));
        Scan scan = new Scan();
        scan.setFilter(filter);

        ResultScanner resultScanner = table.getScanner(scan);
        List<Put> putList = new ArrayList<Put>();
        for(Result result:resultScanner){
            System.out.println("  Row Key To Copy: " + Bytes.toString(result.getRow()));
            StringTokenizer tokenizer = new StringTokenizer(Bytes.toString(result.getRow()),":");
            tokenizer.nextToken();//skip date
            String newKey = toDate + tokenizer.nextToken();
            if(tokenizer.hasMoreTokens()){
                newKey = newKey  + ":" + tokenizer.nextToken();
            }
            Put put = new Put(newKey.getBytes());
            System.out.println("Row Key to Insert: " + Bytes.toString(put.getRow()));
            NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyCellMap = result.getNoVersionMap();
            for(byte[] family:familyCellMap.keySet()){
                NavigableMap<byte[], byte[]> cell = familyCellMap.get(family);
                for (byte[] qualifier: cell.keySet()){
                   // System.out.println("Family: " + Bytes.toString(family) + " Qualifier: " + Bytes.toString(qualifier) + " Value: " + Bytes.toString(cell.get(qualifier)));
                    put.add(family,qualifier,cell.get(qualifier));
                }
            }
            putList.add(put);
        }

       // table.put(putList);
        System.out.println("End of Copy");

        //scan 'TRUNK_DETAIL', {FILTER => org.apache.hadoop.hbase.filter.RowFilter.new(CompareFilter::CompareOp.valueOf('EQUAL'),SubstringComparator.new("20131101:"))}
    }

    public static void main(String[] args) throws IOException {
        CopyRecordFromCurrentDateToNewDate copyRecordFromDateToDate = new CopyRecordFromCurrentDateToNewDate();
        copyRecordFromDateToDate.copyRecords("TABLE_NAME" , "20131101:" , "20131102:");
    }

}

Friday 29 November 2013

Create HBase Table

Create HBase Table

package ddl;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.HBaseAdmin;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: Hdoop Share
 * Date: 29/11/13
 * Time: 5:54 PM
 * To change this template use File | Settings | File Templates.
 */
public class Table {
    private static Configuration configuration = HBaseConfiguration.create();
    static {
            configuration.set("hbase.zookeeper.quorum","localhost");
            configuration.set("hbase.zookeeper.property.port","2181");
    }

    public void createTable(String tableName, List<String> columnFamilyList){
        HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
        for(String columnFamily:columnFamilyList){
            HColumnDescriptor.isLegalFamilyName(columnFamily.getBytes());
            tableDescriptor.addFamily(new HColumnDescriptor(columnFamily));
        }
        try {
            HBaseAdmin hBaseAdmin = new HBaseAdmin(configuration);
            hBaseAdmin.createTable(tableDescriptor);
        } catch (MasterNotRunningException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (ZooKeeperConnectionException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }


    public static void main(String[] args) {
           Table table = new Table();

           List<String> columnFamilies = new ArrayList<String>();
           columnFamilies.add("FAMILY_A");
           columnFamilies.add("FAMILY_B");
           columnFamilies.add("FAMILY_C");

           table.createTable("TABLE_NAME",columnFamilies);
    }


}




You can verify the create table using Hbase Shell :


hbase(main):002:0> describe 'TABLE_NAME'
DESCRIPTION                                                                                  ENABLED                                           
 {NAME => 'TABLE_NAME', FAMILIES => [{NAME => 'FAMILY_A', DATA_BLOCK_ENCODING => 'NONE', BLO true                                              
 OMFILTER => 'NONE', REPLICATION_SCOPE => '0', VERSIONS => '3', COMPRESSION => 'NONE', MIN_V                                                   
 ERSIONS => '0', TTL => '2147483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', I                                                   
 N_MEMORY => 'false', ENCODE_ON_DISK => 'true', BLOCKCACHE => 'true'}, {NAME => 'FAMILY_B',                                                    
 DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'NONE', REPLICATION_SCOPE => '0', VERSIONS =>                                                   
  '3', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => '2147483647', KEEP_DELETED_CELLS =                                                   
 > 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', ENCODE_ON_DISK => 'true', BLOCKCACHE                                                   
  => 'true'}, {NAME => 'FAMILY_C', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'NONE', REP                                                   
 LICATION_SCOPE => '0', VERSIONS => '3', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL =>                                                    
 '2147483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', EN                                                   
 CODE_ON_DISK => 'true', BLOCKCACHE => 'true'}]}                                                                                               
1 row(s) in 0.3070 seconds

Copy Cell From Existing To New Column Family

Copy Cell From Existing To New Column Family


package copyrecord;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;

/**
 * Created with IntelliJ IDEA.
 * User: Hadoop Share
 * Date: 29/11/13
 * Time: 1:52 PM
 * To change this template use File | Settings | File Templates.
 */
public class CopyCellFromExistingToNewFamily {
    private static Configuration configuration = HBaseConfiguration.create();
    static {
        configuration.set("hbase.zookeeper.quorum","localhost");
        configuration.set("hbase.zookeeper.property.port","2181");
    }

    private void copyCellFromOneToAnotherFamily(String tableName, String currentFamily, String toBeFamily) throws IOException {
        HTableInterface table = new HTable(configuration,tableName);
        Scan scan = new Scan();
        scan.addFamily(currentFamily.getBytes());

        Filter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("20131022:"));    // fetch all records for a certain date
        Filter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("REQUIRED_QUALIFIER:")); // fetch required qualifier
        FilterList filterList = new FilterList();
        filterList.addFilter(rowFilter);
        filterList.addFilter(qualifierFilter);
        scan.setFilter(filterList);

        ResultScanner resultScanner = table.getScanner(scan);
        List<Put> putList = new ArrayList<Put>();
        for(Result result:resultScanner){
            NavigableMap<byte[],byte[]> map = result.getFamilyMap(currentFamily.getBytes());
            String qualifier = null;
            String value = null;
            for(byte[]key:map.keySet()){
                qualifier = Bytes.toString(key);
                value = Bytes.toString(map.get(key));
                System.out.println(Bytes.toString(result.getRow()) + " :: "  + " QUALIFIER: " + qualifier  +" VALUE: " + value) ;

                Put put = new Put(result.getRow());
                put.add(toBeFamily.getBytes(),qualifier.getBytes(),value.getBytes()) ;
                putList.add(put);
            }
        }
        table.put(putList);

    }

    public static void main(String[] args) {
        CopyCellFromExistingToNewFamily copyCellFromExistingToNewFamily = new CopyCellFromExistingToNewFamily();
        try {
            copyCellFromExistingToNewFamily.copyCellFromOneToAnotherFamily("TABLE_NAME","EXISTING_FAMILY","NEW_FAMILY");
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}

Wednesday 27 November 2013

Read Records (that begins with some value) using RowFilter from given HBase Table

ReadRecordWithRowFilter

package filter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: Hadoop Share
 * Date: 27/11/13
 * Time: 6:07 PM
 * To change this template use File | Settings | File Templates.
 */
public class ReadRecordWithRowFilter {
    private static Configuration configuration = HBaseConfiguration.create();

    static{
        configuration.set("hbase.zookeeper.quorum","localhost");
        configuration.set("hbase.zookeeper.property.clientPort","2181");
    }

    List<String> getRowKeysBeginningWith(String tableName,String rowKeyBeginsWith) throws IOException {
        List<String> rowKeyList = new ArrayList<String>();
        HTableInterface table = new HTable(configuration,tableName);
        Scan scan = new Scan();
        RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(rowKeyBeginsWith));
        scan.setFilter(rowFilter);
        ResultScanner resultScanner =  table.getScanner(scan);
        for(Result result:resultScanner){
            rowKeyList.add(Bytes.toString(result.getRow()));
        }
        return rowKeyList;
    }

    public static void main(String[] args) {
        ReadRecordWithRowFilter filterReader = new ReadRecordWithRowFilter();
        try {
            List<String> rowKeyList = filterReader.getRowKeysBeginningWith("TABLE_NAME","20131022:");
            System.out.println("TotalNoOfRows:" + rowKeyList.size());
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}

Tuesday 26 November 2013

Communicate with a single HBase table

Sample Code to read record from Hbase Table

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: hadoopshare
 * Date: 26/11/13
 * Time: 6:31 PM
 * To change this template use File | Settings | File Templates.
 */
public class ReadRecordFromTable {
    private static Configuration configuration = HBaseConfiguration.create();

    static{
        configuration.set("hbase.zookeeper.quorum", "localhost");
        configuration.set("hbase.zookeeper.property.clientPort", "2181");
    }

    private List<String> getlistOfRowKeys(String tableName) throws IOException {
        HTableInterface table = new HTable(configuration,tableName);
        Scan scan = new Scan();
        ResultScanner resultScanner = table.getScanner(scan);
        List<String> rowKeyList = new ArrayList<String>();
        for(Result result: resultScanner) {
             rowKeyList.add(Bytes.toString(result.getRow()));
        }
        return rowKeyList;
    }

    public static void main(String[] args) {
        ReadRecordFromTable tableReader = new ReadRecordFromTable();
        try {
            List<String> rowKeyList = tableReader.getlistOfRowKeys("TABLE_NAME");
            System.out.println("No. Of records In Table: " + rowKeyList.size());
            for(String rowKey:rowKeyList){
                System.out.println(rowKey);
            }
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }
}