Get Hbase Columns with Column Families

Get Hbase Columns with Column Families

Get Hbase Columns with Column Families
Hbase doesn’t provide any client API to get all the column qualifiers. If your table has millions of rows and you need to get all the qualifiers then it takes very long time to get all columns by standalone program. Here is, map reduce job that will help you to get all columns with family.
Prepare the Job Driver:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
	public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
 
		String tableName =args[1];
 
		conf.set("habse.table.name", args[1]);
 
		System.out.println("table: " + tableName);
		deletePath(conf, args[2]);
		Job job = new Job(conf, conf.get(JOB_NAME_CONF_KEY, JOB_NAME_CONF_KEY + "_" + tableName));
 
		// Need to add Hbase jars to job class path
 
		HBaseConfiguration.addHbaseResources(job.getConfiguration());
 
		TableMapReduceUtil.addDependencyJars(job);
 
		TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.client.Put.class);
 
		job.setJarByClass(ColumnsCounter.class);
 
		Scan scan = new Scan();
 
		scan.setCacheBlocks(false);
 
		// scan.addFamily(Bytes.toBytes(family));
 
		TableMapReduceUtil.initTableMapperJob(
				tableName, scan, ColumnsMapper.class,
				Text.class,
				Text.class, job);
		job.setReducerClass(ColumnsReducer.class);
 
		//job.setNumReduceTasks(1);
 
		FileOutputFormat.setOutputPath(job, new Path(args[2]));
 
		return job;
 
	}

Mapper that emits the column family as key and value as qualifier:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
	static class ColumnsMapper extends TableMapper<Text, NullWritable> {
 
		@Override
		public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException,
				InterruptedException {
 
			// emit every combination of column family and qualifiers
			for (Entry<byte[], NavigableMap<byte[], byte[]>> columnFamilyMap : values.getNoVersionMap().entrySet()) {
				for (Entry<byte[], byte[]> entry : columnFamilyMap.getValue().entrySet()) {
 
					String cQualifier = Bytes.toString(entry.getKey());
 
					context.write(new Text(cQualifier), NullWritable.get());
 
				}
 
			}
 
		}
	}

Reducer that returns the column family with all qualifier

1
2
3
4
5
6
7
8
9
10
11
12
13
static class ColumnsReducer extends Reducer<Text, Text, Text, Text> {
 
		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException,
				InterruptedException {
		Iterator<Text> columns=	values.iterator();
		List<String> columnsList=new ArrayList<String>();
		while(columns.hasNext())
			columnsList.add(columns.next().toString());	
			context.write(key, new Text(columnsList.toString()));
 
		}
 
	}

Full program at git(techsquids)
Related post: launch hbase mapreduce job runjob

Leave a Reply

Your email address will not be published. Required fields are marked *

Time limit is exhausted. Please reload CAPTCHA.