- Hbase过滤器
- 作用
- 比较过滤器
- 比较运算符
- 常见的六大比较过滤器
- BinaryComparator(二进制比较器)
- BinaryPrefixComparator(二进制前缀比较器)
- NullComparator(空比较器)一般不用
- BitComparator(位比较器)一般不用
- RegexStringComparator(正则比较器)
- SubstringComparator
- 示例代码
- rowKey过滤器:RowFilter
- 列簇过滤器:FamilyFilter
- 列过滤器:QualifierFilter
- 列值过滤器:ValueFilter
- 专用过滤器
- 单列值过滤器:SingleColumnValueFilter
- 列值排除过滤器:SingleColumnValueExcludeFilter
- rowkey前缀过滤器:PrefixFilter
- 分页过滤器PageFilter
- 多过滤器综合查询
- 过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端
- 过滤器的类型很多,但是可以分为两大类:
- 比较过滤器:可应用于rowkey、列簇、列、列值过滤器
- 专用过滤器:只能适用于特定的过滤器
-
LESS <
-
LESS_OR_EQUAL <=
-
EQUAL =
-
NOT_EQUAL <>
-
GREATER_OR_EQUAL >=
-
GREATER >
-
NO_OP 排除所有
BinaryPrefixComparator(二进制前缀比较器)按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
NullComparator(空比较器)一般不用通BinaryComparator,只是比较左端前缀的数据是否相同
BitComparator(位比较器)一般不用判断给定的是否为空
RegexStringComparator(正则比较器)按位比较
SubstringComparator提供一个正则的比较器,仅支持 EQUAL 和非EQUAL
示例代码 rowKey过滤器:RowFilter判断提供的子串是否出现在中
通过RowFilter与BinaryComparator过滤比rowKey 1500100010小的所有值出来
@Test // 通过RowFilter过滤比rowKey 1500100010 小的所有值出来 public void BinaryComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); BinaryComparator binaryComparator = new BinaryComparator(Bytes.toBytes(1500100010)); RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS, binaryComparator); Scan scan = new Scan(); scan.setFilter(rowFilter); ResultScanner scanner = students.getScanner(scan); Result rs = scanner.next(); while (rs != null) { String id = Bytes.toString(rs.getRow()); String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes())); int age = Bytes.toInt(rs.getValue("info".getBytes(), "age".getBytes())); String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes())); String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id + "t" + name + "t" + age + "t" + gender + "t" + clazz + "t"); rs = scanner.next(); } }列簇过滤器:FamilyFilter
通过FamilyFilter与SubstringComparator查询列簇名包含in的所有列簇下面的数据
@Test // 通过FamilyFilter查询列簇名包含in的所有列簇下面的数据 public void SubstringComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); SubstringComparator substringComparator = new SubstringComparator("in"); FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator); Scan scan = new Scan(); scan.setFilter(familyFilter); ResultScanner scanner = students.getScanner(scan); Result rs = scanner.next(); while (rs != null) { String id = Bytes.toString(rs.getRow()); String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes())); int age = Bytes.toInt(rs.getValue("info".getBytes(), "age".getBytes())); String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes())); String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id + "t" + name + "t" + age + "t" + gender + "t" + clazz + "t"); rs = scanner.next(); } }
通过FamilyFilter与 BinaryPrefixComparator 过滤出列簇以info开头的列簇下的所有数据
// 通过FamilyFilter与 BinaryPrefixComparator 过滤出列簇以info开头的所有列簇下的所有数据 @Test public void BinaryPrefixComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); // 二进制前缀比较器 BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("info".getBytes()); // FamilyFilter 作用于列簇的过滤器 FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator); Scan scan = new Scan(); scan.withStartRow("1500100001".getBytes()); scan.withStopRow("1500100011".getBytes()); // 通过setFilter方法设置过滤器 scan.setFilter(familyFilter); ResultScanner scanner = students.getScanner(scan); printRS(scanner); }列过滤器:QualifierFilter
通过QualifierFilter与SubstringComparator查询列名包含in的列的值
public void printRS(ResultScanner scanner) throws IOException { for (Result rs : scanner) { String rowkey = Bytes.toString(rs.getRow()); System.out.println("当前行的rowkey为:" + rowkey); for (Cell cell : rs.listCells()) { String family = Bytes.toString(CellUtil.cloneFamily(cell)); String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell)); byte[] bytes = CellUtil.clonevalue(cell); if ("age".equals(qualifier)) { int value = Bytes.toInt(bytes); System.out.println(family + ":" + qualifier + "的值为" + value); } else { String value = Bytes.toString(bytes); System.out.println(family + ":" + qualifier + "的值为" + value); } } } } @Test // 通过FamilyFilter查询列簇名包含in的所有列簇下面的数据 public void SubstringComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); SubstringComparator substringComparator = new SubstringComparator("in"); FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, substringComparator); Scan scan = new Scan(); scan.setFilter(familyFilter); ResultScanner scanner = students.getScanner(scan); Result rs = scanner.next(); while (rs != null) { String id = Bytes.toString(rs.getRow()); String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes())); int age = Bytes.toInt(rs.getValue("info".getBytes(), "age".getBytes())); String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes())); String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id + "t" + name + "t" + age + "t" + gender + "t" + clazz + "t"); rs = scanner.next(); } }
过滤出 列的名字 中 包含 “am” 所有的列 及列的值
// 过滤出 列的名字 中 包含 "am" 所有的列 及列的值 @Test public void SubstringComparatorQualifierFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); SubstringComparator substringComparator = new SubstringComparator("am"); // 作用在列名上的过滤器 QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, substringComparator); Scan scan = new Scan(); scan.withStartRow("1500100001".getBytes()); scan.withStopRow("1500100011".getBytes()); // 通过setFilter方法设置过滤器 scan.setFilter(qualifierFilter); ResultScanner scanner = students.getScanner(scan); printRS(scanner); }列值过滤器:ValueFilter
通过ValueFilter与BinaryPrefixComparator过滤出所有的cell中值以 “张” 开头的学生
@Test // 通过ValueFilter与BinaryPrefixComparator过滤出所有的cell中值以 "张" 开头的学生 public void BinaryPrefixComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("张".getBytes()); ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator); Scan scan = new Scan(); scan.setFilter(valueFilter); ResultScanner scanner = students.getScanner(scan); printRS(scanner); }
过滤出文科的学生,只会返回clazz列,其他列的数据不符合条件,不会返回
// 过滤出文科的学生 // 只会返回clazz列,其他列的数据不符合条件,不会返回 @Test public void RegexStringComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); // 使用正则表达式比较器 RegexStringComparator regexStringComparator = new RegexStringComparator("^文科.*"); // ValueFilter 会返回符合条件的cell,并不会返回整条数据 ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator); Scan scan = new Scan(); scan.withStartRow("1500100001".getBytes()); scan.withStopRow("1500100011".getBytes()); // 通过setFilter方法设置过滤器 scan.setFilter(valueFilter); ResultScanner scanner = students.getScanner(scan); printRS(scanner); }专用过滤器 单列值过滤器:SingleColumnValueFilter
SingleColumnValueFilter会返回满足条件的cell所在行的所有cell的值(即会返回一行数据)
通过SingleColumnValueFilter与查询文科班所有学生信息
@Test // 通过SingleColumnValueFilter与查询文科班所有学生信息 public void RegexStringComparatorFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter( "info".getBytes(), "clazz".getBytes(), CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^文科.*") ); Scan scan = new Scan(); scan.setFilter(singleColumnValueFilter); ResultScanner scanner = students.getScanner(scan); Result rs = scanner.next(); while (rs != null) { String id = Bytes.toString(rs.getRow()); String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes())); int age = Bytes.toInt(rs.getValue("info".getBytes(), "age".getBytes())); String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes())); String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id + "t" + name + "t" + age + "t" + gender + "t" + clazz + "t"); rs = scanner.next(); } }列值排除过滤器:SingleColumnValueExcludeFilter
与SingleColumnValueFilter相反,会排除掉指定的列,其他的列全部返回
通过SingleColumnValueExcludeFilter与BinaryComparator查询文科一班所有学生信息,最终不返回clazz列
@Test // 通过SingleColumnValueExcludeFilter与BinaryComparator查询文科一班所有学生信息,最终不返回clazz列 public void RegexStringComparatorExcludeFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter( "info".getBytes(), "clazz".getBytes(), CompareFilter.CompareOp.EQUAL, new BinaryComparator("文科一班".getBytes()) ); Scan scan = new Scan(); scan.setFilter(singleColumnValueExcludeFilter); ResultScanner scanner = students.getScanner(scan); Result rs = scanner.next(); while (rs != null) { String id = Bytes.toString(rs.getRow()); String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes())); int age = Bytes.toInt(rs.getValue("info".getBytes(), "age".getBytes())); String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes())); // clazz列为空 String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id + "t" + name + "t" + age + "t" + gender + "t" + clazz + "t"); rs = scanner.next(); } }rowkey前缀过滤器:PrefixFilter
通过PrefixFilter查询以150010008开头的所有前缀的rowkey
@Test // 通过PrefixFilter查询以150010008开头的所有前缀的rowkey public void PrefixFilterFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes()); Scan scan = new Scan(); scan.setFilter(prefixFilter); ResultScanner scanner = students.getScanner(scan); Result rs = scanner.next(); while (rs != null) { String id = Bytes.toString(rs.getRow()); String name = Bytes.toString(rs.getValue("info".getBytes(), "name".getBytes())); int age = Bytes.toInt(rs.getValue("info".getBytes(), "age".getBytes())); String gender = Bytes.toString(rs.getValue("info".getBytes(), "gender".getBytes())); // clazz列为空 String clazz = Bytes.toString(rs.getValue("info".getBytes(), "clazz".getBytes())); System.out.println(id + "t" + name + "t" + age + "t" + gender + "t" + clazz + "t"); rs = scanner.next(); } }分页过滤器PageFilter
通过PageFilter查询第三页的数据,每页10条
使用PageFilter分页效率比较低,每次都需要扫描前面的数据,直到扫描到所需要查的数据
可设计一个合理的rowkey来实现分页需求
@Test // 通过PageFilter查询第三页的数据,每页10条 public void PageFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); int PageNum = 3; int PageSize = 10; Scan scan = new Scan(); if (PageNum == 1) { scan.withStartRow("".getBytes()); //使用分页过滤器,实现数据的分页 PageFilter pageFilter = new PageFilter(PageSize); scan.setFilter(pageFilter); ResultScanner scanner = students.getScanner(scan); printRS(scanner); } else { String current_page_start_rows = ""; int scanDatas = (PageNum - 1) * PageSize + 1; PageFilter pageFilter = new PageFilter(scanDatas); scan.setFilter(pageFilter); ResultScanner scanner = students.getScanner(scan); for (Result rs : scanner) { current_page_start_rows = Bytes.toString(rs.getRow()); } scan.withStartRow(current_page_start_rows.getBytes()); PageFilter pageFilter1 = new PageFilter(PageSize); scan.setFilter(pageFilter1); ResultScanner scanner1 = students.getScanner(scan); printRS(scanner1); } }
通过合理的设置rowkey来实现分页功能
@Test // 通过合理的设置rowkey来实现分页功能,提高效率 public void PageFilterTest2() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); int PageSize = 10; int PageNum = 3; int baseId = 1500100000; int start_row = baseId + (PageNum - 1) * PageSize + 1; int end_row = start_row + PageSize; Scan scan = new Scan(); scan.withStartRow(String.valueOf(start_row).getBytes()); scan.withStopRow(String.valueOf(end_row).getBytes()); ResultScanner scanner = students.getScanner(scan); printRS(scanner); }多过滤器综合查询
查询文科班中的学生中学号以150010008开头并且年龄小于23的学生信息
@Test // 查询文科班中的学生中学号以150010008开头并且年龄小于23的学生信息 public void FilterListFilter() throws IOException { Table students = conn.getTable(TableName.valueOf("students")); Scan scan = new Scan(); SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter( "info".getBytes() , "clazz".getBytes() , CompareFilter.CompareOp.EQUAL , new RegexStringComparator("^文科.*")); PrefixFilter prefixFilter = new PrefixFilter("150010008".getBytes()); SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter( "info".getBytes() , "age".getBytes() , CompareFilter.CompareOp.LESS , new BinaryComparator(Bytes.toBytes(23))); FilterList filterList = new FilterList(); filterList.addFilter(singleColumnValueFilter); filterList.addFilter(prefixFilter); filterList.addFilter(singleColumnValueFilter1); scan.setFilter(filterList); ResultScanner scanner = students.getScanner(scan); printRS(scanner); }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)