花生壳域名直接做网站,网站频繁改版,企业专业建站,网站备案密码一、实验目的
1. 理解分布式数据存储系统HBase的架构和工作原理。
2. 掌握HBase表的设计原则#xff0c;能够根据实际业务需求设计合理的表结构。
3. 学习使用HBase Java API进行数据的插入、查询和管理。
4. 实践分布式数据存储系统在大数据环境下的应用#xff0c;…一、实验目的
1. 理解分布式数据存储系统HBase的架构和工作原理。
2. 掌握HBase表的设计原则能够根据实际业务需求设计合理的表结构。
3. 学习使用HBase Java API进行数据的插入、查询和管理。
4. 实践分布式数据存储系统在大数据环境下的应用提升实际操作能力和问题解决能力。
二、安装配置HBase集群
1、安装并配置一个HBase集群确保集群中的各个组件正常运行。
2、确保集群中的主节点Master和多个从节点RegionServers都可以正常通信。 三、设计HBase表结构
根据钞票交易数据的业务需求设计合适的HBase表结构考虑如何存储和检索钞票交易数据。
HBase表设计 表名currency_transactions 行键冠字号#交易时间确保唯一性按时间查询 列族 info存储钞票基本信息如面额、交易金额。 transaction存储交易相关信息如交易时间、交易地点、类型。 meta其他信息。
四、插入部分钞票交易数据并探索数据特征
1、将一部分钞票交易数据插入到设计的HBase表中。
1创建表create currency_transactions, info, transaction, meta 插入数据
put currency_transactions, 123456ABC#20241201, info:denomination, 100
put currency_transactions, 123456ABC#20241201, info:amount, 100
put currency_transactions, 123456ABC#20241201, transaction:time, 2024-12-01 10:00
put currency_transactions, 123456ABC#20241201, transaction:location, Beijing
put currency_transactions, 123456ABC#20241201, transaction:type, Deposit
put currency_transactions, 123456ABC#20241201, meta:notes, First deposit使用基础的HBase查询语句探索钞票交易数据的结构和特征确保数据可以正确存储和访问。
1查看表中所有数据scan currency_transactions 2查询特定行键的数据
get currency_transactions, 123456ABC#20241201 3查询特定列的数据get currency_transactions, 123456ABC#20241201, transaction:location 五、使用HBase Java API进行操作
1、创建Maven项目并添加相关依赖
1在IDEA上面新建Maven项目下载与虚拟机中HBase一致的jdk版本1.8 2新建项目 3编辑pom.xml文件内容如下
?xml version1.0 encodingUTF-8?
project xmlnshttp://maven.apache.org/POM/4.0.0xmlns:xsihttp://www.w3.org/2001/XMLSchema-instancexsi:schemaLocationhttp://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsdmodelVersion4.0.0/modelVersiongroupIdcom.example/groupIdartifactIdjavaapi/artifactIdversion1.0-SNAPSHOT/versionpropertiesmaven.compiler.source8/maven.compiler.sourcemaven.compiler.target8/maven.compiler.target/propertiesdependencies!-- HBase Client --dependencygroupIdorg.apache.hbase/groupIdartifactIdhbase-client/artifactIdversion1.2.5/version/dependencydependencygroupIdlog4j/groupIdartifactIdlog4j/artifactIdversion1.2.17/version/dependency/dependencies
/project
log4j.properties:
# Set root logger level and appender
log4j.rootLoggerINFO, console
# Console appender
log4j.appender.consoleorg.apache.log4j.ConsoleAppender
log4j.appender.console.targetSystem.out
log4j.appender.console.layoutorg.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n2、HBase Java API的基本操作之表的创建
1代码部分
package com.example;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;import java.io.IOException;public class HBaseTableCreator {public static void main(String[] args) throws IOException {Configuration config HBaseConfiguration.create();config.addResource(new Path(hbase-site.xml));config.set(hbase.zookeeper.quorum, 192.168.125.101);config.set(hbase.zookeeper.property.clientPort, 2181);try (Connection connection ConnectionFactory.createConnection(config);Admin admin connection.getAdmin()) {TableName tableName TableName.valueOf(currency_transactions);// 使用 HBase 1.x API 定义列族HTableDescriptor tableDescriptor new HTableDescriptor(tableName);tableDescriptor.addFamily(new HColumnDescriptor(info));tableDescriptor.addFamily(new HColumnDescriptor(transaction));tableDescriptor.addFamily(new HColumnDescriptor(meta));// 检查表是否存在if (admin.tableExists(tableName)) {System.out.println(Table already exists. Deleting and recreating...);admin.disableTable(tableName);admin.deleteTable(tableName);}// 创建表admin.createTable(tableDescriptor);System.out.println(Table created successfully.);}}
}2运行结果 3、HBase Java API的基本操作——数据的插入
1代码部分
package com.example;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;import java.io.IOException;public class HBaseDataInserter {public static void main(String[] args) throws IOException {// 配置 HBase 连接Configuration config HBaseConfiguration.create();config.addResource(new Path(hbase-site.xml));config.set(hbase.zookeeper.quorum, 192.168.125.101);config.set(hbase.zookeeper.property.clientPort, 2181);// 定义表名String tableName currency_transactions;try (Connection connection ConnectionFactory.createConnection(config)) {// 插入多行数据addRows(connection, tableName);}}public static void addRows(Connection connection, String tableName) throws IOException {try (Table table connection.getTable(TableName.valueOf(tableName))) {// 创建 Put 对象插入行1 (Alice)Put put1 new Put(row1.getBytes());put1.addColumn(info.getBytes(), name.getBytes(), Alice.getBytes());put1.addColumn(info.getBytes(), age.getBytes(), 30.getBytes());put1.addColumn(transaction.getBytes(), amount.getBytes(), 1000.getBytes());put1.addColumn(transaction.getBytes(), currency.getBytes(), USD.getBytes());// 创建 Put 对象插入行2 (Bob)Put put2 new Put(row2.getBytes());put2.addColumn(info.getBytes(), name.getBytes(), Bob.getBytes());put2.addColumn(info.getBytes(), age.getBytes(), 40.getBytes());put2.addColumn(transaction.getBytes(), amount.getBytes(), 500.getBytes());put2.addColumn(transaction.getBytes(), currency.getBytes(), EUR.getBytes());// 批量插入table.put(put1);table.put(put2);System.out.println(Rows added successfully.);}}
}2运行结果 4、根据冠字号行键查询单行数据
1实现代码
package com.example;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;import java.io.IOException;public class HBaseDataRetriever {public static void main(String[] args) throws IOException {// 配置 HBase 连接Configuration config HBaseConfiguration.create();config.addResource(new Path(hbase-site.xml));config.set(hbase.zookeeper.quorum, 192.168.125.101);config.set(hbase.zookeeper.property.clientPort, 2181);// 定义表名和行键冠字号String tableName currency_transactions;String rowKey row1; // 冠字号对应的行键try (Connection connection ConnectionFactory.createConnection(config)) {// 查询单行数据retrieveRowByKey(connection, tableName, rowKey);}}public static void retrieveRowByKey(Connection connection, String tableName, String rowKey) throws IOException {try (Table table connection.getTable(TableName.valueOf(tableName))) {// 创建 Get 对象Get get new Get(rowKey.getBytes());// 指定需要的列族和列可选get.addFamily(info.getBytes()); // 获取 info 列族的所有列get.addColumn(transaction.getBytes(), amount.getBytes()); // 获取特定列// 获取结果Result result table.get(get);// 遍历结果System.out.println(Row Key: rowKey);result.listCells().forEach(cell - {String family new String(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());String qualifier new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());String value new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());System.out.println(family : qualifier value);});}}
}2运行结果 5、批量检索所有数据扫描
1实现代码
package com.example;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;import java.io.IOException;public class HBaseDataScanner {public static void main(String[] args) throws IOException {// 配置 HBase 连接Configuration config HBaseConfiguration.create();config.addResource(new Path(hbase-site.xml));config.set(hbase.zookeeper.quorum, 192.168.125.101);config.set(hbase.zookeeper.property.clientPort, 2181);// 定义表名String tableName currency_transactions;try (Connection connection ConnectionFactory.createConnection(config)) {// 扫描表数据scanTableData(connection, tableName);}}public static void scanTableData(Connection connection, String tableName) throws IOException {try (Table table connection.getTable(TableName.valueOf(tableName))) {// 创建 Scan 对象Scan scan new Scan();// 指定列族或列可选scan.addFamily(info.getBytes()); // 扫描 info 列族scan.addColumn(transaction.getBytes(), currency.getBytes()); // 扫描特定列// 获取结果ResultScanner scanner table.getScanner(scan);// 遍历结果for (Result result : scanner) {String rowKey new String(result.getRow());System.out.println(Row Key: rowKey);result.listCells().forEach(cell - {String family new String(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());String qualifier new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());String value new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());System.out.println(family : qualifier value);});System.out.println(------------);}}}
}2运行结果 6、根据列值如冠字号、金额等过滤数据
1实现代码
package com.example;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.filter.*;import java.io.IOException;public class HBaseFilteredScan {public static void main(String[] args) throws IOException {// 配置 HBase 连接Configuration config HBaseConfiguration.create();config.addResource(new Path(hbase-site.xml));config.set(hbase.zookeeper.quorum, 192.168.125.101);config.set(hbase.zookeeper.property.clientPort, 2181);// 定义表名String tableName currency_transactions;try (Connection connection ConnectionFactory.createConnection(config)) {// 使用过滤器检索数据scanTableWithFilter(connection, tableName, transaction, currency, USD);}}public static void scanTableWithFilter(Connection connection, String tableName, String columnFamily, String columnQualifier, String valueToFilter) throws IOException {try (Table table connection.getTable(TableName.valueOf(tableName))) {// 创建 Scan 对象Scan scan new Scan();// 添加列值过滤器SingleColumnValueFilter filter new SingleColumnValueFilter(columnFamily.getBytes(), // 列族columnQualifier.getBytes(), // 列名CompareFilter.CompareOp.EQUAL, // 比较操作valueToFilter.getBytes()); // 目标值// 设置过滤器scan.setFilter(filter);// 获取结果ResultScanner scanner table.getScanner(scan);// 遍历结果for (Result result : scanner) {String rowKey new String(result.getRow());System.out.println(Row Key: rowKey);result.listCells().forEach(cell - {String family new String(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());String qualifier new String(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());String value new String(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());System.out.println(family : qualifier value);});System.out.println(------------);}}}
}2运行结果