一站式网站建设业务,the7 wordpress主题,百度一下百度搜索百度一下,深圳十大建筑设计公司前几天写了一篇关于哈希算法的文章#xff0c;起源就是在构思AB实验平台的时候#xff0c;用到了哈希#xff0c;所以对其做了深入的了解 AB实验平台是一般互联网做策略、样式实验会用到的一个系统#xff0c;一般开启某个实验之后#xff0c;需要对线上流量进行分流… 前几天写了一篇关于哈希算法的文章起源就是在构思AB实验平台的时候用到了哈希所以对其做了深入的了解 AB实验平台是一般互联网做策略、样式实验会用到的一个系统一般开启某个实验之后需要对线上流量进行分流客户端-实验平台-策略平台-应用服务大概是这个链路 场景
线上流量策略分流需要对不同人群做策略实验最终效果好的一组需要推全到100%在未推全之前100%的流量会被分到原有流量几个实验组 比如我有一个实验样式迭代产品希望60%的流量走线上逻辑剩下的40%均分到4个实验组这样可以细致的比对样式的效果当然如果一个用户命中了实验组A那么他后续的行为在实验组不调整的情况下需要一直保持有的人说希望调整了也保持但是真的了解AB分流逻辑之后真挺难的 其实我们有自己的AB平台但是我在想如果让我去实现我会怎么做便有了这一篇文章
考虑的问题
1.如何保证一个用户对实验命中唯一性
在实验组不调整的情况下PM希望用户分流的结果始终保持一致 【策略】hash算法通过对imei进行哈希运算得到int的哈希值即可可以加一个时间戳实验修改时间来控制用户的imei命中变化
2.如何按比例分流
【策略】取模运算构建一个长度为100的bucket然后通过对imei的哈希值运算结果对100取模即可得到0-100的数从而去命中数据区间挑选实验组即可由于哈希算法的特性只要imei不变时间戳(因子)不变计算得到的哈希值是永远不会变的所以取模结果命中实验的结果也是不变的
比如我的case是线上流量60%其他实验组10%,10%,10%,10%则可以构建一个数据区间【0,60,70,80,90,100】
实战
代码
Data
public class TrafficDistributor {private String id;// 实验idprivate String name;private double isolation;// 线上流量private ListExperiment experiments;// 实验组private TableString, String, SetString whiteTable HashBasedTable.create();private static Date updateTime new Date(1719844946L);//2024/7/1 22:42:26public TrafficDistributor(String id, String name, double isolation, ListExperiment experiments, TableString, String, SetString whiteTable) {this.id id;this.name name;this.isolation isolation;this.experiments experiments;this.whiteTable whiteTable;}public static void main(String[] args) {// 实验idString id new_style;// 1.初始化白名单》正式环境从db中拿出来初始化TableString, String, SetString white HashBasedTable.create();white.put(id, 00, Sets.newHashSetWithExpectedSize(0));white.put(id, 01, Set.of(whitelist_imei_1, whitelist_imei_2));white.put(id, 02, Set.of(whitelist_imei_3, whitelist_imei_4));white.put(id, 11, Set.of(whitelist_imei_5, whitelist_imei_6));white.put(id, 12, Set.of(whitelist_imei_7, whitelist_imei_8));// 2.初始化实验组Experiment experiment1 new Experiment(01, 对照组01, 10);Experiment experiment2 new Experiment(02, 对照组02, 10);Experiment experiment3 new Experiment(11, 实验组01, 10);Experiment experiment4 new Experiment(12, 实验组02, 10);ListExperiment experiments List.of(experiment1, experiment2, experiment3, experiment4);// 3.初始化分流器TrafficDistributor distributor new TrafficDistributor(id, 新样式实验, 60, experiments, white);// 4.模拟分流for (int times 0; times 100; times) {int index 1, size 100;MapString, Integer result Maps.newHashMap();ListLong cost Lists.newArrayListWithCapacity(size);while (index size) {String imei RandomStringUtils.randomAlphanumeric(64);long start System.currentTimeMillis();// 核心分流Experiment experiment distributor.allocate(id, imei);if (experiment ! null) {// 实验组result.compute(experiment.getId(), (eid, count) - (count null) ? 1 : count 1);} else {// 线上int value result.getOrDefault(00, 0);result.put(00, value 1);}cost.add(System.currentTimeMillis() - start);index;}System.out.println(耗时: cost.stream().mapToLong(Long::longValue).sum() 结果: JSON.toJSONString(result));}}/*** 流量分配: 白名单 - 实验组 - 线上流量*/public Experiment allocate(String id, String imei) {Experiment whiteListShot whiteListShot(id, imei);if (whiteListShot ! null) {return whiteListShot;}ListDouble weights experiments.stream().map(Experiment::getWeight).collect(Collectors.toList());int groupId distributeTraffic(weights, imei);if (groupId 0) {// 线上流量return null;}// 实验组流量return experiments.get(groupId);}/*** 根据每个实验组的权重配比判断最终流量应该分配到哪个实验组。** param weights 每个实验组的权重值数组。* param imei imei* return 分配流量的实验组索引*/public int distributeTraffic(ListDouble weights, String imei) {if (CollectionUtils.isEmpty(weights)) {return -1;}double totalWeight 100;// 总权重100%double testWeight weights.stream().mapToDouble(Double::doubleValue).sum();// 实验组总权重if (totalWeight ! (isolation testWeight)) {throw new IllegalArgumentException(实验组和对照组流量分配存在问题);}// imei时间戳(因子)生成hashint hash HashUtils.hashcode(imei updateTime.getTime());return bucketShot(weights, hash % totalWeight);}/*** 哈希值进行取模定位后命中的实验*/public int bucketShot(ListDouble weights, double bucketIndex) {ListDouble range Lists.newArrayListWithCapacity(weights.size() 1);range.add(isolation);double pre range.get(0);for (double weight : weights) {range.add(pre weight);pre pre weight;}for (int i 0; i range.size(); i) {if (bucketIndex range.get(i)) {return i - 1;}}throw new IllegalArgumentException(实验组和对照组流量分配存在问题);}/*** 白名单命中** param id 实验id* param imei imei* return 实验组*/public Experiment whiteListShot(String id, String imei) {assert whiteTable ! null !whiteTable.isEmpty();if (!whiteTable.containsRow(id)) {throw new IllegalArgumentException(实验id id 不存在);}MapString, SetString experimentData whiteTable.row(id);for (Map.EntryString, SetString entry : experimentData.entrySet()) {SetString values entry.getValue();if (!CollectionUtils.isEmpty(values) values.contains(imei)) {String key entry.getKey();return experiments.stream().filter(test - test.getId().equals(key)).findFirst().orElse(null);}}return null;}// 其他方法保持不变DataAllArgsConstructorstatic class Experiment {private String id;private String name;private double weight;}public class HashUtils {/*** hashCode方法*/public static int hashcode(Object obj) {final int p 16777619;int hash (int) 2166136261L;String str obj.toString();for (int i 0; i str.length(); i)hash (hash ^ str.charAt(i)) * p;hash hash 13;hash ^ hash 7;hash hash 3;hash ^ hash 17;hash hash 5;if (hash 0)hash Math.abs(hash);return hash;}
}分析
耗时:34结果:{11:11,00:61,01:7,12:9,02:12}
耗时:4结果:{11:8,00:61,12:4,01:9,02:18}
耗时:2结果:{11:9,00:60,01:10,12:7,02:14}
耗时:2结果:{11:11,00:54,01:10,12:14,02:11}
耗时:1结果:{11:9,00:65,12:12,01:8,02:6}
耗时:3结果:{11:9,00:61,01:6,12:12,02:12}
耗时:2结果:{11:9,00:56,12:8,01:17,02:10}
耗时:2结果:{11:7,00:61,12:7,01:10,02:15}
耗时:1结果:{11:8,00:58,01:13,12:4,02:17}
耗时:0结果:{11:5,00:72,01:9,12:5,02:9}
耗时:0结果:{11:2,00:70,12:14,01:8,02:6}
耗时:2结果:{11:7,00:63,12:10,01:10,02:10}
耗时:0结果:{11:8,00:60,12:11,01:10,02:11}
耗时:1结果:{11:9,00:60,12:11,01:13,02:7}
耗时:0结果:{11:12,00:71,12:5,01:9,02:3}
耗时:1结果:{11:12,00:57,01:11,12:11,02:9}
耗时:0结果:{11:8,00:62,01:14,12:7,02:9}
耗时:1结果:{11:10,00:64,12:6,01:9,02:11}
耗时:2结果:{11:5,00:73,12:7,01:9,02:6}
耗时:0结果:{11:9,00:68,01:6,12:9,02:8}
耗时:1结果:{11:12,00:63,01:10,12:4,02:11}
耗时:1结果:{11:15,00:59,01:8,12:9,02:9}
耗时:0结果:{11:10,00:66,01:8,12:6,02:10}
耗时:1结果:{11:8,00:64,01:10,12:6,02:12}
耗时:2结果:{11:11,00:63,01:8,12:8,02:10}
耗时:1结果:{11:5,00:66,12:9,01:12,02:8}
耗时:3结果:{11:8,00:67,12:10,01:9,02:6}
耗时:1结果:{11:9,00:54,12:16,01:7,02:14}
耗时:0结果:{11:11,00:63,12:5,01:11,02:10}
耗时:1结果:{11:10,00:59,01:12,12:8,02:11}
耗时:1结果:{11:12,00:62,12:11,01:8,02:7}
耗时:0结果:{11:8,00:59,12:8,01:13,02:12}
耗时:1结果:{11:12,00:51,12:11,01:15,02:11}
耗时:1结果:{11:1,00:72,01:10,12:8,02:9}
耗时:1结果:{11:8,00:55,01:18,12:9,02:10}
耗时:0结果:{11:6,00:54,01:22,12:5,02:13}
耗时:1结果:{11:9,00:58,12:11,01:11,02:11}
耗时:1结果:{11:15,00:57,12:12,01:3,02:13}
耗时:1结果:{11:6,00:66,12:10,01:11,02:7}
耗时:0结果:{11:13,00:61,12:12,01:8,02:6}
耗时:0结果:{11:8,00:61,12:11,01:10,02:10}
耗时:0结果:{11:10,00:57,01:10,12:12,02:11}
耗时:1结果:{11:6,00:62,12:12,01:11,02:9}
耗时:5结果:{11:9,00:64,12:8,01:10,02:9}
耗时:0结果:{11:15,00:54,12:8,01:9,02:14}
耗时:0结果:{11:12,00:62,01:8,12:10,02:8}
耗时:0结果:{11:9,00:63,12:6,01:12,02:10}
耗时:0结果:{11:9,00:59,01:10,12:9,02:13}
耗时:1结果:{11:10,00:58,01:7,12:14,02:11}
耗时:1结果:{11:9,00:73,01:8,12:2,02:8}
耗时:0结果:{11:14,00:62,12:7,01:10,02:7}
耗时:1结果:{11:12,00:55,12:10,01:12,02:11}
耗时:0结果:{11:5,00:59,01:7,12:17,02:12}
耗时:0结果:{11:10,00:59,12:7,01:10,02:14}
耗时:1结果:{11:11,00:54,01:17,12:8,02:10}
耗时:0结果:{11:8,00:65,12:8,01:9,02:10}
耗时:1结果:{11:13,00:61,12:8,01:9,02:9}
耗时:1结果:{11:6,00:67,01:10,12:11,02:6}
耗时:1结果:{11:7,00:61,12:8,01:10,02:14}
耗时:0结果:{11:6,00:63,12:8,01:10,02:13}
耗时:0结果:{11:9,00:62,12:9,01:9,02:11}
耗时:1结果:{11:5,00:65,01:8,12:11,02:11}
耗时:0结果:{11:11,00:52,12:9,01:15,02:13}
耗时:0结果:{11:14,00:66,01:4,12:7,02:9}
耗时:0结果:{11:12,00:54,12:8,01:8,02:18}
耗时:1结果:{11:9,00:64,12:8,01:10,02:9}
耗时:1结果:{11:9,00:65,01:3,12:11,02:12}
耗时:0结果:{11:5,00:67,01:7,12:12,02:9}
耗时:0结果:{11:13,00:50,01:12,12:11,02:14}
耗时:1结果:{11:18,00:55,12:7,01:10,02:10}
耗时:0结果:{11:5,00:64,01:12,12:5,02:14}
耗时:0结果:{11:10,00:68,12:6,01:7,02:9}
耗时:1结果:{11:9,00:71,12:4,01:6,02:10}
耗时:0结果:{11:8,00:62,12:9,01:9,02:12}
耗时:0结果:{11:10,00:64,12:8,01:9,02:9}
耗时:0结果:{11:9,00:57,12:10,01:9,02:15}
耗时:0结果:{11:10,00:60,12:13,01:9,02:8}
耗时:0结果:{11:12,00:66,01:5,12:9,02:8}
耗时:0结果:{11:6,00:58,01:11,12:13,02:12}
耗时:1结果:{11:10,00:62,01:12,12:9,02:7}
耗时:1结果:{11:7,00:66,12:11,01:7,02:9}
耗时:0结果:{11:10,00:63,12:9,01:11,02:7}
耗时:1结果:{11:8,00:61,12:10,01:12,02:9}
耗时:0结果:{11:8,00:62,12:6,01:10,02:14}
耗时:0结果:{11:7,00:68,12:8,01:11,02:6}
耗时:0结果:{11:11,00:54,01:11,12:16,02:8}
耗时:0结果:{11:7,00:68,01:10,12:6,02:9}
耗时:0结果:{11:7,00:65,12:7,01:8,02:13}
耗时:0结果:{11:8,00:69,01:8,12:5,02:10}
耗时:0结果:{11:15,00:60,01:6,12:11,02:8}
耗时:0结果:{11:9,00:70,01:6,12:7,02:8}
耗时:0结果:{11:14,00:62,12:7,01:10,02:7}
耗时:0结果:{11:11,00:64,12:7,01:7,02:11}
耗时:1结果:{11:6,00:56,12:14,01:10,02:14}
耗时:0结果:{11:7,00:64,12:8,01:11,02:10}
耗时:1结果:{11:14,00:65,12:4,01:10,02:7}
耗时:0结果:{11:13,00:59,12:7,01:13,02:8}
耗时:1结果:{11:5,00:65,12:8,01:14,02:8}
耗时:1结果:{11:12,00:54,01:11,12:10,02:13}
耗时:1结果:{11:10,00:56,12:11,01:11,02:12}
Process finished with exit code 0我手写的hashCode方法包括随机字符串生成的imei仍然存在实验组分类偏向的情况中间尝试过使用一致性哈希解决偏移但是又解决不了加权按比例分配的问题不知道各位同仁有没有什么好的建议我怀疑是随机字符串的偏向性问题后面看看有没有办法解决取模算法是个宝藏既可以精准定位也可以利用随机性来做区间命中updateTime它是个变化因子如果实验有新增实验组或者流量调整可以利用它来控制imei实验组变化
参考资料
https://zhuanlan.zhihu.com/p/404232432https://blog.csdn.net/SmartCodeTech/article/details/113698568?spm1001.2101.3001.6650.2utm_mediumdistribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7ECtr-2-113698568-blog-131205090.235%5Ev43%5Epc_blog_bottom_relevance_base8depth_1-utm_sourcedistribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7ECtr-2-113698568-blog-131205090.235%5Ev43%5Epc_blog_bottom_relevance_base8utm_relevant_index5