当前位置：首页 > news >正文

一个网站怎样做两个后台记事本做网站插图片

news 2025/11/21 8:01:40

一个网站怎样做两个后台,记事本做网站插图片,商城分销怎么做,唯品会网站建设的目标在ray.rllib中定义和使用自己的模型#xff0c; 分为以下三个步骤#xff1a; 1. 定义自己的模型。 2. 向ray注册自定义的模型 3. 在config中配置使用自定义的模型环境配置#xff1a; torch2.5.1 ray2.10.0 ray[rllib]2.10.0 ray[tune]2.10.0 ray[serve]2.10.0 numpy1.23.… 在ray.rllib中定义和使用自己的模型分为以下三个步骤 1. 定义自己的模型。 2. 向ray注册自定义的模型 3. 在config中配置使用自定义的模型环境配置 torch2.5.1 ray2.10.0 ray[rllib]2.10.0 ray[tune]2.10.0 ray[serve]2.10.0 numpy1.23.0 python3.9.18 一、定义自己的模型需要继承自 TFModel 或 TorchModelV2, 并重写需要自定义的方法其代码框架如下 import torch.nn as nn from ray.rllib.models.torch.torch_modelv2 import TorchModelV2class My_Model(TorchModelV2, nn.Module): ## 重构以下函数函数接口不能变。def __init__(self, obs_space, action_space, num_outputs, model_config, name, *, custom_arg1, custom_arg2): ...def forward(self, input_dict, state, seq_lens): ...def value_function(self): ... 示例如下 ## 1. 定义自己的模型 import numpy as np import torch.nn as nn from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 import gymnasium as gym from gymnasium import spaces from ray.rllib.utils.typing import Dict, TensorType, List, ModelConfigDictclass My_Model(TorchModelV2, nn.Module):def __init__(self, obs_space:gym.spaces.Space, action_space:gym.spaces.Space, num_outputs:int, model_config:ModelConfigDict, ## PPOConfig.training(model ModelConfigDict), 调用的是config.model中的参数name:str,*, custom_arg1, custom_arg2):TorchModelV2.__init__(self, obs_space, action_space, num_outputs,model_config,name)nn.Module.__init__(self)## 测试 custom_arg1 custom_arg2 传递进来的是什么数值print(f custom_arg1 {custom_arg1}, custom_arg2 {custom_arg2})## 定义网络层obs_dim int(np.product(obs_space.shape))action_dim int(np.product(action_space.shape))## shareNetself.shared_fc nn.Linear(obs_dim,128)## actorNetself.actorNet nn.Linear(128, action_dim)## criticNetself.criticNet nn.Linear(128,1)self._feature None def forward(self, input_dict, state, seq_lens):obs input_dict[obs].float()self._feature self.shared_fc.forward(obs)action_logits self.actorNet.forward(self._feature)return action_logits, state def value_function(self):value self.criticNet.forward(self._feature).squeeze(1)return value 在rllib中每个算法的所有网络都被汇集到同一个 ModelV2 类下供算法调用。actor 网络和critic网络可以在外面定义也可以在model内部直接定义。 model的forward用于返回actor网络的输出 value_function函数用于返回critic网络的输出。网络结构和网络层共享可以自定义设置。输入输出接口需要与上面保持严格一致。二、向ray注册自定义模型 ray.rllib.model.ModelCatalog 类用于向ray注册自定义的model, 还可以用于获取env的 preprocessors 和 action distributions。 import ray from ray.rllib.models import ModelCatalog # ModelCatalog 类用于注册 models, 获取env的 preprocessors 和 action distributions。 ModelCatalog.register_custom_model(model_namemy_torch_model, model_class My_Model) 三、在算法中配置并使用自定义的模型主要是在 config.training() 模块中的 model 子模块中传入两个配置信息 1custom_model:my_torch_model , 2custom_model_config: {custom_arg1: 1, custom_arg2: 2,}}) 两个关键字固定不变填入自己注册的模型名和对应的模型参数即可。可以有以下三种配置代码的编写方式配置方法1 ## 3. 在训练中使用自定义模型 from ray.rllib.algorithms.ppo import PPOConfig from ray.tune.logger import pretty_print config PPOConfig() config config.environment(CartPole-v1) config config.rollouts(num_rollout_workers2) config config.framework(frameworktorch) ## 配置使用自定义的模型 config config.training(model {custom_model:my_torch_model , custom_model_config: {custom_arg1: 1, custom_arg2: 2,}}) ## 主要在上面两行配置使用自己的模型 ## 配置 model 的 custom_model 项用于指定rllib算法所使用的模型 ## 配置 model 的 custom_model_config 项用于传入自定义的网络参数供自定义的model使用。 ## 这两个关键词不可更改。algo config.build() ## 4. 执行训练 result algo.train() print(pretty_print(result)) 与以上配置内容一样还可以用以下两种配置写法配置方法2 config PPOConfig() config config.environment(CartPole-v1) config config.rollouts(num_rollout_workers2) config config.framework(frameworktorch) ## 配置自定义模型 model_config_dict {} model_config_dict[custom_model] my_torch_model model_config_dict[custom_model_config] {custom_arg1: 1, custom_arg2: 2,} config config.training(model model_config_dict) algo config.build() 配置方法3推荐 config PPOConfig() config config.environment(CartPole-v1) config config.rollouts(num_rollout_workers2) config config.framework(frameworktorch) ## 配置自定义模型 config.model[custom_model] my_torch_model config.model[custom_model_config] {custom_arg1: 1, custom_arg2: 2,}algo config.build() 代码汇总在ray.rllib中定义和使用自己的模型分为以下三个步骤 1. 定义自己的模型。需要继承自 TFModel 或 TorchModelV2, 并重写需要自定义的方法import torch.nn as nnfrom ray.rllib.models.torch.torch_modelv2 import TorchModelV2class CustomTorchModel(TorchModelV2, nn.Module): ## 重构以下函数函数接口不能变。 def __init__(self, obs_space, action_space, num_outputs, model_config, name, *, custom_arg1, custom_arg2): ...def forward(self, input_dict, state, seq_lens): ...def value_function(self): ...2. 向ray注册自定义的模型from ray.rllib.models import ModelCatalogModelCatalog.register_custom_model(wzg_torch_model, CustomTorchModel)3. 在config中配置使用自定义的模型model_config_dict {custom_model:wzg_torch_model,custom_model_config:{custom_arg1: 1,custom_arg2: 2}}config PPOConfig()# config config.training(model model_config_dict)config.model[custom_model] wzg_torch_modelconfig.model[custom_model_config] {custom_arg1: 1,custom_arg2: 2} ## 1. 定义自己的模型 import numpy as np import torch.nn as nn from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 import gymnasium as gym from gymnasium import spaces from ray.rllib.utils.typing import Dict, TensorType, List, ModelConfigDictclass My_Model(TorchModelV2, nn.Module):def __init__(self, obs_space:gym.spaces.Space, action_space:gym.spaces.Space, num_outputs:int, model_config:ModelConfigDict, ## PPOConfig.training(model ModelConfigDict), 调用的是config.model中的参数name:str,*, custom_arg1, custom_arg2):TorchModelV2.__init__(self, obs_space, action_space, num_outputs,model_config,name)nn.Module.__init__(self)## 测试 custom_arg1 custom_arg2 传递进来的是什么数值print(f custom_arg1 {custom_arg1}, custom_arg2 {custom_arg2})## 定义网络层obs_dim int(np.product(obs_space.shape))action_dim int(np.product(action_space.shape))## shareNetself.shared_fc nn.Linear(obs_dim,128)## actorNetself.actorNet nn.Linear(128, action_dim)## criticNetself.criticNet nn.Linear(128,1)self._feature None def forward(self, input_dict, state, seq_lens):obs input_dict[obs].float()self._feature self.shared_fc.forward(obs)action_logits self.actorNet.forward(self._feature)return action_logits, state def value_function(self):value self.criticNet.forward(self._feature).squeeze(1)return value ## 2. 向ray注册自定义模型 import ray from ray.rllib.models import ModelCatalog # ModelCatalog 类用于注册 models, 获取env的 preprocessors 和 action distributions。 ModelCatalog.register_custom_model(model_namemy_torch_model, model_class My_Model) ray.init()## 3. 在训练中使用自定义模型 from ray.rllib.algorithms.ppo import PPOConfig from ray.tune.logger import pretty_print config PPOConfig() config config.environment(CartPole-v1) config config.rollouts(num_rollout_workers2) config config.framework(frameworktorch) # ## 配置自定义模型方法 1 # config config.training(model {custom_model:my_torch_model , # custom_model_config: {custom_arg1: 1, custom_arg2: 2,}}) # ## 配置自定义模型方法 2 # model_config_dict {} # model_config_dict[custom_model] my_torch_model # model_config_dict[custom_model_config] {custom_arg1: 1, custom_arg2: 2,} # config config.training(model model_config_dict) ## 配置自定义模型: 方法 3 个人更喜欢因为嵌套层次少 config.model[custom_model] my_torch_model config.model[custom_model_config] {custom_arg1: 1, custom_arg2: 2,}## 错误方法 # model_config_dict {} # model_config_dict[custom_model] my_torch_model # model_config_dict[custom_model_config] {custom_arg1: 1, custom_arg2: 2,} # config.model model_config_dict # 会清空 model 里面的其他默认配置导致报错algo config.build()## 4. 执行训练 result algo.train() print(pretty_print(result))

查看全文

http://www.zqtcl.cn/news/825073/