diff --git a/src/cql_sac/agent.py b/src/cql_sac/agent.py index bfb7b1211892fc8b22b3f7738bde9b62fe576d29..85abbdca76bdf71552c718fe603f60bd645c5662 100644 --- a/src/cql_sac/agent.py +++ b/src/cql_sac/agent.py @@ -115,7 +115,7 @@ class CQLSAC(nn.Module): critic_target(state, action) -> Q-value Params ====== - experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples + experiences (Tuple[torch.Tensor]): tuple of (s, a, r, c, s', done) tuples gamma (float): discount factor """ states, actions, rewards, costs, next_states, dones = experiences