gnunet

Main GNUnet Logic
Log | Files | Refs | Submodules | README | LICENSE

commit 5e11dc84e665f1de925336dec1eaf9d4071352dd
parent d6f94672c2e5ce0da942aaff9fab277176027656
Author: Fabian Oehlmann <oehlmann@in.tum.de>
Date:   Wed, 11 Sep 2013 17:23:14 +0000

ats_ril: solver further continued

Diffstat:
Msrc/ats/gnunet-service-ats-solver_ril.c | 350++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Msrc/ats/gnunet-service-ats-solver_ril.h | 2+-
2 files changed, 266 insertions(+), 86 deletions(-)

diff --git a/src/ats/gnunet-service-ats-solver_ril.c b/src/ats/gnunet-service-ats-solver_ril.c @@ -50,12 +50,31 @@ enum RIL_Action }; //TODO add the rest of the actions +enum RIL_Algorithm +{ + RIL_ALGO_SARSA, + RIL_ALGO_Q +}; + +enum RIL_E_Modification +{ + RIL_E_SET, + RIL_E_ZERO, + RIL_E_ACCUMULATE, + RIL_E_REPLACE +}; + /** * Global learning parameters */ struct RIL_Learning_Parameters { /** + * The TD-algorithm to use + */ + enum RIL_Algorithm algorithm; + + /** * Learning discount factor in the TD-update */ float gamma; @@ -129,9 +148,9 @@ struct RIL_Peer_Agent int a_old; /** - * Last eligibility trace vector + * Eligibility trace vector */ - double * e_t; + double * e; /** * Address in use @@ -303,23 +322,40 @@ agent_estimate_q (struct RIL_Peer_Agent *agent, return result; } +/** + * Decide whether to do exploration (i.e. taking a new action) or exploitation (i.e. taking the + * currently estimated best action) in the current step + * @param agent agent performing the step + * @return yes, if exploring + */ +int +agent_decide_exploration (struct RIL_Peer_Agent *agent) +{ + double r = (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX; + + if (r < RIL_EXPLORE_RATIO) + { + return GNUNET_YES; + } + return GNUNET_NO; +} + +/** + * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the + * most reward in the future) + * @param agent agent performing the calculation + * @param state the state from which to take the action + * @return the action promising most future reward + */ int -agent_choose_action (struct RIL_Peer_Agent *agent, +agent_get_action_best (struct RIL_Peer_Agent *agent, double *state) { int i; int max_i = -1; - double r; double cur_q; double max_q = DBL_MIN; - r = ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX); - - if (r < RIL_EXPLORE_RATIO) - { - return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n); - } - for (i = 0; i < agent->m; i++) { cur_q = agent_estimate_q (agent, state, i); @@ -335,11 +371,91 @@ agent_choose_action (struct RIL_Peer_Agent *agent, return max_i; } +/** + * Gets any action, to explore the action space from that state + * @param agent agent performing the calculation + * @param state the state from which to take the action + * @return any action + */ +int +agent_get_action_explore (struct RIL_Peer_Agent *agent, + double *state) +{ + return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n); +} + +/** + * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a + * @param agent the agent performing the update + * @param reward the reward received for the last action + * @param s_next the new state, the last step got the agent into + * @param a_prime the new + */ +void +agent_update_weights (struct RIL_Peer_Agent *agent, + double reward, + double *s_next, + int a_prime) +{ + int i; + double delta; + double *theta = (agent->W)[agent->a_old]; + + delta = reward + agent_estimate_q (agent, s_next, a_prime) - + agent_estimate_q (agent, agent->s_old, agent->a_old); + for (i = 0; i < agent->m; i++) + { + theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i]; + } +} + +/** + * Changes the eligibility trace vector e in various manners: + * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces + * RIL_E_REPLACE - resets each component to 1 as in replacing traces + * RIL_E_SET - multiplies e with gamma and lambda as in the update rule + * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing + * @param agent + * @param mod + */ +void +agent_modify_eligibility (struct RIL_Peer_Agent *agent, + enum RIL_E_Modification mod) +{ + int i; + double *e = agent->e; + double gamma = agent->envi->parameters.gamma; + double lambda = agent->envi->parameters.lambda; + + for (i = 0; i < agent->m; i++) + { + switch (mod) + { + case RIL_E_ACCUMULATE: + e[i] += 1; + break; + case RIL_E_REPLACE: + e[i] = 1; + break; + case RIL_E_SET: + e[i] = gamma * lambda; + break; + case RIL_E_ZERO: + e[i] = 0; + break; + } + } +} + +/** + * Allocates a state vector and fills it with the features present + * @param solver the solver handle + * @return pointer to the state vector + */ double * -envi_get_state (void *s) +envi_get_state (struct GAS_RIL_Handle *solver) { int i; - struct GAS_RIL_Handle *solver = s; struct RIL_Network *net; double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4); @@ -355,36 +471,84 @@ envi_get_state (void *s) return state; } +/** + * Gets the reward of the last performed step + * @param solver solver handle + * @return the reward + */ double -envi_get_reward () +envi_get_reward (struct GAS_RIL_Handle *solver) { //TODO implement return (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX; } +/** + * Puts the action into effect + * @param solver solver handle + * @param action action to perform by the solver + */ +void +envi_do_action (struct GAS_RIL_Handle *solver, + int action) +{ + +} + +/** + * Performs one step of the Markov Decision Process. Other than in the literature the step starts + * after having done the last action a_old. It observes the new state s_next and the reward + * received. Then the coefficient update is done according to the SARSA or Q-learning method. The + * next action is put into effect. + * @param agent the agent performing the step + */ void agent_step (struct RIL_Peer_Agent *agent) { - int a_next; + int a_next = -1; double *s_next; double reward; - double delta; - double q_next; - s_next = envi_get_state(agent->envi); - reward = envi_get_reward(); - - a_next = agent_choose_action (agent, s_next); - q_next = agent_estimate_q(agent, s_next, a_next); + reward = envi_get_reward(agent->envi); - if (NULL != agent->s_old) + switch (agent->envi->parameters.algorithm) { - delta = reward + - (agent->envi->parameters.gamma * q_next) - - agent_estimate_q(agent, agent->s_old, agent->a_old); + case RIL_ALGO_SARSA: + agent_modify_eligibility (agent, RIL_E_SET); + if (agent_decide_exploration (agent)) + { + a_next = agent_get_action_explore (agent, s_next); + } + else + { + a_next = agent_get_action_best (agent, s_next); + } + agent_update_weights (agent, reward, s_next, a_next); //update weights with next action + break; + + case RIL_ALGO_Q: + a_next = agent_get_action_best (agent, s_next); //update weights with best action + agent_update_weights (agent, reward, s_next, a_next); + if (agent_decide_exploration (agent)) + { + a_next = agent_get_action_explore (agent, s_next); + agent_modify_eligibility(agent, RIL_E_ZERO); + } + else + { + a_next = agent_get_action_best (agent, s_next); + agent_modify_eligibility(agent, RIL_E_SET); + } + break; } + GNUNET_assert (-1 != a_next); + + agent_modify_eligibility (agent, RIL_E_ACCUMULATE); + + envi_do_action(agent->envi, a_next); + GNUNET_free(agent->s_old); agent->s_old = s_next; agent->a_old = a_next; @@ -392,14 +556,16 @@ agent_step (struct RIL_Peer_Agent *agent) agent->step_count += 1; } +/** + * Cycles through all agents and lets the active ones do a step. Schedules the next step. + * @param solver the solver handle + * @param tc task context for the scheduler + */ void -ril_periodic_step (void *s, +ril_periodic_step (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) { - /* - * iterate over active agents and do a time step - */ - struct GAS_RIL_Handle *solver = s; + struct GAS_RIL_Handle *solver = cls; struct RIL_Peer_Agent *cur; GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count); @@ -427,14 +593,14 @@ ril_periodic_step (void *s, */ struct RIL_Peer_Agent * agent_init (void *s, - struct GNUNET_PeerIdentity peer) + const struct GNUNET_PeerIdentity *peer) { int i; struct GAS_RIL_Handle * solver = s; struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent)); agent->envi = solver; - agent->peer = peer; + agent->peer = *peer; agent->step_count = 0; agent->active = GNUNET_NO; agent->s_old = NULL; @@ -446,7 +612,8 @@ agent_init (void *s, (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m); } agent->a_old = -1; - agent->e_t = NULL; + agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m); + agent_modify_eligibility (agent, RIL_E_ZERO); GNUNET_CONTAINER_DLL_insert (solver->agents_head, solver->agents_tail, agent); @@ -459,10 +626,10 @@ agent_init (void *s, * @param agent the agent to retire */ void -agent_die (void *s, - struct RIL_Peer_Agent * agent) +agent_die (struct GAS_RIL_Handle *solver, + struct RIL_Peer_Agent *agent) { - + //TODO implement } /** @@ -472,15 +639,14 @@ agent_die (void *s, * @return agent */ struct RIL_Peer_Agent * -ril_get_agent (struct GAS_RIL_Handle * s, - struct GNUNET_PeerIdentity peer) +ril_get_agent (struct GAS_RIL_Handle *solver, + const struct GNUNET_PeerIdentity *peer) { - struct GAS_RIL_Handle * solver = s; - struct RIL_Peer_Agent * cur; + struct RIL_Peer_Agent *cur; - for (cur = s->agents_head; NULL != cur; cur = cur->next) + for (cur = solver->agents_head; NULL != cur; cur = cur->next) { - if (0 == GNUNET_CRYPTO_hash_cmp (&peer.hashPubKey, &cur->peer.hashPubKey)) + if (0 == GNUNET_CRYPTO_hash_cmp (&peer->hashPubKey, &cur->peer.hashPubKey)) { return cur; } @@ -506,7 +672,7 @@ init_agents_it (void *cls, struct ATS_Address *address = value; struct RIL_Peer_Agent *agent; - agent = ril_get_agent (solver, address->peer); + agent = ril_get_agent (solver, &address->peer); GNUNET_assert (agent != NULL); @@ -534,10 +700,10 @@ init_agents_it (void *cls, * @param pref_rel the normalized preference value for this kind over all clients */ void -GAS_ril_address_change_preference (void *solver, - const struct GNUNET_PeerIdentity *peer, - enum GNUNET_ATS_PreferenceKind kind, - double pref_rel) +GAS_ril_address_change_preference (void *s, + const struct GNUNET_PeerIdentity *peer, + enum GNUNET_ATS_PreferenceKind kind, + double pref_rel) { GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Preference `%s' for peer `%s' changed to %.2f \n", @@ -581,18 +747,18 @@ GAS_ril_address_change_preference (void *solver, */ void * GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg, - const struct GNUNET_STATISTICS_Handle *stats, - const struct GNUNET_CONTAINER_MultiHashMap *addresses, - int *network, - unsigned long long *out_quota, - unsigned long long *in_quota, - int dest_length, - GAS_bandwidth_changed_cb bw_changed_cb, - void *bw_changed_cb_cls, - GAS_get_preferences get_preference, - void *get_preference_cls, - GAS_get_properties get_properties, - void *get_properties_cls) + const struct GNUNET_STATISTICS_Handle *stats, + const struct GNUNET_CONTAINER_MultiHashMap *addresses, + int *network, + unsigned long long *out_quota, + unsigned long long *in_quota, + int dest_length, + GAS_bandwidth_changed_cb bw_changed_cb, + void *bw_changed_cb_cls, + GAS_get_preferences get_preference, + void *get_preference_cls, + GAS_get_properties get_properties, + void *get_properties_cls) { //TODO implement int c; @@ -683,6 +849,9 @@ void GAS_ril_done (void * solver) { //TODO implement + /* + * dealloc: agents, learning parameters, callbacks + */ struct GAS_RIL_Handle *s = solver; GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_done() has been called\n"); @@ -702,8 +871,8 @@ GAS_ril_done (void * solver) */ void GAS_ril_address_add (void *solver, - struct ATS_Address *address, - uint32_t network) + struct ATS_Address *address, + uint32_t network) { //TODO implement /* @@ -726,8 +895,8 @@ GAS_ril_address_add (void *solver, */ void GAS_ril_address_delete (void *solver, - struct ATS_Address *address, - int session_only) + struct ATS_Address *address, + int session_only) { //TODO implement /* @@ -753,10 +922,10 @@ GAS_ril_address_delete (void *solver, */ void GAS_ril_address_property_changed (void *solver, - struct ATS_Address *address, - uint32_t type, - uint32_t abs_value, - double rel_value) + struct ATS_Address *address, + uint32_t type, + uint32_t abs_value, + double rel_value) { GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Property `%s' for peer `%s' address %p changed to %.2f \n", @@ -781,9 +950,9 @@ GAS_ril_address_property_changed (void *solver, */ void GAS_ril_address_session_changed (void *solver, - struct ATS_Address *address, - uint32_t cur_session, - uint32_t new_session) + struct ATS_Address *address, + uint32_t cur_session, + uint32_t new_session) { //TODO implement /* @@ -804,8 +973,8 @@ GAS_ril_address_session_changed (void *solver, */ void GAS_ril_address_inuse_changed (void *solver, - struct ATS_Address *address, - int in_use) + struct ATS_Address *address, + int in_use) { //TODO implement /** @@ -826,9 +995,9 @@ GAS_ril_address_inuse_changed (void *solver, */ void GAS_ril_address_change_network (void *solver, - struct ATS_Address *address, - uint32_t current_network, - uint32_t new_network) + struct ATS_Address *address, + uint32_t current_network, + uint32_t new_network) { //TODO implement /* @@ -849,11 +1018,11 @@ GAS_ril_address_change_network (void *solver, */ void GAS_ril_address_preference_feedback (void *solver, - void *application, - const struct GNUNET_PeerIdentity *peer, - const struct GNUNET_TIME_Relative scope, - enum GNUNET_ATS_PreferenceKind kind, - double score) + void *application, + const struct GNUNET_PeerIdentity *peer, + const struct GNUNET_TIME_Relative scope, + enum GNUNET_ATS_PreferenceKind kind, + double score) { //TODO implement GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_preference_feedback() has been called\n"); @@ -898,16 +1067,21 @@ GAS_ril_bulk_stop (void *solver) */ const struct ATS_Address * GAS_ril_get_preferred_address (void *solver, - const struct GNUNET_PeerIdentity *peer) + const struct GNUNET_PeerIdentity *peer) { - //TODO implement + //TODO implement, gets only the first address for now + /* * connect-only for requested peers, move agent to active list */ struct GAS_RIL_Handle *s = solver; + struct RIL_Peer_Agent *agent; GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_get_preferred_address() has been called\n"); + agent = ril_get_agent(s, peer); + agent->active = GNUNET_YES; + if (0 == GNUNET_CONTAINER_multihashmap_contains(s->addresses, &peer->hashPubKey)) { return GNUNET_CONTAINER_multihashmap_get(s->addresses, &peer->hashPubKey); @@ -925,13 +1099,19 @@ GAS_ril_get_preferred_address (void *solver, */ void GAS_ril_stop_get_preferred_address (void *solver, - const struct GNUNET_PeerIdentity *peer) + const struct GNUNET_PeerIdentity *peer) { //TODO implement /* * connect-only for requested peers, move agent to paused list */ + struct GAS_RIL_Handle *s = solver; + struct RIL_Peer_Agent *agent; + GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_stop_get_preferred_address() has been called\n"); + + agent = ril_get_agent(s, peer); + agent->active = GNUNET_NO; } -/* end of gnunet-service-ats-solver_reinf.c */ +/* end of gnunet-service-ats-solver_ril.c */ diff --git a/src/ats/gnunet-service-ats-solver_ril.h b/src/ats/gnunet-service-ats-solver_ril.h @@ -245,4 +245,4 @@ const struct ATS_Address * GAS_ril_get_preferred_address (void *solver, const struct GNUNET_PeerIdentity *peer); -/* end of gnunet-service-ats-solver_reinf.h */ +/* end of gnunet-service-ats-solver_ril.h */