AI & ML interests
None defined yet.
teamcore/DPO_Q0.5B_RMAB_TG_beta0.1loss_typegeneralized_sigmoidreward_model
Updated
teamcore/SFT_Q0.5B_RMAB_PG
Updated
teamcore/SFT_Q0.5B_RMAB_TG
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_adv0.25
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_flip0.1
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_flip0.5
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_adv0.5
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7b
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_flip0.3
Updated
teamcore/SFT_L8B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_adv0.75
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoid_dro_dynamic_smooth_labelEurus_RM_7bbt_noise_flip0.4
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_noise_flip0.4
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoid_dro_dynamic_smooth_labelEurus_RM_7bbt_noise_flip0.1
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_noise_flip0.1
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_noise_flip0.2
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoid_dro_dynamic_smooth_labelEurus_RM_7bbt_noise_flip0.2
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_noise_adv0.25
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_noise_adv0.5
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoid_dro_dynamic_smooth_labelEurus_RM_7bbt_noise_adv0.25
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoid_dro_dynamic_smooth_labelEurus_RM_7bbt_noise_adv0.5
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_noise_adv0.75
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoid_dro_dynamic_smooth_labelEurus_RM_7bbt_noise_adv0.75
Updated
teamcore/SFT_Q0.5B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_flip0.1
Updated
teamcore/SFT_Q0.5B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_flip0.4
Updated
teamcore/SFT_Q0.5B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_flip0.2
Updated
teamcore/SFT_Q0.5B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_adv0.5
Updated
teamcore/SFT_Q0.5B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_adv0.25
Updated
teamcore/SFT_Q0.5B_U0_reward_modelEurus_RM_7bnoise_typebt_noise_adv0.75
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7blabel_switching0.4
Updated
teamcore/DPO_Q0.5B_U0_beta0.1generalized_sigmoidEurus_RM_7bbt_prob_noise0.5
Updated