Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions experiments/box2d_arm_example/hyperparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,9 @@

state_cost = {
'type': CostState,
'data_types' : {
JOINT_ANGLES: {
'wp': np.array([1, 1]),
'target_state': agent["target_state"],
},
},
'data_type': JOINT_ANGLES,
'wp': np.array([1, 1]),
'target_state': agent["target_state"],
}

algorithm['cost'] = {
Expand Down
9 changes: 3 additions & 6 deletions experiments/box2d_badmm_example/hyperparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,9 @@

state_cost = {
'type': CostState,
'data_types' : {
JOINT_ANGLES: {
'wp': np.array([1, 1]),
'target_state': agent["target_state"],
},
},
'data_type': JOINT_ANGLES,
'wp': np.array([1, 1]),
'target_state': agent["target_state"],
}

algorithm['cost'] = {
Expand Down
9 changes: 3 additions & 6 deletions experiments/box2d_pointmass_example/hyperparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,9 @@

state_cost = {
'type': CostState,
'data_types' : {
END_EFFECTOR_POINTS: {
'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
'target_state': agent["target_state"],
},
},
'data_type': END_EFFECTOR_POINTS,
'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
'target_state': agent["target_state"],
}

algorithm['cost'] = {
Expand Down
75 changes: 75 additions & 0 deletions experiments/icra_peg_2D/badmm/hyperparams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
import numpy as np
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_badmm import AlgorithmBADMM

BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

TEST_CONDITIONS = default.TEST_CONDITIONS
TRAIN_CONDITIONS = 4
TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS

# Create train positions.
xs = np.linspace(default.lower, default.upper, 2)
ys = np.linspace(default.lower, default.upper, 2)
pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]

# Add test positions.
pos_body_offset += default.test_pos_body_offset

# Update the defaults
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
'conditions': TOTAL_CONDITIONS,
'train_conditions': range(TRAIN_CONDITIONS),
'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS),
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

agent = default.agent.copy()
agent.update({
'conditions': common['conditions'],
'train_conditions': common['train_conditions'],
'test_conditions': common['test_conditions'],
'pos_body_idx': np.array([1]),
'pos_body_offset': pos_body_offset,
})

algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmBADMM,
'inner_iterations': 4,
'sample_on_policy': False,
'conditions': common['conditions'],
'train_conditions': common['train_conditions'],
'test_conditions': common['test_conditions'],
'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]),
'policy_dual_rate': 0.1,
'init_pol_wt': 0.002,
'fixed_lg_step': 3,
'kl_step': 0.5,
'max_step_mult': 2.0,
'min_step_mult': 0.1,
})
algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'agent': agent,
'num_samples': 5,
})

common['info'] = generate_experiment_info(config)
150 changes: 150 additions & 0 deletions experiments/icra_peg_2D/hyperparams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
from __future__ import division

from datetime import datetime
import os.path

import numpy as np

from gps import __file__ as gps_filepath
from gps.agent.mjc.agent_mjc import AgentMuJoCo
from gps.algorithm.cost.cost_fk import CostFK
from gps.algorithm.cost.cost_action import CostAction
from gps.algorithm.cost.cost_sum import CostSum
from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
from gps.algorithm.policy.lin_gauss_init import init_lqr
from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
from gps.algorithm.policy.policy_prior import PolicyPrior
from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
from gps.gui.config import generate_experiment_info


SENSOR_DIMS = {
JOINT_ANGLES: 7,
JOINT_VELOCITIES: 7,
END_EFFECTOR_POINTS: 6,
END_EFFECTOR_POINT_VELOCITIES: 6,
ACTION: 7,
}

PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])

common = {
'experiment_name': 'my_experiment' + '_' + \
datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
}

# Set up grid of test positions.
lower = -0.25
upper = 0.15

TEST_CONDITIONS = 49
xs = np.linspace(lower + 0.01, upper - 0.01, 7)
ys = np.linspace(lower + 0.01, upper - 0.01, 7)
test_pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]

agent = {
'type': AgentMuJoCo,
'filename': './mjc_models/pr2_arm3d.xml',
'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
np.zeros(7)]),
'dt': 0.05,
'substeps': 5,
'T': 100,
'sensor_dims': SENSOR_DIMS,
'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
END_EFFECTOR_POINT_VELOCITIES],
'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
END_EFFECTOR_POINT_VELOCITIES],
'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
}

algorithm = {
'iterations': 12,
'policy_sample_mode': 'replace',
}

algorithm['init_traj_distr'] = {
'type': init_lqr,
'init_gains': 1.0 / PR2_GAINS,
'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
'init_var': 1.0,
'stiffness': 1.0,
'stiffness_vel': 0.5,
'final_weight': 50.0,
'dt': agent['dt'],
'T': agent['T'],
}

torque_cost = {
'type': CostAction,
'wu': 1e-3 / PR2_GAINS,
}

fk_cost = {
'type': CostFK,
'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
'wp': np.array([2, 2, 1, 2, 2, 1]),
'l1': 0.1,
'l2': 10.0,
'alpha': 1e-5,
}

# Create second cost function for last step only.
final_cost = {
'type': CostFK,
'ramp_option': RAMP_FINAL_ONLY,
'target_end_effector': fk_cost['target_end_effector'],
'wp': fk_cost['wp'],
'l1': 1.0,
'l2': 0.0,
'alpha': 1e-5,
'wp_final_multiplier': 10.0,
}

algorithm['cost'] = {
'type': CostSum,
'costs': [torque_cost, fk_cost, final_cost],
'weights': [1.0, 1.0, 1.0],
}

algorithm['dynamics'] = {
'type': DynamicsLRPrior,
'regularization': 1e-6,
'prior': {
'type': DynamicsPriorGMM,
'max_clusters': 50,
'min_samples_per_cluster': 40,
'max_samples': 20,
},
}

algorithm['traj_opt'] = {
'type': TrajOptLQRPython,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'iterations': 5000,
}

algorithm['policy_prior'] = {
'type': PolicyPriorGMM,
'max_clusters': 50,
'min_samples_per_cluster': 40,
}

config = {
'iterations': algorithm['iterations'],
'verbose_trials': 0,
'verbose_policy_trials': 1,
'agent': agent,
'gui_on': True,
'random_seed': list(range(5)),
}
74 changes: 74 additions & 0 deletions experiments/icra_peg_2D/mdgps_cluster/hyperparams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
""" Hyperparameters for MJC peg insertion policy optimization. """
import imp
import os.path
import numpy as np
from gps.gui.config import generate_experiment_info
from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS

BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')

EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'

TEST_CONDITIONS = default.TEST_CONDITIONS
TRAIN_CONDITIONS = 20
TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS

# Create dummy train positions (will be overwritten randomly).
pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]

# Add test positions.
pos_body_offset += default.test_pos_body_offset

# Update the default config.
common = default.common.copy()
common.update({
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
'conditions': TOTAL_CONDITIONS,
'train_conditions': range(TRAIN_CONDITIONS),
'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS),
})

if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

agent = default.agent.copy()
agent.update({
'conditions': common['conditions'],
'train_conditions': common['train_conditions'],
'test_conditions': common['test_conditions'],
'pos_body_idx': np.array([1]),
'pos_body_offset': pos_body_offset,
'randomly_sample_bodypos': True,
'sampling_range_bodypos': [np.array([default.lower, default.lower, 0.0]),
np.array([default.upper, default.upper, 0.0])],
'prohibited_ranges_bodypos': [],
})

algorithm = default.algorithm.copy()
algorithm.update({
'type': AlgorithmMDGPS,
'sample_on_policy': True,
'conditions': common['conditions'],
'train_conditions': common['train_conditions'],
'test_conditions': common['test_conditions'],
'num_clusters': 4,
'cluster_method': 'traj_em',
'kl_step': 2.0,
'max_step_mult': 2.0,
'min_step_mult': 1.0,
})
algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'

config = default.config.copy()
config.update({
'common': common,
'algorithm': algorithm,
'agent': agent,
'num_samples': 1,
})

common['info'] = generate_experiment_info(config)
Loading