cbfinn · wmontgomery4 · Oct 26, 2016 · Feb 24, 2017
diff --git a/experiments/box2d_arm_example/hyperparams.py b/experiments/box2d_arm_example/hyperparams.py
@@ -82,12 +82,9 @@
 
 state_cost = {
     'type': CostState,
-    'data_types' : {
-        JOINT_ANGLES: {
-            'wp': np.array([1, 1]),
-            'target_state': agent["target_state"],
-        },
-    },
+    'data_type': JOINT_ANGLES,
+    'wp': np.array([1, 1]),
+    'target_state': agent["target_state"],
 }
 
 algorithm['cost'] = {

diff --git a/experiments/box2d_badmm_example/hyperparams.py b/experiments/box2d_badmm_example/hyperparams.py
@@ -97,12 +97,9 @@
 
 state_cost = {
     'type': CostState,
-    'data_types' : {
-        JOINT_ANGLES: {
-            'wp': np.array([1, 1]),
-            'target_state': agent["target_state"],
-        },
-    },
+    'data_type': JOINT_ANGLES,
+    'wp': np.array([1, 1]),
+    'target_state': agent["target_state"],
 }
 
 algorithm['cost'] = {

diff --git a/experiments/box2d_pointmass_example/hyperparams.py b/experiments/box2d_pointmass_example/hyperparams.py
@@ -80,12 +80,9 @@
 
 state_cost = {
     'type': CostState,
-    'data_types' : {
-        END_EFFECTOR_POINTS: {
-            'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
-            'target_state': agent["target_state"],
-        },
-    },
+    'data_type': END_EFFECTOR_POINTS,
+    'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+    'target_state': agent["target_state"],
 }
 
 algorithm['cost'] = {

diff --git a/experiments/icra_peg_2D/badmm/hyperparams.py b/experiments/icra_peg_2D/badmm/hyperparams.py
@@ -0,0 +1,75 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 4
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create train positions.
+xs = np.linspace(default.lower, default.upper, 2)
+ys = np.linspace(default.lower, default.upper, 2)
+pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the defaults
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmBADMM,
+    'inner_iterations': 4,
+    'sample_on_policy': False,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'lg_step_schedule': np.array([1e-4, 1e-4, 1e-3, 1e-3]),
+    'policy_dual_rate': 0.1,
+    'init_pol_wt': 0.002,
+    'fixed_lg_step': 3,
+    'kl_step': 0.5,
+    'max_step_mult': 2.0,
+    'min_step_mult': 0.1,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 5,
+})
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/icra_peg_2D/hyperparams.py b/experiments/icra_peg_2D/hyperparams.py
@@ -0,0 +1,150 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+from __future__ import division
+
+from datetime import datetime
+import os.path
+
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.mjc.agent_mjc import AgentMuJoCo
+from gps.algorithm.cost.cost_fk import CostFK
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.cost.cost_utils import RAMP_FINAL_ONLY
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy.lin_gauss_init import init_lqr
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.policy.policy_prior import PolicyPrior
+from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
+        END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.gui.config import generate_experiment_info
+
+
+SENSOR_DIMS = {
+    JOINT_ANGLES: 7,
+    JOINT_VELOCITIES: 7,
+    END_EFFECTOR_POINTS: 6,
+    END_EFFECTOR_POINT_VELOCITIES: 6,
+    ACTION: 7,
+}
+
+PR2_GAINS = np.array([3.09, 1.08, 0.393, 0.674, 0.111, 0.152, 0.098])
+
+common = {
+    'experiment_name': 'my_experiment' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+}
+
+# Set up grid of test positions.
+lower = -0.25
+upper = 0.15
+
+TEST_CONDITIONS = 49
+xs = np.linspace(lower + 0.01, upper - 0.01, 7)
+ys = np.linspace(lower + 0.01, upper - 0.01, 7)
+test_pos_body_offset = [np.array([x,y,0]) for x in xs for y in ys]
+
+agent = {
+    'type': AgentMuJoCo,
+    'filename': './mjc_models/pr2_arm3d.xml',
+    'x0': np.concatenate([np.array([0.1, 0.1, -1.54, -1.7, 1.54, -0.2, 0]),
+                          np.zeros(7)]),
+    'dt': 0.05,
+    'substeps': 5,
+    'T': 100,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                      END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
+                    END_EFFECTOR_POINT_VELOCITIES],
+    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+}
+
+algorithm = {
+    'iterations': 12,
+    'policy_sample_mode': 'replace',
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_lqr,
+    'init_gains':  1.0 / PR2_GAINS,
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 1.0,
+    'stiffness': 1.0,
+    'stiffness_vel': 0.5,
+    'final_weight': 50.0,
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+torque_cost = {
+    'type': CostAction,
+    'wu': 1e-3 / PR2_GAINS,
+}
+
+fk_cost = {
+    'type': CostFK,
+    'target_end_effector': np.array([0.0, 0.3, -0.5, 0.0, 0.3, -0.2]),
+    'wp': np.array([2, 2, 1, 2, 2, 1]),
+    'l1': 0.1,
+    'l2': 10.0,
+    'alpha': 1e-5,
+}
+
+# Create second cost function for last step only.
+final_cost = {
+    'type': CostFK,
+    'ramp_option': RAMP_FINAL_ONLY,
+    'target_end_effector': fk_cost['target_end_effector'],
+    'wp': fk_cost['wp'],
+    'l1': 1.0,
+    'l2': 0.0,
+    'alpha': 1e-5,
+    'wp_final_multiplier': 10.0,
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [torque_cost, fk_cost, final_cost],
+    'weights': [1.0, 1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 50,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'iterations': 5000,
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 50,
+    'min_samples_per_cluster': 40,
+}
+
+config = {
+    'iterations': algorithm['iterations'],
+    'verbose_trials': 0,
+    'verbose_policy_trials': 1,
+    'agent': agent,
+    'gui_on': True,
+    'random_seed': list(range(5)),
+}
diff --git a/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py b/experiments/icra_peg_2D/mdgps_cluster/hyperparams.py
@@ -0,0 +1,74 @@
+""" Hyperparameters for MJC peg insertion policy optimization. """
+import imp
+import os.path
+import numpy as np
+from gps.gui.config import generate_experiment_info
+from gps.algorithm.algorithm_mdgps import AlgorithmMDGPS
+
+BASE_DIR = '/'.join(str.split(__file__, '/')[:-2])
+default = imp.load_source('default_hyperparams', BASE_DIR+'/hyperparams.py')
+
+EXP_DIR = '/'.join(str.split(__file__, '/')[:-1]) + '/'
+
+TEST_CONDITIONS = default.TEST_CONDITIONS
+TRAIN_CONDITIONS = 20
+TOTAL_CONDITIONS = TEST_CONDITIONS + TRAIN_CONDITIONS
+
+# Create dummy train positions (will be overwritten randomly).
+pos_body_offset = [np.zeros(3) for _ in range(TRAIN_CONDITIONS)]
+
+# Add test positions.
+pos_body_offset += default.test_pos_body_offset
+
+# Update the default config.
+common = default.common.copy()
+common.update({
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'target_filename': EXP_DIR + 'target.npz',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': TOTAL_CONDITIONS,
+    'train_conditions': range(TRAIN_CONDITIONS), 
+    'test_conditions': range(TRAIN_CONDITIONS, TOTAL_CONDITIONS), 
+})
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = default.agent.copy()
+agent.update({
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'pos_body_idx': np.array([1]),
+    'pos_body_offset': pos_body_offset,
+    'randomly_sample_bodypos': True,
+    'sampling_range_bodypos': [np.array([default.lower, default.lower, 0.0]),
+                                np.array([default.upper, default.upper, 0.0])],
+    'prohibited_ranges_bodypos': [],
+})
+
+algorithm = default.algorithm.copy()
+algorithm.update({
+    'type': AlgorithmMDGPS,
+    'sample_on_policy': True,
+    'conditions': common['conditions'],
+    'train_conditions': common['train_conditions'],
+    'test_conditions': common['test_conditions'],
+    'num_clusters': 4,
+    'cluster_method': 'traj_em',
+    'kl_step': 2.0,
+    'max_step_mult': 2.0,
+    'min_step_mult': 1.0,
+})
+algorithm['policy_opt']['weights_file_prefix'] = EXP_DIR + 'policy'
+
+config = default.config.copy()
+config.update({
+    'common': common,
+    'algorithm': algorithm,
+    'agent': agent,
+    'num_samples': 1,
+})
+
+common['info'] = generate_experiment_info(config)