Number of threads used for the generation of conformations
"))
- self.label_10.setText(_translate("ToolSmilesTo3d", "CPU threads"))
- self.checkBox_mpi.setText(_translate("ToolSmilesTo3d", "Run with MPI"))
-
-
-if __name__ == "__main__":
- import sys
- app = QtWidgets.QApplication(sys.argv)
- ToolSmilesTo3d = QtWidgets.QWidget()
- ui = Ui_ToolSmilesTo3d()
- ui.setupUi(ToolSmilesTo3d)
- ToolSmilesTo3d.show()
- sys.exit(app.exec_())
-
diff --git a/ChemFlow/src/GUI/qt_creator/mainwindow.ui b/ChemFlow/src/GUI/qt_creator/mainwindow.ui
index 0d03207..83def21 100644
--- a/ChemFlow/src/GUI/qt_creator/mainwindow.ui
+++ b/ChemFlow/src/GUI/qt_creator/mainwindow.ui
@@ -66,6 +66,495 @@
0
+
+
+ SmilesTo3D
+
+
+
+
+ 11
+ 95
+ 67
+ 17
+
+
+
+ Delimiter
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+
+
+
+ 90
+ 228
+ 85
+ 27
+
+
+
+ <html><head/><body><p>Method used to generate 3D conformations: </p><ul style="margin-top: 0px; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; -qt-list-indent: 1;"><li style=" margin-top:12px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">UFF</span> : distance geometry + force field minimization with UFF</li><li style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">MMFF</span> : distance geometry + force field minimization with MMFF</li><li style=" margin-top:0px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">ETKDG </span>: distance geometry with experimental torsion angles knowledge from the CSD</li></ul></body></html>
+
+
+
+ ETKDG
+
+
+
+
+ MMFF
+
+
+
+
+ UFF
+
+
+
+
+
+
+ 349
+ 194
+ 151
+ 22
+
+
+
+ <html><head/><body><p>Print additional information to the terminal</p></body></html>
+
+
+ Verbose output
+
+
+
+
+
+ 158
+ 57
+ 48
+ 27
+
+
+
+ <html><head/><body><p>Index of the column containing the SMILES</p></body></html>
+
+
+ 1
+
+
+ 1
+
+
+
+
+
+ 90
+ 89
+ 85
+ 27
+
+
+
+
+ Tab
+
+
+
+
+ Space
+
+
+
+
+ ,
+
+
+
+
+ ;
+
+
+
+
+ :
+
+
+
+
+
+
+ 94
+ 288
+ 91
+ 20
+
+
+
+ CPU threads
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+
+
+
+ 349
+ 289
+ 131
+ 22
+
+
+
+ Run with MPI
+
+
+
+
+
+ 92
+ 30
+ 441
+ 21
+
+
+
+
+ 9
+
+
+
+ Path to input SMILES file
+
+
+
+
+
+ 90
+ 120
+ 171
+ 22
+
+
+
+ Fist line is a header
+
+
+
+
+
+ 90
+ 194
+ 221
+ 22
+
+
+
+ <html><head/><body><p>Remove hydrogen atoms from the output</p></body></html>
+
+
+ Remove hydrogen atoms
+
+
+
+
+
+ 300
+ 57
+ 48
+ 27
+
+
+
+ <html><head/><body><p>Index of the column containing the names of your molecules. Leave to 2 if there are no names.</p></body></html>
+
+
+ 1
+
+
+ 2
+
+
+
+
+
+ 13
+ 10
+ 51
+ 17
+
+
+
+
+ 75
+ true
+
+
+
+ Input
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+
+
+
+ 11
+ 32
+ 67
+ 17
+
+
+
+ File
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+
+
+
+ 13
+ 261
+ 81
+ 17
+
+
+
+
+ 75
+ true
+
+
+
+ Execution
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+
+
+
+ 190
+ 284
+ 48
+ 27
+
+
+
+ <html><head/><body><p>Number of threads used for the generation of conformations</p></body></html>
+
+
+ 1
+
+
+ 1
+
+
+
+
+
+ 246
+ 60
+ 51
+ 20
+
+
+
+
+ true
+
+
+
+ Name
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+
+
+
+ 538
+ 29
+ 91
+ 23
+
+
+
+ <html><head/><body><p>Text file containing one molecule in SMILES format per line.</p></body></html>
+
+
+ Browse
+
+
+
+
+
+ 13
+ 143
+ 67
+ 17
+
+
+
+
+ 75
+ true
+
+
+
+ Output
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+
+
+
+ 13
+ 234
+ 71
+ 17
+
+
+
+
+ 75
+ true
+
+
+
+ Method
+
+
+
+
+
+ 92
+ 165
+ 391
+ 21
+
+
+
+
+ 9
+
+
+
+ Path to output SDF file
+
+
+
+
+
+ 10
+ 169
+ 67
+ 17
+
+
+
+ File
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+
+
+
+ -33
+ 60
+ 111
+ 20
+
+
+
+ Index
+
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
+
+
+
+
+ 0
+ 320
+ 640
+ 3
+
+
+
+ QFrame::Sunken
+
+
+ Qt::Horizontal
+
+
+
+
+
+ 230
+ 330
+ 181
+ 51
+
+
+
+
+ 14
+
+
+
+ <html><head/><body><p>Generates 3D structures in SDF format from SMILES, using RDKIT</p></body></html>
+
+
+ Generate 3D
+
+
+
+ ../img/run.png../img/run.png
+
+
+
+ 36
+ 36
+
+
+
+
+
+
+ 94
+ 62
+ 61
+ 17
+
+
+
+
+ true
+
+
+
+ SMILES
+
+
+ Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
+
+
+ LigFlow
@@ -1414,11 +1903,6 @@
Bounding shape
-
-
- SMILES to 3D
-
-
@@ -1618,6 +2102,9 @@
25
+
+ false
+ Project
@@ -1631,9 +2118,10 @@
Help
+
+
-
@@ -1714,10 +2202,28 @@
Dark theme
+
+
+ Documentation
+
+ tabWidget
+ lineEdit_smiles_input
+ pushButton_smiles_input
+ spinBox_smiles_col
+ spinBox_names_col
+ comboBox_delimiter
+ checkBox_header
+ lineEdit_smiles_output
+ checkBox_hydrogen
+ checkBox_verbose
+ comboBox_method
+ spinBox_nthreads
+ checkBox_mpi
+ commandLinkButton_smiles_runlineEdit_ligflow_ligpushButton_ligflow_ligcomboBox_charges
diff --git a/ChemFlow/src/GUI/qt_creator/tool_smiles_to_3d.ui b/ChemFlow/src/GUI/qt_creator/tool_smiles_to_3d.ui
deleted file mode 100644
index 22fa804..0000000
--- a/ChemFlow/src/GUI/qt_creator/tool_smiles_to_3d.ui
+++ /dev/null
@@ -1,459 +0,0 @@
-
-
- ToolSmilesTo3d
-
-
-
- 0
- 0
- 480
- 499
-
-
-
- SMILES to 3D
-
-
-
-
- 20
- 10
- 111
- 17
-
-
-
-
- 75
- true
-
-
-
- SMILES to 3D
-
-
-
-
-
- 152
- 57
- 212
- 20
-
-
-
- Path to input SMILES file
-
-
-
-
-
- 368
- 55
- 86
- 23
-
-
-
- <html><head/><body><p>Text file containing one molecule in SMILES format per line.</p></body></html>
-
-
- Browse
-
-
-
-
-
- 40
- 36
- 61
- 20
-
-
-
-
- 75
- true
-
-
-
- Input
-
-
- Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
-
-
-
-
-
- 131
- 461
- 86
- 29
-
-
-
- Ok
-
-
-
-
-
- 261
- 461
- 86
- 29
-
-
-
- Cancel
-
-
-
-
-
- 60
- 59
- 67
- 17
-
-
-
- File
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
-
-
-
- 150
- 83
- 48
- 27
-
-
-
- <html><head/><body><p>Index of the column containing the SMILES</p></body></html>
-
-
- 1
-
-
- 1
-
-
-
-
-
- 16
- 86
- 111
- 20
-
-
-
- SMILES column
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
-
-
-
- 16
- 116
- 111
- 20
-
-
-
- Name column
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
-
-
-
- 150
- 113
- 48
- 27
-
-
-
- <html><head/><body><p>Index of the column containing the names of your molecules</p></body></html>
-
-
- 1
-
-
- 2
-
-
-
-
-
- 60
- 149
- 67
- 17
-
-
-
- Delimiter
-
-
-
-
-
- 150
- 143
- 85
- 27
-
-
-
-
- Tab
-
-
-
-
- Space
-
-
-
-
- ,
-
-
-
-
- ;
-
-
-
-
- :
-
-
-
-
-
-
- 150
- 174
- 171
- 22
-
-
-
- Fist line is a header
-
-
-
-
-
- 40
- 214
- 67
- 17
-
-
-
-
- 75
- true
-
-
-
- Output
-
-
- Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
-
-
-
-
-
- 152
- 236
- 301
- 20
-
-
-
- Path to output SDF file
-
-
-
-
-
- 59
- 240
- 67
- 17
-
-
-
- File
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
-
-
-
- 150
- 265
- 221
- 22
-
-
-
- Write all Hydrogen atoms
-
-
-
-
-
- 150
- 291
- 221
- 22
-
-
-
- <html><head/><body><p>Print additional information to the terminal</p></body></html>
-
-
- Increase output verbosity
-
-
-
-
-
- 40
- 367
- 91
- 17
-
-
-
-
- 75
- true
-
-
-
- Execution
-
-
- Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter
-
-
-
-
-
- 150
- 324
- 85
- 27
-
-
-
- <html><head/><body><p>Method used to generate 3D conformations: </p><ul style="margin-top: 0px; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; -qt-list-indent: 1;"><li style=" margin-top:12px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">UFF</span> : distance geometry + force field minimization with UFF</li><li style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">MMFF</span> : distance geometry + force field minimization with MMFF</li><li style=" margin-top:0px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">ETKDG </span>: distance geometry with experimental torsion angles knowledge from the CSD</li></ul></body></html>
-
-
-
- ETKDG
-
-
-
-
- MMFF
-
-
-
-
- UFF
-
-
-
-
-
-
- 40
- 330
- 81
- 17
-
-
-
-
- 75
- true
-
-
-
- Method
-
-
-
-
-
- 150
- 390
- 48
- 27
-
-
-
- <html><head/><body><p>Number of threads used for the generation of conformations</p></body></html>
-
-
- 1
-
-
- 1
-
-
-
-
-
- 25
- 394
- 101
- 20
-
-
-
- CPU threads
-
-
- Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
-
-
-
-
-
- 150
- 425
- 131
- 22
-
-
-
- Run with MPI
-
-
-
-
-
-
diff --git a/ChemFlow/src/LigFlow_functions.bash b/ChemFlow/src/LigFlow_functions.bash
index fb4908a..2732402 100644
--- a/ChemFlow/src/LigFlow_functions.bash
+++ b/ChemFlow/src/LigFlow_functions.bash
@@ -14,9 +14,9 @@ LigFlow_write_origin_ligands() {
# ${DOCK_PROGRAM}
# ${WORKDIR}
#
-# Author: Dona de Francquen
+# Author: Dona de Francquen, Diego Enry Barreto Gomes.
#
-# UPDATE: fri. july 6 14:49:50 CEST 2018
+# UPDATE: Ter Nov 5 10:08:11 -03 2019
#
#===============================================================================
OLDIFS=$IFS
@@ -33,16 +33,17 @@ done < ${LIGAND_FILE}
IFS=${OLDIFS}
+# Removed Ter Nov 5 10:50:45 -03 2019
#
# QUICK AND DIRTY FIX BY DIEGO - PLEASE FIX THIS FOR THE LOVE OF GOD
#
-cd ${RUNDIR}/original/
-for LIGAND in ${LIGAND_LIST[@]} ; do
- antechamber -i ${LIGAND}.mol2 -o tmp.mol2 -fi mol2 -fo mol2 -at sybyl -dr no &>/dev/null
- if [ -f tmp.mol2 ]; then mv tmp.mol2 ${LIGAND}.mol2; fi
-done
-rm -f ANTECHAMBER_*
-rm ATOMTYPE.INF
+#cd ${RUNDIR}/original/
+#for LIGAND in ${LIGAND_LIST[@]} ; do
+# antechamber -i ${LIGAND}.mol2 -o tmp.mol2 -fi mol2 -fo mol2 -at sybyl -pf y -dr no &>/dev/null
+# if [ -f tmp.mol2 ]; then mv tmp.mol2 ${LIGAND}.mol2; fi
+#done
+#rm -f ANTECHAMBER_*
+#rm ATOMTYPE.INF
#
#
#
@@ -76,31 +77,52 @@ fi
}
+
LigFlow_filter_ligand_list() {
+
NEW_LIGAND_LIST=""
-for LIGAND in ${LIGAND_LIST[@]} ; do
- DONE_CHARGE="false"
+# Step 1 - Check if ChemBase and ChemBase.lst exist
+if [ -s ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.lst ] && [ -s ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.mol2 ] ; then
- if [ "${DONE_CHARGE}" == "false" ] && [ -s ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.lst ] && [ -s ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.mol2 ] ; then
- if [ "$(grep ${LIGAND} ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.lst)" == ${LIGAND} ] ; then
- DONE_CHARGE="true"
- fi
- fi
- if [ "${DONE_CHARGE}" == "false" ] && [ -f ${RUNDIR}/${CHARGE}/${LIGAND}.mol2 ] ; then
- DONE_CHARGE="true"
- fi
- if [ "${DONE_CHARGE}" == "false" ] ; then
- if [ ! -n "`echo ${NEW_LIGAND_LIST} | xargs -n1 echo | grep -e \"^${LIGAND}$\"`" ] ; then
- NEW_LIGAND_LIST="${NEW_LIGAND_LIST} $LIGAND"
- fi
- fi
-done
+ # Step 2 - Populate CHEMBASE_LIST
+ CHEMBASE_LIST=$(cat ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.lst)
+ CHEMBASE_LIST=($CHEMBASE_LIST)
+else
+ CHEMBASE_LIST=''
+fi
+
+ # Step 3 - Populate COMPUTED_LIST of charges
+if [ -d ${RUNDIR}/${CHARGE}/ ] ; then
+ COMPUTED_LIST=$(ls -U ${RUNDIR}/${CHARGE}/ | grep mol2 | sed s/\.mol2// )
+ COMPUTED_LIST=($COMPUTED_LIST)
+else
+ COMPUTED_LIST=''
+fi
+
+ # Step 3 - Check if LIGAND already exists on CHEMBASE
+ echo "Checking for precomputed charges. Please wait ..."
+
+ conter=0
+ for LIGAND in ${LIGAND_LIST[@]} ; do
+
+ # If found at LigFlow, proceed to next LIGAND.
+ case "${COMPUTED_LIST[@]}" in *"${LIGAND}"*) continue ;; esac
+
+ # If found at ChemBase, proceed to next LIGAND.
+ case "${CHEMBASE_LIST[@]}" in *"${LIGAND}"*) continue ;; esac
+
+ # Add list of ligands to compute
+ NEW_LIGAND_LIST[$counter]=${LIGAND}
+ let counter++
+
+ done
unset LIGAND_LIST
LIGAND_LIST=(${NEW_LIGAND_LIST[@]})
-}
+unset NEW_LIGAND_LIST
+}
LigFlow_write_HPC_header() {
#=== FUNCTION ================================================================
@@ -146,8 +168,10 @@ fi
LigFlow_prepare_ligands_charges() {
-# Actualize the ligand list
+
+# UPDATE the ligand list
LigFlow_filter_ligand_list
+
NCHARGE=${#LIGAND_LIST[@]}
if [ ${NCHARGE} == 0 ] ; then
@@ -156,12 +180,14 @@ else
echo "There are ${NLIGANDS} compounds and ${NCHARGE} remaining to prepare"
fi
+
cd ${RUNDIR}
if [ ! -d ${RUNDIR}/gas ] ; then
mkdir -p ${RUNDIR}/gas
fi
+
if [ ! -d ${RUNDIR}/${CHARGE} ] ; then
mkdir -p ${RUNDIR}/${CHARGE}
fi
@@ -171,28 +197,33 @@ case ${JOB_SCHEDULLER} in
if [ -f LigFlow.run ] ; then
rm -rf LigFlow.run
fi
-
- for LIGAND in ${LIGAND_LIST[@]} ; do
- if [ ! -f ${RUNDIR}/gas/${LIGAND}.mol2 ] ; then
- echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/gas/${LIGAND}.mol2 -fo mol2 -c gas -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/" >> ${RUNDIR}/LigFlow.xargs
- fi
- done
-
+
if [ -f ${RUNDIR}/LigFlow.xargs ] ; then
- cat ${RUNDIR}/LigFlow.xargs | xargs -P${NCORES} -I '{}' bash -c '{}'
+ rm ${RUNDIR}/LigFlow.xargs
fi
- # Clean up
- rm -rf ${RUNDIR}/LigFlow.xargs
for LIGAND in ${LIGAND_LIST[@]} ; do
case ${CHARGE} in
"bcc")
+ if [ "${CHARGE_FILE}" == '' ] ; then
# Compute am1-bcc charges
- echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/gas/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/bcc/${LIGAND}.mol2 -fo mol2 -c bcc -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/">> ${RUNDIR}/LigFlow.xargs
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${LIGAND}_gas.mol2 -fo mol2 -c gas -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; antechamber -i ${LIGAND}_gas.mol2 -fi mol2 -o ${RUNDIR}/bcc/${LIGAND}.mol2 -fo mol2 -c bcc -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/">> ${RUNDIR}/LigFlow.xargs
+ else
+ net_charge=$(awk -v i=${LIGAND} '$0 ~ i {print $2}' ${CHARGE_FILE})
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/bcc/${LIGAND}.mol2 -fo mol2 -c bcc -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 -nc ${net_charge} &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/">> ${RUNDIR}/LigFlow.xargs
+
+ fi
;;
"resp")
# Prepare Gaussian
- antechamber -i ${RUNDIR}/gas/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/resp/${LIGAND}.gau -fo gcrt -gv 1 -ge ${RUNDIR}/resp/${LIGAND}.gesp -ch ${RUNDIR}/resp/${LIGAND} -gm %mem=16Gb -gn %nproc=${NCORES} -s 2 -eq 1 -rn MOL -pf y -dr no &> antechamber.log
+ if [ "${CHARGE_FILE}" == '' ] ; then
+ antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o /tmp/${LIGAND}.mol2 -fo mol2 -s 2 -pf y -dr no -c gas &> antechamber.log
+ antechamber -i /tmp/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/resp/${LIGAND}.gau -fo gcrt -gv 1 -ge ${RUNDIR}/resp/${LIGAND}.gesp -ch ${RUNDIR}/resp/${LIGAND} -gm %mem=8Gb -gn %nproc=${NCORES} -s 2 -eq 1 -rn MOL -pf y -dr no &> antechamber.log
+
+ else
+ net_charge=$(awk -v i=${LIGAND} '$0 ~ i {print $2}' ${CHARGE_FILE})
+ antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/resp/${LIGAND}.gau -fo gcrt -gv 1 -ge ${RUNDIR}/resp/${LIGAND}.gesp -ch ${RUNDIR}/resp/${LIGAND} -gm %mem=16Gb -gn %nproc=${NCORES} -s 2 -eq 1 -rn MOL -pf y -dr no -nc ${net_charge} &> antechamber.log
+ fi
# Run Gaussian to optimize structure and generate electrostatic potential grid
g09 <${RUNDIR}/resp/${LIGAND}.gau>${RUNDIR}/resp/${LIGAND}.gout
@@ -205,9 +236,13 @@ case ${JOB_SCHEDULLER} in
done
# Actually compute AM1-BCC charges
- if [ -f ${RUNDIR}/LigFlow.xargs ] ; then
- cat ${RUNDIR}/LigFlow.xargs | xargs -P${NCORES} -I '{}' bash -c '{}'
- fi
+ case ${CHARGE} in
+ "bcc")
+ if [ -f ${RUNDIR}/LigFlow.xargs ] ; then
+ cat ${RUNDIR}/LigFlow.xargs | xargs -P${NCORES} -I '{}' bash -c '{}'
+ fi
+ ;;
+ esac
;;
"SLURM"|"PBS")
@@ -223,43 +258,43 @@ case ${JOB_SCHEDULLER} in
LigFlow_write_HPC_header
- for LIGAND in ${LIGAND_LIST[@]:$first:$nlig} ; do
- if [ ! -f ${RUNDIR}/gas/${LIGAND}.mol2 ] ; then
- echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/\${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/gas/${LIGAND}.mol2 -fo mol2 -c gas -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/" >> LigFlow_gas.${first}.xargs
- fi
- done
-
- # Actually compute Gasteiger charges
- if [ -f ${RUNDIR}/LigFlow_gas.${first}.xargs ] ; then
- echo "cat ${RUNDIR}/LigFlow_gas.${first}.xargs | xargs -P${NCORES} -I '{}' bash -c '{}' " >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
- echo "rm -rf ${RUNDIR}/LigFlow_gas.${first}.xargs" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
- fi
-
-
for LIGAND in ${LIGAND_LIST[@]:$first:$nlig} ; do
case ${CHARGE} in
"bcc")
# Compute am1-bcc charges
- echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/gas/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/bcc/${LIGAND}.mol2 -fo mol2 -c bcc -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/">> LigFlow_bcc.${first}.xargs
+ if [ "${CHARGE_FILE}" == '' ] ; then
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/bcc/${LIGAND}.mol2 -fo mol2 -c bcc -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/">> LigFlow_bcc.${first}.xargs
+ else
+ net_charge=$(awk -v i=${LIGAND} '$0 ~ i {print $2}' ${CHARGE_FILE})
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/bcc/${LIGAND}.mol2 -fo mol2 -c bcc -s 2 -eq 1 -rn MOL -pf y -dr no -at gaff2 -nc ${net_charge} &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/">> LigFlow_bcc.${first}.xargs
+ fi
;;
"resp")
# Prepare Gaussian
- echo "antechamber -i ${RUNDIR}/gas/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/resp/${LIGAND}.gau -fo gcrt -gv 1 -ge ${RUNDIR}/resp/${LIGAND}.gesp -ch ${RUNDIR}/resp/${LIGAND} -gm %mem=16Gb -gn %nproc=${NCORES} -s 2 -eq 1 -rn MOL -pf y -dr no &> antechamber.log" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
-
+ if [ "${CHARGE_FILE}" == '' ] ; then
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/resp/${LIGAND}.gau -fo gcrt -gv 1 -ge ${RUNDIR}/resp/${LIGAND}.gesp -ch ${RUNDIR}/resp/${LIGAND} -gm %mem=16Gb -gn %nproc=${NCORES} -s 2 -eq 1 -rn MOL -pf y -dr no &> antechamber.log ;rm -rf /tmp/${USER}/${LIGAND}/" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
+ else
+ net_charge=$(awk -v i=${LIGAND} '$0 ~ i {print $2}' ${CHARGE_FILE})
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/original/${LIGAND}.mol2 -fi mol2 -o ${RUNDIR}/resp/${LIGAND}.gau -fo gcrt -gv 1 -ge ${RUNDIR}/resp/${LIGAND}.gesp -ch ${RUNDIR}/resp/${LIGAND} -gm %mem=16Gb -gn %nproc=${NCORES} -s 2 -eq 1 -rn MOL -pf y -dr no -nc ${net_charge} &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
+ fi
# Run Gaussian to optimize structure and generate electrostatic potential grid
echo "g09 <${RUNDIR}/resp/${LIGAND}.gau>${RUNDIR}/resp/${LIGAND}.gout" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
# Read Gaussian output and write new optimized ligand with RESP charges
- echo "antechamber -i ${RUNDIR}/resp/${LIGAND}.gout -fi gout -o ${RUNDIR}/resp/${LIGAND}_resp.mol2 -fo mol2 -c resp -s 2 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
+ echo "mkdir -p /tmp/${USER}/${LIGAND}; cd /tmp/${USER}/${LIGAND} ; antechamber -i ${RUNDIR}/resp/${LIGAND}.gout -fi gout -o ${RUNDIR}/resp/${LIGAND}.mol2 -fo mol2 -c resp -s 2 -rn MOL -pf y -dr no -at gaff2 &> antechamber.log ; rm -rf /tmp/${USER}/${LIGAND}/" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
;;
esac
done
# Actually compute AM1-BCC charges
- if [ -f ${RUNDIR}/LigFlow_bcc.${first}.xargs ] ; then
- echo "cat ${RUNDIR}/LigFlow_bcc.${first}.xargs | xargs -P${NCORES} -I '{}' bash -c '{}' " >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
- echo "rm -rf ${RUNDIR}/LigFlow_bcc.${first}.xargs" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
- fi
+ case ${CHARGE} in
+ "bcc")
+ if [ -f ${RUNDIR}/LigFlow_bcc.${first}.xargs ] ; then
+ echo "cat ${RUNDIR}/LigFlow_bcc.${first}.xargs | xargs -P${NCORES} -I '{}' bash -c '{}' " >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
+ echo "rm -rf ${RUNDIR}/LigFlow_bcc.${first}.xargs" >> ${RUNDIR}/LigFlow.${JOB_SCHEDULLER,,}
+ fi
+ ;;
+ esac
if [ "${JOB_SCHEDULLER}" == "SLURM" ] ; then
@@ -329,7 +364,7 @@ LigFlow -l ligand.mol2 -p myproject [--bcc] [--resp]
[Options]
-h/--help : Show this help message and quit
- -hh/--fullhelp : Detailed help
+ -hh/--full-help : Detailed help
-l/--ligand : Ligands .mol2 input file.
-p/--project : ChemFlow project.
@@ -346,7 +381,7 @@ LigFlow -l ligand.mol2 -p myproject [--bcc] [--resp]
[Help]
-h/--help : Show this help message and quit
- -hh/--fullhelp : Detailed help
+ -hh/--full-help : Detailed help
[ Required ]
*-p/--project STR : ChemFlow project
@@ -361,6 +396,10 @@ LigFlow -l ligand.mol2 -p myproject [--bcc] [--resp]
--pbs/--slurm : Workload manager, PBS or SLURM
--header FILE : Header file provided to run on your cluster.
+[ Development ]
+ --charges-file FILE : Contains the net charges for all ligands in a library.
+ ( name charge ) ( CHEMBL123 -1 )
+
"
exit 0
}
@@ -403,7 +442,7 @@ while [[ $# -gt 0 ]]; do
;;
"-nc"|"--cores") # Number of Cores [1] (or cores/node)
NCORES="$2" # Same as above.
- NC_CHANGED="yes"
+ #NC_CHANGED="yes"
shift # past argument
;;
# HPC options
@@ -418,6 +457,12 @@ while [[ $# -gt 0 ]]; do
HEADER_FILE=$(abspath "$2")
shift
;;
+ # Features under Development
+ "--charges-file")
+ CHARGE_FILE=$(abspath "$2")
+ if [ ! -f ${CHARGE_FILE} ] ; then echo "Charge file \"${CHARGE_FILE}\" not found " ; exit 1 ; fi
+ shift
+ ;;
*)
unknown="$1" # unknown option
echo "Unknown flag \"$unknown\""
diff --git a/ChemFlow/src/ScoreFlow_functions.bash b/ChemFlow/src/ScoreFlow_functions.bash
index 0d0205e..1e24f91 100644
--- a/ChemFlow/src/ScoreFlow_functions.bash
+++ b/ChemFlow/src/ScoreFlow_functions.bash
@@ -175,16 +175,14 @@ ScoreFlow_rescore_vina() {
#
# Author: Dona de Francquen
#
-# PARAMETERS: ${mgltools_folder}
-# ${RUNDIR}
+# PARAMETERS: ${RUNDIR}
# ${LIGAND_LIST}
# ${DOCK_CENTER}
# ${DOCK_LENGHT}
#===============================================================================
# Prepare RECEPTOR
if [ ! -f ${RUNDIR}/receptor.pdbqt ] ; then
- ${mgltools_folder}/bin/python \
- ${mgltools_folder}/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_receptor4.py \
+ pythonsh $(command -v prepare_receptor4.py) \
-r ${RUNDIR}/receptor.mol2 \
-o ${RUNDIR}/receptor.pdbqt
fi
@@ -193,9 +191,8 @@ fi
for LIGAND in ${LIGAND_LIST[@]} ; do
# Prepare Ligands
if [ ! -f ${RUNDIR}/${LIGAND}/ligand.pdbqt ] ; then
- ${mgltools_folder}/bin/python \
- ${mgltools_folder}/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_ligand4.py \
- -l ${RUNDIR}/${LIGAND}/ligand.mol2 \
+ pythonsh $(command -v prepare_ligand4.py) \
+ -l ${RUNDIR}/${LIGAND}/ligand.mol2 \
-o ${RUNDIR}/${LIGAND}/ligand.pdbqt
fi
# Run vina
@@ -252,6 +249,10 @@ else
for LIGAND in ${LIGAND_LIST[@]} ; do
cd ${RUNDIR}/${LIGAND}
+ if [ ! -f ligand.frcmod ] ; then
+ parmchk2 -i ligand_gas.mol2 -o ligand.frcmod -f mol2 -s 2
+ fi
+
if [ ${RUN_ONLY} == 'yes' ] ; then
echo -e "cd ${RUNDIR}/${LIGAND}" > ScoreFlow.run
@@ -330,7 +331,7 @@ if [ ${CHARGE} != 'gas' ] ; then
if [ "$(grep ${LIGAND_NAME} ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.lst)" == ${LIGAND_NAME} ] ; then
echo "${CHARGE} charges found in ChemBase for ${LIGAND}"
- awk -v LIGAND=${LIGAND_NAME} '$0 ~ LIGAND {flag=1;next}/BOND/{flag=0}flag' ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.mol2 | awk '/1 MOL/&&!/TEMP/ {print $9}' > charges.dat
+ awk -v LIGAND=${LIGAND_NAME} '$0 ~ LIGAND {flag=1;next}/BOND/{flag=0}flag' ${CHEMFLOW_HOME}/ChemBase/${CHARGE}/ChemBase_${CHARGE}.mol2 | awk '/ MOL/&&!/TEMP/ {print $9}' > charges.dat
antechamber -i ligand_gas.mol2 -o ligand_${CHARGE}.mol2 -fi mol2 -fo mol2 -cf charges.dat -c rc -pf yes &> /dev/null
# Done
@@ -342,8 +343,8 @@ if [ ${CHARGE} != 'gas' ] ; then
if [ "${DONE_CHARGE}" == "false" ] && [ -f ${WORKDIR}/${PROJECT}.chemflow/LigFlow/${CHARGE}/${LIGAND_NAME}.mol2 ] ; then
echo "${CHARGE} charges found in LigFlow for ${LIGAND}"
- awk '/1 MOL/&&!/TEMP/ {print $9}' ${WORKDIR}/${PROJECT}.chemflow/LigFlow/${CHARGE}/${LIGAND_NAME}.mol2 > charges.dat
- antechamber -i ligand_gas.mol2 -o ligand_${CHARGE}.mol2 -fi mol2 -fo mol2 -cf charges.dat -c rc -pf yes &> /dev/null
+ awk '/ MOL/&&!/TEMP/ {print $9}' ${WORKDIR}/${PROJECT}.chemflow/LigFlow/${CHARGE}/${LIGAND_NAME}.mol2 > charges.dat
+ antechamber -i ligand_gas.mol2 -o ligand_${CHARGE}.mol2 -fi mol2 -fo mol2 -cf charges.dat -c rc -pf yes -dr no &> /dev/null
# Done
DONE_CHARGE="true"
@@ -441,8 +442,8 @@ for filename in ${scoreflow_protocol} ; do
eval echo \""${file}"\" > ${RUNDIR}/${filename}.in
done
-#mmgbsa input
-echo "$(cat ${CHEMFLOW_HOME}/templates/mmgbsa/GB2.template)" > ${RUNDIR}/GB2.in
+#mm(pb,gb)sa input
+echo "$(cat ${CHEMFLOW_HOME}/templates/mmgbsa/${SCORING_FUNCTION}.template)" > ${RUNDIR}/${SCORING_FUNCTION}.in
}
@@ -516,9 +517,14 @@ done" >> ${RUNDIR}/ScoreFlow.run
if [ ! -f MMPBSA.dat ] ; then
echo "rm -rf com.top rec.top ligand.top
-ante-MMPBSA.py -p ${init}.prmtop -c com.top -r rec.top -l ligand.top -n :MOL -s ':WAT,Na+,Cl-' --radii=mbondi2 &> ante_mmpbsa.job
-MMPBSA.py -O -i ../GB2.in -sp ${init}.prmtop -cp com.top -rp rec.top -lp ligand.top -o MMPBSA.dat -eo MMPBSA.csv -y ${TRAJECTORY} &> MMPBSA.job
-rm -rf reference.frc " >> ${RUNDIR}/ScoreFlow.run
+ante-MMPBSA.py -p ${init}.prmtop -c com.top -r rec.top -l ligand.top -n :MOL -s ':WAT,Na+,Cl-' --radii=mbondi2 &> ante_mmpbsa.job" >>${RUNDIR}/ScoreFlow.run
+
+if [ "${WATER}" != "yes" ] ; then
+echo "MMPBSA.py -O -i ../mmgbsa.in -cp com.top -rp rec.top -lp ligand.top -o MMPBSA.dat -eo MMPBSA.csv -y ${TRAJECTORY} &> MMPBSA.job" >>${RUNDIR}/ScoreFlow.run
+else
+echo "MMPBSA.py -O -i ../mmgbsa.in -sp ${init}.prmtop -cp com.top -rp rec.top -lp ligand.top -o MMPBSA.dat -eo MMPBSA.csv -y ${TRAJECTORY} &> MMPBSA.job" >>${RUNDIR}/ScoreFlow.run
+fi
+echo "rm -rf reference.frc " >> ${RUNDIR}/ScoreFlow.run
fi
}
@@ -639,7 +645,7 @@ if [ -f ${RUNDIR}/ScoreFlow.csv ] ; then
rm -rf ${RUNDIR}/ScoreFlow.csv
fi
-SCOREFLOW_HEADER="DOCK_PROGRAM PROTOCOL RECEPTOR LIGAND POSE SCORE"
+SCOREFLOW_HEADER="SCORE_PROGRAM PROTOCOL RECEPTOR LIGAND POSE SCORE"
case ${SCORE_PROGRAM} in
"PLANTS")
@@ -750,13 +756,16 @@ JOB SCHEDULLER: ${JOB_SCHEDULLER}
OVERWRITE: ${OVERWRITE}
"
-echo -n "
-Continue [y/n]? "
-read opt
-case $opt in
-"Y"|"YES"|"Yes"|"yes"|"y") ;;
-*) echo "Exiting" ; exit 0 ;;
-esac
+if [ "${YESTOALL}" != 'yes' ] ; then
+ echo -n "
+ Continue [y/n]? "
+ read opt
+ case $opt in
+ "Y"|"YES"|"Yes"|"yes"|"y") ;;
+ *) echo "Exiting" ; exit 0 ;;
+ esac
+fi
+
}
@@ -815,18 +824,18 @@ ScoreFlow -r receptor.mol2 -l ligand.mol2 -p myproject --center X Y Z [--protoco
# For MMGBSA only
ScoreFlow -r receptor.pdb -l ligand.mol2 -p myproject [-protocol protocol-name] -sf mmgbsa
-[Help]
+[ Help ]
-h/--help : Show this help message and quit
-hh/--fullhelp : Detailed help
-[Required]
-*-p/--project STR : ChemFlow project
-*-r/--receptor FILE : Receptor MOL2 file
-*-l/--ligand FILE : Ligands MOL2 file
+[ Required ]
+ -p/--project STR : ChemFlow project
+ -r/--receptor FILE : Receptor MOL2 file
+ -l/--ligand FILE : Ligands MOL2 file
-[Optional]
+[ Optional ]
--protocol STR : Name for this specific protocol [default]
- -sf/--function STR : vina, chemplp, plp, plp95, mmgbsa [chemplp]
+ -sf/--function STR : vina, chemplp, plp, plp95, mmgbsa, mmpbsa [chemplp]
[ Charges for ligands - MMGBSA ]
--gas : Gasteiger-Marsili (default)
@@ -849,16 +858,22 @@ ScoreFlow -r receptor.pdb -l ligand.mol2 -p myproject [-protocol protocol-name]
--overwrite : Overwrite results
[ Rescoring with vina or plants ]
-Note: You can automatically get the center and radius/size for a particular ligand .mol2 file by using the ${CHEMFLOW_HOME}/bin/bounding_shape.py script
-*--center STR : xyz coordinates of the center of the binding site, separated by a space
-[ PLANTS ]
+
+ --center STR : xyz coordinates of the center of the binding site, separated by a space
+
+ [ PLANTS ]
--radius FLOAT : Radius of the spheric binding site [15]
-[ Vina ]
+
+ [ Vina ]
--size LIST : Size of the grid along the x, y and z axis, separated by a space [15 15 15]
--vina-mode STR : local_only (local search then score) or score_only [local_only]
[ Post Processing ]
--postprocess : Process ScoreFlow output for the specified project/protocol/receptor.
+
+Note: You can automatically get the center and radius/size
+ for a particular ligand .mol2 file using the bounding_shape.py script
+
_________________________________________________________________________________
"
exit 0
@@ -986,6 +1001,12 @@ while [[ $# -gt 0 ]]; do
# --archive)
# ARCHIVE='yes'
# ;;
+ "--yes")
+ YESTOALL='yes'
+ ;;
+ "--cuda-double")
+ CUDA_PRECISION="DOUBLE"
+ ;;
*)
unknown="$1" # unknown option
echo "Unknown flag \"$unknown\". RTFM"
diff --git a/ChemFlow/src/split_sdf.bash b/ChemFlow/src/split_sdf.bash
index 886746a..b0d1c40 100644
--- a/ChemFlow/src/split_sdf.bash
+++ b/ChemFlow/src/split_sdf.bash
@@ -1,4 +1,7 @@
-#!/bin/bashi
+#!/bin/bash
+#
+# Split SDF
+#
OLDIFS=${IFS}
IFS='%'
diff --git a/ChemFlow/templates/mmgbsa/GB2.template b/ChemFlow/templates/mmgbsa/mmgbsa.template
similarity index 100%
rename from ChemFlow/templates/mmgbsa/GB2.template
rename to ChemFlow/templates/mmgbsa/mmgbsa.template
diff --git a/ChemFlow/templates/mmgbsa/mmpbsa.template b/ChemFlow/templates/mmgbsa/mmpbsa.template
new file mode 100644
index 0000000..3ef9b50
--- /dev/null
+++ b/ChemFlow/templates/mmgbsa/mmpbsa.template
@@ -0,0 +1,7 @@
+Input file for running PB
+&general
+ verbose=1,keep_files=0,interval=10
+/
+&pb
+ istrng=0.000,inp=1,radiopt=0,indi=4,
+/
\ No newline at end of file
diff --git a/README.rst b/README.rst
index 49ff1e5..8d5fb22 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
ChemFlow |LOGO|
===============
-.. |LOGO| image:: https://user-images.githubusercontent.com/27850535/29564754-6b07a548-8743-11e7-9463-8626675b9481.png
+.. |LOGO| image:: CHEMFLOW_LOGO.PNG
:alt: Logo
:align: middle
@@ -29,4 +29,5 @@ We do not provide any of the licensed softwares used by ChemFlow. It is up to th
PLANTS_ and SPORES_ are both available under a free academic license.
.. _PLANTS: http://www.uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/pharmazie-und-biochemie/pharmazie/pharmazeutische-chemie/pd-dr-t-exner/research/plants.html
-.. _SPORES: http://www.mnf.uni-tuebingen.de/fachbereiche/pharmazie-und-biochemie/pharmazie/pharmazeutische-chemie/pd-dr-t-exner/research/spores.html
\ No newline at end of file
+.. _SPORES: http://www.mnf.uni-tuebingen.de/fachbereiche/pharmazie-und-biochemie/pharmazie/pharmazeutische-chemie/pd-dr-t-exner/research/spores.html
+
diff --git a/docs/CHEMFLOW_LOGO.PNG b/docs/CHEMFLOW_LOGO.PNG
new file mode 100644
index 0000000..7d2b742
Binary files /dev/null and b/docs/CHEMFLOW_LOGO.PNG differ
diff --git a/docs/HPC.rst b/docs/HPC.rst
new file mode 100644
index 0000000..333381c
--- /dev/null
+++ b/docs/HPC.rst
@@ -0,0 +1,117 @@
+.. highlight:: bash
+
+=======
+HPC Run
+=======
+
+Chem\ *Flow* was designed to profit from High Performance Computer (HPC) resources throught SLURM or PBS schedullers (SGE comming soon).
+
+Usage
+=====
+HPC resources may be requested through **--slurm**, **--pbs** flags, followed by an appropriate TEMPLATE indicated by the **--header** flag.
+
+Sample TEMPLATES
+----------------
+Here are examples for this header file. One must always provide the HEADER for SLURM and PBS and edit them carefully.
+
+PBS - Using the **public** queue, asking 2 nodes, 28 cores/node, for 2h
+
+.. code-block:: bash
+
+ #! /bin/bash
+ #PBS -q public
+ #PBS -l nodes=2:ppn=28
+ #PBS -l walltime=2:00:00
+ #PBS -N myjob
+ ...
+
+SLURM - Using the **gpu** queue, asking 1 node with 4 GPUs, for 10 minutes.
+
+.. code-block:: bash
+
+ #! /bin/bash
+ #SBATCH -p gpu
+ #SBATCH -n 1
+ #SBATCH -t 00:10:00
+ #SBATCH --gres=gpu:4
+ #SBATCH --job-name=myjob
+ ...
+
+Additional configuration may needed such as loading the compiler, MPI and CUDA libraries. Also any specifics for proprietary software, such as Amber or Gaussian that may differ from one's workstation installation.
+
+.. code-block:: bash
+
+ # Load modules
+ module load compilers/gnu
+ module load mpi/openmpi-3.0
+ module load compilers/cuda-10.1
+
+ # Path to amber.sh replace with your own
+ source ~/software/amber18/amber.sh
+
+ # Load Gaussian
+ module load gaussian
+ ...
+
+.. Tip::
+ Seek assistance from the system administrator to optimally configure the TEMPLATE files.
+
+Sample command lines:
+=====================
+
+Lig\ *Flow*:
+------------
+Prepare compounds with RESP charges, using 28 cores/node and through the SLURM scheduller.
+
+.. code-block:: bash
+
+ LigFlow \
+ -p myproject \
+ -l compounds.mol2 \
+ --resp \
+ -nc 28 \
+ --slurm --header TEMPLATE.slurm
+
+Dock\ *Flow*:
+-------------
+Dock compounds using AutoDock Vina. Using 16 cores/node through the PBS scheduller.
+
+.. code-block:: bash
+
+ # AutoDock Vina, 16 cores/node, PBS
+ DockFlow \
+ -p myproject \
+ --protocol vina \
+ -r receptor.mol2 \
+ -l compounds.mol2 \
+ --center 31.50 13.74 24.36 \
+ --size 11.83 14.96 12.71 \
+ -sf vina \
+ -nc 16 \
+ --pbs --header TEMPLATE.pbs
+
+Score\ *Flow*:
+--------------
+Standard Minimization and Molecular Dynamics Simulaton in explicit solvent, with RESP charges for the ligand, followed by MM/PBSA binding energy. Using 4 cores and 4 GPUs/node, and double precision CUDA.
+
+.. code-block:: bash
+
+ ScoreFlow \
+ -p myproject \
+ --protocol MMPBSA \
+ -r receptor.pdb \
+ -l compounds.mol2 \
+ -sf mmpbsa \
+ --resp --md --water \
+ --cuda-double \
+ -nc 4 \
+ --slurm --header TEMPLATE.slurm
+
+
+.. Tip::
+
+ Use the **\\-\\-write-only** flag to run a test before launching High Throughput calculations.
+
+.. Warning::
+
+ Be aware that HPC systems comonlly limit the amount of submitted jobs, choose your options wisely.
diff --git a/docs/authors.rst b/docs/authors.rst
index 49db8a4..92d33e7 100644
--- a/docs/authors.rst
+++ b/docs/authors.rst
@@ -1,18 +1,35 @@
+.. highlight:: bash
+
=======
Credits
=======
Manager
-------
+@ Université de Strasbourg
-* Marco Cecchini
+Marco Cecchini
Development Lead
----------------
+@ Universidade Federal de Juiz de Fora
+
+Diego Enry Barreto Gomes
-* Diego Enry Barreto Gomes
-* Cedric Bouysset
+@ Université Côte D'Azur
+
+Cédric Bouysset
Contributors
------------
-* Dona de Francquen
+@ Conseil de l'Europe.
+
+Donatienne de Francquen
+
+@ Université de Strasbourg
+
+Gilberto Pereira
+
+Marion Sisquellas
+
+Adrien Cerdan
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index cb6e027..7a787cd 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -20,13 +20,13 @@
# -- Project information -----------------------------------------------------
project = 'ChemFlow'
-copyright = '2018, Diego Enry Barreto Gomes'
-author = 'Diego Enry Barreto Gomes, Dona de Francquen, Cedric Bouysset'
+copyright = '2016-2019, Diego Enry Barreto Gomes, Cedric Bouysset, Marco Cecchini'
+author = 'Diego Enry Barreto Gomes, Cedric Bouysset, Marco Cecchini'
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
-release = '0.9'
+release = '1.0'
# -- General configuration ---------------------------------------------------
@@ -101,7 +101,7 @@
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
-htmlhelp_basename = 'ChemFlowdoc'
+htmlhelp_basename = 'ChemFlowDoc'
# -- Options for LaTeX output ------------------------------------------------
@@ -129,7 +129,7 @@
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'ChemFlow.tex', 'ChemFlow Documentation',
- 'Diego Enry Barreto Gomes', 'manual'),
+ 'Diego E. B. Gomes, Cedric Bouysset, Marco Cecchini', 'manual'),
]
diff --git a/docs/contributing.rst b/docs/contributing.rst
index a1d6ea9..fa77fed 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -1,4 +1,4 @@
-.. highlight:: shell
+.. highlight:: bash
============
Contributing
@@ -38,8 +38,8 @@ and "help wanted" is open to whoever wants to implement it.
Write Documentation
~~~~~~~~~~~~~~~~~~~
-Double Decoupling Method (DDM) could always use more documentation, whether as part of the
-official Double Decoupling Method (DDM) docs, in docstrings, or even on the web in blog posts,
+ChemFlow could always use more documentation, whether as part of the
+official docs, in docstrings, or even on the web in blog posts,
articles, and such.
Submit Feedback
@@ -59,14 +59,14 @@ Get Started!
Ready to contribute? Here's how to set up `ChemFlow` for local development.
-1. Fork the `CHemFlow` repo on GitHub.
+1. Fork the `ChemFlow` repo on GitHub.
2. Clone your fork locally::
$ git clone git@github.com:IFMlab/ChemFlow.git
3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
- $ mkvirtualenv ddm
+ $ mkvirtualenv chemflow
$ cd ChemFlow/
$ python setup.py develop
diff --git a/docs/features.rst b/docs/features.rst
new file mode 100644
index 0000000..0b176d7
--- /dev/null
+++ b/docs/features.rst
@@ -0,0 +1,34 @@
+.. highlight:: bash
+
+========
+Features
+========
+Chem\ *Flow* was designed as modular tool based on plain Bourne-again Shell (BASH) script, a ubiquitous environment and programming language in every UNIX environment. The code integrates freely available software for structure manipulation (Rdkit and openbabel), molecular docking (PLANTS and Autodock Vina), molecular dynamics (MD) simulation, binding free energy calculations (AmberTools18), and structure calculation (SQM). In addition, it contains optional routines for proprietary software Amber18 and Gaussian 09.
+
+High Throughput
+===============
+Chem\ *Flow* was tuned to optimally use the high performance and throughput of available computing resources, following architectural standards of SLURM and PBS queueing systems. The job distribution was designed to minimize I/O and maximize throughput. Simplified configuration files allow them to adapt this policy to the available topology.
+
+.. note:: We'll soon add support to Sun Grid Engine.
+
+Fault tolerance and Checkpointing
+=================================
+A big concern when dealing with high throughput data is to be able to diagnose and resume after unsuccessful execution. Chem\ *Flow* detects and report failed jobs, and resubmit them all.
+
+.. warning:: One should **always** investigate the reasons of failure with proper care.
+
+Analysis and Reporting
+======================
+The major benefit of standardization is to facilitate analysis. Currently Chem\ *Flow* ships with protocols that perform analyses and report for some common scenarios for computational chemistry.
+
+#. Docking and virtual screening of compounds including prospection, validation and method comparison.
+
+#. MD simulations, contains protein stability (temperature, solvent, solvent mixture).
+
+#. Any two-molecule system contains “rescoring” and binding free energy estimation.
+
+.. note:: Analysis are mostly implemented with Bourne-again Shell (BASH) while the reporting tools are implemented as interactive Python Notebooks.
+
+Data curation and reproducibility
+=================================
+Data curation demand for industrial applications require compliance with ISO/IEC 17025, the standard for which laboratories must hold accreditation in order to be deemed technically competent. The Chem\ *Flow* standard is a readily accessible and familiar specification useful for data curation on drug discovery and molecular simulation projects.
diff --git a/docs/history.rst b/docs/history.rst
index 3fdbc39..7f6a042 100644
--- a/docs/history.rst
+++ b/docs/history.rst
@@ -1,6 +1,19 @@
+.. highlight:: bash
+
=======
History
=======
+1.0 (2019-10-23)
+----------------
+* ChemFlow community release !
+
+0.92 (2019-08-21)
+----------------
+* ChemFlow community pre-release !
+
+0.91 (2019-02-05)
+-----------------
+* Documentation update
0.9 (2018-07-27)
------------------
diff --git a/docs/index.rst b/docs/index.rst
index 9dee729..32d5a03 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -6,16 +6,20 @@
****************************************
Welcome to Chem\ *Flow*'s documentation!
****************************************
-Chem\ *Flow* is a modular platform for computational chemistry workflows using high performance environments.
-The workflows address common **computational chemistry** tasks and are named with a prefix followed by *Flow*, Dock\ *Flow*, Score\ *Flow* and Lig\ *Flow*.
.. toctree::
- :maxdepth: 1
+ :maxdepth: 2
:caption: Contents:
readme
+ overview
+ workflows
+ features
+ HPC
installation
+ manual
tutorial
+ contributing
authors
.. usage
@@ -27,52 +31,3 @@ The workflows address common **computational chemistry** tasks and are named wit
.. * :ref:`genindex`
.. * :ref:`modindex`
.. * :ref:`search`
-
-
-Workflows
-=========
-:Dock\ *Flow*: Covers docking and virtual screening of compound(s) against some single or multiple targets, with one, some or all of the implemented docking software.
-
-:Score\ *Flow*: Handles (re)scoring of (top) docking poses either with empirical (VinaSF, ChemPLP) or physics-based functions (MM/PBSA, MM/GBSA).
-
-:Lig\ *Flow*: Handles small molecule conversions, conformer search and compound parametrization through assignment to the General Amber Force-Field (GAFF2) and charge calculation through QM methods. It also probes the Chem\ **Base** to avoid redundancy.
-
-:Chem\ **Base**: is the Chem\ *Flow database for pre-calculated molecules, so far it spams nearly 9000 drug-like compounds from the *Chimioteque Nationale du CNRS* with QM optimized geometries and assigned parameters for GAFF.
-
-Implementation
-==============
-Chem\ *Flow* was designed as modular tool based on plain Bourne-again Shell (BASH) script, a ubiquitous environment and programming language in every UNIX environment. The code integrates freely available software for structure manipulation, molecular docking, molecular dynamics (MD) simulation, binding free energy calculations, and structure calculation. In addition, it contains optional routines for proprietary software.
-By design, the goal was to be as simple is possible and facilitate modifications and requires minimal installation. The code is containerized and modular to allow methods to be applied only in a well-defined way which is traceable and reproducible following the ISO/IEC 17025 guidelines for assays. Chem\ *Flow* ships with protocols for common experiments and curated benchmarks to assess the performance of methods.
-
-How ChemFlow was born
-=====================
-During virtual screening/ molecular dynamics study we were confronted with unintelligibly data from a previous collaborator and the challenge to produce our own. In fact that's an actually a very common scenario, everyone “\ *does their things their own way*”.
-
-We thought it would be valuable if we standardize the directory structure and file naming. The whole point of a standard is to facilitate documentation, comprehension, data retention and reproducibility, so that future users or applications will not have to figure out this structure or migrate to new formats.
-
-Features
-========
-Chem\ *Flow* was designed as modular tool based on plain Bourne-again Shell (BASH) script, a ubiquitous environment and programming language in every UNIX environment. The code integrates freely available software for structure manipulation (Rdkit and openbabel), molecular docking (PLANTS and Autodock Vina), molecular dynamics (MD) simulation, binding free energy calculations (AmberTools18), and structure calculation (SQM). In addition, it contains optional routines for proprietary software Amber18 and Gaussian 09.
-
-* High Throughput: Chem\ *Flow* was tuned to optimally use the high performance and throughput of available computing resources, following architectural standards of SLURM and PBS queueing systems (SGE coming soon). The job distribution was designed to minimize I/O and maximize throughput. Simplified configuration files allow them to adapt this policy to the available topology.
-
-* Fault tolerance and Checkpointing: A big concert when dealing with high throughput data is to be able to diagnose and resume after unsuccessful execution. So far, Chem\ *Flow* can only detect and report failed jobs, and resubmit them, letting the user investigate the reasons of failure.
-
-* Analysis and Reporting: The major benefit from standardization is to facilitate analysis. Currently Chem\ *Flow* ships with protocols perform, analyses and report for some common scenarios for computational chemistry. For docking and virtual screening of compounds including prospection, validation and method comparison. For MD simulations, contains protein stability (temperature, solvent, solvent mixture). For any two-molecule system contains “rescoring” and binding free energy estimation. Analysis is implemented with Bourne-again Shell (BASH) and but, for its graphic capabilities, the reporting tools are implemented as interactive Python Notebooks.
-
-* Data curation and reproducibility: Data curation demand for industrial applications require compliance with ISO/IEC 17025, the standard for which laboratories must hold accreditation in order to be deemed technically competent. The Chem\ *Flow* standard is a readily accessible and familiar specifications useful for data curation on drug discovery and molecular simulation projects.
-
-Workflows (extended)
-====================
-Chem\ *Flow* is a modular platform for computational chemistry workflows using high performance environments.
-The workflows address common **computational chemistry** tasks and are named with a prefix followed by *Flow*, Dock\ *Flow*, Score\ *Flow* and Lig\ *Flow*.
-
-* Dock\ *Flow* covers docking and virtual screening of compound(s) against some single or multiple targets, with one, some or all of the implemented docking software (so far Autodock Vina and PLANTS).
-
-* Score\ *Flow* on the other hand handles (re)scoring of (top) docking poses which is more expensive, Structural Interaction fingerprints (using IChem), VinaSF, ChemPLP, PLP, MM/GBSA (with or without MD, implicit/explicit solvent). Both DockFlow and ScoreFlow are implemented to comply with docking/scoring benchmarks so that user can test new search and scoring functions and directly compare with the competition (within ReportFlow).
-
-* Lig\ *Flow* handles conversions (such as smiles to mol2), conformer search and compound parameterization through assignment to the General Amber Force-Field (GAFF2) and charge calculation through QM methods. It also probes the ChemBase to avoid redundancy.
-
-* Chem\ **Base** is the Chem\ *Flow* database for pre-calculated molecules, so far it contains a nearly 9000 drug-like compounds from the “Chimioteque Nationale du CNRS” with QM optimized geometries and assigned parameters for GAFF.
-
-* MD\ *Flow** (not active) handles molecular dynamics simulation protocols and analysis and HGFlow is an application specific workflow designed for Host-Guest systems.
diff --git a/docs/installation.rst b/docs/installation.rst
index 85ac4e9..231d22c 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -1,76 +1,76 @@
.. highlight:: bash
-=========================
-Installation instructions
-=========================
+=======
+Install
+=======
Step 1 - Download ChemFlow sources.
-----------------------------------
-The sources for ChemFlow can be downloaded from the `Github repo`_.
+The sources for ChemFlow can be downloaded from the `Github repo`_.
-.. _Github repo: https://github.com/IFMlab/ChemFlow.git
+.. _Github repo: https://github.com/IFMlab/ChemFlow.git
-You can either clone the public repository:
+.. code-block:: bash
- ``git clone https://github.com/IFMlab/ChemFlow.git``
+ # Clone ChemFlow to the install location of your choice:
+ git clone https://github.com/IFMlab/ChemFlow.git
Step 2 - Install miniconda.
---------------------------
-* Download and install miniconda, python 3.
+Download and install the latest version of miniconda for python 3.x
https://docs.conda.io/en/latest/miniconda.html
-* Create an environment for ChemFlow.
-``conda create -n chemflow``
+.. code-block:: bash
+
+ # Download the latest version of miniconda for python 3.x
+ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+
+ # Install miniconda
+ chmod +x Miniconda3-latest-Linux-x86_64.sh
+ ./Miniconda3-latest-Linux-x86_64.sh
+
+ #Create an environment for ChemFlow
+ conda create -n chemflow
+
+ # Activate chemflow environment
+ conda activate chemflow
-Step 3 - Install the software dependencies
+Step 3 - Install the software dependencies.
--------------------------------------------
-* rdkit
- ``conda install -c rdkit rdkit``
+.. code-block:: bash
+
+ # rdkit
+ conda install -c rdkit rdkit
-* AmberTools
- ``conda install -c ambermd ambertools``
+ # AmberTools (use Amber18 if you have a licence)
+ conda install -c ambermd ambertools
-* AutoDockTools (required for Vina)
+ # AutoDockTools (required for Vina)
Download and install from: http://mgltools.scripps.edu/downloads
-* Autodock Vina
+ # Autodock Vina
Download and install from: http://vina.scripps.edu
-* PLANTS ( Requires free registration )
+ # PLANTS ( Requires free registration )
Download and install from: http://www.tcd.uni-konstanz.de/plants_download/
-* Amber18 (Optional, licence is required)
+ # Amber18 (Optional, licence is required)
Download and install from: http://ambermd.org
-* Gaussian (Optional, licence is required)
+ # Gaussian (Optional, licence is required. Required for RESP charges)
Download and install from: https://gaussian.com
+
Step 4 - Set PATHS
------------------
-* AutoDockTools - "Utilities24" must be in the system PATH:
- ``export PATH=${PATH}:[/home/user/myapps/]mgltools_x86_64Linux2_1.5.6/MGLToolsPckgs/AutoDockTools/Utilities24/``
-* PLANTS
- ``export PATH=${PATH}:[/home/user/myapps/]PLANTS1.2_64bit``
-* AutoDock Vina
- ``export PATH=${PATH}:[/home/user/myapps/]autodock_vina_1_1_2_linux_x86/bin/``
-
-If you choose to manually install Amber18 and/or Gaussian, make sure they're also on ${PATH}
-* Amber18
- source [/home/user/myapps/]amber18/amber.sh
-* Gaussian 09
- ``export g09root=[/home/user/myapps/]``
-
- ``export GAUSS_SCRDIR=/tmp``
-
- ``source $g09root/g09/bsd/g09.profile``
-
-# Instructions for the impacient:
+In addition to downloading the required software, you must be able to run then flawlessly.
+Set up the PATHS to their install locations, as following and add to your .bashrc.
.. code-block:: bash
- # Please modify the following paths and add them to your .bashrc
+
# ChemFlow
export CHEMFLOW_HOME=~/software/ChemFlow/ChemFlow/
@@ -87,13 +87,20 @@ If you choose to manually install Amber18 and/or Gaussian, make sure they're als
export PATH="${PATH}:~/software/PLANTS/"
# Optional (paid software)
-
+
# Amber18 (Ambertools19 and Amber18)
source ~/software/amber18/amber.sh
-
+
# Gaussian 09
export g09root=~/software/
export GAUSS_SCRDIR=/tmp
source $g09root/g09/bsd/g09.profile
+
+Additional software for the tutorial
+------------------------------------
+To run the jupyter-notebook tutorial, you may also install some python modules.
+
+ ``conda install pandas seaborn``
+
\ No newline at end of file
diff --git a/docs/manual.rst b/docs/manual.rst
new file mode 100644
index 0000000..1d5585e
--- /dev/null
+++ b/docs/manual.rst
@@ -0,0 +1,378 @@
+.. highlight:: bash
+
+===========
+User Manual
+===========
+
+Lig\ *Flow*
+============
+Lig\ *Flow* handles the curation of compound libraries, stored as SMILES or MOL2 files, automating 3D conformer generation, compound parameterization and charge calculation. It also probes the Chem\ *Base* to avoid redundancy.
+
+Lig\ *Flow* does it all through a series of functions designed to prepare compounds for Dock\ *Flow* and Score\ *Flow*. Lig\ *Flow* supports resuming unfinished calculation.
+
+**.mol2** files are stored according to the following hierarchy, with file names determined by molecule name.
+
+
+.. code-block:: bash
+
+ |--${project}.chemflow
+ | |--LigFlow
+ | |--original/${compound}.mol2
+ | |--${charge/${compound}.mol2 (gas, bcc or resp charges)
+
+**gas** - Gasteiger-Marsili charges ; **bcc** - Bond Charge Correction (AM1-BCC) ; **resp** - Restrained electrostatic fitted charges
+
+.. note:: Lig\ *Flow* uses /tmp/${molecule} during calculations, when running in parallel.
+
+
+
+Step 1a - Starting from a *.smi* file. (SMILES)
+----------------------------------------------
+Conversion of a SMILES library to 3D and conformer generation can be achieved through integration with RDKit, OpenBabel or Chemaxon's molconvert (licence required), pick your favorite. A 3D structure for each compound will be generated and stored as individual **${compound}.mol2** file.
+
+By default only the most probable tautomer for pH 7.0, and 3D conformer is generated, therefore users are highly encouraged to provide isomeric (ism) smiles or carefully inspect the output library to avoid mistakes.
+
+
+Step 1b - Starting from a *.mol2* file.
+---------------------------------------
+One should provide a complete .mol2 file, all-hydrogen, correct bond valences. PERIOD. LigFlow will split multimol2 files and store as individual **${compound}.mol2** files.
+
+
+.. tip:: Chemical library curation is a crutial step. Consider using a specialized tool for compound tautomer and pka prediction.
+
+.. warning:: Lig\ *Flow* will **never** autogenerate names for your molecules, **never**. Make sure you provide proper input files.
+
+Step 2 - Compound parameterization
+----------------------------------
+Depending on the purpose, a different parameterization should take place. For docking, a Tripos .mol2 file sufices since Dock\ *Flow* has specific routine to prepare it to the target software.
+
+If one however chooses to rescore a complex using more accurate free energy methods Lig\ *Flow* automatizes the parameterization to the General Amber Force-Field (GAFF), and charge calculation through QM methods, either AM1 with BCC charges or HF/6-31G* with RESP charges. GAFF works great for small, drug-like molecules, but remember its a **general** force field.
+
+.. tip:: For large screenings we recomend using less accurate BCC charges to prioritize compounds, migrating to more time consuming HF/6-31G* with RESP charges
+
+.. tip:: To improve accuracy one must carefully parameterize each molecule, search for warnings in the **${molecule}.frcmod** file.
+
+
+Usage
+-----
+To prepare a compound library for file **ligand.mol2**, for the project **myproject** use the command bellow. Make sure to choose the appropriate charge model for you project. Refer to **HPC** topic on how to use in a High Performance Environment.
+
+
+.. code-block:: bash
+
+ LigFlow -l ligand.mol2 -p myproject [--bcc] [--resp]
+
+
+Options
+-------
+The compound file name (.mol2 file) and project name are mandatory, and you're done. Check the advanced options bellow.
+
+.. code-block:: bash
+
+ [Help]
+ -h/--help : Show this help message and quit
+ -hh/--full-help : Detailed help
+
+ [Required]
+ -p/--project : ChemFlow project.
+ -l/--ligand : Ligands .mol2 input file.
+
+Advanced options
+----------------
+These options let you better control the execution, including charge calculation, and parallel (local) or HPC execution. Refer to **HPC Run** topic for guidance on how to use a High Performance Computers.
+
+
+.. code-block:: bash
+
+ [ Optional ]
+ --gas : Compute Gasteiger-Marsili charges
+ --bcc : Compute bcc charges
+ --resp : Compute resp charges
+
+ [ Parallel execution ]
+ -nc/--cores INT : Number of cores per node [8]
+ --pbs/--slurm : Workload manager, PBS or SLURM
+ --header FILE : Header file provided to run on your cluster.
+
+ [ Development ]
+ --charges-file FILE : Contains the net charges for all ligands in a library.
+ ( name charge ) ( CHEMBL123 -1 )
+
+
+.. note:: RESP charges require a GAUSSIAN 09+ licence.
+
+
+================================================================================
+
+
+Dock\ *Flow*
+============
+
+Dock\ *Flow* covers docking and Virtual High Throughput Screening (vHTS) of compound(s) against a target (receptor) through the so far implemented docking software: Autodock Vina and PLANTS. The vHTS is efficiently distributed on the available computational resources.
+
+
+Docking output files are stored according to the following hierarchy, with file names determined by molecule name.
+
+.. code-block:: bash
+
+ |--${project}.ChemFlow
+ | |--DockFlow
+ | |--${project}/${receptor}/${protocol}/${compound}/ligand.out
+ | |--${project}/${receptor}/${protocol}/${compound}/ligand.pdbqt (VINA)
+ | |--${project}/${receptor}/${protocol}/${compound}/ligand.mol2 (PLANTS)
+
+
+
+Usage
+------
+The user should first curate the compound library (.smi or .mol2) using Lig\ *Flow* then provide that same input file. Dock\ *Flow* only uses the molecule name from this file and gets all structural data from the Lig\ *Flow*-generated library. Refer to **HPC** topic on how to use in a High Performance Environment.
+
+
+.. code-block:: bash
+
+ DockFlow -r receptor.mol2 -l ligand.mol2 -p myproject --center X Y Z [--protocol protocol-name] [-n 10] [-sf chemplp]
+
+.. note:: Make sure to use the same *project* name and *protocol*.
+
+Options
+-------
+Dock\ *Flow* requires the receptor and "ligands" files are required, together with the center of the binding site.
+
+
+.. code-block:: bash
+
+ [Help]
+ -h/--help : Show this help message and quit
+ -hh/--fullhelp : Detailed help
+
+ [ Required ]
+ -p/--project STR : ChemFlow project
+ -r/--receptor FILE : Receptor MOL2 file
+ -l/--ligand FILE : Ligands MOL2 file
+ --center X Y Z : Binding site coordinates (space separated)
+
+Advanced options
+----------------
+These options let you better control the execution, including the scoring function and specific parameters for each implemented docking software. In addition, it has options to control the parallel (local) or HPC execution. Refer to **HPC Run** topic for guidance on how to use a High Performance Computers.
+
+.. code-block:: bash
+
+ [ Post Processing ]
+ --postprocess : Process DockFlow output for the specified
+ project/protocol/receptor.
+ --postprocess-all : Process all DockFlow outputs in a ChemFlow project.
+ -n/--n-poses INT : Number of docked poses to keep.
+ --archive : Compress the docking folder for a project/protocol/receptor.
+ --archive-all : Compress all docking folders in a ChemFLow project.
+
+ [ Optional ]
+ --protocol STR : Name for this specific protocol [default]
+ -n/--n-poses INT : Maximum number docking of poses per ligand [10]
+ -sf/--function STR : vina, chemplp, plp, plp95 [chemplp]
+
+ [ Parallel execution ]
+ -nc/--cores INT : Number of cores per node [${NCORES}]
+ --pbs/--slurm : Workload manager, PBS or SLURM
+ --header FILE : Header file provided to run on your cluster.
+
+ [ Additional ]
+ --overwrite : Overwrite results
+ --yes : Yes to all questions
+ _________________________________________________________________________________
+ [ Options for docking program ]
+
+ [ PLANTS ]
+ --radius FLOAT : Radius of the spheric binding site [15]
+ --speed INT : Search speed for Plants. 1, 2 or 4 [1]
+ --ants INT : Number of ants [20]
+ --evap_rate FLOAT : Evaporation rate of pheromones [0.15]
+ --iter_scaling FLOAT : Iteration scaling factor [1.0]
+ --cluster_rmsd FLOAT : RMSD similarity threshold between poses, in Å [2.0]
+ --water FILE : Path to a structural water molecule (.mol2)
+ --water_xyzr LIST : xyz coordinates and radius of the water sphere, separated by a space
+ _________________________________________________________________________________
+ [ Vina ]
+ --size LIST : Size of the grid along the x, y and z axis, separated by a space [15 15 15]
+ --exhaustiveness INT : Exhaustiveness of the global search [8]
+ --energy_range FLOAT : Max energy difference (kcal/mol) between the best and worst poses [3.00]
+ _________________________________________________________________________________
+
+
+Options to Postprocess and Archive
+----------------------------------
+
+Docking produces a number of poses and their associated energies, but each software does it their own way. --postprocess[--all] standardizes the output to two files: docked_ligands.mol2 and DockFlow.csv.
+
+.. code-block:: bash
+
+ # Directory structure
+ |--${project}.ChemFlow
+ | |--DockFlow
+ | |--${project}/${receptor}/${protocol}/docked_ligands.mol2
+ | |--${project}/${receptor}/${protocol}/DockFlow.csv
+
+
+
+================================================================================
+
+
+Score\ *Flow*
+=============
+Score\ *Flow* handles *rescoring** of molecular complexes such as protein-ligand systems using empirical or physics-based scoring functions in a High Throughput fashion. Computation is efficiently distributed on the available computational resources. Score\ *Flow* can resume calculations.
+
+**Empirical scoring functions** are the same as implemented in docking, Autodock Vina and PLANTs. While scoring a local search and/or optimization is performed before the producing the final score.
+
+**Physics-based scoring functions** can be currently obtained through MM/PBSA methods. The user can choose between Poisson-Boltzmann (PB) and Generalized Born solvation (GB) models and their parameters. Also, the user is can to perform diffent protocols from a simple system **optimization** up to a full **molecular dynamics simulation** of the complex, also choosing to do it in implicit or explicit solvent. AmberTools16+ is the default simulation engine, and users can profit form GPU with an Amber16+ licence.
+
+.. note:: Future implementations will address Machine Learning routines VinaRF and DLSCORE.
+
+Preparing the receptor:
+ While preparing the receptor one should carefully inspect the structure for missing atoms or residues and assign the proper protonation states for the sidechains. One should complete the missing atoms and decide to model the missing parts (normally flexible loops) or neutralize the terminals. Finally one should save the PDB following the amber PDB naming scheme.
+
+.. tip:: Use UCSF Chimera (https://www.cgl.ucsf.edu/chimera/) and its interfaces to PDB2PQR/PROPKA and Modeller.
+
+Preparing the ligand(s):
+ One should first curate the ligand library (.mol2) using Lig\ *Flow* then provide that same input file. Score\ *Flow* only uses the molecule name from this file and gets all structural data from the Lig\ *Flow*-generated library. MM/(PB,GB)SA requires either AM1-BCC or RESP charges for accuracy, make sure to prepare your Lig\ *Flow* library accordingly (--bcc or --resp).
+
+Usage:
+------
+Score\ *Flow* requires a **receptor** in PDB format and a **ligand** in MOL2. Score\ *Flow* creates a project folder named 'myproject'.chemflow/ScoreFlow. Refer to **HPC** topic on how to use in a High Performance Environment.
+
+.. code-block:: bash
+
+ # For VINA and PLANTS scoring functions:
+ ScoreFlow -r receptor.mol2 -l ligand.mol2 -p myproject --center X Y Z [--protocol protocol-name] [-sf vina]
+
+ # For MMGBSA only
+ ScoreFlow -r receptor.pdb -l ligand.mol2 -p myproject [-protocol protocol-name] -sf mmgbsa
+
+Post-Processing
+---------------
+
+Rescoring produces new energies for each complex in their own folder for each protocol. Also, each software (Vina/PLANTS/AMBER) does it their own way. --postprocess standardizes the output to a single file: **ScoreFlow.csv**.
+
+.. code-block:: bash
+
+ # Usage:
+ ScoreFlow -r receptor.pdb -l ligand.mol2 -p myproject [-protocol protocol-name] -sf mmgbsa --postprocess
+
+ # Directory structure:
+ |--${project}.ChemFlow
+ | |--ScoreFlow
+ | |--${project}/${receptor}/${protocol}/ScoreFlow.csv
+
+
+Options
+-------
+Score\ *Flow* requires the receptor and "ligands" files are required. In addition, when using Autodock Vina or PLANTS to **rescore**, one must provide the center of the binding site.
+
+.. code-block:: bash
+
+ [Help]
+ -h/--help : Show this help message and quit
+ -hh/--fullhelp : Detailed help
+
+ [Required]
+ -r/--receptor : Receptor .mol2 or .pdb file.
+ -l/--ligand : Ligands .mol2 input file.
+ -p/--project : ChemFlow project.
+
+Advanced Options
+----------------
+These options let you better control the execution, including the scoring function and specific parameters for each implemented software. In addition, it has options to control the parallel (local) or HPC execution. Refer to **HPC Run** topic for guidance on how to use a High Performance Computers.
+
+.. code-block:: bash
+
+ [ Required ]
+ -p/--project STR : ChemFlow project
+ -r/--receptor FILE : Receptor MOL2 file
+ -l/--ligand FILE : Ligands MOL2 file
+
+ [ Optional ]
+ --protocol STR : Name for this specific protocol [default]
+ -sf/--function STR : vina, chemplp, plp, plp95, mmgbsa, mmpbsa [chemplp]
+
+ [ Charges for ligands - MMGBSA ]
+ --gas : Gasteiger-Marsili (default)
+ --bcc : AM1-BCC charges
+ --resp : RESP charges (require gaussian)
+
+ [ Simulation - MMGBSA ]
+ --maxcyc INT : Maximum number of energy minimization steps for implicit solvent simulations [1000]
+ --water : Explicit solvent simulation
+ --md : Molecular dynamics
+
+ [ Parallel execution - MMGBSA ]
+ -nc/--cores INT : Number of cores per node [${NCORES}]
+ --pbs/--slurm : Workload manager, PBS or SLURM
+ --header FILE : Header file provided to run on your cluster.
+ --write-only : Write a template file (ScoreFlow.run.template) command without running.
+ --run-only : Run using the ScoreFlow.run.template file.
+
+ [ Additional ]
+ --overwrite : Overwrite results
+
+ [ Rescoring with vina or plants ]
+
+ --center STR : xyz coordinates of the center of the binding site, separated by a space
+
+ [ PLANTS ]
+ --radius FLOAT : Radius of the spheric binding site [15]
+
+ [ Vina ]
+ --size LIST : Size of the grid along the x, y and z axis, separated by a space [15 15 15]
+ --vina-mode STR : local_only (local search then score) or score_only [local_only]
+
+ [ Post Processing ]
+ --postprocess : Process ScoreFlow output for the specified project/protocol/receptor.
+
+
+.. note:: Note: You can automatically get the center and radius/size
+ for a particular ligand .mol2 file using the bounding_shape.py script
+
+.. tip:: Score\ *Flow* automatically resumes incomplete calculations. To overwrite just use the flag **\\-\\-overwrite**
+
+
+Advanced Use
+------------
+One may want to further customize the rescoring, or advanced features of the system preparation such as addition of non-standard residues or co-factors to the receptor. With that in mind we implemented the flag **\\-\\-write-only**. After the modifications, rerun ScoreFlow using **\\-\\-run-only**.
+
+All input files will be written in the following scheme:
+
+**PROJECT**.chemflow/ScoreFlow/**PROTOCOL**/**receptor**/
+
+For instance when using MM/PBSA family of methods, features of AmberTools16+/Amber (tleap, sander/pmemd, cpptraj, MMPBSA.py) may be adjusted.
+
+System Setup
+ One can customize the system setup (**tleap.in**) inside a job, modify the default ions, add co-fators, change the waterbox shape etc.
+
+Simulation protocol
+ The procedures for each protocol can also be modified, the user must review "ScoreFlow.run.template".
+ Simulation details such as lengh, cut-offs, thermostat and barostat etc.
+
+One may also choose to directly modify the templates at e.g.:
+
+.. code-block:: bash
+
+ ${CHEMFLOW_HOME}/ChemFlow/templates/mmgbsa/
+ .
+ ├── explicit
+ │ ├── heat_npt.template
+ │ ├── heat_nvt.template
+ │ ├── min1.template
+ │ ├── min2.template
+ │ ├── min3.template
+ │ ├── min4.template
+ │ └── prod.template
+ ├── implicit
+ │ ├── md.template
+ │ └── min.template
+ ├── job_scheduller
+ │ ├── pbs.template
+ │ └── slurm.template
+ ├── mmgbsa.template
+ ├── mmpbsa.template
+ └── tleap
+ ├── tleap_explicit.template
+ └── tleap_implicit.template
+
diff --git a/docs/overview.rst b/docs/overview.rst
new file mode 100644
index 0000000..d12065c
--- /dev/null
+++ b/docs/overview.rst
@@ -0,0 +1,60 @@
+.. highlight:: bash
+
+========
+Overview
+========
+Chem\ *Flow* is a computational software composed of a series of tools within the domain of computational chemistry, and subdomain drug discovery and development.
+It was designed to contain simple and integrated workflows to perform common protocols in the early stages of drug discovery and protein design.
+
+Chem\ *Flow* is a modular platform for computational chemistry workflows using high performance environments.
+The workflows address common **computational chemistry** tasks and are named with a prefix followed by *Flow*, Dock\ *Flow*, Score\ *Flow* and Lig\ *Flow*.
+
+Chem\ *Flow* contains several features that set it apart from competition:
+
+#. Ready to use protocols for drug discovery and drug repurposing.
+
+#. Facilitated usage of high performance computing (HPC) resources.
+
+#. Checkpointing, resuming of calculations, error reporting.
+
+#. Report facility for each protocol.
+#. A database. (Chem\ **Base**)
+#. It's mostly written in BASH script ! There's no space fancy Python mambo-jambos.
+
+Why Chem\ *Flow*?
+=================
+
+During a virtual High Throughput computational chemistry study we were confronted with unintelligibly data from a previous collaborator and the challenge to produce our own. This is actually a very common scenario we've been confronted our whole carreers.
+
+.. warning:: Everyone does things their own way !
+
+We found it would be valuable to do just like proprietary tools and **standardize** the directory structure and file naming for projects. Standardization facilitate documentation, comprehension, data retention and reproducibility, therefore future users or applications will not have to figure out this structure or migrate to new formats.
+
+
+A standardization effort
+------------------------
+Before being a software, Chem\ *Flow* is an initiative to fill a gap in the field by developing an open standard for execution and curation of computational chemistry data, to enable access to precomputed data and facilitate method development, validation and comparison.
+
+Implementation
+--------------
+
+Chem\ *Flow* was designed as modular tool based on plain Bourne-again Shell (BASH) script, an ubiquitous environment and programming language in every UNIX environment.
+
+.. note:: BASH is the **default shell** for the most popular Linux distributions, and for MacOS.
+
+.. tip:: If you've got to learn a scripting language, go for BASH, is an easy and powerfull.
+
+Middleware design
+-----------------
+The code integrates freely available softwares for structure manipulation (RDKIT and openbabel), molecular docking (PLANTS and Autodock Vina), molecular dynamics (MD) simulation, binding free energy calculations (AmberTools), and structure calculation (SQM). Chem\ *Flow* also contains optional routines for proprietary software Amber18 and Gaussian 09.
+
+As simple is possible
+---------------------
+
+By design, the goal was to make Chem\ *Flow* as simple as possible to require minimal installation and promote extension. The code is containerized and modular to allow methods to be applied only in a well-defined way which is **traceable and reproducible** following the ISO/IEC 17025 guidelines for assays. Chem\ *Flow* ships with protocols for common experiments and curated benchmarks to assess the performance of methods.
+
+How ChemFlow was born
+---------------------
+During virtual screening/ molecular dynamics study we were confronted with unintelligibly data from a previous collaborator and the challenge to produce our own. In fact that's an actually a very common scenario, everyone “\ *does their things their own way*”.
+
+We thought it would be valuable if we standardize the directory structure and file naming. The whole point of a standard is to facilitate documentation, comprehension, data retention and reproducibility, so that future users or applications will not have to figure out this structure or migrate to new formats.
diff --git a/docs/readme.rst b/docs/readme.rst
index 72a3355..73f2b57 100644
--- a/docs/readme.rst
+++ b/docs/readme.rst
@@ -1 +1,3 @@
+.. highlight:: bash
+
.. include:: ../README.rst
diff --git a/docs/usage.rst b/docs/usage.rst
index 342d421..f63d0e9 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -1,3 +1,5 @@
+.. highlight:: bash
+
=====
Usage
=====
diff --git a/docs/workflows.rst b/docs/workflows.rst
new file mode 100644
index 0000000..bd1043f
--- /dev/null
+++ b/docs/workflows.rst
@@ -0,0 +1,57 @@
+.. highlight:: bash
+
+=========
+Workflows
+=========
+Chem\ *Flow* workflows address common computational chemistry tasks and are named with a prefix followed by "*Flow*", their are: Dock\ *Flow*, Score\ *Flow*, and Lig\ *Flow*. Two additional *Flows* will be released soon MD\ *Flow*, HG\ *Flow*, Entropy\ *Flow* (from Pereira, G.) stay tunned for updates!
+
+.. hint:: The work\ *Flows* let you harness the power of your multicore machine or HPC resource.
+
+Dock\ *Flow*
+=============
+Dock\ *Flow* covers docking and virtual screening of compound(s) against some single or multiple targets, with one or multiple compounds through the so far implemented docking software: Autodock Vina and PLANTS.
+
+Score\ *Flow*
+=============
+Score\ *Flow* handles the (re)scoring of complexes (such as docking poses), either with empirical (VinaSF, ChemPLP) or physics-based functions (MM/PBSA, MM/GBSA) with or without MD, implicit/explicit solvent.
+
+Lig\ *Flow*
+=============
+Lig\ *Flow* handles the curation of compound libraries, stored as SMILES or MOL2 files, automating 3D conformer generation, compound parameterization and charge calculation. It also probes the Chem\ *Base* to avoid redundancy.
+
+
+Handles small molecule conversions, conformer search and compound parametrization through assignment to the General Amber Force-Field (GAFF2) and charge calculation through QM methods. It also probes the Chem\ **Base** to avoid redundancy.
+
+Chem\ **Base**
+==============
+Chem\ **Base** is the Chem\ *Flow database for pre-calculated molecules, so far it spams nearly 9000 drug-like compounds from the *Chimioteque Nationale du CNRS* with QM optimized geometries and assigned parameters for GAFF.
+
+
+MD\ *Flow* (unreleased)
+=============
+Handles molecular dynamics simulation protocols and analysis.
+
+HG\ *Flow* (unreleased)
+=============
+A full fledged workflow designed for Host-Guest systems such as molecular cages.
+
+.. hint:: Dock\ *Flow* and Score\ *Flow* were implemented to comply with docking/scoring benchmarks. One can test new search and scoring functions and directly compare with the competition (within Report\ *Flow*).
+
+.. Note:: Chem\ **Base** is the Chem\ *Flow* database for pre-calculated molecules, so far it contains nearly 9000 drug-like compounds from the “Chimioteque Nationale du CNRS” with HF 6-31G* QM optimized geometries and assigned parameters for GAFF2. Access requires proof of "Chimioteque" licence.
+
+
+
+Workflows (extended)
+====================
+Chem\ *Flow* is a modular platform for computational chemistry workflows using high performance environments.
+The workflows address common **computational chemistry** tasks and are named with a prefix followed by *Flow*, Dock\ *Flow*, Score\ *Flow* and Lig\ *Flow*.
+
+* Dock\ *Flow* covers docking and virtual screening of compound(s) against some single or multiple targets, with one, some or all of the implemented docking software (so far Autodock Vina and PLANTS).
+
+* Score\ *Flow* on the other hand handles (re)scoring of (top) docking poses which is more expensive, Structural Interaction fingerprints (using IChem), VinaSF, ChemPLP, PLP, MM/GBSA (with or without MD, implicit/explicit solvent). Both DockFlow and ScoreFlow are implemented to comply with docking/scoring benchmarks so that user can test new search and scoring functions and directly compare with the competition (within ReportFlow).
+
+* Lig\ *Flow* handles conversions (such as smiles to mol2), conformer search and compound parameterization through assignment to the General Amber Force-Field (GAFF2) and charge calculation through QM methods. It also probes the ChemBase to avoid redundancy.
+
+* Chem\ **Base** is the Chem\ *Flow* database for pre-calculated molecules, so far it contains a nearly 9000 drug-like compounds from the “Chimioteque Nationale du CNRS” with QM optimized geometries and assigned parameters for GAFF.
+
+* MD\ *Flow** (not active) handles molecular dynamics simulation protocols and analysis and HGFlow is an application specific workflow designed for Host-Guest systems.
diff --git a/install.sh b/install.sh
index 3ea728b..47d5eb3 100755
--- a/install.sh
+++ b/install.sh
@@ -1,210 +1,31 @@
-#!/bin/bash
-###############################################################################
-## ChemFlow - Computational Chemistry is Great Again
-##
-## Description:
-## Install ChemFlow, set necessary variables, check for missing dependencies
-##
-## Author:
-## cbouy - Cedric Bouysset - cbouysset@unice.fr
-##
-###############################################################################
+# Chemflow installation.
-RELEASE="v0.6-beta"
-GUI_NAME="chemflow"
+cat << 'EOF'
+# Please modify the following paths and add them to your .bashrc
-# ChemFlow installation script
+# ChemFlow
+export CHEMFLOW_HOME=~/software/ChemFlow/ChemFlow/
+export PATH=${PATH}:${CHEMFLOW_HOME}/bin/
-not_on_path(){
- echo "[ $1 ] $2 is not on your PATH"
- if [ "$1" == "WARNING" ]; then
- let warning_count+=1
- else
- let error_count+=1
- fi
-}
+# MGLTools
+export PATH="${PATH}:~/software/mgltools_x86_64Linux2_1.5.6/bin/"
+export PATH="${PATH}:~/software/mgltools_x86_64Linux2_1.5.6/MGLToolsPckgs/AutoDockTools/Utilities24/"
-_install(){
- if [ -z "$1" ]; then
- echo "Installing ChemFlow in $DESTINATION..."
- # Move files if necessary
- if [ "$DESTINATION" != $(abspath "$PWD") ]; then
- echo "Copying files from $PWD to $DESTINATION/"
- cp -r "$PWD" "$DESTINATION/"
- COPY=1
- fi
- # Create environment variable and add to .bashrc
- CHEMFLOW_HOME="$DESTINATION/ChemFlow"
- echo -e "\n# ChemFlow" >> ~/.bashrc
- echo "export CHEMFLOW_HOME=\"$CHEMFLOW_HOME\"" >> ~/.bashrc
- echo "export PATH=\$PATH:\$CHEMFLOW_HOME/bin" >> ~/.bashrc
- echo "ChemFlow successfully installed !"
- else
- echo "ChemFlow would be installed in $DESTINATION"
- if [ "$DESTINATION" != $(abspath "$PWD") ]; then
- echo "Would copy files from $PWD to $DESTINATION/"
- COPY=1
- fi
- fi
-}
+# Autodock Vina
+export PATH="${PATH}:~/software/autodock_vina_1_1_2_linux_x86/bin/"
-_update(){
- echo "ChemFlow has already been installed on your system."
- if [ -z "$1" ]; then
- echo "Updating installation to $DESTINATION..."
- # Move files if necessary
- if [ "$DESTINATION" != $(abspath "$PWD") ]; then
- echo "Copying files from $PWD to $DESTINATION/"
- cp -r "$PWD" "$DESTINATION/"
- COPY=1
- fi
- # Backup
- cp ~/.bashrc ~/.bashrc.bak
- # Replace with new path (not using sed -i because it's not available on all sed versions)
- CHEMFLOW_HOME="$DESTINATION/ChemFlow"
- sed -e 's?export CHEMFLOW_HOME=".*"?export CHEMFLOW_HOME="'$DESTINATION'\/ChemFlow"?g' ~/.bashrc > ~/.bashrc.new && mv ~/.bashrc.new ~/.bashrc
- echo "Update successful"
- else
- echo "ChemFlow would be updated to $DESTINATION"
- if [ "$DESTINATION" != $(abspath "$PWD") ]; then
- echo "Would copy files from $PWD to $DESTINATION/"
- COPY=1
- fi
- fi
-}
+# PLANTS
+export PATH="${PATH}:~/software/PLANTS/"
-_install_gui(){
- if [ -z "$1" ]; then
- echo "Installing GUI from release $RELEASE"
- #TODO: uncomment when repository made public
- #wget -P /tmp/ https://github.com/IFMlab/ChemFlow/releases/download/${RELEASE}/${GUI_NAME}
- #mv /tmp/${GUI_NAME} ${CHEMFLOW_HOME}/bin/
- else
- echo "Would download GUI from release $RELEASE"
- fi
-}
+# Optional (paid software)
-_check(){
- # Check if programs are on PATH
- echo "Checking softwares available on your PATH..."
- source ~/.bashrc
+# Amber18 (Ambertools19 and Amber18)
+source ~/software/amber18/amber.sh
- ## Core tools
- if [ -z "$(command -v perl)" ] ; then not_on_path ERROR Perl ; fi
+# Gaussian 09
+export g09root=~/software/
+export GAUSS_SCRDIR=/tmp
+source $g09root/g09/bsd/g09.profile
- ## Python modules
- _pyv=($(python -V 2>&1))
- if [ "$(echo ${_pyv[1]} | cut -d. -f1)" -lt 3 ]; then
- echo "[ ERROR ] Python 3 is not your current Python version"
- let error_count+=1
- fi
- python -c 'import pandas' 2>/dev/null
- if [ "$?" -eq 1 ]; then
- echo "[ ERROR ] Pandas is not installed in your current Python environment"
- let error_count+=1
- fi
- python -c 'import rdkit' 2>/dev/null
- if [ "$?" -eq 1 ]; then
- echo "[ ERROR ] RDKit is not installed in your current Python environment"
- let error_count+=1
- fi
- ## Softwares
- if [ -z "$(command -v babel)" ] ; then not_on_path ERROR OpenBabel ; fi
- if [ -z "$mgltools_folder" ]; then
- mgltools_folder=$(find /home /bin /opt /soft* -type d -name 'MGLToolsPckgs' 2>/dev/null | sed 's/\/MGLToolsPckgs//' | head -1)
- if [ -z "$mgltools_folder" ]; then
- echo "[ WARNING ] MGLTools could not be found. Please install it and run this script if you plan on using Vina."
- let warning_count+=1
- else
- if [ -z "$1" ]; then
- # Add to .bashrc
- echo "# MGLTools for ChemFlow" >> ~/.bashrc
- echo "export mgltools_folder=$mgltools_folder" >> ~/.bashrc
- else
- echo "Would set MGLTools directory to $mgltools_folder"
- fi
- fi
- fi
- if [ -z "$(command -v PLANTS1.2_64bit)" ]; then not_on_path WARNING PLANTS; fi
- if [ -z "$(command -v vina)" ]; then not_on_path WARNING Vina; fi
- if [ -z "$(command -v sander)" ] ; then not_on_path WARNING AmberTools; fi
- if [ -z "$(command -v g09)" ] ; then not_on_path WARNING Gaussian09; fi
- if [ -z "$(command -v IChem)" ] ; then not_on_path WARNING IChem; fi
-
- # ChemFlow
- if [ ! -x "$CHEMFLOW_HOME/bin/DockFlow" ]; then
- echo "[ ERROR ] Binaries in $CHEMFLOW_HOME are not executable"
- let error_count+=1
- fi
-}
-
-_help(){
-echo "\
-Usage: $0
- -h|--help : show this help message and quit
- -d|--destination STR : install ChemFlow at the specified destination
- --gui : install GUI from release $RELEASE (not working yet)
- --debug : only verify the installation, don't do anything
-"
-}
-
-# CLI
-while [[ $# -gt 0 ]]; do
- key="$1"
- case ${key} in
- "-h"|"--help")
- _help
- exit 0
- ;;
- "--gui")
- GUI=1
- ;;
- "-d"|"--destination")
- if [ -w "$2" ] && [ -d "$2" ] && [ ! -f "$2/ChemFlow" ]; then
- DESTINATION="$2"
- shift
- else
- echo "$2 is not writable, is not a directory, or already exists"
- exit 1
- fi
- ;;
- "--debug")
- echo "DEBUG mode activated"
- DEBUG=1
- ;;
- *)
- echo "Unknown flag \"$1\""
- exit 1
- ;;
- esac
- shift
-done
-
-# Main
-
-warning_count=0
-error_count=0
-cd $(dirname $0)
-source ~/.bashrc
-source ./ChemFlow/src/ChemFlow_functions.bash
-
-if [ -z "$DESTINATION" ]; then DESTINATION="$PWD"; fi
-DESTINATION=$(abspath $DESTINATION)
-if [ -z "$CHEMFLOW_HOME" ]
-then
- _install $DEBUG
-else
- _update $DEBUG
-fi
-if [ ! -z "$GUI" ]; then
- _install_gui $DEBUG
-fi
-_check $DEBUG
-
-echo "Installation finished with $error_count error(s) and $warning_count warning(s)."
-if [ ! -z "$COPY" ]; then
- echo "ChemFlow was installed in $DESTINATION. You can safely remove the directory $PWD"
-fi
-echo "Don't forget to run the following command to use ChemFlow right away:"
-echo "source ~/.bashrc"
+EOF
diff --git a/install_old.sh b/install_old.sh
new file mode 100755
index 0000000..12313ae
--- /dev/null
+++ b/install_old.sh
@@ -0,0 +1,210 @@
+#!/bin/bash
+###############################################################################
+## ChemFlow - Computational Chemistry is Great Again
+##
+## Description:
+## Install ChemFlow, set necessary variables, check for missing dependencies
+##
+## Author:
+## cbouy - Cedric Bouysset - cbouysset@unice.fr
+##
+###############################################################################
+
+RELEASE="v0.7-beta"
+GUI_NAME="chemflow"
+
+# ChemFlow installation script
+
+not_on_path(){
+ echo "[ $1 ] $2 is not on your PATH"
+ if [ "$1" == "WARNING" ]; then
+ let warning_count+=1
+ else
+ let error_count+=1
+ fi
+}
+
+_install(){
+ if [ -z "$1" ]; then
+ echo "Installing ChemFlow in $DESTINATION..."
+ # Move files if necessary
+ if [ "$DESTINATION" != $(abspath "$PWD") ]; then
+ echo "Copying files from $PWD to $DESTINATION/"
+ cp -r "$PWD" "$DESTINATION/"
+ COPY=1
+ fi
+ # Create environment variable and add to .bashrc
+ CHEMFLOW_HOME="$DESTINATION/ChemFlow"
+ echo -e "\n# ChemFlow" >> ~/.bashrc
+ echo "export CHEMFLOW_HOME=\"$CHEMFLOW_HOME\"" >> ~/.bashrc
+ echo "export PATH=\$PATH:\$CHEMFLOW_HOME/bin" >> ~/.bashrc
+ echo "ChemFlow successfully installed !"
+ else
+ echo "ChemFlow would be installed in $DESTINATION"
+ CHEMFLOW_HOME="$DESTINATION/ChemFlow"
+ if [ "$DESTINATION" != $(abspath "$PWD") ]; then
+ echo "Would copy files from $PWD to $DESTINATION/"
+ COPY=1
+ fi
+ fi
+}
+
+_update(){
+ echo "ChemFlow has already been installed on your system."
+ if [ -z "$1" ]; then
+ echo "Updating installation to $DESTINATION..."
+ # Move files if necessary
+ if [ "$DESTINATION" != $(abspath "$PWD") ]; then
+ echo "Copying files from $PWD to $DESTINATION/"
+ cp -r "$PWD" "$DESTINATION/"
+ COPY=1
+ fi
+ # Backup
+ cp ~/.bashrc ~/.bashrc.bak
+ # Replace with new path (not using sed -i because it's not available on all sed versions)
+ CHEMFLOW_HOME="$DESTINATION/ChemFlow"
+ sed -e 's?export CHEMFLOW_HOME=".*"?export CHEMFLOW_HOME="'$DESTINATION'\/ChemFlow"?g' ~/.bashrc > ~/.bashrc.new && mv ~/.bashrc.new ~/.bashrc
+ echo "Update successful"
+ else
+ echo "ChemFlow would be updated to $DESTINATION"
+ CHEMFLOW_HOME="$DESTINATION/ChemFlow"
+ if [ "$DESTINATION" != $(abspath "$PWD") ]; then
+ echo "Would copy files from $PWD to $DESTINATION/"
+ COPY=1
+ fi
+ fi
+}
+
+_install_gui(){
+ if [ -z "$1" ]; then
+ echo "Installing GUI from release $RELEASE"
+ wget -P ${CHEMFLOW_HOME}/bin/ https://github.com/IFMlab/ChemFlow/releases/download/${RELEASE}/${GUI_NAME}
+ else
+ echo "Would download GUI from release $RELEASE"
+ fi
+}
+
+_check(){
+ # Check if programs are on PATH
+ echo "Checking softwares available on your PATH..."
+ source ~/.bashrc
+
+ ## Core tools
+ if [ -z "$(command -v perl)" ] ; then not_on_path ERROR Perl ; fi
+
+ ## Python modules
+ _pyv=($(python -V 2>&1))
+ if [ "$(echo ${_pyv[1]} | cut -d. -f1)" -lt 3 ]; then
+ echo "[ ERROR ] Python 3 is not your current Python version"
+ let error_count+=1
+ fi
+ python -c 'import pandas' 2>/dev/null
+ if [ "$?" -eq 1 ]; then
+ echo "[ ERROR ] Pandas is not installed in your current Python environment"
+ let error_count+=1
+ fi
+ python -c 'import rdkit' 2>/dev/null
+ if [ "$?" -eq 1 ]; then
+ echo "[ ERROR ] RDKit is not installed in your current Python environment"
+ let error_count+=1
+ fi
+
+ ## Softwares
+ if [ -z "$(command -v babel)" ] ; then not_on_path ERROR OpenBabel ; fi
+ if [ -z "$mgltools_folder" ]; then
+ mgltools_folder=$(find /home /bin /opt /soft* -type d -name 'MGLToolsPckgs' 2>/dev/null | sed 's/\/MGLToolsPckgs//' | head -1)
+ if [ -z "$mgltools_folder" ]; then
+ echo "[ WARNING ] MGLTools could not be found. Please install it and run this script if you plan on using Vina."
+ let warning_count+=1
+ else
+ if [ -z "$1" ]; then
+ # Add to .bashrc
+ echo "# MGLTools for ChemFlow" >> ~/.bashrc
+ echo "export mgltools_folder=$mgltools_folder" >> ~/.bashrc
+ else
+ echo "Would set MGLTools directory to $mgltools_folder"
+ fi
+ fi
+ fi
+ if [ -z "$(command -v PLANTS1.2_64bit)" ]; then not_on_path WARNING PLANTS; fi
+ if [ -z "$(command -v vina)" ]; then not_on_path WARNING Vina; fi
+ if [ -z "$(command -v sander)" ] ; then not_on_path WARNING AmberTools; fi
+ if [ -z "$(command -v g09)" ] ; then not_on_path WARNING Gaussian09; fi
+ if [ -z "$(command -v IChem)" ] ; then not_on_path WARNING IChem; fi
+
+ # ChemFlow
+ if [ ! -x "$CHEMFLOW_HOME/bin/DockFlow" ]; then
+ echo "[ ERROR ] Binaries in $CHEMFLOW_HOME are not executable"
+ let error_count+=1
+ fi
+}
+
+_help(){
+echo "\
+Usage: $0
+ -h|--help : show this help message and quit
+ -d|--destination STR : install ChemFlow at the specified destination
+ --gui : install GUI from release $RELEASE
+ --debug : only verify the installation, don't do anything
+"
+}
+
+# CLI
+while [[ $# -gt 0 ]]; do
+ key="$1"
+ case ${key} in
+ "-h"|"--help")
+ _help
+ exit 0
+ ;;
+ "--gui")
+ GUI=1
+ ;;
+ "-d"|"--destination")
+ if [ -w "$2" ] && [ -d "$2" ] && [ ! -f "$2/ChemFlow" ]; then
+ DESTINATION="$2"
+ shift
+ else
+ echo "$2 is not writable, is not a directory, or already exists"
+ exit 1
+ fi
+ ;;
+ "--debug")
+ echo "DEBUG mode activated"
+ DEBUG=1
+ ;;
+ *)
+ echo "Unknown flag \"$1\""
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+# Main
+
+warning_count=0
+error_count=0
+cd $(dirname $0)
+source ~/.bashrc
+source ./ChemFlow/src/ChemFlow_functions.bash
+
+if [ -z "$DESTINATION" ]; then DESTINATION="$PWD"; fi
+DESTINATION=$(abspath $DESTINATION)
+if [ -z "$CHEMFLOW_HOME" ]
+then
+ _install $DEBUG
+else
+ _update $DEBUG
+fi
+if [ ! -z "$GUI" ]; then
+ _install_gui $DEBUG
+fi
+_check $DEBUG
+
+echo "Installation finished with $error_count error(s) and $warning_count warning(s)."
+if [ ! -z "$COPY" ]; then
+ echo "ChemFlow was installed in $DESTINATION. You can safely remove the directory $PWD"
+fi
+echo "Don't forget to run the following command to use ChemFlow right away:"
+echo "source ~/.bashrc"
diff --git a/tutorial/TUTORIAL.rst b/tutorial/TUTORIAL.rst
index 16f8065..d477c51 100644
--- a/tutorial/TUTORIAL.rst
+++ b/tutorial/TUTORIAL.rst
@@ -48,7 +48,13 @@ First for the b1-b7 from an undisclosed article (b1 = 1DWC crystal. b2-7 = Build
.. code-block:: bash
- python $(which SmilesTo3D.py) -i ligands.smi -o ligands.sdf --hydrogen -v
+ # Make sure you activate your ChemFlow environment
+ conda activate chemflow
+
+ # Convert SMILES into 3D SDF structures.
+ SmilesTo3D.py -i ligands.smi -o ligands.sdf -v
+
+ # Then into .MOL2 files.
babel -isdf ligands.sdf -omol2 ligands.mol2
The second set, with ligands from crystal structures, we also have the affinities.
@@ -56,14 +62,18 @@ We superimposed 1DWC 1DWB 1DWD 1D3D 1D3P 1D3Q 1D3T (1DWC as reference) and saved
Hydrogens were added using SPORES (from PLANTS). (SPORES_64bit \\-\\-mode complete)
Now the Decoys from `DUD-E database `_.
+
Download, and get the first 14.
+
wget http://dude.docking.org/targets/thrb/decoys_final.ism
+
head -n 14 decoys_final.ism > decoys.smi
+
[ WARNING ] On DUD-E the "field separator" is a SPACE instead of "\t", so you MUST specify it in SmilesTo3D.py.
.. code-block:: bash
- python $(which SmilesTo3D.py) -i decoys.smi -o decoys.sdf --hydrogen -v -d " "
+ SmilesTo3D.py -i decoys.smi -o decoys.sdf -v -d " "
babel -isdf decoys.sdf -omol2 decoys.mol2
To keep it simple, let's merge all compounds into a single mol2 file.
@@ -84,15 +94,17 @@ To perform this action run:
LigFlow -p tutorial -l compounds.mol2
-In addition Lig\ *Flow* can be used to build up a compound database with **advanced** charges such as AM1-BCC and RESP and their associated
-optimized structures, we'll see it's use latter to compute appropriate charges for the free energy calculations.
+In addition Lig\ *Flow* can be used to build up a compound database with **advanced** charges such as AM1-BCC (--bcc) and RESP (--resp) and their associated optimized structures, we'll see it's use latter to compute appropriate charges for the free energy calculations.
+
+
Since these calculations are computationally expensive we recomend the users to use a cluster/supercomputer. In the examples bellow
-we demonstrate how to derive the AM1-BCC and RESP charges using the two most widespread queueing systems in supercomputers (PBS and SLURM)
+we demonstrate how to derive the AM1-BCC and RESP charges using the two most widespread queueing systems in supercomputers using --pbs for PBS or --slurm for SLURM).
+By Now, let's just generate **AM1-BCC** charges.
.. code-block:: bash
- LigFlow -p tutorial -l compounds.mol2 --bcc --pbs
- LigFlow -p tutorial -l compounds.mol2 --resp --slurm
+ LigFlow -p tutorial -l compounds.mol2 --bcc
+
If a compound already exists in the ChemBase (Chem\ *Flow* database), Lig\ *Flow* won't compute the charges for this compound.
@@ -112,13 +124,13 @@ For PLANTS it's enough to have only the center.
.. code-block:: bash
- python $CHEMFLOW_HOME/bin/bounding_shape.py reference_ligand.mol2 --shape sphere --padding 8.0
+ bounding_shape.py reference_ligand.mol2 --shape sphere --padding 8.0
For VINA you need the center AND the lenghts of X, Y and Z.
.. code-block:: bash
- python $CHEMFLOW_HOME/bin/bounding_shape.py reference_ligand.mol2 --shape box --padding 8.0
+ bounding_shape.py reference_ligand.mol2 --shape box --padding 8.0
Step 4: Run Dock\ *Flow* to predict the docking poses.
------------------------------------------------------
@@ -162,11 +174,19 @@ When tou are done, you can postprocess (\\-\\-postprocess) the results. Here, we
Score\ *Flow*
*************
-Step 6: Run Score\ *Flow* to rescore the previous docking poses (best 3 for each ligand)
+Rescoring through the MMGBSA method, using two protocols in **implicit solvent** first just minimization, then 1ns md simulation. To obtain results with better correlation with experimental binding affinities you may use **RESP** charges.
+
+Step 6.1: Run Lig\ *Flow* to compute **RESP** charges.
+----------------------------------------------------------------------------------------
+
+.. code-block:: bash
+
+ LigFlow -p tutorial -l compounds.mol2 --resp
+
+Step 6.2: Run Score\ *Flow* to rescore the previous docking poses (best 3 for each ligand)
----------------------------------------------------------------------------------------
Here, we only keep on with plants results (tutorial.chemflow/DockFlow/plants/receptor/docked_ligands.mol2).
-Rescoring through the MMGBSA method, using two protocols in **implicit solvent** first just minimization, then 1ns md simulation :
.. code-block:: bash