// // clusterconf.svl Clustering Tool for Conformers // // 19-oct-2021 (kk) db_RMSD bug-fixed: added multiple ligand check // 18-oct-2021 (kk) db_RMSD bug-fixed: obtain coordinates from the same // atoms from which SMILES was extracted // 06-oct-2021 (kk) db_RMSD: support moe type field // 19-aug-2021 (kk) RMSD: use ligand rkey instead of ckey // 22-jul-2021 (kk) db_RMSD: changed input args and use only ligand atoms // 21-jul-2021 (kk) reviced code // 22-jul-2015 (ti) db_RMSD bug fixed: if no atoms matches query SMILES, // skip this entry // 12-nov-2013 (ao) replace cattok with tok_cat // 13-jul-2010 (ad) fixed panel database open bug // 09-jul-2009 (na) added argument for batch // 12-nov-2008 (na) patched for unassigned Cluster for the same conformer // with center of cluster // 14-feb-2008 (sn) bug-fixed for terminalatoms || mask_list // 28-dec-2007 (sn) Add sd_RMSD function // 09-feb-2005 (rk) bug-fixed for terminalatoms || mask_list // 09-feb-2005 (rk) replaced dbv_ViewKeys with dbv_DefaultView terminalatoms // 26-aug-2003 (jg) revised for symmetry handling // 24-aug-2001 (jg) bug-fixed and add 'method' option // 14-aug-2001 (jg) Created // // COPYRIGHT (C) 2001-2021 MOLSIS INC. ALL RIGHTS RESERVED. // // PERMISSION TO USE, COPY, MODIFY AND DISTRIBUTE THIS SOFTWARE IS HEREBY // GRANTED PROVIDED THAT: (1) UNMODIFIED OR FUNCTIONALLY EQUIVALENT CODE // DERIVED FROM THIS SOFTWARE MUST CONTAIN THIS NOTICE; (2) ALL CODE DERIVED // FROM THIS SOFTWARE MUST ACKNOWLEDGE THE AUTHOR(S) AND INSTITUTION(S); (3) // THE NAMES OF THE AUTHOR(S) AND INSTITUTION(S) NOT BE USED IN ADVERTISING // OR PUBLICITY PERTAINING TO THE DISTRIBUTION OF THE SOFTWARE WITHOUT // SPECIFIC, WRITTEN PRIOR PERMISSION; (4) ALL CODE DERIVED FROM THIS SOFTWARE // BE EXECUTED WITH THE MOLECULAR OPERATING ENVIRONMENT (MOE) LICENSED FROM // MOLSIS INC. // // MOLSIS INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS // SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, // AND IN NO EVENT SHALL MOLSIS INC. BE LIABLE FOR ANY // SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER // RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF // CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. // #set title 'Conformer Clustering' #set class 'MOE:interactive' #set version 'MOLSIS 2021.10.19' #set main 'ClusterConf' function db_ImportSD; function Superpose; function RenderStick; const CCG_VER = 0; //--------------------------------------------------------------------- // GUI //--------------------------------------------------------------------- const panel = [ title: 'Conformer Clustering', name: 'confcluster', windowName: 'Conformer Clustering', text: ['Clustering', 'Cancel'], // Shell button onTrigger: ['return', 'exit'], Hbox: [ Option: [ title: 'Conformer File:', name: 'mdbfile', onTrigger: 'return' ], Button: [ name: 'browse', text: 'Open', font: 'mediumBold', onTrigger:'return' ] ], Option: [title: 'Mol Field:', name: 'molfield'], Text: [title: 'RMSD:', name: 'rmsd', len: 4, sensitive: 1, type : 'real'], Radio: [ name: 'method', title: 'Method:', sensitive: 1, options: ['Minimize Cluster', 'Maximize Cluster'], bubbleHelp: 'Minimize Cluster:Each entry belongs to the cluster\n' ' with the nearest cluster center.\n' 'Maximize Cluster:Make largest cluster.' ], Separator: [], Hbox: [ Option: [ name: 'display', title : 'Display Conformers:', titleFont: 'medium', options: ['ALL', 'NONE'], sensitive: 0, onTrigger: 'return' ], Text: [name: 'n_conf', len: 5, sensitive: 0, type: 'int'], Checkbox: [ name: 'heavyatoms', title:' Heavy Atoms Only', titleFont: 'medium', sensitive: 0, onTrigger: 'return' ] ], Radio: [ name: 'color', title: 'Color:', titleFont: 'medium', options: ['Cluster', 'Element'], sensitive: 0, onTrigger: 'return' ], Radio: [ name: 'center', title: 'Cluster Center:', titleFont: 'medium', options: ['Only', 'Stick', 'Select'], sensitive: 0, onTrigger: 'return' ] ]; local function atom_pairs atoms local n = length atoms; local idx = igen n; local i2 = cat apt rep [idx, dec idx]; local i1 = cat app igen dec idx; return apt get [[atoms],tr [i1,i2]]; endfunction local function _calc_rmsd [a_pos, b] local idx = aPrioZQH b; local eq_atoms = [b] || (igen maxE idx == [idx]); // equalatoms eq_atoms = eq_atoms | (app length eq_atoms > 1); local n_eq_atoms = app length eq_atoms; local j, k, c, pos, rmsd, rmsd0, m, n, check; rmsd0 = sqrt ((add cat sqr (a_pos - aPos b)) / length b); for j = 1, length n_eq_atoms loop c = eq_atoms(j); local pairs = atom_pairs c; local term_atoms = (c | (aHeavyValence c == 1)); //terminalatoms if length (aBonds term_atoms) == length (uniq aBonds term_atoms) then term_atoms = term_atoms || apt eqL [ aBonds term_atoms, uniq aBonds term_atoms ]; else term_atoms = []; endif pairs = cat [pairs, term_atoms, app reverse term_atoms]; loop for k = 1, length pairs loop pos = aPos pairs(k); aSetPos [rotl pairs(k), pos]; rmsd(k) = sqrt ((add cat sqr (a_pos - aPos b)) / length b); aSetPos [pairs(k), pos]; endloop if minE rmsd < rmsd0 then rmsd0 = minE rmsd; m = indexof [rmsd0, rmsd]; aSetPos [rotl pairs(m), aPos pairs(m)]; check = 1; else check = 0; endif until check === 0 endloop endloop return rmsd0; endfunction //--------------------------------------------------------------------- // Clustering main code //--------------------------------------------------------------------- #if CCG_VER function conf_NormalizeAtoms,conf_ExpandAutomorphisms; const COMPARATORS = tr [ ['max', 'x_max'], ['min', 'x_min'] ]; local function ConfClustering [mdbfile, molfield, ent, rmsd, method, usecompf, efield, maxormin] apt db_EnsureField [mdbfile, ['Cluster', 'Center', 'RMSD'], ['int', 'int', 'float'] ]; local n = length ent; local comp_fcn = get [ COMPARATORS(2), indexof [maxormin, COMPARATORS(1)] ]; local i, j, mol, chains, atoms,pos,ConfDistMatrix, E; local entries = ent; local dbmol = [], confs = []; for i = 1, n loop mol = first db_ReadFields [mdbfile, ent(i), molfield]; if usecompf then E(i) = cat db_ReadFields [mdbfile, ent(i), efield]; endif chains = mol_Create mol; atoms = cat cAtoms chains; pos(i) = aPos atoms; conf_NormalizeAtoms atoms; aSetForceRS [atoms, 0]; local hatoms = atoms | aAtomicNumber atoms > 1; dbmol(i) = mol_Extract hatoms; confs(i) = first conf_ExpandAutomorphisms [ dbmol(i), nest mol_aPos dbmol(i), 0 ]; oDestroy chains; endloop local atomcount = length first first pos; // calculate RMSD matrix for all combination of conformers ConfDistMatrix=[]; local x,y; for y = 1, n loop for x = 1, dec y loop local rmsds = []; for i = 1, length confs(x) loop for j = 1, length confs(y) loop rmsds(i)(j) = first Superpose [[confs(x)(i), confs(y)(j)]]; endloop endloop ConfDistMatrix(x)(y) = sqrt min cat cat rmsds; ConfDistMatrix(y)(x) = ConfDistMatrix(x)(y); endloop ConfDistMatrix(y)(y) = 0; endloop // find all pairs within RMSD threashold local f_RMSD_Matrix = ConfDistMatrix < rmsd; i = 1; local orig_E = E; local orig_CMD = ConfDistMatrix; local ClusterCenter, ClusterConfList, f_Entry, f_Cluster; // get cluster center and entry list in each cluster while length f_RMSD_Matrix loop f_Entry = app add f_RMSD_Matrix; if usecompf then f_Entry = E; endif // find an entry that defines a cluster center f_Entry = put [ rep [0, length f_RMSD_Matrix], call [comp_fcn, f_Entry], 1 ]; ClusterCenter(i) = ent | f_Entry; // get conformers in the cluster ClusterConfList(i) = ent | cat (f_RMSD_Matrix | f_Entry); // strip entries in the cluster above from original entry list // do same process for f_RMSD_Matrix f_Cluster = not cat (f_RMSD_Matrix | f_Entry); ent = ent | f_Cluster; if usecompf then E = E | f_Cluster; endif f_RMSD_Matrix = f_RMSD_Matrix || [f_Cluster]; f_RMSD_Matrix = f_RMSD_Matrix | f_Cluster; i = i + 1; endloop // get RMSD from each Cluster Center ent = entries; ConfDistMatrix = ConfDistMatrix | indexof [ent, ClusterCenter]; ConfDistMatrix = perm [ConfDistMatrix, pack indexof [ent, ClusterCenter]]; ConfDistMatrix = apt mput [ConfDistMatrix, (ConfDistMatrix >= rmsd), 0]; //------------------------------------------------------------------- // Remove double-counted entry and assign only one center for each entry // pr ConfDistMatrix; pr n; local function RMSD_clusterclean [ConfDistMatrix, n, method] local i; ConfDistMatrix = tr ConfDistMatrix; local dist_list, ent_idx; if method === 'Minimize Cluster' then for i = 1, n loop dist_list = ConfDistMatrix(i) | ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = first ( (indexof [dist_list, ConfDistMatrix(i)]) | dist_list == minE dist_list ); ConfDistMatrix(i) = rep [0, length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = minE dist_list; endif endloop else for i = 1, n loop dist_list = ConfDistMatrix(i) | ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = indexof [first dist_list, ConfDistMatrix(i)]; ConfDistMatrix(i) = rep [0, length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = first dist_list; endif endloop endif ConfDistMatrix = tr ConfDistMatrix; return ConfDistMatrix; endfunction ConfDistMatrix = RMSD_clusterclean [ConfDistMatrix, n, method]; // unify the enties for each cluster. At this stage, if we are using // a float field rather than RMSD to define the cluster centres, we // need to move the cluster centre. // SHOULD WE THEN LOOP BACK AND RECALCULATE CLUSTER MEMBERS? for i = 1, length ClusterCenter loop ClusterConfList(i) = cat [ClusterCenter(i), ent | ConfDistMatrix(i)]; if usecompf then local ClusterE = orig_E | indexof [ ent, cat [ClusterCenter(i), ClusterConfList(i)] ]; ClusterE = perm [ ClusterE, pack indexof [ent, ClusterConfList(i)] ]; ClusterCenter(i) = get [ ClusterConfList(i), call [comp_fcn, ClusterE] ]; endif endloop // Moving the cluster centre means we have to reaquire the distance // matrix data for the output RMSD field if usecompf then orig_CMD = orig_CMD | indexof [ent, ClusterCenter]; orig_CMD = tr perm [orig_CMD, pack indexof [ent, ClusterCenter]]; for i = 1, n loop ConfDistMatrix(i) = mget [ orig_CMD(i), (apt indexof [ent(i), ClusterConfList]) ]; endloop else ConfDistMatrix = add ConfDistMatrix; endif //----------------------------------------------------------- // output data for each entry local k; print '------------------------'; write ['{t:} {n:}\n' , ['RMSD = ', rmsd]]; for i = 1, length ClusterCenter loop for j = 1, length ClusterConfList(i) loop if iadd (ClusterCenter == ClusterConfList(i)(j)) then k = 1; else k = 0; endif db_Write [mdbfile, ClusterConfList(i)(j), tag [['Cluster', 'Center'], [i, k]] ]; endloop write [ '{t:} {n:}\n', [ 'Cluster_', token swrite ['{c:}', i], ' = ', length ClusterConfList(i) ]]; endloop for i = 1, n loop db_Write [mdbfile, ent(i), tag ['RMSD', ConfDistMatrix(i)]]; endloop return [ClusterCenter, ClusterConfList]; endfunction ////// // db_conf_cluster.svl Cluster conformations of a database of molecules // // 19-aug-2015 (fk) singleton clusters also assigned // 05-jun-2007 (sg) added an option to select cluster centre based on float // 14-dec-2006 (ah) created based on clusterconf.svl & db_conf_cluster.svl // 04-dec-2002 (db) use Superpose to compute RMSD // 10-oct-2002 (db) fixed the vector of wrong length bug // 20-jan-2002 (cw) added comments...included with CCG support local function check_fields [mdb, mseq, E] local [fieldnames, fieldtypes] = db_Fields mdb; local molfieldnames = fieldnames | (fieldtypes == 'molecule'); [fieldnames, fieldtypes] = [fieldnames, fieldtypes] || [not (fieldtypes == 'molecule')]; if isnull mseq then if anytrue (fieldnames == 'mseq') then mseq = 'mseq'; else mseq = first fieldnames; endif elseif allfalse (fieldnames == mseq) then mseq = first fieldnames; endif if isnull E then if anytrue (fieldnames == 'E') then E = 'E'; else E = first (fieldnames | fieldtypes == 'float'); endif elseif allfalse (fieldnames == E) then E = first (fieldnames | fieldtypes == 'float'); endif return [mseq, E, fieldnames, molfieldnames]; endfunction global function db_conf_cluster [mdb, mseq, efield, method, rmsd] if MOE_BATCH and anytrue app isnull argument [] then exit 'Usage db_conf_cluster [mdb, mseq, method, rmsd]'; endif if istrue indexof [storage mdb, ['tok', 'int']] then mdb = db_Open [mdb, 'read-write']; else mdb = dbv_DefaultView []; endif local mdbfile = db_Filename mdb; local fieldnames, molfieldnames, ents, data, m, opts, mfield; [mseq, efield, fieldnames, molfieldnames] = check_fields [mdb, mseq, efield]; const PANEL = [ title: 'Database Conformer Clustering', name: 'db_conf_cluster', windowName: 'Database Conformer Clustering', text: ['OK', 'Cancel'], // Shell button onTrigger: ['return', 'exit'], Hbox: [ name: 'box1', extendH: 1, FSBText: [ name: 'mdb', title: 'Database', bubbleHelp: 'Database for conformation clustering.', font: 'mediumBold', len: 30, onTrigger: 'return' ], Button: [ name: 'browse', text: 'Browse...', font: 'mediumBold', onTrigger: 'return' ] ], Hbox: [ name: 'box2', Option: [ name: 'mfield', title: 'Molecule Field', font: 'mediumBold', bubbleHelp: 'Molecule Field' ], Option: [ name: 'mseq', title: 'Molecule Identifier', font: 'mediumBold', bubbleHelp: 'Field to use for compound identifiers' ] ], Hbox: [ name: 'box3', Checkbox: [ title: 'Comparison Field', name: 'usecompf', bubbleHelp: 'Use a specific float field to\n' 'define cluster centres, rather\n' 'than cluster size. If disabled,\n' 'default comparison function is max\n' 'cluster size.' ], Option: [ name: 'efield', font: 'mediumBold', bubbleHelp: 'Specify comparison Field' ], Option: [ name: 'maxormin', title: 'Comparison Function', font: 'mediumBold', text: ['min', 'max'], bubbleHelp: 'High or low values form\n' 'cluster centres' ] ], Hbox: [ name: 'box4', Text: [ title: 'Select where RMSD >', name: 'rmsd', len:6, sensitive: 1, type : 'real' ], Label: [ text: 'A between Conformations', font: 'mediumBold' ] ], Radio: [ name: 'method', title: 'Method:', options: ['Minimize Cluster', 'Maximize Cluster'], sensitive: 1, bubbleHelp: 'Minimize Cluster: Each entry belongs to the cluster\n' 'with the nearest cluster center.\n' 'Maximize Cluster: Make largest cluster.' ], Radio: [ name: 'superpose', title: 'Template Fitting:', options: ['Superpose', 'Absolute Positions'], sensitive: 1, bubbleHelp: 'Superpose the atoms that match the selected atoms.\n' 'Use the absolute positions that match the selected atoms.' ] ]; if anytrue app isnull [method, rmsd] then local wkey = WindowCreate PANEL; if isnull method then method = 'Highest';endif WindowSetData [wkey, [mdb: db_Filename mdb]]; WindowSetAttr [wkey, [mseq: [text: fieldnames]]]; WindowSetAttr [wkey, [mfield: [text: molfieldnames]]]; WindowSetAttr [wkey, [efield: [text: fieldnames]]]; WindowSetData [wkey, [efield: efield]]; WindowSetData [wkey, [mseq: mseq]]; WindowSetData [wkey, [rmsd: 1]]; WindowShow wkey; // sg Set up a child task to monitor the usecompf checkbox if second task_fork [master:'parent', statics:'share'] == 'child' then task_prio 0; task_idle 1; local baval; loop sleep 0.15; baval = WindowGetData [wkey, 'usecompf']; if baval.usecompf === 1 then WindowSetAttr [wkey, [efield: [sensitive: 1]]]; WindowSetAttr [wkey, [maxormin: [sensitive: 1]]]; else WindowSetAttr [wkey, [efield: [sensitive: 0]]]; WindowSetAttr [wkey, [maxormin: [sensitive: 0]]]; WindowSetData [wkey, [maxormin: 'max']]; endif endloop endif loop local [value, trigger] = WindowWait wkey; if trigger === 'browse' then mdbfile = FilePrompt [ title: 'Select a Database', filter: '*.mdb' ]; WindowSetData[wkey, [mdb: mdbfile]]; mdb = db_Open mdbfile; dbv_Open mdbfile; [mseq, efield, fieldnames, molfieldnames] = check_fields [ mdb, mseq, efield ]; WindowSetAttr [wkey, [mseq: [text: fieldnames]]]; WindowSetAttr [wkey, [mfield: [text: molfieldnames]]]; WindowSetData [wkey, [mseq: mseq]]; elseif trigger === 'db_conf_cluster' then if value.db_conf_cluster === 'OK' then mdb = db_Open value.mdb; method = value.method; mseq = value.mseq; rmsd = value.rmsd; endif break; endif endloop else value.method = method; value.rmsd = rmsd; value.mfield = db_FirstFieldType [mdb, 'molecule']; endif local paneloff = [ 'db_conf_cluster', 'box1', 'box2', 'box3', 'box4', 'method', 'superpose' ]; apt WindowSetAttr [wkey, [tag [paneloff, [[sensitive:0]]]]]; local entries = db_Entries mdb; local mseqs = db_ReadColumn [mdb, mseq]; entries = entries [x_sort mseqs]; local split_ents = split [entries, mtoc m_uniq sort mseqs]; for ents in split_ents loop // if there are > entry/molecule, perform clustering if length ents > 1 then ConfClustering [ mdbfile, value.mfield, ents, value.rmsd, value.method, value.usecompf, value.efield, value.maxormin ]; // if there is only 1 entry/molecule assign cluster, center anyway elseif length ents === 1 then // make sure the fields are created already apt db_EnsureField [mdb, ['Cluster', 'Center', 'RMSD'], ['int', 'int', 'float'] ]; db_Write [mdb, ents, tag ['Cluster', 1]]; // assign cluster #1 db_Write [mdb, ents, tag ['Center', 1]]; // assign center db_Write [mdb, ents, tag ['RMSD', 0]]; // RMSD = 0 endif endloop db_Close mdb; endfunction #else global function ConfClustering [mdbfile, molfield, rmsd, method] local RMSD_FOR_SAMECONF=0.0000001; // patch for the same conformer // (sufficiently small valule) write['ConfClustering [\'{t:}\',\n \'{t:}\', {n:}, \'{t:}\']\n', db_Filename mdbfile, molfield, rmsd, method]; if iadd (first db_Fields mdbfile == 'Cluster') === 0 then db_CreateField [mdbfile, 'Cluster', 'int']; endif if iadd (first db_Fields mdbfile == 'Center') === 0 then db_CreateField [mdbfile, 'Center', 'int']; endif if iadd (first db_Fields mdbfile == 'RMSD') === 0 then db_CreateField [mdbfile, 'RMSD', 'float']; endif local ent = db_Entries mdbfile; local n = length ent; local i, j,chain, atoms, pos, mols, ConfDistMatrix; atoms = Atoms []; for i = 1, n loop chain = mol_Create cat db_ReadFields [mdbfile, ent(i), molfield]; atoms = cat cAtoms chain; oDestroy (atoms | (aAtomicNumber atoms == 1)); atoms = cat cAtoms chain; if length SelectedAtoms [] > 0 then atoms = SelectedAtoms []; endif mols(i) = atoms; endloop // calculate RMSD matrix for all combination of conformers for i = 1, n loop for j = 1, n loop ConfDistMatrix(i)(j) = _calc_rmsd [aPos mols(i), mols(j)]; if i<>j and ConfDistMatrix(i)(j)===0 then ConfDistMatrix(i)(j)=RMSD_FOR_SAMECONF; // patch for the same conformer endif endloop endloop Close [force: 1]; // find all pairs within RMSD threashold local f_RMSD_Matrix = ConfDistMatrix < rmsd; i = 1; local ClusterCenter, ClusterConfList, f_Entry, f_Cluster; // get cluster center and entry list in each cluster while length f_RMSD_Matrix loop f_Entry = app add f_RMSD_Matrix; // find an entry with the largest number of neighbors // and define it as a cluster center f_Entry = put [rep[0, length f_RMSD_Matrix], x_max f_Entry, 1]; ClusterCenter(i) = ent | f_Entry; // get conformers in the cluster ClusterConfList(i) = ent | cat (f_RMSD_Matrix | f_Entry); // strip entries in the cluster above from original entry list // do same process for f_RMSD_Matrix f_Cluster = not cat (f_RMSD_Matrix | f_Entry); ent = ent | f_Cluster; f_RMSD_Matrix = f_RMSD_Matrix || [f_Cluster]; f_RMSD_Matrix = f_RMSD_Matrix | f_Cluster; i = i + 1; endloop // get RMSD from each Cluster Center ent = db_Entries mdbfile; ConfDistMatrix = ConfDistMatrix | indexof [ent, ClusterCenter]; ConfDistMatrix = perm [ConfDistMatrix,pack indexof [ent, ClusterCenter]]; ConfDistMatrix = apt mput [ConfDistMatrix, (ConfDistMatrix >= rmsd), 0]; //------------------------------------------------------------------------- // Remove double-counted entry and assign only one center for each entry // ConfDistMatrix = tr ConfDistMatrix; local dist_list, ent_idx; if method === 'Minimize Cluster' then for i = 1,n loop dist_list = ConfDistMatrix(i) | ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = first ((indexof [dist_list, ConfDistMatrix(i)]) | dist_list == minE dist_list ); ConfDistMatrix(i) = rep [0, length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = minE dist_list; endif endloop else for i = 1,n loop dist_list = ConfDistMatrix(i) | ConfDistMatrix(i)>0; if length dist_list > 0 then ent_idx = indexof [first dist_list,ConfDistMatrix(i)]; ConfDistMatrix(i) = rep [0, length ConfDistMatrix(i)]; ConfDistMatrix(i)(ent_idx) = first dist_list; endif endloop endif ConfDistMatrix = tr ConfDistMatrix; for i = 1, length ClusterCenter loop ClusterConfList(i) = cat [ClusterCenter(i), ent | ConfDistMatrix(i)]; endloop ConfDistMatrix = add ConfDistMatrix; //------------------------------------------------------------------------- // output data for each entry local k; print '------------------------'; write ['{t:} {n:}\n' , ['RMSD = ', rmsd]]; for i = 1, length ClusterCenter loop for j = 1, length ClusterConfList(i) loop if iadd (ClusterCenter == ClusterConfList(i)(j)) then k = 1; else k = 0; endif db_Write [mdbfile, ClusterConfList(i)(j), tag [['Cluster','Center'], [i, k]] ]; endloop write ['{t:} {n:}\n', [ 'Cluster_',token swrite ['{c:}',i],' = ', length ClusterConfList(i) ]]; endloop print '------------------------'; for i = 1, n loop if ConfDistMatrix(i) === RMSD_FOR_SAMECONF then ConfDistMatrix(i) = 0.0; // patch for the same conformer endif db_Write [mdbfile, ent(i), tag ['RMSD', ConfDistMatrix(i)]]; endloop return [ClusterCenter, ClusterConfList]; endfunction global function ConfClusterDisplay [CenterFlag, ColScalar, ClusterCenter, ClusterConfList, heavyatoms, display, color, center] local i; local atoms = Atoms []; local chains = Chains []; if display === 'ALL' then aSetHidden [atoms, 0]; else i = atoi token drop [string display, 8]; aSetHidden [cat cAtoms (chains | not(ColScalar == i)), 1]; aSetHidden [cat cAtoms (chains | (ColScalar == i)), 0]; endif if color === 'Cluster' then aSetScalar [cAtoms chains, ColScalar / maxE ColScalar]; aSetColorBy [atoms, 'scalar']; else aSetColorBy [atoms, 'element']; endif if center === 'Select' then aSetBondLook [atoms, 'line']; if display === 'ALL' then aSetSelected [cat cAtoms (chains | CenterFlag), 1]; else aSetSelected [atoms,0]; aSetSelected [cat cAtoms (chains |(CenterFlag and (ColScalar == i))), 1 ]; endif aSetSelected [SelectedAtoms [] | (aAtomicNumber SelectedAtoms [] == 1), not heavyatoms ]; elseif center === 'Stick' then aSetBondLook [cat cAtoms (chains | CenterFlag), 'cylinder']; aSetSelected [atoms, 0]; else aSetBondLook [atoms, 'line']; aSetSelected [atoms, 0]; aSetHidden [cat cAtoms (chains | not CenterFlag), 1]; endif atoms = cat cAtoms uniq aChain (atoms | not aHidden atoms); aSetHidden [atoms | (aAtomicNumber atoms == 1), heavyatoms]; View []; endfunction global function RMSD [] local res = Residues []; res = res | rClassRLS res == 'lig'; local atoms = SelectedAtoms []; if isnull atoms then atoms = cat rAtoms first res; endif local a, b, sm, a_pos, i, rmsd; sm = sm_Extract [atoms, aPrioSMI atoms]; a = uniq cat sm_MatchAtoms [sm, atoms]; a_pos = aPos a; for i = 1, length res loop b = uniq cat sm_MatchAtoms [sm, cat rAtoms res(i)]; if isnull b then continue; endif rmsd = _calc_rmsd [a_pos, b]; write ['RMSD ({n:}) = {n:}\n', i, rmsd]; endloop endfunction const DB_RMSD_DEFAULTS = [ molfld : '', esel : 0, ent : [], verbose : 1 ]; //global function db_RMSD [mdb, selected_ent, ent] global function db_RMSD [mdbfn, opt] opt = tagcat [opt, DB_RMSD_DEFAULTS]; local mdb, allents, ent, enum, molfield; if isnull mdbfn then mdb = dbv_DefaultView []; elseif isscalarnum mdbfn then // mdbfn is dbkey mdb = mdbfn; else // mdbfn is filename(?) mdb = db_Open [mdbfn, 'read-write']; endif allents = db_Entries mdb; if istrue opt.esel then ent = dbv_SelectedEntries mdb; if isnull ent then exit 'No selected entries.'; endif endif if isnull opt.ent then ent = allents; else ent = opt.ent; endif enum = igen db_nEntries mdb; enum = enum | m_join [allents, ent]; local [flds, typs] = db_Fields mdb; local mflds = flds | m_join [typs, ['molecule', 'moe']]; if isfalse opt.molfld then molfield = first mflds; else molfield = opt.molfld; endif if isfalse (molfield == mflds) then exit twrite ['"{}" is not a molecule field.', molfield]; endif local is_moe = eqL [db_FieldType [mdb, molfield], 'moe']; local atoms, a, b, sm, a_pos, i, rmsd, chain, mol; atoms = SelectedAtoms []; if isnull atoms then atoms = _Atoms '$$ligand'; if isnull atoms then exit 'There are no ligand atoms in the MOE window.'; elseif length uniq aMoleculeNumber atoms > 1 then exit 'Multiple ligands are loaded in MOE.'; endif else atoms = join [atoms, _Atoms '$$ligand']; // get only ligand atoms if isnull atoms then exit 'Selected atoms seem not to be ligand atoms.'; elseif length uniq aMoleculeNumber atoms > 1 then exit 'Multiple ligands are selected.'; endif endif sm = sm_Extract [atoms, aPrioSMI atoms]; // a = uniq cat sm_MatchAtoms [sm, _Atoms '$$ligand']; a = uniq cat sm_MatchAtoms [sm, atoms]; a_pos = aPos a; if istrue opt.verbose then write ['Query SMILES: "{}"\n', sm]; endif local rmsdfld = tok_cat ['RMSD_', molfield]; db_EnsureField [mdb, rmsdfld, 'float']; local pdata = SystemPush []; for i = 1, length ent loop mol = cat db_ReadFields [mdb, ent(i), molfield]; if is_moe then mol = moe_CreateMOL mol; endif chain = mol_Create mol; // b = uniq cat sm_MatchAtoms [sm, cat cAtoms chain]; b = uniq cat sm_MatchAtoms [sm, _Atoms '$$ligand']; if istrue b then // if there are no matched atoms, skip this entry rmsd = _calc_rmsd [a_pos, b]; if istrue opt.verbose then write ['Entry No.{}: RMSD = {.2f} A\n', enum(i), rmsd]; endif db_Write [mdb, ent(i), tag [rmsdfld, rmsd]]; else if istrue opt.verbose then write ['Entry No.{}: No matched atoms\n', enum(i)]; endif endif oDestroy chain; endloop SystemPop pdata; db_Close mdb; if istrue opt.verbose then write ['Finished.\n']; endif endfunction global function db_RMSD_1vsM [db1, mseq1, xtalfield, db2, mseq2, dockfield, rmsdfield] local no1 = db_ReadColumn [db1, mseq1]; local ent1 = db_Entries db1; local no2 = db_ReadColumn [db2, mseq2]; local ent2 = db_Entries db2; db_EnsureField [db2, rmsdfield, 'float']; local i; for i = 1, length ent1 loop local mol1 = cat db_ReadFields [db1, ent1(i), xtalfield]; local chain1 = mol_Create mol1; local mask = m_join [no2, no1(i)]; if orE mask then local ent2_sub = ent2 | mask; db_RMSD [db2, [], ent2_sub]; endif oDestroy chain1; endloop endfunction //(sn) global function sd_RMSD src_sdf local tmp_mdb = tok_cat [fbase src_sdf, '_superposeRMSD.mdb']; local molfield = 'mol'; db_Open [tmp_mdb, 'create']; db_ImportSD [tmp_mdb, src_sdf, molfield , [], [], [], [file_filed: 0]]; apt db_CreateField [tmp_mdb, ['pair_mol','sRMSD','xRMSD'], ['molecule','float','float'] ]; local ent = db_Entries tmp_mdb; local n = length ent; local i, chain, atoms; chain = mol_Create cat db_ReadFields [tmp_mdb, first ent, molfield]; atoms = cat cAtoms chain; oDestroy (atoms | (aAtomicNumber atoms == 1)); atoms = cat cAtoms chain; local a_pos = aPos atoms; oDestroy chain; write ['{}\n\n', 'sd_RMSD Results']; write ['{}\n', '#conf, RMSD']; for i = 1, n loop chain = mol_Create cat db_ReadFields [tmp_mdb, ent(i), molfield]; atoms = cat cAtoms chain; oDestroy (atoms | (aAtomicNumber atoms == 1)); atoms = cat cAtoms chain; local b = atoms; local xrmsd = _calc_rmsd [a_pos, b]; local b_pos = aPos b; local srmsd0 = xrmsd; local cnt = 0; loop cnt = inc cnt; local [srmsd, R, t] = Superpose [[a_pos, b_pos]]; R = app add (tr R(1) * R[2]); t = t(2) - app add (tr R * t[1]); local b_pos0 = app add (R * [b_pos - t]); aSetPos [b, b_pos0]; srmsd = _calc_rmsd [a_pos, b]; b_pos = aPos b; if abs (srmsd0 - srmsd) < 0.0001 then break; else srmsd0 = srmsd; endif until cnt > 5 endloop b_pos = select [b_pos0, b_pos, srmsd0 < srmsd]; aSetPos [b, b_pos]; srmsd = sqrt ((add cat sqr (a_pos - b_pos)) / length b); db_Write [tmp_mdb, ent(i), [pair_mol: mol_Extract b, sRMSD: srmsd, xRMSD: xrmsd] ]; write ['{n:5},{n:8.4f}\n',i,srmsd]; oDestroy chain; endloop endfunction const USAGE = #token Usage: ClusterConf [ mdbfile, molfield, rmsd threshold, method ] mdbfile : 'filename.mdb' molfield: 'field name for cluster in mdbfile' rmsd threshold: threshold value for clustering (angstrom) method : 'Minimize Cluster' (default) or 'Maximize Cluster' # ; global function ClusterConf args local ClusterCenter, ClusterConfList, CenterFlag, ColScalar; local mdb, mdbfile, molfield; mdb = first args; if isnull args then mdb = dbv_ViewKeys []; if MOE_BATCH then write USAGE; return []; endif elseif MOE_BATCH then // for moebatch Close [force: 1]; if isnull args(2) then args(2) = db_FirstFieldType [args(1), 'molecule']; endif if isnull args(3) then args(3) = 1.0; endif if isnull args(4) then args(4) = 'Minimize Cluster'; endif ConfClustering args; // [mdbfile, molfield, rmsd, method] Close [force:1]; return []; endif if mdb === [] then mdb = ''; mdbfile = ''; molfield = ''; else mdbfile = app db_Filename mdb; molfield = db_Fields mdbfile; molfield = molfield(1) | molfield(2) == 'molecule'; endif local wkey = WindowCreate panel; WindowShow wkey; WindowSetAttr [wkey,[ mdbfile: [text: mdbfile], molfield: [text: molfield] ]]; WindowSetData [wkey,[rmsd: 1]]; loop local [value, trigger] = WindowWait wkey; if trigger === 'mdbfile' then molfield = db_Fields value.mdbfile; molfield = molfield(1) | molfield(2) == 'molecule'; WindowSetAttr [wkey,[ mdbfile: [text: mdbfile], molfield: [text: molfield] ]]; elseif trigger === 'browse' then mdb = dbv_Open []; mdbfile = cat [db_Filename mdb, mdbfile]; molfield = db_Fields mdb; molfield = molfield(1) | molfield(2) == 'molecule'; WindowSetAttr [wkey,[ mdbfile: [text: mdbfile], molfield: [text: molfield] ]]; WindowSetData [wkey,[mdbfile: mdbfile(1)]]; elseif trigger === 'confcluster' then if value.confcluster === 'Clustering' then Close [force:1]; [ClusterCenter,ClusterConfList] = ConfClustering [ value.mdbfile, value.molfield, value.rmsd, value.method ]; Close [force: 1]; CenterFlag = db_ReadColumn [value.mdbfile, 'Center']; ColScalar = db_ReadColumn [value.mdbfile, 'Cluster']; app db_CreateMolecule db_ReadColumn [ value.mdbfile, value.molfield ]; local n = igen length ClusterCenter; local i; local ClusterNumList = []; for i = 1, length ClusterCenter loop ClusterNumList(i) = tok_cat [ 'Cluster_', token swrite ['{c:}',n(i)] ]; endloop ClusterNumList = cat ['ALL', ClusterNumList]; WindowSetAttr [wkey, [ confcluster : [ text: ['Recalc.', 'Cancel'], onTrigger: ['return', 'exit'] ], mdbfile : [sensitive: 0, titleFont: 'medium'], browse : [sensitive: 0], molfield : [sensitive: 0, titleFont: 'medium'], rmsd : [sensitive: 0, titleFont: 'medium'], method : [sensitive: 0, titleFont: 'medium'], display : [ sensitive: 1, titleFont: 'mediumBold', options: ClusterNumList ], heavyatoms : [sensitive: 1, titleFont: 'mediumBold'], color : [sensitive: 1, titleFont: 'mediumBold'], center : [sensitive: 1, titleFont: 'mediumBold'] ]]; WindowSetData [wkey, [ n_conf : iadd app length ClusterConfList ]]; ConfClusterDisplay [CenterFlag, ColScalar, ClusterCenter, ClusterConfList, value.heavyatoms, 'ALL', 'Cluster', 'Only' ]; elseif value.confcluster === 'Recalc.' then Close [force: 1]; WindowSetData [wkey, [ display : 'ALL', color : 'Cluster', center : 'Only', n_conf : iadd app length ClusterConfList ]]; WindowSetAttr [wkey, [ confcluster : [ text: ['Clustering','Cancel'], onTrigger: ['return','exit'] ], mdbfile : [sensitive: 1, titleFont: 'mediumBold'], browse : [sensitive: 1], molfield : [sensitive: 1, titleFont: 'mediumBold'], rmsd : [sensitive: 1, titleFont: 'mediumBold'], method : [sensitive: 1, titleFont: 'mediumBold'], heavyatoms : [sensitive: 0, titleFont: 'medium'], display : [sensitive: 0, titleFont: 'medium'], color : [sensitive: 0, titleFont: 'medium'], center : [sensitive: 0, titleFont: 'medium'] ]]; endif else if value.display === 'ALL' then WindowSetData [wkey, [ n_conf : iadd app length ClusterConfList ]]; else WindowSetData [wkey, [ n_conf : length ClusterConfList ( atoi token drop [string value.display, 8] )]]; endif ConfClusterDisplay [CenterFlag, ColScalar, ClusterCenter, ClusterConfList, value.heavyatoms, value.display, value.color, value.center ]; if value.confcluster === 'Clustering' then Close [force: 1]; endif endif endloop endfunction #endif