[docs]defname_to_smiles(name:str)->str:"""Convert from chemical name to SMILES string using chemical identifier resolver. This script is useful to combine with ``df.apply`` from pandas, hence it does not throw exceptions for invalid molecules but instead returns an empty string for easy subsequent postprocessing of the dataframe. Args: name: Name or nickname of compound. Returns: SMILES string corresponding to chemical name. """name=name.replace(" ","%20")try:url="http://cactus.nci.nih.gov/chemical/structure/"+name+"/smiles"ctx=ssl.create_default_context()ctx.check_hostname=Falsectx.verify_mode=ssl.CERT_NONEwithurllib.request.urlopen(url,context=ctx)asweb:smiles=web.read().decode("utf8")smiles=str(smiles)if"</div>"insmiles:return""returnsmilesexceptException:return""
@lru_cache(maxsize=None)@_disk_cachedef_molecule_to_fingerprint_features(molecule:str|Chem.Mol,encoder:BaseFingerprintTransformer,)->np.ndarray:"""Compute molecular fingerprint for a single molecule. Args: molecule: SMILES string or molecule object. encoder: Instance of the fingerprint class to be used for computation. Returns: Array of fingerprint features. """returnencoder.transform([molecule])
[docs]defsmiles_to_fingerprint_features(smiles:Sequence[str],encoding:SubstanceEncoding,prefix:str|None=None,kwargs_conformer:dict|None=None,kwargs_fingerprint:dict|None=None,)->pd.DataFrame:"""Compute molecular fingerprints for a list of SMILES strings. Args: smiles: Sequence of SMILES strings. encoding: Encoding used to transform SMILES to fingerprints. prefix: Name prefix for each descriptor (e.g., nBase --> <prefix>_nBase). kwargs_conformer: kwargs for conformer generator kwargs_fingerprint: kwargs for fingerprint generator Returns: Dataframe containing fingerprints for each SMILES string. """kwargs_fingerprint=kwargs_fingerprintor{}kwargs_conformer=kwargs_conformeror{}ifencodingisSubstanceEncoding.MORGAN_FP:warnings.warn(f"Substance encoding '{encoding.name}' is deprecated and will be disabled "f"in a future version. Use '{SubstanceEncoding.ECFP.name}' "f"with 'fp_size' 1024 and 'radius' 4 instead.",DeprecationWarning,)encoding=SubstanceEncoding.ECFPkwargs_fingerprint.update({"fp_size":1024,"radius":4})elifencodingisSubstanceEncoding.RDKIT:warnings.warn(f"Substance encoding '{encoding.name}' is deprecated and will be disabled "f"in a future version. Use '{SubstanceEncoding.RDKIT2DDESCRIPTORS.name}' "f"instead.",DeprecationWarning,)encoding=SubstanceEncoding.RDKIT2DDESCRIPTORSfingerprint_cls=get_fingerprint_class(encoding)fingerprint_encoder=fingerprint_cls(**kwargs_fingerprint)iffingerprint_encoder.requires_conformers:mol_list=ConformerGenerator(**kwargs_conformer).transform(MolFromSmilesTransformer().transform(smiles))else:mol_list=smilesfeatures=np.concatenate([_molecule_to_fingerprint_features(mol,fingerprint_encoder)formolinmol_list])name=f"{encoding.name}_"prefix=prefix+"_"ifprefixelse""feature_names_out=fingerprint_encoder.get_feature_names_out()no_descriptor_names=all("fingerprint"infforfinfeature_names_out)suffixes=[f.split("fingerprint")[1]ifno_descriptor_nameselsefforfinfeature_names_out]col_names=[prefix+name+suffixforsuffixinsuffixes]df=pd.DataFrame(features,columns=col_names,dtype=DTypeFloatNumpy)returndf
[docs]defget_fingerprint_class(encoding:SubstanceEncoding)->BaseFingerprintTransformer:"""Retrieve the fingerprint class corresponding to a given encoding. Args: encoding: A substance encoding. Raises: ValueError: If no fingerprint class for the specified encoding is found. Returns: The fingerprint class. """# Exception caseifencodingisSubstanceEncoding.RDKITFINGERPRINT:returnfingerprints.RDKitFingerprinttry:cls_name=next(namefornameindir(fingerprints)if(encoding.name+"Fingerprint").casefold()==name.casefold())exceptStopIterationase:raiseValueError(f"No fingerprint class exists for the specified encoding '{encoding.name}'.")fromereturngetattr(fingerprints,cls_name)
[docs]defget_canonical_smiles(smiles:str)->str:"""Return the "canonical" representation of the given SMILES."""try:returnChem.MolToSmiles(Chem.MolFromSmiles(smiles))exceptException:raiseValueError(f"The SMILES '{smiles}' does not appear to be valid.")