{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Interpretability" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from molgraph.chemistry import MolecularGraphEncoder\n", "from molgraph.chemistry import Featurizer\n", "from molgraph.chemistry import features\n", "\n", "from molgraph.models import GradientActivationMapping\n", "from molgraph.models import IntegratedSaliencyMapping\n", "\n", "import tensorflow as tf\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "np.set_printoptions(suppress=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Construct a `MolecularGraphEncoder`" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "MolecularGraphEncoder(atom_encoder=Featurizer(features=[Symbol(allowable_set=['[OOV:0]', 'C', 'N', 'O'], ordinal=False, oov_size=1), Hybridization(allowable_set=['[OOV:0]', 'SP', 'SP2', 'SP3'], ordinal=False, oov_size=1), HydrogenDonor(), HydrogenAcceptor(), Hetero()]), bond_encoder=Featurizer(features=[BondType(allowable_set=['AROMATIC', 'DOUBLE', 'SINGLE', 'TRIPLE'], ordinal=False, oov_size=0), Rotatable()]), positional_encoding_dim=None, self_loops=False)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atom_encoder = Featurizer([\n", " features.Symbol({'C', 'N', 'O'}, oov_size=1),\n", " features.Hybridization({'SP', 'SP2', 'SP3'}, oov_size=1),\n", " features.HydrogenDonor(),\n", " features.HydrogenAcceptor(),\n", " features.Hetero()\n", "])\n", "\n", "bond_encoder = Featurizer([\n", " features.BondType({'SINGLE', 'DOUBLE', 'TRIPLE', 'AROMATIC'}),\n", " features.Rotatable()\n", "])\n", "\n", "mol_encoder = MolecularGraphEncoder(atom_encoder, bond_encoder, positional_encoding_dim=None)\n", "mol_encoder" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Compound ID | \n", "ESOL predicted log solubility in mols per litre | \n", "Minimum Degree | \n", "Molecular Weight | \n", "Number of H-Bond Donors | \n", "Number of Rings | \n", "Number of Rotatable Bonds | \n", "Polar Surface Area | \n", "measured log solubility in mols per litre | \n", "smiles | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Amigdalin | \n", "-0.974 | \n", "1 | \n", "457.432 | \n", "7 | \n", "3 | \n", "7 | \n", "202.32 | \n", "-0.77 | \n", "OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)... | \n", "
| 1 | \n", "Fenfuram | \n", "-2.885 | \n", "1 | \n", "201.225 | \n", "1 | \n", "2 | \n", "2 | \n", "42.24 | \n", "-3.30 | \n", "Cc1occc1C(=O)Nc2ccccc2 | \n", "
| 2 | \n", "citral | \n", "-2.579 | \n", "1 | \n", "152.237 | \n", "0 | \n", "0 | \n", "4 | \n", "17.07 | \n", "-2.06 | \n", "CC(C)=CCCC(C)=CC(=O) | \n", "