{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Layers and models" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from molgraph.chemistry import MolecularGraphEncoder\n", "from molgraph.chemistry import Featurizer \n", "from molgraph.chemistry import features\n", "\n", "import tensorflow as tf\n", "\n", "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Construct a `MolecularGraphEncoder`" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [], "source": [ "atom_encoder = Featurizer([\n", " features.Symbol({'C', 'N', 'O'}, oov_size=1),\n", " features.Hybridization({'SP', 'SP2', 'SP3'}, oov_size=1),\n", " features.HydrogenDonor(),\n", " features.HydrogenAcceptor(),\n", " features.Hetero()\n", "])\n", "\n", "bond_encoder = Featurizer([\n", " features.BondType({'SINGLE', 'DOUBLE', 'TRIPLE', 'AROMATIC'}),\n", " features.Rotatable(),\n", "])\n", "\n", "encoder = MolecularGraphEncoder(atom_encoder, bond_encoder)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Compound ID | \n", "ESOL predicted log solubility in mols per litre | \n", "Minimum Degree | \n", "Molecular Weight | \n", "Number of H-Bond Donors | \n", "Number of Rings | \n", "Number of Rotatable Bonds | \n", "Polar Surface Area | \n", "measured log solubility in mols per litre | \n", "smiles | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Amigdalin | \n", "-0.974 | \n", "1 | \n", "457.432 | \n", "7 | \n", "3 | \n", "7 | \n", "202.32 | \n", "-0.77 | \n", "OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)... | \n", "
| 1 | \n", "Fenfuram | \n", "-2.885 | \n", "1 | \n", "201.225 | \n", "1 | \n", "2 | \n", "2 | \n", "42.24 | \n", "-3.30 | \n", "Cc1occc1C(=O)Nc2ccccc2 | \n", "
| 2 | \n", "citral | \n", "-2.579 | \n", "1 | \n", "152.237 | \n", "0 | \n", "0 | \n", "4 | \n", "17.07 | \n", "-2.06 | \n", "CC(C)=CCCC(C)=CC(=O) | \n", "