{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Chemistry" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "from molgraph import chemistry\n", "\n", "import tensorflow as tf\n", "\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# Convert SMILES representation of molecule to an RDKit molecule\n", "rdkit_mol = chemistry.molecule_from_string(\n", " 'OCC1OC(C(C1O)O)n1cnc2c1ncnc2N')\n", "\n", "print(type(rdkit_mol))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `chemistry.features` (level 1)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Symbol: O\n", "BondType: SINGLE\n" ] } ], "source": [ "# Obtain RDKit atom and bond via the RDKit API\n", "atom = rdkit_mol.GetAtoms()[0]\n", "bond = rdkit_mol.GetBonds()[0]\n", "\n", "symbol_feature = chemistry.features.Symbol()\n", "bondtype_feature = chemistry.features.BondType()\n", "\n", "print('Symbol: ', symbol_feature(atom))\n", "print('BondType:', bondtype_feature(bond))\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Arguments of a `chemistry.Feature`\n", "\n", "- `allowable_set` specifies what features should be considered\n", "- `oov_size` specifies the number of bins alloted to \"out-of-vocabulary\" features (based on `allowable_set`) \n", "- `ordinal` specifies if encoding should be ordinal or not (nominal)\n", "\n", "Importantly, the arguments above will only take effect when wrapped in a `chemistry.Encoding`, which occur automatically inside a `chemistry.Featurizer` or `chemistry.Tokenizer`." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BondType(allowable_set={'DOUBLE', 'TRIPLE', 'AROMATIC', 'SINGLE'}, ordinal=False, oov_size=0)\n", "BondType(allowable_set=['SINGLE', 'DOUBLE'], ordinal=False, oov_size=1)\n" ] } ], "source": [ "print(chemistry.features.BondType())\n", "print(chemistry.features.BondType(allowable_set=['SINGLE', 'DOUBLE'], oov_size=1, ordinal=False))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### `chemistry.Featurizer` (level 2)\n", "\n", "A `chemistry.Featurizer` (or `chemistry.Tokenizer`) can be built from a list of `chemistry.Feature`s. Note: if `ordinal=False` (default), `allowable_set` will be sorted internally via `sort()`. " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Symbol: O\n", "Hybridization: SP3\n", "[0. 0. 0. 1. 0. 0. 0. 1.]\n" ] } ], "source": [ "atom_encoder = chemistry.Featurizer([\n", " chemistry.features.Symbol(['C', 'N', 'O'], oov_size=1),\n", " chemistry.features.Hybridization(['SP', 'SP2', 'SP3'], oov_size=1)\n", "])\n", "\n", "print('Symbol: ', atom.GetSymbol())\n", "print('Hybridization:', atom.GetHybridization().name)\n", "\n", "# first and fifth bin is alloted to OOVs\n", "print(atom_encoder(atom))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Create a custom `chemistry.Feature`" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AtomMass: 15.999\n", "Symbol: O\n", "[0. 1. 0. 0. 0.]\n" ] } ], "source": [ "class AtomMass(chemistry.Feature):\n", " def __call__(self, x):\n", " mass = x.GetMass()\n", " if mass < 5:\n", " return 'x<5'\n", " elif mass < 40:\n", " return '5,\n", " node_feature=,\n", " edge_src=,\n", " edge_dst=,\n", " edge_feature=,\n", " node_position=)\n" ] } ], "source": [ "atom_encoder = chemistry.Featurizer([\n", " chemistry.features.Symbol(allowable_set={'C', 'N', 'O'})\n", "])\n", "bond_encoder = chemistry.Featurizer([\n", " chemistry.features.BondType(allowable_set={'SINGLE', 'DOUBLE'})\n", "])\n", "\n", "mol_encoder = chemistry.MolecularGraphEncoder(\n", " atom_encoder=atom_encoder, # not default, required\n", " bond_encoder=bond_encoder, # not default, optional\n", " positional_encoding_dim=16, # default\n", " self_loops=False, # default\n", " molecule_from_string_fn=chemistry.molecule_from_string # default\n", ")\n", "\n", "print(mol_encoder(rdkit_mol))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here, a list of molecules are encoded as a `GraphTensor`" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GraphTensor(\n", " sizes=,\n", " node_feature=,\n", " edge_src=,\n", " edge_dst=,\n", " edge_feature=,\n", " node_position=)\n" ] } ], "source": [ "smiles = [\n", " 'OCC1OC(C(C1O)O)n1cnc2c1ncnc2N',\n", " 'C(C(=O)O)N',\n", " 'C1=CC(=CC=C1CC(C(=O)O)N)O'\n", "]\n", "# Uses multiprocessing by default\n", "graph_tensor = mol_encoder(smiles, processes=8)\n", "print(graph_tensor)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Inspect generated `GraphTensor`" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "node_feature: tf.Tensor(\n", "[[0. 0. 1.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]\n", " [0. 0. 1.]], shape=(37, 3), dtype=float32)\n", "\n", "edge_feature: tf.Tensor(\n", "[[0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 1.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]], shape=(76, 2), dtype=float32)\n", "\n", "edge_dst: tf.Tensor(\n", "[ 1 0 2 1 3 6 2 4 3 5 9 4 6 8 2 5 7 6 5 4 10 13 9 11\n", " 10 12 11 13 17 9 12 14 13 15 14 16 15 17 12 16 18 17 20 23 19 21 22 20\n", " 20 19 25 29 24 26 25 27 36 26 28 27 29 24 28 30 29 31 30 32 35 31 33 34\n", " 32 32 31 26], shape=(76,), dtype=int32)\n", "\n", "edge_src: tf.Tensor(\n", "[ 0 1 1 2 2 2 3 3 4 4 4 5 5 5 6 6 6 7 8 9 9 9 10 10\n", " 11 11 12 12 12 13 13 13 14 14 15 15 16 16 17 17 17 18 19 19 20 20 20 21\n", " 22 23 24 24 25 25 26 26 26 27 27 28 28 29 29 29 30 30 31 31 31 32 32 32\n", " 33 34 35 36], shape=(76,), dtype=int32)\n", "\n" ] } ], "source": [ "print('node_feature:', graph_tensor.node_feature, end='\\n\\n')\n", "print('edge_feature:', graph_tensor.edge_feature, end='\\n\\n')\n", "print('edge_dst:', graph_tensor.edge_dst, end='\\n\\n')\n", "print('edge_src:', graph_tensor.edge_src, end='\\n\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Extract the second subgraph (glycine)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAASwAAAEsCAIAAAD2HxkiAAAbm0lEQVR4nO3deVQUV74H8G83NIJCVAxu4BaDazAqGmJcYhA3Hm6JxuQcwfGpEEft8SQn4pIEYk5mMOPMaDTO0xPfCOSZJ76JEY06KifEqNEoMUpEFHE0QTGhFWSVpfu+P6pEVJZu6O7bNN/P4Y+i+nbVD+VLVVfde0sjhAARyaOVXQBRS8cQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASScYQEknGEBJJxhASSeYquwBqZgoLC8+ePVteXj5w4EBfX1/Z5TgDHgnJAp988omfn9/EiROnTZvWo0ePt99+W3ZFzoAhJHN9++23S5Ysee+99woLC0tKSt55551169bt2bNHdl3NnkYIIbsGah5MJtOpU6dGjBihfGs0Gr28vBYuXLhhwwa5hTV3PBKSubRabXUCAbi4uLi5uZWUlEgsyTkwhNRIZ86cuXv37ujRo2UX0uzxdJQao6qqaty4cQaD4dy5c66uvMbeJPznI4uZTKbIyMhz586lpKQwgU3Hf0GyTFFR0bx58w4dOnTgwIHAwEDZ5TgDhpAscOnSpVdeeaW8vPy7774bOHCg7HKcBD8TkrlSUlJmzJhRVlY2f/78du3aKSt9fHzeeustqXU1ezwSkrlu3LgRFBQEIDs7u3plt27d5FXkJHgkJJKM9wmJJGMIiSRjCIkkYwiJJGMIiSRjCIkkYwiJJOPNerLQvXswGFBRgSefxBNPyK7GGfBISOY5cQKRkejZEx4e6NYNvXujbVs88QSmT8f27aislF1fM8YeM9SQ27cxfz7qn0vG3x/btoEDfBuFIaR65eRg/HhkZqrfjh6NyZPRrRvc3JCXh6NHsW8fSksBwN0dO3ZgxgyJxTZTDCHVzWjEiy/i+HEA6N4diYkYM+bRNjk5mD8fhw4BQOvW+OEH9O1r7zqbOX4mpLp98omaQB8fHD1aSwIB+Plh716MHw8ApaVYuNCuFToFHgmpDiYT/P1x9SoAJCZizpz6GufkoH9/FBcDwOnTGDbMHhU6Cx4JqQ7HjqkJ7NwZr73WQGM/P7z6qrocH2/bwpwOQ0h1UE5EAYSGwpzZnF5++dE3knkYQqpDWpq6MHSoWe2rm6Wn87ahRRhCqkNenrrg52dW+y5doNMBQFUV8vNtVZUzYgipDnfvqgteXua+pW1bdYEhtARDSHXQ3v/dMP/6udGoLri4WL8e58UQUh3uT2qIwkKz2guBoiJ1uX17W1TkrBhCqkPHjurC9etmtc/JQVUVALRq9eC8lMzAEFIdqm+4nz1rVvsfflAXBg8265YG3ccQUh1eeEFd2L/frFsOu3erC6NG2aokJ8UQUh1GjECfPgBgMODzzxtofOMG/u//1OXf/c6mdTkfhpDqoNHgD39Ql99+G7/8UmfLqipERUF5ZO/EiXjmGXuU50TYgZvqZjRi7FgcOwYAvXohKamWntkGAyIj1XNRLy+cO4devexdZzPHEFK9fv4Z48fj8mUA0GoxeTLCwtC9O9zdkZuLo0eRlISCAgBwd8fOnZg6VWq5zRJDSA3Jy0NEBA4erK9N797Yvp2XZBqHISTzHDmCbdtw+DBu336w0s0No0Zh5kzMnw83N3nFNW8MIVnCaMSvv8JgQHk5OnZEp05wd5ddU7PHEBJJxlsU1JCrV3Hy5IORTY8zGnHyJM6csWNNToUhpIb88Y8YMaK+eUdLSjBiBIKD7ViTU2EIiSRjCIkkYwiJJGMIiSRjCIkkYwiJJGMIiSRjCIkkYwiJJGMIiSRjCIkkYwiJJGMIiSRjCIkkYwiJJGMIiSRjCIkkYwgJACoqKqqUZyqR3TGELd2tW7feffddX1/fXpw5WxKGsEXbs2dPr169Dh061LNnz0pzHr1ENsAQtmgvvvhidnb2qVOngjlNkzx8mGOL1q5du3bVj8UmSXgkJJKMISSSjCEkkowhJJKMISSSjFdHW7o9e/ZUVFRkZmbeu3dv165dAGbMmOHqyl8M++G/dUu3Zs2agoICAB06dFixYgWA0NBQhtCe+G/d0qWlpckuoaXjZ0IiyRhCIskYQmpAlFbrqtFs02jqalCk0bhqNO21/F1qJP7DUQOMJpNRCCFEXQ2EEEYhjCaTPatyJvJDOH/+/GHDhkVERMguhEgOySH8/PPPExMTKysrMzMz5VZCJIvMEN6+fXvZsmVvvfXWgAEDJJZBJJfMEC5btszDw+Odd96RWAORdNJu1h88ePCzzz7bu3dvmzZtZNVA5AjkHAlLS0sXL148e/bssLAwKQUQOQ45IVy5cqXBYPjLX/4iZe9EDkXC6Wh6evqmTZv69u27Zs0aZc3p06cLCwujoqLWrl3LKU+opZHzmXDBggVS9kvkgCSEMCAgYMuWLTXXvP7669nZ2Y+sJGoh5PeYIWrhHGI84YYNGzj9M7VYDhHCjh07yi6BSBqejhJJxhASScYQEklmwWdCZdCmRoO6x1hb3JKIzD0SJibCxQUuLli0qIGWqalqy0Z0C125Uo2uVovU1Ppa9uihtuTjZam5c9DTUSGwZAl424JaAgcNIYALF/DXv8ougsj2HDSELi4AsGYNrl2TXAmRrTloCH/3OwAoLW34IyhRc+egIZw6FSNHAsDBg/jnP2VXQ2RLDhpCABs2QJlOdulS3L0ruxoim3HcEAYGQpmLNDcXsbGSiyGyHccNIYC4OLRtCwAbN+KHH2RXQ2QbFo+i+OorjB9fX4P8/MZX84hOnfDee3jrLRiNeOMNnDwJPu+AnI/FIczJQU6OLSqpnV6PhAScO4fTp/H3v2PxYvvtmmr1/vvvr1u3zmQyjRw5Mjk52d3dXXZFzZ7FIfT1Rb9+9TXIz7fmqaOrKzZtwpgxEAKrV+OVV9C5s9U23kyZTKbPPvvs8OHDVVVVgYGBb7zxhqenpx32e+DAgblz5+bl5SnfHj58uH///n/+858nTJhgh71XVVXt2rXryJEj5eXl/fv3//3vf9++fXs77NcehHkSEgQgABEV1UDLr79WW4aGmrntB1asUN+7Z89D6+fOVdf/53+qa7p3V9dUVlq8l+bu9ddf12q1U6ZMCQ8P9/T0HDRoUFFRke12N3/+fACTJk1SnqGt0WjCwsLWrl07cOBA5Vdo1KhRALy8vGxXQ2Vl5YQJEzw9PefMmbNo0SIfHx9fX9+8vDzb7dGemkcIDQbRoYMAhFYrTp0SogWH8F//+heAdevWKd8ePXpUo9F88MEHNtqd0Wh88cUXlbBptdohQ4ZcuHCh+qX4+PhOnTopr+p0ulu3btmojPT09D59+pw7d075NiMjw8XF5d1337XR7uyseYRQCLF5s/rSyJHCZGq5IVy4cKGXl1dpaWn1mpEjRw4aNMgW+0pNTX322WeVjI0YMeLs2bOPt8nPz1++fLlOpwPQrl27uLi4e/fu2aKYRzz11FOvvPKKHXZkB83mamNUFIKCAOD48Rbdh+bKlSuDBg3y8PCoXvPcc89lZ2dbdy85OTkREREvvfTSuXPn/Pz84uPjjx8/Pnjw4MdbtmvXbu3atT/99FNYWFhBQcGKFSsCAgJ27dpl3XoeYTKZ8vPznWae6GYTQq0WGzeqtyhWrXp0lFNODj76COXlUkqzq9zc3CeffLLmGh8fn5KSkrtW6lVUWloaGxvr7++fmJjo4eERExNz+fLliIgITb0DtPv06bN3797Dhw8PHDgwKyvr1VdfHT9+/IULF6xS0uP27NmTn5//2muv2Wj7dtZsQghg+HBERgJAVhZycx96aflyREfD3x8JCVJKs59WrVpVVFTUXFNeXq7RaFq1atXELQshdu3aNWDAgPfff7+8vHzWrFkZGRmxsbE1j7r1CwkJOXv27JYtW5588skjR44MGTIkKirKYDA0sbBH5ObmLl68ePr06SEhIdbdsix2DeG9e/jHPzB9OoYORVAQFi2CpX8r//hH1Do94sKFGDQIv/yCuXMRHIzz561SryPy9fX99ddfa6757bffOnTo0MT7dWlpaaNHj3711VevX78eGBj47bffJiUl9ejR4/GWJSUlmzZtevnll3/88cfHX9XpdJGRkZcuXdLr9SaTaevWrX379t2wYYPRaGxKedVu3rw5btw45QzZKht0CGZ+dmz6hZkbN8SQIQIQXl4iIEB07CgA0arVQ9dg6rkwU+0f/1DbPHJhxmgU8fHCx0e9iBoeLn791cwfrjlZvny5q6vrrzV+Nn9//+Dg4EZv8ObNm5GRkVqtFkCXLl22bNliNBprbVlRUbFixQpvb+/WrVsD+OKLL+rfckZGxsSJE5Vfs/79+x88eLDRRSqOHj3apUuXUaNG3b59u4mbcij2C+F//IcAxJtviuJiIYQwmcSaNQIQPj6ipERtY04ITSYxdmwtIVTcuSOio4WbmwBE+/YiLk6Ul5v5IzYPFy9edHFxmTNnjhKVDRs2ANixY0cjNlVeXr5+/fonnngCgJubm16vv3v3bv1viY2NPXz4cGZmpjkhVCQnJ/fq1UuJYlhYWHZ2diNKFUJs2bJFOcyWO9n/qPkh3LdPDB4sBg8WH37YQMszZ9SWS5c+tH7/frFy5UNrjEbh7y8AceiQumbjRhEYKAIDxTff1LeL8+fVZoGBoqqqlgaZmSI0VE1p377iq68aqLl52bx5s6urq5+fX9++fQFERUWZTCZLN5KcnPzUU081LhvKxVgzQyjup93Ly6s67YWFhRZV++mnnwJwcXF5qobnn3/eoo04LHNDaCMzZghAbN9uk40fPiwGDFCjGBIi7t9kdgZXr17dunXrxo0bz5w5Y+l7L168OGnSJCV+/fr1O3DggKVbsDSEihs3blSf93bt2rWe897HXbp0KekxycnJllbumCSHcMwYAYi9e221/YoKsX69aNtWAEKnE3q9KCiw1b4c3+3bt/V6vdL7zNvbe/369ZWN6uvQuBAqTp8+/cILLyh/AoYNG3b8+PFGbMTJyAzhb78JDw/h5ibu3LHtjgwGodcLFxcBiA4dxPr1tZ/EOrHKykrlzgEAV1fXyMjIpnS8bEoIhRAmkykpKal79+4ANBrNrFmzrl+/3uhinIDMEOr1AhCRkXbaXVqaGD1aPTsdOlQcPWqn/Up35MiRZ555Rjn4jBs37vz5803cYBNDqCgqKlqxYoVye9PT0/N///Y3UVbWxMKaKWkh/Oc/hUYj/PyEna82JyeLnj3VKIaFiX//2657t7PLly/PmjVLid/TTz+dlJRklc1aJYSKn3/+OTw8HIAhMFB06ybi45u+zWZHTgj37xfu7sLHR87FkpISERcnPD0FIDw8RHS0sOVIIDmKiopiYmKU40ybNm1iYmKs2K/aiiFUpKemioAA9U9jcLBo8rG6eZEQwo8/Fjqd8PYWtXXKt5+cHBEeLjQaAQhfXxEfLyy/zu+Iao4w0mq14eHhVhxhtHr16kc6e3z00UfW2bTS2ULpw+HEnS1qY9cQlpWJefPUe3cXL9pzz3U6dUo8/7z6J/i558R338kuqGlOnjz5/PPPK/F47rnnvrP2z3Pjxo0zD7PyGMKanS3atXPCzha1sV8Is7LUM44pUxzrPoHJJOLjRefOAhAajQgPF7m5smuy3C+//BIeHq6MdfD19Y2Pj2/EHXxHcemS2sEKEH36iH37ZBdkW3YKYXGxerNO+QPXvv1DXw12hbNPhTExolUrAYg2bURMjLDL2FQrKCkpiYuLU6aZad26dXR0tE1nu7CfRzpb/PST7IJsRSOEaEr/bzMVF2PGjDpfDQlBdLQdqmhYVhbefBP79gFAUFDF6tX/mjJliuyi6rN37169Xn/t2jUAYWFhGzdu7Nmzp+SarKiyEps3IyYGd+9Cp8OiRVizRp2L1pnI/ivgiI4cEQEBYtSobQCCg4ObfmPNFtLS0pQZlgAMHTr0qBPf93T2zhYMYe0qKsTHH29SJtVzdXXV6/V3bN2vx2x5eXl6vd7FxQVAhw4d1q9fX+Vcv5S1++EHtZcjIIYMaaCPf7PCENanZmfL9u3bN7qzpbVUVFRUDz7S6XR6vb7Aoa5x2cEjnS2uXpVdkBUwhA17ZNjB/v37pZShTLarlBESEpKRkSGlDPlKS0VcnPDyetDZwsKBUY6GITRXcnJy7969lQyEhYVduXLFbrvOzMwMDQ1Vdt23b9+vnGx8ZOM4UWeLFhTCoqKiZcuWde3aVafT+fv7Kw9UsGgLj58NNjgUvYnu3LlT83w4Li7O+caVN8n334sRI9Sz0+HDxYkTZr3r6lWxdauIiRGLF4s//EF88IHYuVMYDPW9JSVFxMeL+HjRYOeEkyfVlmaPk24pITQajSNHjnz22WeTkpK+/vrr5cuXo8Y81hZ5fFIWW1wXUQYf+fj44H7vs99++83qe3EGFnW22LdPneno8S8XFzFlikhPr/2N1TM1fP11A/VERaktExLM/AlaSgjv3bu3du3amvMjBQUFjRo1qtEbPHPmzEjlid6AMj2ZNcpUpaSkBAQEKBsPDg6unv6d6qR0tnB3f9DZ4pGBUZWVap/J6i93d9Gtm+jSRbi6PhTF//qvWrbPEFpdVVVVr169pk2b1pSNKINTlXkBlcGp165da2JhWVlZ1YOPevfuba3BRy1FVpaYOlXNwIIFD730+uvqeqV3+IkTonpyjdJSkZwsgoIeRPGTTx7dMkNoRT/99FNycvL06dN9fHyscoQpKSmJiYlRZsht3bp1TExMzQdFmK+4uDgmJkaZPlQZfFTWUge5NlVKiggMFJmZD9b8938/OPrVNZOfySSWLXvQ7JHzUobQioYNG6YE5s0337TiELua/aeVqWnNv+pjMpni4+M7d+6sHFHDw8Nzm2MXcodVXq5+YgTEpk0NNA4LU1tOn/7QeobQ6o4fP+7t7R0REWHdzaamplY/NSUoKOjkyZMNvuX7778fMWKE8pbhw4efMPP6Hplvxw41Ff36iQbnd7t6VWi16ofDmh8uGEJb0Ov1bm5uVr+waf6Y2pycHOcZfOTI5sxRU9HgnLmKkBC1/ZYtD1baMoTN6YEwTXT58uWa31bHz7p70Wq1ERERmZmZ0dHROp0uMTHx6aefjo2NLa/xyKg7d+6sWLGif//+iYmJ7u7u0dHRFy9ebPDJR9RIx4+rC2PGmNW+utl339mknseZGdbmzmAweHl5hYeHZ2Rk5ObmJiQkeHh4zJ4926Y7rTnPkr+//7Zt20wm09tvv63cfAcQFhZ21Sl6Pzquqiq1Vw1g7pxiu3er7UeOfLCy+kj4xRfi1q36viIieDpap5SUlOqZ/9zc3ObMmWPr/i6Kr776SpmvHoAy9AGAh4fH7t277bD3ls5gUCOh0TT8gVDxzTfqWwYMeLCyOoTmf/F09HHBwcHp6em3bt3KysrKz89PTExUOqDZWmhoaHp6+qpVqzQajdFo1Gq1s2fPvnv37vTp0+2w95aupERd8PBQHzHboDZt1IXiYpuU9BhX++zGcXTq1Em5amJPOp3uww8/DA8Pz8rKGjx4cLdu3excQMvl6akulJXBZDIrh9W5rfVvdHQ0+vWr7+3x8UhNtaTElhdCifr169ev/v8/srq2baHRQAgIgYICeHs3/JY7d9SFdu1qeXXSJIwdW9/bT560NIQt6HSUWiIXF9x/OqK5z4VOT1cX+vSxSUmPYQjJ2d2fiNXcWw4nTqgLQUE2qecxDCE5u/uP7Mb//E/DjW/exJEjAODiggkTbFhVDQwhObvZs9GxIwCcP48dOxpovGoVqqoAICwM9po8kiG0nytXrhw7duz69euyC2lhWrXCypXq8uLF+P77Oltu3oyEBADQ6RATY4/aADCE9mEwGMaOHevv7z969OiePXtOmzatpPo6ONmBXo9x4wCgoADBwfjwQ9y+/VCDK1cwdy6WLIHSjTE2FkOG2K06htAe5s2bl5aWtm/fvuLi4s8///zgwYPLli2TXVRLotXiyy/VD4clJXjnHXTqhCFDEBaGCRPQpw/8/ZGQ8CCBq1bZtTzLOwKRZbKysgB88MEH1WuWLFmi0+kcZzbhlqKiQnz8sfDxqbOj2aBB4uDB2t9ry1EUvFlvc2fOnAEwbdq06jVTp07dtGnTjz/++NJLL8mrq+XR6bB0KRYsQGoqUlLw738jLw86HTp1Qr9+mDQJw4bV2aVmwQIEBwPA0083sJeZM+HvDwDDhplZF0Noczk5OQD8/Pyq1yjLN27ckFZTS+bhgcmTMXmyZe+q53lGjwgJQUiIRdvmZ0KbKyoqAtC6devqNW3atAFQWFgorSZyJAyhzXXp0gXA7RqX4/Ly8gB07dpVWk3kSBhCm+vevTuACzU6LmZkZABQ5kokstNDQluy4uLiLl26TJ48OSkpCYDJZBo3blx2dva1a9e0Zo5wI6fGXwKb8/T0jI2N3bVr19y5cxMSEmbOnPnNN9989NFHTKDjO3bs2MyZMwMCAsaOHbt27drS0lJb7IVHQjvZvn37p59+mpub26NHD71ez2H1jm/nzp0RERHz5s0bOnRodnb2xo0bJ06cuHv3bqvviCEkqt3WrVsBREZGKt/GxsauWbPGYDB4mzMy2BK8T0hUu+r4VdNoNLaYlpIhJKpPWVlZQUFBamrq+vXrly5d2r59e6vvgqejRPVZtWrVn/70JwBjxoz58ssvGUIieysrKyssLLx48eLSpUsBpKWlubm5WXcXDCGRWQ4dOjRx4sT9+/dPtrTfaUN4q4qodmVlZVXKVBcAAC8vL9imxy9DSFS7uXPnhoaG5ubmAsjLy4uNjXV3dx9j5lNlLMEQEtVu9erVN2/e9PX19fb27tSpU1pa2s6dO5Xu+NbFz4REdRJCnD9//vr1697e3sOHD2/VqpUt9sIQEknG01EiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyRhCIskYQiLJGEIiyf4f3oDvImYsotsAAAAASUVORK5CYII=", "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from molgraph.chemistry import vis\n", "\n", "index = 1\n", "\n", "# visualize the second molecule of the GraphTensor as reference\n", "vis.visualize_molecule(\n", " molecule=smiles[index], \n", " atom_index=True, \n", " bond_index=True\n", ")" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GraphTensor(\n", " sizes=,\n", " node_feature=,\n", " edge_src=,\n", " edge_dst=,\n", " edge_feature=,\n", " node_position=)\n", "\n", "node_feature: tf.Tensor(\n", "[[1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]], shape=(5, 3), dtype=float32)\n", "\n", "edge_feature: tf.Tensor(\n", "[[0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [0. 1.]], shape=(8, 2), dtype=float32)\n", "\n", "edge_dst: tf.Tensor([1 4 0 2 3 1 1 0], shape=(8,), dtype=int32)\n", "\n", "edge_src: tf.Tensor([0 0 1 1 1 2 3 4], shape=(8,), dtype=int32)\n", "\n" ] } ], "source": [ "print(graph_tensor[index], end='\\n\\n')\n", "print('node_feature:', graph_tensor[index].node_feature, end='\\n\\n')\n", "print('edge_feature:', graph_tensor[index].edge_feature, end='\\n\\n')\n", "print('edge_dst:', graph_tensor[index].edge_dst, end='\\n\\n')\n", "print('edge_src:', graph_tensor[index].edge_src, end='\\n\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Separate `GraphTensor` (its subgraphs) into separate rows" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GraphTensor(\n", " sizes=,\n", " node_feature=,\n", " edge_src=,\n", " edge_dst=,\n", " edge_feature=,\n", " node_position=)\n", "\n", "node_feature: \n", "\n", "edge_feature: \n", "\n", "edge_dst: \n", "\n", "edge_src: \n", "\n", "graph_indicator: None\n" ] } ], "source": [ "graph_tensor = graph_tensor.separate()\n", "print(graph_tensor, end='\\n\\n')\n", "print('node_feature:', graph_tensor.node_feature, end='\\n\\n')\n", "print('edge_feature:', graph_tensor.edge_feature, end='\\n\\n')\n", "print('edge_dst:', graph_tensor.edge_dst, end='\\n\\n')\n", "print('edge_src:', graph_tensor.edge_src, end='\\n\\n')\n", "print('graph_indicator:', graph_tensor.graph_indicator)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Merge `GraphTensor` (its subgraphs) into a single disjoint graph" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GraphTensor(\n", " sizes=,\n", " node_feature=,\n", " edge_src=,\n", " edge_dst=,\n", " edge_feature=,\n", " node_position=)\n", "\n", "node_feature: tf.Tensor(\n", "[[0. 0. 1.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [1. 0. 0.]\n", " [0. 0. 1.]\n", " [0. 0. 1.]\n", " [0. 1. 0.]\n", " [0. 0. 1.]], shape=(37, 3), dtype=float32)\n", "\n", "edge_feature: tf.Tensor(\n", "[[0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 1.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [1. 0.]\n", " [0. 1.]\n", " [0. 1.]\n", " [0. 1.]], shape=(76, 2), dtype=float32)\n", "\n", "edge_dst: tf.Tensor(\n", "[ 1 0 2 1 3 6 2 4 3 5 9 4 6 8 2 5 7 6 5 4 10 13 9 11\n", " 10 12 11 13 17 9 12 14 13 15 14 16 15 17 12 16 18 17 20 23 19 21 22 20\n", " 20 19 25 29 24 26 25 27 36 26 28 27 29 24 28 30 29 31 30 32 35 31 33 34\n", " 32 32 31 26], shape=(76,), dtype=int32)\n", "\n", "edge_src: tf.Tensor(\n", "[ 0 1 1 2 2 2 3 3 4 4 4 5 5 5 6 6 6 7 8 9 9 9 10 10\n", " 11 11 12 12 12 13 13 13 14 14 15 15 16 16 17 17 17 18 19 19 20 20 20 21\n", " 22 23 24 24 25 25 26 26 26 27 27 28 28 29 29 29 30 30 31 31 31 32 32 32\n", " 33 34 35 36], shape=(76,), dtype=int32)\n", "\n" ] } ], "source": [ "graph_tensor = graph_tensor.merge()\n", "print(graph_tensor, end='\\n\\n')\n", "print('node_feature:', graph_tensor.node_feature, end='\\n\\n')\n", "print('edge_feature:', graph_tensor.edge_feature, end='\\n\\n')\n", "print('edge_dst:', graph_tensor.edge_dst, end='\\n\\n')\n", "print('edge_src:', graph_tensor.edge_src, end='\\n\\n')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }