Skip to content

API Reference

Main Classes

ScipyKernel

On-the-fly heat diffusion kernel using scipy matrix exponentiation.

Source code in tiedie/kernel_scipy.py
class ScipyKernel:
    """On-the-fly heat diffusion kernel using scipy matrix exponentiation."""

    def __init__(self, network_file):
        """Input:

            network_file - a tab-delimited file in .sif network format:
            <source> <interaction> <target>

        Returns:
            Kernel object.

        """

        self.labels = {}
        # The number of rows and columns for each kernel
        self.ncols = {}
        self.nrows = {}

        # parse the network, build indexes
        edges, nodes, node_out_degrees = self.parse_net(network_file)
        num_nodes = len(nodes)
        node_order = list(nodes)
        index2node = {}
        node2index = {}
        for i in range(0, num_nodes):
            index2node[i] = node_order[i]
            node2index[node_order[i]] = i

        # construct the diagonals
        # SCIPY uses row and column indexes to build the matrix
        # row and columns are just indexes: the data column stores
        # the actual entries of the matrix
        row = array('i')
        col = array('i')
        data = array('f')
        # build the diagonals, including the out-degree
        for i in range(0, num_nodes):
            # diag entries: out degree
            degree = 0
            if index2node[i] in node_out_degrees:
                degree = node_out_degrees[index2node[i]]
            # append to the end
            # array object: first argument is the index, the second is the data value
            # append the out-degree to the data array
            data.insert(len(data), degree)
            # build the diagonals
            row.insert(len(row), i)
            col.insert(len(col), i)

        # add off-diagonal edges
        for i in range(0, num_nodes):
            for j in range(0, num_nodes):
                if i == j:
                    continue
                if (index2node[i], index2node[j]) not in edges:
                    continue
                # append index to i-th row, j-th column
                row.insert(len(row), i)
                col.insert(len(col), j)
                # -1 for laplacian: i.e. the negative of the adjacency matrix
                data.insert(len(data), -1)

        # Build the graph laplacian: the CSC matrix provides a sparse matrix format
        # that can be exponentiated efficiently
        L = coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsc()
        time_T = -0.1
        self.laplacian = L
        self.index2node = index2node
        # this is the matrix exponentiation calculation.
        # Uses the Pade approximiation for accurate approximation. Computationally expensive.
        # O(n^2), n= # of features, in memory as well.
        self.kernel = expm(time_T * L)
        self.labels = node_order

        # self.print_laplacian()

    def get_labels(self):
        """Return the set of all node/gene labels used by this kernel object"""
        # all_labels = set()
        # for label in self.labels:
        all_labels = set(self.labels)

        return all_labels

    def print_laplacian(self):
        """Debug function"""
        cx = self.laplacian.tocoo()
        for i, j, v in zip(cx.row, cx.col, cx.data):
            a = self.index2node[i]
            b = self.index2node[j]
            print('\t'.join([a, b, str(v)]))

    def parse_net(self, network):
        """Parse .sif network, using just the first and third columns
        to build an undirected graph. Store the node out-degrees
        in an index while we're at it.
        """
        edges = set()
        nodes = set()
        degrees = {}
        for line in open(network):
            parts = line.rstrip().split('\t')
            source = parts[0]
            target = parts[2]

            # if inputing a multi-graph, skip this
            if (source, target) in edges:
                continue

            edges.add((source, target))
            edges.add((target, source))
            nodes.add(source)
            nodes.add(target)

            if source not in degrees:
                degrees[source] = 0
            if target not in degrees:
                degrees[target] = 0

            degrees[source] += 1
            degrees[target] += 1

        return (edges, nodes, degrees)

    def kernel_multiply_one(self, vector):
        """Multiply the specified kernel by the supplied input heat vector.

        Input:
            vector: A hash mapping gene labels to floating point values
            kernel: a single index for a specific kernel

        Returns:
            A hash of diffused heats, indexed by the same names as the
            input vector
        """
        # Have to convert to ordered array format for the input vector
        array = []
        for label in self.labels:
            # Input heats may not actually be in the network.
            # Check and initialize to zero if not
            if label in vector:
                array.append(vector[label])
            else:
                array.append(0)

        # take the dot product
        value = self.kernel * array

        # Convert back to a hash and return diffused heats
        return_vec = {}
        idx = 0
        for label in self.labels:
            return_vec[label] = float(value[idx])
            idx += 1

        return return_vec

    def diffuse(self, vector, reverse=False):
        """Diffuse input heats over the set of kernels, add to this object

        Input:
            {'gene1': float(heat1)
             'gene2' : float(heat2)
              ...
            }

        Returns:
            Diffused heat vector
        """

        diffused_vector = self.kernel_multiply_one(vector)

        return diffused_vector

Functions

__init__(network_file)

Input:

network_file - a tab-delimited file in .sif network format:
<source> <interaction> <target>

Returns:

Type Description

Kernel object.

Source code in tiedie/kernel_scipy.py
def __init__(self, network_file):
    """Input:

        network_file - a tab-delimited file in .sif network format:
        <source> <interaction> <target>

    Returns:
        Kernel object.

    """

    self.labels = {}
    # The number of rows and columns for each kernel
    self.ncols = {}
    self.nrows = {}

    # parse the network, build indexes
    edges, nodes, node_out_degrees = self.parse_net(network_file)
    num_nodes = len(nodes)
    node_order = list(nodes)
    index2node = {}
    node2index = {}
    for i in range(0, num_nodes):
        index2node[i] = node_order[i]
        node2index[node_order[i]] = i

    # construct the diagonals
    # SCIPY uses row and column indexes to build the matrix
    # row and columns are just indexes: the data column stores
    # the actual entries of the matrix
    row = array('i')
    col = array('i')
    data = array('f')
    # build the diagonals, including the out-degree
    for i in range(0, num_nodes):
        # diag entries: out degree
        degree = 0
        if index2node[i] in node_out_degrees:
            degree = node_out_degrees[index2node[i]]
        # append to the end
        # array object: first argument is the index, the second is the data value
        # append the out-degree to the data array
        data.insert(len(data), degree)
        # build the diagonals
        row.insert(len(row), i)
        col.insert(len(col), i)

    # add off-diagonal edges
    for i in range(0, num_nodes):
        for j in range(0, num_nodes):
            if i == j:
                continue
            if (index2node[i], index2node[j]) not in edges:
                continue
            # append index to i-th row, j-th column
            row.insert(len(row), i)
            col.insert(len(col), j)
            # -1 for laplacian: i.e. the negative of the adjacency matrix
            data.insert(len(data), -1)

    # Build the graph laplacian: the CSC matrix provides a sparse matrix format
    # that can be exponentiated efficiently
    L = coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsc()
    time_T = -0.1
    self.laplacian = L
    self.index2node = index2node
    # this is the matrix exponentiation calculation.
    # Uses the Pade approximiation for accurate approximation. Computationally expensive.
    # O(n^2), n= # of features, in memory as well.
    self.kernel = expm(time_T * L)
    self.labels = node_order

diffuse(vector, reverse=False)

Diffuse input heats over the set of kernels, add to this object

Input

{'gene1': float(heat1) 'gene2' : float(heat2) ... }

Returns:

Type Description

Diffused heat vector

Source code in tiedie/kernel_scipy.py
def diffuse(self, vector, reverse=False):
    """Diffuse input heats over the set of kernels, add to this object

    Input:
        {'gene1': float(heat1)
         'gene2' : float(heat2)
          ...
        }

    Returns:
        Diffused heat vector
    """

    diffused_vector = self.kernel_multiply_one(vector)

    return diffused_vector

get_labels()

Return the set of all node/gene labels used by this kernel object

Source code in tiedie/kernel_scipy.py
def get_labels(self):
    """Return the set of all node/gene labels used by this kernel object"""
    # all_labels = set()
    # for label in self.labels:
    all_labels = set(self.labels)

    return all_labels

kernel_multiply_one(vector)

Multiply the specified kernel by the supplied input heat vector.

Input

vector: A hash mapping gene labels to floating point values kernel: a single index for a specific kernel

Returns:

Type Description

A hash of diffused heats, indexed by the same names as the

input vector

Source code in tiedie/kernel_scipy.py
def kernel_multiply_one(self, vector):
    """Multiply the specified kernel by the supplied input heat vector.

    Input:
        vector: A hash mapping gene labels to floating point values
        kernel: a single index for a specific kernel

    Returns:
        A hash of diffused heats, indexed by the same names as the
        input vector
    """
    # Have to convert to ordered array format for the input vector
    array = []
    for label in self.labels:
        # Input heats may not actually be in the network.
        # Check and initialize to zero if not
        if label in vector:
            array.append(vector[label])
        else:
            array.append(0)

    # take the dot product
    value = self.kernel * array

    # Convert back to a hash and return diffused heats
    return_vec = {}
    idx = 0
    for label in self.labels:
        return_vec[label] = float(value[idx])
        idx += 1

    return return_vec

parse_net(network)

Parse .sif network, using just the first and third columns to build an undirected graph. Store the node out-degrees in an index while we're at it.

Source code in tiedie/kernel_scipy.py
def parse_net(self, network):
    """Parse .sif network, using just the first and third columns
    to build an undirected graph. Store the node out-degrees
    in an index while we're at it.
    """
    edges = set()
    nodes = set()
    degrees = {}
    for line in open(network):
        parts = line.rstrip().split('\t')
        source = parts[0]
        target = parts[2]

        # if inputing a multi-graph, skip this
        if (source, target) in edges:
            continue

        edges.add((source, target))
        edges.add((target, source))
        nodes.add(source)
        nodes.add(target)

        if source not in degrees:
            degrees[source] = 0
        if target not in degrees:
            degrees[target] = 0

        degrees[source] += 1
        degrees[target] += 1

    return (edges, nodes, degrees)

print_laplacian()

Debug function

Source code in tiedie/kernel_scipy.py
def print_laplacian(self):
    """Debug function"""
    cx = self.laplacian.tocoo()
    for i, j, v in zip(cx.row, cx.col, cx.data):
        a = self.index2node[i]
        b = self.index2node[j]
        print('\t'.join([a, b, str(v)]))

Kernel

Pre-computed heat diffusion kernel for network analysis.

Source code in tiedie/kernel.py
class Kernel:
    """Pre-computed heat diffusion kernel for network analysis."""

    def __init__(self, kernel_files):
        """Input:

            kernel_file - a tab-delimited matrix file with both a header
            and first-row labels, in the same order.

        Returns:
                Kernel object.
        """

        # Multiple kernels are supported. Linearity of the diffusion kernel allows
        # this feature.

        # store kernel object
        self.kernels = {}
        # Store the header line for each kernel: will be used for lookup
        self.labels = {}
        # The number of rows and columns for each kernel
        self.ncols = {}
        self.nrows = {}
        # parse each kernel file
        for kernel in kernel_files.split(':'):
            # numpy's genfromtxt format. Relatively memory-intensive
            # FIXME: add option for matlab .mat compressed file format
            self.kernels[kernel] = genfromtxt(kernel, delimiter='\t')[1:, 1:]
            self.labels[kernel] = None
            fh = open(kernel)
            # get the header line
            for line in fh:
                self.labels[kernel] = line.rstrip().split('\t')[1:]
                break
            fh.close()

            self.ncols[kernel] = self.kernels[kernel].shape[1] - 1
            self.nrows[kernel] = self.kernels[kernel].shape[0] - 1

    def get_labels(self):
        """Return the set of all node/gene labels used by this kernel object"""
        all_labels = set()
        for label in self.labels:
            all_labels = all_labels.union(set(self.labels[label]))

        return all_labels

    def kernel_multiply_one(self, kernel, vector):
        """Multiply the specified kernel by the supplied input heat vector.

        Input:
            vector: A hash mapping gene labels to floating point values
            kernel: a single index for a specific kernel

        Returns:
            A hash of diffused heats, indexed by the same names as the
            input vector
        """

        # Have to convert to ordered array format for the input vector
        array = []
        for label in self.labels[kernel]:
            # Input heats may not actually be in the network.
            # Check and initialize to zero if not
            if label in vector:
                array.append(vector[label])
            else:
                array.append(0)

        # Matrix mulitply op
        value = dot(self.kernels[kernel], array)

        # Convert back to a hash and return diffused heats
        return_vec = {}
        idx = 0
        for label in self.labels[kernel]:
            return_vec[label] = float(value[idx])
            idx += 1

        return return_vec

    def add_vectors(self, vector_list):
        """Sum vectors: Add hash / float-valued vectors"""
        sum = {}

        for vec in vector_list:
            for key in vec:
                val = vec[key]
                if key not in sum:
                    sum[key] = val
                else:
                    sum[key] += val

        return sum

    def diffuse(self, vector, reverse=False):
        """Diffuse input heats over the set of kernels, add to this object

        Input:
            {'gene1': float(heat1)
             'gene2' : float(heat2)
              ...
            }

        Returns:
            Diffused heat vector
        """
        # diffuse separately on each kernel (if more than one), and store.
        return_vectors = []
        for kernel in self.kernels:
            # run diffusion on each constituent kernel
            diffused_vector = self.kernel_multiply_one(kernel, vector)
            return_vectors.append(diffused_vector)

        return self.add_vectors(return_vectors)

Functions

__init__(kernel_files)

Input:

kernel_file - a tab-delimited matrix file with both a header
and first-row labels, in the same order.

Returns:

Type Description

Kernel object.

Source code in tiedie/kernel.py
def __init__(self, kernel_files):
    """Input:

        kernel_file - a tab-delimited matrix file with both a header
        and first-row labels, in the same order.

    Returns:
            Kernel object.
    """

    # Multiple kernels are supported. Linearity of the diffusion kernel allows
    # this feature.

    # store kernel object
    self.kernels = {}
    # Store the header line for each kernel: will be used for lookup
    self.labels = {}
    # The number of rows and columns for each kernel
    self.ncols = {}
    self.nrows = {}
    # parse each kernel file
    for kernel in kernel_files.split(':'):
        # numpy's genfromtxt format. Relatively memory-intensive
        # FIXME: add option for matlab .mat compressed file format
        self.kernels[kernel] = genfromtxt(kernel, delimiter='\t')[1:, 1:]
        self.labels[kernel] = None
        fh = open(kernel)
        # get the header line
        for line in fh:
            self.labels[kernel] = line.rstrip().split('\t')[1:]
            break
        fh.close()

        self.ncols[kernel] = self.kernels[kernel].shape[1] - 1
        self.nrows[kernel] = self.kernels[kernel].shape[0] - 1

add_vectors(vector_list)

Sum vectors: Add hash / float-valued vectors

Source code in tiedie/kernel.py
def add_vectors(self, vector_list):
    """Sum vectors: Add hash / float-valued vectors"""
    sum = {}

    for vec in vector_list:
        for key in vec:
            val = vec[key]
            if key not in sum:
                sum[key] = val
            else:
                sum[key] += val

    return sum

diffuse(vector, reverse=False)

Diffuse input heats over the set of kernels, add to this object

Input

{'gene1': float(heat1) 'gene2' : float(heat2) ... }

Returns:

Type Description

Diffused heat vector

Source code in tiedie/kernel.py
def diffuse(self, vector, reverse=False):
    """Diffuse input heats over the set of kernels, add to this object

    Input:
        {'gene1': float(heat1)
         'gene2' : float(heat2)
          ...
        }

    Returns:
        Diffused heat vector
    """
    # diffuse separately on each kernel (if more than one), and store.
    return_vectors = []
    for kernel in self.kernels:
        # run diffusion on each constituent kernel
        diffused_vector = self.kernel_multiply_one(kernel, vector)
        return_vectors.append(diffused_vector)

    return self.add_vectors(return_vectors)

get_labels()

Return the set of all node/gene labels used by this kernel object

Source code in tiedie/kernel.py
def get_labels(self):
    """Return the set of all node/gene labels used by this kernel object"""
    all_labels = set()
    for label in self.labels:
        all_labels = all_labels.union(set(self.labels[label]))

    return all_labels

kernel_multiply_one(kernel, vector)

Multiply the specified kernel by the supplied input heat vector.

Input

vector: A hash mapping gene labels to floating point values kernel: a single index for a specific kernel

Returns:

Type Description

A hash of diffused heats, indexed by the same names as the

input vector

Source code in tiedie/kernel.py
def kernel_multiply_one(self, kernel, vector):
    """Multiply the specified kernel by the supplied input heat vector.

    Input:
        vector: A hash mapping gene labels to floating point values
        kernel: a single index for a specific kernel

    Returns:
        A hash of diffused heats, indexed by the same names as the
        input vector
    """

    # Have to convert to ordered array format for the input vector
    array = []
    for label in self.labels[kernel]:
        # Input heats may not actually be in the network.
        # Check and initialize to zero if not
        if label in vector:
            array.append(vector[label])
        else:
            array.append(0)

    # Matrix mulitply op
    value = dot(self.kernels[kernel], array)

    # Convert back to a hash and return diffused heats
    return_vec = {}
    idx = 0
    for label in self.labels[kernel]:
        return_vec[label] = float(value[idx])
        idx += 1

    return return_vec

PprDiffuser

Personalized PageRank diffusion for network analysis.

Source code in tiedie/ppr.py
class PprDiffuser:
    """Personalized PageRank diffusion for network analysis."""

    def __init__(self, network):
        """PprDiffuser: object to perform the Personalized PageRank Algorithm
        This method creates the diffuser object from an networkx DiGraph() object,
        which can then be used to diffuse vectors over this

        Input:
            - network : a network hash object
        """

        # create a directed graph with NetworkX
        self.G = nx.DiGraph()
        # create a reversed graph for diffusion from the 'target' set
        self.G_reversed = nx.DiGraph()
        # convert network format to networkX Graph object
        for source in network:
            for i, t in network[source]:
                self.G.add_edge(source, t)
                self.G_reversed.add_edge(t, source)

    def personal_page_rank(self, p_vector, reverse=False):
        """Personal_Page_Rank: Get the personal pagerank of the supplied input vector

        Input:
            - p_vector: A hash-map of input values for a selection (or all) nodes
            (if supplied nodes aren't in the graph, they will be ignored)

        Output:
            - A vector of diffused heats in hash-map (key,value) format
        """
        input_pvec = None
        #  without initializing this vector the initial probabilities will be flat
        # and this will be equivalent to standard page rank
        if p_vector:
            input_pvec = {}
            # doesn't seem to be necessary for a non-zero epsilon now, but
            # leave this as a place holder
            epsilon = 0.0
            for node in self.G.nodes(data=False):
                if node in p_vector:
                    input_pvec[node] = p_vector[node]
                else:
                    input_pvec[node] = epsilon

        if reverse:
            return nx.pagerank_numpy(self.G_reversed, 0.85, input_pvec)
        else:
            return nx.pagerank_numpy(self.G, 0.85, input_pvec)

    def diffuse(self, p_vector, reverse=False):
        """Diffuse: perform generalized diffusion from the supplied input vector

        Input:
            - p_vector: A hash-map of input values for a selection (or all) nodes
            (if supplied nodes aren't in the graph, they will be ignored)

        Output:
            - A vector of diffused heats in hash-map (key,value) format
        """
        return self.personal_page_rank(p_vector, reverse)

Functions

__init__(network)

PprDiffuser: object to perform the Personalized PageRank Algorithm This method creates the diffuser object from an networkx DiGraph() object, which can then be used to diffuse vectors over this

Input
  • network : a network hash object
Source code in tiedie/ppr.py
def __init__(self, network):
    """PprDiffuser: object to perform the Personalized PageRank Algorithm
    This method creates the diffuser object from an networkx DiGraph() object,
    which can then be used to diffuse vectors over this

    Input:
        - network : a network hash object
    """

    # create a directed graph with NetworkX
    self.G = nx.DiGraph()
    # create a reversed graph for diffusion from the 'target' set
    self.G_reversed = nx.DiGraph()
    # convert network format to networkX Graph object
    for source in network:
        for i, t in network[source]:
            self.G.add_edge(source, t)
            self.G_reversed.add_edge(t, source)

diffuse(p_vector, reverse=False)

Diffuse: perform generalized diffusion from the supplied input vector

Input
  • p_vector: A hash-map of input values for a selection (or all) nodes (if supplied nodes aren't in the graph, they will be ignored)
Output
  • A vector of diffused heats in hash-map (key,value) format
Source code in tiedie/ppr.py
def diffuse(self, p_vector, reverse=False):
    """Diffuse: perform generalized diffusion from the supplied input vector

    Input:
        - p_vector: A hash-map of input values for a selection (or all) nodes
        (if supplied nodes aren't in the graph, they will be ignored)

    Output:
        - A vector of diffused heats in hash-map (key,value) format
    """
    return self.personal_page_rank(p_vector, reverse)

personal_page_rank(p_vector, reverse=False)

Personal_Page_Rank: Get the personal pagerank of the supplied input vector

Input
  • p_vector: A hash-map of input values for a selection (or all) nodes (if supplied nodes aren't in the graph, they will be ignored)
Output
  • A vector of diffused heats in hash-map (key,value) format
Source code in tiedie/ppr.py
def personal_page_rank(self, p_vector, reverse=False):
    """Personal_Page_Rank: Get the personal pagerank of the supplied input vector

    Input:
        - p_vector: A hash-map of input values for a selection (or all) nodes
        (if supplied nodes aren't in the graph, they will be ignored)

    Output:
        - A vector of diffused heats in hash-map (key,value) format
    """
    input_pvec = None
    #  without initializing this vector the initial probabilities will be flat
    # and this will be equivalent to standard page rank
    if p_vector:
        input_pvec = {}
        # doesn't seem to be necessary for a non-zero epsilon now, but
        # leave this as a place holder
        epsilon = 0.0
        for node in self.G.nodes(data=False):
            if node in p_vector:
                input_pvec[node] = p_vector[node]
            else:
                input_pvec[node] = epsilon

    if reverse:
        return nx.pagerank_numpy(self.G_reversed, 0.85, input_pvec)
    else:
        return nx.pagerank_numpy(self.G, 0.85, input_pvec)

NetBalancedPermuter

Encapsulates the permutation logic for an input heat set. Permutes Node scores with other nodes of similar network degree by sorting all nodes by degree, and binning them into blocks of a set size. Permutations are done only within blocks, so that the degree distribution of input nodes is preserved.

Source code in tiedie/permute.py
class NetBalancedPermuter:
    """Encapsulates the permutation logic for an input heat set. Permutes
    Node scores with other nodes of similar network degree by sorting
    all nodes by degree, and binning them into blocks of a set size.
    Permutations are done only within blocks, so that the degree distribution
    of input nodes is preserved.
    """

    def __init__(self, network, up_set):
        """Input:
        network: net[source] = [(i, t)]
        up_set: up_set[node] = score
        """

        # store node-degrees for all in network
        self.degrees = {}

        # the set of initial nodes to permute within blocks: save it to the
        # instantiated object here
        self.nodes = up_set.keys()
        # heuristic: block needs to be significantly larger than the input set size
        BLOCK_MULTIPLE = 10
        self.block_size = len(self.nodes) * BLOCK_MULTIPLE
        self.scores = {}
        for node in self.nodes:
            # save the scores as a set of tuples
            self.scores[(node, str(up_set[node]))] = 1

        # Compute total degree for each node in the network
        for source in network:
            if source not in self.degrees:
                self.degrees[source] = 0

            for i, target in network[source]:
                self.degrees[source] += 1

                if target not in self.degrees:
                    self.degrees[target] = 0
                # add a degree for the incoming edge
                self.degrees[target] += 1

        # reverse-sort the degrees of all nodes in the network.
        self.sorted_degrees = sorted(
            self.degrees.items(), key=lambda x: x[1], reverse=True
        )

    def permute_block(self, block):
        """Take a block of nodes and randomly shuffle using python's random.shuffle method.

        Input:

            An array of node labels

        Returns:
            A hash mapping the original nodes to the nodes to swap with each.
        """
        # make a copy
        orig = copy(block)
        b = copy(block)
        map = {}
        # shuffle the copy in-place with the random.shuffle() method.
        shuffle(b)
        for i in range(0, len(b)):
            # build the mapping from original to new
            map[orig[i]] = b[i]

        return map

    def permute_one(self):
        """Generate one permutation of scores for all nodes, and return a hash of { node : score }
        for each.
        """
        group_count = 0
        permuted_scores = {}
        # initialize a new block
        block = []
        for node, degree in self.sorted_degrees:
            block.append(node)
            group_count += 1
            # reset every time we use the block size
            if group_count % self.block_size == 0:
                # permute the order of this <block_size> block
                map = self.permute_block(block)
                for node, score in self.scores:
                    if node in map:
                        permuted_scores[map[node]] = float(score)
                block = []

        return permuted_scores

    def permute(self, iterations):
        """Generate an array of random permutations of node scores.

        Input:
            iteration: the number of permutations to generate

        Returns:
            an array of hashes: each hash indexes the nodes to permuted scores
        """
        permuted = []
        for i in range(0, iterations):
            permuted.append(self.permute_one())

        return permuted

Functions

__init__(network, up_set)

Input: network: net[source] = [(i, t)] up_set: up_set[node] = score

Source code in tiedie/permute.py
def __init__(self, network, up_set):
    """Input:
    network: net[source] = [(i, t)]
    up_set: up_set[node] = score
    """

    # store node-degrees for all in network
    self.degrees = {}

    # the set of initial nodes to permute within blocks: save it to the
    # instantiated object here
    self.nodes = up_set.keys()
    # heuristic: block needs to be significantly larger than the input set size
    BLOCK_MULTIPLE = 10
    self.block_size = len(self.nodes) * BLOCK_MULTIPLE
    self.scores = {}
    for node in self.nodes:
        # save the scores as a set of tuples
        self.scores[(node, str(up_set[node]))] = 1

    # Compute total degree for each node in the network
    for source in network:
        if source not in self.degrees:
            self.degrees[source] = 0

        for i, target in network[source]:
            self.degrees[source] += 1

            if target not in self.degrees:
                self.degrees[target] = 0
            # add a degree for the incoming edge
            self.degrees[target] += 1

    # reverse-sort the degrees of all nodes in the network.
    self.sorted_degrees = sorted(
        self.degrees.items(), key=lambda x: x[1], reverse=True
    )

permute(iterations)

Generate an array of random permutations of node scores.

Input

iteration: the number of permutations to generate

Returns:

Type Description

an array of hashes: each hash indexes the nodes to permuted scores

Source code in tiedie/permute.py
def permute(self, iterations):
    """Generate an array of random permutations of node scores.

    Input:
        iteration: the number of permutations to generate

    Returns:
        an array of hashes: each hash indexes the nodes to permuted scores
    """
    permuted = []
    for i in range(0, iterations):
        permuted.append(self.permute_one())

    return permuted

permute_block(block)

Take a block of nodes and randomly shuffle using python's random.shuffle method.

Input:

An array of node labels

Returns:

Type Description

A hash mapping the original nodes to the nodes to swap with each.

Source code in tiedie/permute.py
def permute_block(self, block):
    """Take a block of nodes and randomly shuffle using python's random.shuffle method.

    Input:

        An array of node labels

    Returns:
        A hash mapping the original nodes to the nodes to swap with each.
    """
    # make a copy
    orig = copy(block)
    b = copy(block)
    map = {}
    # shuffle the copy in-place with the random.shuffle() method.
    shuffle(b)
    for i in range(0, len(b)):
        # build the mapping from original to new
        map[orig[i]] = b[i]

    return map

permute_one()

Generate one permutation of scores for all nodes, and return a hash of { node : score } for each.

Source code in tiedie/permute.py
def permute_one(self):
    """Generate one permutation of scores for all nodes, and return a hash of { node : score }
    for each.
    """
    group_count = 0
    permuted_scores = {}
    # initialize a new block
    block = []
    for node, degree in self.sorted_degrees:
        block.append(node)
        group_count += 1
        # reset every time we use the block size
        if group_count % self.block_size == 0:
            # permute the order of this <block_size> block
            map = self.permute_block(block)
            for node, score in self.scores:
                if node in map:
                    permuted_scores[map[node]] = float(score)
            block = []

    return permuted_scores

Utility Functions

Parse input heats file in form

Returns:

Type Description
  • Two hashes: one indexing by gene and storing the input heats, and one storing the input signs
Source code in tiedie/util.py
def parse_heats(file, network_nodes=None):
    """Parse input heats file in form:
        <gene> <heat> <perturbation/activity sign (+/-)>

    Returns:
        - Two hashes: one indexing by gene and storing the input heats, and one storing the input signs
    """

    heats = {}
    signs = {}
    fh = None
    try:
        fh = open(file)
    except OSError as e:
        raise Exception("Error: can't open file: " + file) from e

    lineno = 1
    for line in fh:
        parts = line.rstrip().split('\t')
        if len(parts) > 2:
            prot, heat, sign = line.rstrip().split('\t')

            # provide a warning if node not in the network
            if network_nodes and prot not in network_nodes:
                sys.stderr.write(
                    'Warning: input heat node '
                    + prot
                    + ' not in the network and will be ignored...\n'
                )
                continue

            # input validation for heat values
            try:
                heats[prot] = float(heat)
            except ValueError as e:
                raise Exception(
                    'Error: non float heat value on line '
                    + str(lineno)
                    + ' gene '
                    + prot
                ) from e

            # input validation for input signs
            if sign != '+' and sign != '-':
                raise Exception(
                    'Error: invalid value for heat sign on line '
                    + str(lineno)
                    + sign
                )

            signs[prot] = sign
        else:
            heats[parts[0]] = float(parts[1])

        lineno += 1

    fh.close()
    return (heats, signs)

Build a directed network from a .sif file.

Inputs

A network in .sif format, tab-separated ( )

Returns:

Name Type Description

A network in hash key format, i.e. convert two lines of a file:

To

{'source': set( (interaction, target1), (interaction, target2) )

Source code in tiedie/util.py
def parse_net(network):
    """Build a directed network from a .sif file.

    Inputs:
        A network in .sif format, tab-separated (<source> <interaction> <target>)

    Returns:
        A network in hash key format, i.e. convert two lines of a file:
            <source>    <interaction1>  <target1>
            <source>    <interaction2>  <target2>
        To:
            {'source': set( (interaction, target1), (interaction, target2) )
    """
    net = {}
    for line in open(network):
        parts = line.rstrip().split('\t')
        source = parts[0]
        interaction = parts[1]
        target = parts[2]

        if source not in net:
            net[source] = set()

        net[source].add((interaction, target))

    return net

Normalize absolute value sum of data hash to 1000

Source code in tiedie/util.py
def normalize_heats(data):
    """Normalize absolute value sum of data hash to 1000"""
    FACTOR = 1000
    normalized = {}
    signs = {}
    sum = 0.0
    for event, val in data.items():
        sum += abs(val)

    for event, val in data.items():
        sign = '+'
        if val < 0:
            sign = '-'
        normalized[event] = FACTOR * abs(val) / sum
        signs[event] = sign

    return normalized

Take a network in hash-key format and return a set containing the nodes in it.

Source code in tiedie/util.py
def get_network_nodes(network):
    """Take a network in hash-key format and return a set containing the
    nodes in it.
    """
    nodes = set()
    for s in network:
        nodes.add(s)
        for i, t in network[s]:
            nodes.add(t)
    return nodes

Use the min(diffused1, diffused2) function to return a list of genes that fall above that cutoff. Input: diffused heats for each set, and the numeric cutoff value

Returns:

Type Description

a list of genes above the cutoff, a hash of minimum heat values

Source code in tiedie/util.py
def filter_linkers(up_heats_diffused, down_heats_diffused, cutoff):
    """Use the min(diffused1, diffused2) function to return a list of genes
    that fall above that cutoff.
    Input:
        diffused heats for each set, and the numeric cutoff value

    Returns:
        a list of genes above the cutoff, a hash of minimum heat values
    """
    linkers = {}
    filtered = []
    if down_heats_diffused is None:
        # trivially: if this is a single list of diffused values, just return it
        return (up_heats_diffused.keys(), up_heats_diffused)

    for node in up_heats_diffused:
        if node not in down_heats_diffused:
            # it doesn't make the cut if it's not in both sets
            continue
        min_heat = min(up_heats_diffused[node], down_heats_diffused[node])
        linkers[node] = min_heat
        if min_heat > cutoff:
            filtered.append(node)

    return (filtered, linkers)

Find a heat threshold that yields a linker set of the given size.

Parameters:

Name Type Description Default
source_set

Set of source node names.

required
target_set

Set of target node names.

required
up_heat_diffused

Dict mapping nodes to upstream diffused heat values.

required
down_heat_diffused

Dict mapping nodes to downstream diffused heat values.

required
size

Relative size of the linker set (compared to input set size).

required

Returns:

Type Description

Tuple of (cutoff_threshold, relevance_score).

Example
find_linker_cutoff(
    source_set={"A", "B"},
    target_set={"X", "Y"},
    up_heat_diffused={"A": 1.0, "B": 1.1, "C": 0.5, "D": 0.4},
    down_heat_diffused={"X": 2.0, "Y": 2.1, "C": 0.7, "D": 0.5},
    size=0.2
)
# Returns: (0.4999, 0.1667)
Source code in tiedie/util.py
def find_linker_cutoff(
    source_set, target_set, up_heat_diffused, down_heat_diffused, size
):
    """Find a heat threshold that yields a linker set of the given size.

    Args:
        source_set: Set of source node names.
        target_set: Set of target node names.
        up_heat_diffused: Dict mapping nodes to upstream diffused heat values.
        down_heat_diffused: Dict mapping nodes to downstream diffused heat values.
        size: Relative size of the linker set (compared to input set size).

    Returns:
        Tuple of (cutoff_threshold, relevance_score).

    Example:
        ```python
        find_linker_cutoff(
            source_set={"A", "B"},
            target_set={"X", "Y"},
            up_heat_diffused={"A": 1.0, "B": 1.1, "C": 0.5, "D": 0.4},
            down_heat_diffused={"X": 2.0, "Y": 2.1, "C": 0.7, "D": 0.5},
            size=0.2
        )
        # Returns: (0.4999, 0.1667)
        ```
    """
    if down_heat_diffused is None:
        # diffusing from a single source (i.e. not TieDIE but the HotNet algorithm, for comparison)
        cutoff, score = find_linker_cutoff_single(
            source_set, up_heat_diffused, size
        )
    else:
        try:
            cutoff, score = find_linker_cutoff_multi(
                source_set,
                target_set,
                up_heat_diffused,
                down_heat_diffused,
                size,
            )
        except Exception as e:  # noqa: BLE001 - intentional fallback with warning
            global _linker_cutoff_error_count, _linker_cutoff_error_shown
            _linker_cutoff_error_count += 1
            if not _linker_cutoff_error_shown:
                warnings.warn(
                    f'find_linker_cutoff failed: {e}',
                    stacklevel=2,
                )
                _linker_cutoff_error_shown = True
            return (0, 0)

    return (cutoff, score)

Extract edges where both endpoints are in the given node subset.

Parameters:

Name Type Description Default
network

Dict mapping source nodes to sets of (interaction, target) tuples.

required
subnet_nodes

Set of nodes to use for edge selection.

required

Returns:

Type Description

Set of (source, target) edge tuples where both nodes are in subnet_nodes.

Example
network = {
    'S1': {('a>', 'T1')},
    'S2': {('a>', 'T2')},
    'T1': {('t|', 'T2')},
    'T2': {('a>', 'T1')},
    'T3': {('t>', 'G5')},
}
connected_subnets(network, {'S1', 'T1', 'T2', 'T3', 'G5'})
# Returns: {('S1', 'T1'), ('T1', 'T2'), ('T2', 'T1')}
Source code in tiedie/util.py
def connected_subnets(network, subnet_nodes):
    """Extract edges where both endpoints are in the given node subset.

    Args:
        network: Dict mapping source nodes to sets of (interaction, target) tuples.
        subnet_nodes: Set of nodes to use for edge selection.

    Returns:
        Set of (source, target) edge tuples where both nodes are in subnet_nodes.

    Example:
        ```python
        network = {
            'S1': {('a>', 'T1')},
            'S2': {('a>', 'T2')},
            'T1': {('t|', 'T2')},
            'T2': {('a>', 'T1')},
            'T3': {('t>', 'G5')},
        }
        connected_subnets(network, {'S1', 'T1', 'T2', 'T3', 'G5'})
        # Returns: {('S1', 'T1'), ('T1', 'T2'), ('T2', 'T1')}
        ```
    """
    edgelist = set()
    ugraph = set()

    for s in network:
        for i, t in network[s]:
            # ignore self-links
            if s == t:
                continue
            if s in subnet_nodes and t in subnet_nodes:
                edgelist.add((s, t))
                if (t, s) not in edgelist:
                    ugraph.add((s, t))

    # use networkx to find the largest connected sub graph
    G = nx.Graph()
    G.add_edges_from(list(ugraph))
    # get the biggest connected component, add edges between all
    validated_edges = set()
    for component in nx.connected_components(G):
        validated_nodes = component
        for s, t in edgelist:
            # validate both nodes
            if s in validated_nodes and t in validated_nodes:
                validated_edges.add((s, t))

        break

    return validated_edges

Map undirected edges to the network to form a subnetwork in the hash-key directed network format

Input

edge_list: edges in (s,t) format network: network in {source:set( (int, target), ... )

Returns:

Type Description

Subnetwork in the data structure format of network input

Source code in tiedie/util.py
def map_ugraph_to_network(edge_list, network):
    """Map undirected edges to the network to form a subnetwork
    in the hash-key directed network format

    Input:
        edge_list: edges in (s,t) format
        network: network in {source:set( (int, target), ... )

    Returns:
        Subnetwork in the data structure format of network input
    """

    subnetwork = {}

    for s, t in edge_list:
        # find this equivalent edge(s) in the directed network
        # edges:
        if s in network:
            for i, nt in network[s]:
                if nt == t:
                    if s not in subnetwork:
                        subnetwork[s] = set()
                    subnetwork[s].add((i, t))

        if t in network:
            for i, nt in network[t]:
                if nt == s:
                    if t not in subnetwork:
                        subnetwork[t] = set()
                    subnetwork[t].add((i, s))

    return subnetwork