"""
Module that defines the schema for interactions, pvalues, networks and their metadata
"""
import numpy as np
import pandas as pd
from schematics.exceptions import ValidationError
from schematics.models import Model
from schematics.types import (
BaseType,
DateType,
DictType,
FloatType,
IntType,
ListType,
ModelType,
StringType,
UnionType,
)
[docs]class InteractionmatrixType(BaseType):
"""
DataType that describes the expected structure of an interaction matrix
Parameters
----------
symm : bool, optional
True if interaction matrix is expected to be symmetric
Default value is False
"""
def __init__(self, symm=False, *args, **kwargs):
super().__init__(*args, **kwargs)
self.symm = symm
[docs] def validate_isdataframe(self, value):
"""Check whether the object is a pandas DataFrame"""
if not isinstance(value, pd.DataFrame):
raise ValidationError(
"Interaction matrix must be a `pd.DataFrame` instance"
)
[docs] def validate_symmetry(self, value):
"""Check whether the the interaction matrix is symmetric"""
if self.symm:
if value.shape[0] != value.shape[1]:
raise ValidationError("Interaction matrix is not symmetric")
if not np.allclose(value, value.T):
raise ValidationError("Interaction matrix is not symmetric")
[docs] def validate_data(self, value):
if not value.values.dtype == float and not value.values.dtype == int:
raise ValidationError("Invalid data. Interactions must be int or float")
[docs]class CorrelationmatrixType(InteractionmatrixType):
"""DataType that describes the expected structure of a correlation matrix"""
def __init__(self, *args, **kwargs):
super().__init__(symm=True, *args, **kwargs)
[docs] def validate_data_range(self, value):
if value.values.max() > 1 or value.values.min() < -1:
raise ValidationError("Correlation matrix must be bound by -1 and 1")
[docs]class PvaluematrixType(InteractionmatrixType):
"""DataType that describes the expected structure of a pvalue matrix"""
[docs] def validate_data_range(self, value):
if value.values.max() > 1 or value.values.min() < 0:
raise ValidationError("Pvalue matrix must be bound by 0 and 1")
[docs]class PublicationModel(Model):
"""Model that describes the expected structure of the publication input"""
date = DateType(required=True)
authors = ListType(DictType(StringType), required=True)
pubmed_id = StringType(required=True)
[docs]class ChildrenmapType(BaseType):
"""DataType that describes the expected structure of the children map dictionary"""
[docs] def validate_keys(self, value):
for k in value.keys():
if not isinstance(k, str):
raise ValidationError("Children map must have string keys")
[docs] def validate_values(self, value):
for v in value.values():
if not isinstance(v, list):
raise ValidationError(
"Children map must have lists of strings as values"
)
for elem in v:
if not isinstance(elem, str):
raise ValidationError(
"Children map must have lists of strings as values"
)
[docs]class NodeModel(Model):
"""Model that describes the structure of one node in the network"""
id = StringType(min_length=2, required=True)
lineage = StringType(required=True)
name = StringType(required=True)
taxid = IntType(required=True)
taxlevel = StringType(
regex=r"(Kingdom|Phylum|Class|Order|Family|Genus|Species)", required=True
)
abundance = FloatType()
children = ListType(StringType, required=True)
[docs]class NodesModel(Model):
"""Model that describes the structure of the nodes in the network"""
nodes = ListType(ModelType(NodeModel), required=True)
[docs]class LinkModel(Model):
"""Model that describes the structure of one link in the network"""
pvalue = FloatType()
weight = FloatType(required=True)
source = StringType(min_length=2, required=True)
target = StringType(min_length=2, required=True)
[docs]class LinksModel(Model):
"""Model that describes the structure of one link in the network"""
links = ListType(ModelType(LinkModel), required=True)
[docs]class ElistType(BaseType):
"""DataType that describes the expected structure of an edge list"""