Module similaritymeasures.similaritymeasures

Expand source code
from __future__ import division
import numpy as np
from scipy.spatial import distance
# MIT License
#
# Copyright (c) 2018,2019 Charles Jekel
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


def poly_area(x, y):
    r"""
    A function that computes the polygonal area via the shoelace formula.

    This function allows you to take the area of any polygon that is not self
    intersecting. This is also known as Gauss's area formula. See
    https://en.wikipedia.org/wiki/Shoelace_formula

    Parameters
    ----------
    x : ndarray (1-D)
        the x locations of a polygon
    y : ndarray (1-D)
        the y locations of the polygon

    Returns
    -------
    area : float
        the calculated polygonal area

    Notes
    -----
    The x and y locations need to be ordered such that the first vertex
    of the polynomial correspounds to x[0] and y[0], the second vertex
    x[1] and y[1] and so forth


    Thanks to Mahdi for this one line code
    https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
    """
    return 0.5*np.abs(np.dot(x, np.roll(y, 1))-np.dot(y, np.roll(x, 1)))


def is_simple_quad(ab, bc, cd, da):
    r"""
    Returns True if a quadrilateral is simple

    This function performs cross products at the vertices of a quadrilateral.
    It is possible to use the results to decide whether a quadrilateral is
    simple or complex. This function returns True if the quadrilateral is
    simple, and False if the quadrilateral is complex. A complex quadrilateral
    is a self-intersecting quadrilateral.

    Parameters
    ----------
    ab : array_like
        [x, y] location of the first vertex
    bc : array_like
        [x, y] location of the second vertex
    cd : array_like
        [x, y] location of the third vertex
    da : array_like
        [x, y] location of the fourth vertex

    Returns
    -------
    simple : bool
        True if quadrilateral is simple, False if complex
    """
    #   Compute all four cross products
    temp0 = np.cross(ab, bc)
    temp1 = np.cross(bc, cd)
    temp2 = np.cross(cd, da)
    temp3 = np.cross(da, ab)
    cross = np.array([temp0, temp1, temp2, temp3])
    #   See that majority of cross products is non-positive or non-negative
    #   They don't necessarily need to lie in the same 'Z' direction
    if sum(cross > 0) < sum(cross < 0):
        crossTF = cross <= 0
    else:
        crossTF = cross >= 0
    return sum(crossTF) > 2


def makeQuad(x, y):
    r"""
    Calculate the area from the x and y locations of a quadrilateral

    This function first constructs a simple quadrilateral from the x and y
    locations of the vertices of the quadrilateral. The function then
    calculates the shoelace area of the simple quadrilateral.

    Parameters
    ----------
    x : array_like
        the x locations of a quadrilateral
    y : array_like
        the y locations of a quadrilateral

    Returns
    -------
    area : float
        Area of quadrilateral via shoelace formula

    Notes
    -----
    This function rearranges the vertices of a quadrilateral until the
    quadrilateral is "Simple" (meaning non-complex). Once a simple
    quadrilateral is found, the area of the quadrilateral is calculated
    using the shoelace formula.
    """

    # check to see if the provide point order is valid
    # I need to get the values of the cross products of ABxBC, BCxCD, CDxDA,
    # DAxAB, thus I need to create the following arrays AB, BC, CD, DA

    AB = [x[1]-x[0], y[1]-y[0]]
    BC = [x[2]-x[1], y[2]-y[1]]
    CD = [x[3]-x[2], y[3]-y[2]]
    DA = [x[0]-x[3], y[0]-y[3]]

    isQuad = is_simple_quad(AB, BC, CD, DA)

    if isQuad is False:
        # attempt to rearrange the first two points
        x[1], x[0] = x[0], x[1]
        y[1], y[0] = y[0], y[1]
        AB = [x[1]-x[0], y[1]-y[0]]
        BC = [x[2]-x[1], y[2]-y[1]]
        CD = [x[3]-x[2], y[3]-y[2]]
        DA = [x[0]-x[3], y[0]-y[3]]

        isQuad = is_simple_quad(AB, BC, CD, DA)

        if isQuad is False:
            # place the second and first points back where they were, and
            # swap the second and third points
            x[2], x[0], x[1] = x[0], x[1], x[2]
            y[2], y[0], y[1] = y[0], y[1], y[2]
            AB = [x[1]-x[0], y[1]-y[0]]
            BC = [x[2]-x[1], y[2]-y[1]]
            CD = [x[3]-x[2], y[3]-y[2]]
            DA = [x[0]-x[3], y[0]-y[3]]

            isQuad = is_simple_quad(AB, BC, CD, DA)

    # calculate the area via shoelace formula
    area = poly_area(x, y)
    return area


def get_arc_length(dataset):
    r"""
    Obtain arc length distances between every point in 2-D space

    Obtains the total arc length of a curve in 2-D space (a curve of x and y)
    as well as the arc lengths between each two consecutive data points of the
    curve.

    Parameters
    ----------
    dataset : ndarray (2-D)
        The dataset of the curve in 2-D space.

    Returns
    -------
    arcLength : float
        The sum of all consecutive arc lengths
    arcLengths : array_like
        A list of the arc lengths between data points

    Notes
    -----
    Your x locations of data points should be dataset[:, 0], and the y
    locations of the data points should be dataset[:, 1]
    """
    #   split the dataset into two discrete datasets, each of length m-1
    m = len(dataset)
    a = dataset[0:m-1, :]
    b = dataset[1:m, :]
    #   use scipy.spatial to compute the euclidean distance
    dataDistance = distance.cdist(a, b, 'euclidean')
    #   this returns a matrix of the euclidean distance between all points
    #   the arc length is simply the sum of the diagonal of this matrix
    arcLengths = np.diagonal(dataDistance)
    arcLength = sum(arcLengths)
    return arcLength, arcLengths


def area_between_two_curves(exp_data, num_data):
    r"""
    Calculates the area between two curves.

    This calculates the area according to the algorithm in [1]_. Each curve is
    constructed from discretized data points in 2-D space, e.g. each curve
    consists of x and y data points.

    Parameters
    ----------
    exp_data : ndarray (2-D)
        Curve from your experimental data.
    num_data : ndarray (2-D)
        Curve from your numerical data.

    Returns
    -------
    area : float
        The area between exp_data and num_data curves.

    References
    ----------
    .. [1] Jekel, C. F., Venter, G., Venter, M. P., Stander, N., & Haftka, R.
        T. (2018). Similarity measures for identifying material parameters from
        hysteresis loops using inverse analysis. International Journal of
        Material Forming. https://doi.org/10.1007/s12289-018-1421-8

    Notes
    -----
    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.
    """
    # Calculate the area between two curves using quadrilaterals
    # Consider the test data to be data from an experimental test as exp_data
    # Consider the computer simulation (results from numerical model) to be
    # num_data
    #
    # Example on formatting the test and history data:
    # Curve1 = [xi1, eta1]
    # Curve2 = [xi2, eta2]
    # exp_data = np.zeros([len(xi1), 2])
    # num_data = np.zeros([len(xi2), 2])
    # exp_data[:,0] = xi1
    # exp_data[:,1] = eta1
    # num_data[:, 0] = xi2
    # num_data[:, 1] = eta2
    #
    # then you can calculate the area as
    # area = area_between_two_curves(exp_data, num_data)

    n_exp = len(exp_data)
    n_num = len(num_data)

    # the length of exp_data must be larger than the length of num_data
    if n_exp < n_num:
        temp = num_data.copy()
        num_data = exp_data.copy()
        exp_data = temp.copy()
        n_exp = len(exp_data)
        n_num = len(num_data)

    # get the arc length data of the curves
    # arcexp_data, _ = get_arc_length(exp_data)
    _, arcsnum_data = get_arc_length(num_data)

    # let's find the largest gap between point the num_data, and then
    # linearally interpolate between these points such that the num_data
    # becomes the same length as the exp_data
    for i in range(0, n_exp-n_num):
        a = num_data[0:n_num-1, 0]
        b = num_data[1:n_num, 0]
        nIndex = np.argmax(arcsnum_data)
        newX = (b[nIndex] + a[nIndex])/2.0
        #   the interpolation model messes up if x2 < x1 so we do a quick check
        if a[nIndex] < b[nIndex]:
            newY = np.interp(newX, [a[nIndex], b[nIndex]],
                             [num_data[nIndex, 1], num_data[nIndex+1, 1]])
        else:
            newY = np.interp(newX, [b[nIndex], a[nIndex]],
                             [num_data[nIndex+1, 1], num_data[nIndex, 1]])
        num_data = np.insert(num_data, nIndex+1, newX, axis=0)
        num_data[nIndex+1, 1] = newY

        _, arcsnum_data = get_arc_length(num_data)
        n_num = len(num_data)

    # Calculate the quadrilateral area, by looping through all of the quads
    area = []
    for i in range(1, n_exp):
        tempX = [exp_data[i-1, 0], exp_data[i, 0], num_data[i, 0],
                 num_data[i-1, 0]]
        tempY = [exp_data[i-1, 1], exp_data[i, 1], num_data[i, 1],
                 num_data[i-1, 1]]
        area.append(makeQuad(tempX, tempY))
    return np.sum(area)


def get_length(x, y, norm_seg_length=True):
    r"""
    Compute arc lengths of an x y curve.

    Computes the arc length of a xy curve, the cumulative arc length of an xy,
    and the total xy arc length of a xy curve. The euclidean distance is used
    to determine the arc length.

    Parameters
    ----------
    x : array_like
        the x locations of a curve
    y : array_like
        the y locations of a curve
    norm_seg_length : boolean
        Whether to divide the segment length of each curve by the maximum.

    Returns
    -------
    le : ndarray (1-D)
        the euclidean distance between every two points
    le_total : float
        the total arc length distance of a curve
    le_cum : ndarray (1-D)
        the cumulative sum of euclidean distances between every two points

    Examples
    --------
    >>> le, le_total, le_cum = get_length(x, y)

    """
    n = len(x)

    if norm_seg_length:
        xmax = np.max(np.abs(x))
        ymax = np.max(np.abs(y))

        # if your max x or y value is zero... you'll get np.inf
        # as your curve length based measure
        if xmax == 0:
            xmax = 1e-15
        if ymax == 0:
            ymax = 1e-15
    else:
        ymax = 1.0
        xmax = 1.0

    le = np.zeros(n)
    le[0] = 0.0
    l_sum = np.zeros(n)
    l_sum[0] = 0.0
    for i in range(0, n-1):
        le[i+1] = np.sqrt((((x[i+1]-x[i])/xmax)**2)+(((y[i+1]-y[i])/ymax)**2))
        l_sum[i+1] = l_sum[i]+le[i+1]
    return le, np.sum(le), l_sum


def curve_length_measure(exp_data, num_data):
    r"""
    Compute the curve length based distance between two curves.

    This computes the curve length similarity measure according to [1]_. This
    implementation follows the OF2 form, which is a self normalizing form
    based on the average value.

    Parameters
    ----------
    exp_data : ndarray (2-D)
        Curve from your experimental data.
    num_data : ndarray (2-D)
        Curve from your numerical data.

    Returns
    -------
    r : float
        curve length based similarity distance

    Notes
    -----
    This uses the OF2 method from [1]_.

    References
    ----------
    .. [1] A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria
        for determination of material model parameters. International Journal
        of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI
        https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL:
        http://www.sciencedirect.com/science/article/pii/S0020740311002451

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> r = curve_length_measure(exp_data, num_data)

    """
    x_e = exp_data[:, 0]
    y_e = exp_data[:, 1]
    x_c = num_data[:, 0]
    y_c = num_data[:, 1]

    _, le_nj, le_sum = get_length(x_e, y_e)
    _, lc_nj, lc_sum = get_length(x_c, y_c)

    xmean = np.mean(x_e)
    ymean = np.mean(y_e)

    n = len(x_e)

    r_sq = np.zeros(n)
    for i in range(0, n):
        lieq = le_sum[i]*(lc_nj/le_nj)
        xtemp = np.interp(lieq, lc_sum, x_c)
        ytemp = np.interp(lieq, lc_sum, y_c)

        r_sq[i] = np.log(1.0 + (np.abs((xtemp-x_e[i])/xmean)))**2 + \
            np.log(1.0 + (np.abs((ytemp-y_e[i])/ymean)))**2
    return np.sqrt(np.sum(r_sq))


def frechet_dist(exp_data, num_data, p=2):
    r"""
    Compute the discrete Frechet distance

    Compute the Discrete Frechet Distance between two N-D curves according to
    [1]_. The Frechet distance has been defined as the walking dog problem.
    From Wikipedia: "In mathematics, the Frechet distance is a measure of
    similarity between curves that takes into account the location and
    ordering of the points along the curves. It is named after Maurice Frechet.
    https://en.wikipedia.org/wiki/Fr%C3%A9chet_distance

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimmensions
    num_data : array_like
        Curve from your numerical data. num_data is of (P, N) shape, where P
        is the number of data points, and N is the number of dimmensions
    p : float, 1 <= p <= infinity
        Which Minkowski p-norm to use. Default is p=2 (Eculidean).
        The manhattan distance is p=1.

    Returns
    -------
    df : float
        discrete Frechet distance

    References
    ----------
    .. [1] Thomas Eiter and Heikki Mannila. Computing discrete Frechet
        distance. Technical report, 1994.
        http://www.kr.tuwien.ac.at/staff/eiter/et-archive/cdtr9464.pdf
        http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.937&rep=rep1&type=pdf

    Notes
    -----
    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.

    Thanks to Arbel Amir for the issue, and Sen ZHANG for the iterative code
    https://github.com/cjekel/similarity_measures/issues/6

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> df = frechet_dist(exp_data, num_data)

    """
    n = len(exp_data)
    m = len(num_data)
    c = distance.cdist(exp_data, num_data, metric='minkowski', p=p)
    ca = np.ones((n, m))
    ca = np.multiply(ca, -1)
    ca[0, 0] = c[0, 0]
    for i in range(1, n):
        ca[i, 0] = max(ca[i-1, 0], c[i, 0])
    for j in range(1, m):
        ca[0, j] = max(ca[0, j-1], c[0, j])
    for i in range(1, n):
        for j in range(1, m):
            ca[i, j] = max(min(ca[i-1, j], ca[i, j-1], ca[i-1, j-1]),
                           c[i, j])
    return ca[n-1, m-1]


def normalizeTwoCurves(x, y, w, z):
    """
    Normalize two curves for PCM method.

    This normalizes the two curves for PCM method following [1]_.

    Parameters
    ----------
    x : array_like (1-D)
        x locations for first curve
    y : array_like (1-D)
        y locations for first curve
    w : array_like (1-D)
        x locations for second curve curve
    z : array_like (1-D)
        y locations for second curve

    Returns
    -------
    xi : array_like
        normalized x locations for first curve
    eta : array_like
        normalized y locations for first curve
    xiP : array_like
        normalized x locations for second curve
    etaP : array_like
        normalized y locations for second curve

    References
    ----------
    .. [1] Katharina Witowski and Nielen Stander. "Parameter Identification of
        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
        Technology, Integration, and Operations (ATIO) Conference and 14th
        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
        Aviation Technology, Integration, and Operations (ATIO) Conferences.
        doi: doi:10.2514/6.2012-5580.

    Examples
    --------
    >>> xi, eta, xiP, etaP = normalizeTwoCurves(x,y,w,z)

    """
    minX = np.min(x)
    maxX = np.max(x)
    minY = np.min(y)
    maxY = np.max(y)

    xi = (x - minX) / (maxX - minX)
    eta = (y - minY) / (maxY - minY)
    xiP = (w - minX) / (maxX - minX)
    etaP = (z - minY) / (maxY - minY)
    return xi, eta, xiP, etaP


def pcm(exp_data, num_data, norm_seg_length=False):
    """
    Compute the Partial Curve Mapping area.

    Computes the Partial Cuve Mapping (PCM) similarity measure as proposed by
    [1]_.

    Parameters
    ----------
    exp_data : ndarray (2-D)
        Curve from your experimental data.
    num_data : ndarray (2-D)
        Curve from your numerical data.
    norm_seg_length : boolean
        Whether to divide the segment length of each curve by the maximum. The
        default value is false, which more closely follows the algorithm
        proposed in [1]_. Versions prior to 0.6.0 used `norm_seg_length=True`.

    Returns
    -------
    p : float
        pcm area measure

    Notes
    -----
    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.

    PCM distance was changed in version 0.6.0. To get the same results from
    previous versions, set `norm_seg_length=True`.

    References
    ----------
    .. [1] Katharina Witowski and Nielen Stander. "Parameter Identification of
        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
        Technology, Integration, and Operations (ATIO) Conference and 14th
        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
        Aviation Technology, Integration, and Operations (ATIO) Conferences.
        doi: doi:10.2514/6.2012-5580.

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> p = pcm(exp_data, num_data)
    """
    # normalize the curves to the experimental data
    xi1, eta1, xi2, eta2 = normalizeTwoCurves(exp_data[:, 0], exp_data[:, 1],
                                              num_data[:, 0], num_data[:, 1])
    # compute the arc lengths of each curve
    le, le_nj, le_sum = get_length(xi1, eta1, norm_seg_length)
    lc, lc_nj, lc_sum = get_length(xi2, eta2, norm_seg_length)
    # scale each segment to the total polygon length
    le = le / le_nj
    le_sum = le_sum / le_nj
    lc = lc / lc_nj
    lc_sum = lc_sum / lc_nj
    # right now exp_data is curve a, and num_data is curve b
    # make sure a is shorter than a', if not swap the defintion
    if lc_nj > le_nj:
        # compute the arc lengths of each curve
        le, le_nj, le_sum = get_length(xi2, eta2, norm_seg_length)
        lc, lc_nj, lc_sum = get_length(xi1, eta1, norm_seg_length)
        # scale each segment to the total polygon length
        le = le / le_nj
        le_sum = le_sum / le_nj
        lc = lc / lc_nj
        lc_sum = lc_sum / lc_nj
        # swap xi1, eta1 with xi2, eta2
        xi1OLD = xi1.copy()
        eta1OLD = eta1.copy()
        xi1 = xi2.copy()
        eta1 = eta2.copy()
        xi2 = xi1OLD.copy()
        eta2 = eta1OLD.copy()

    n_sum = len(le_sum)

    min_offset = 0.0
    max_offset = le_nj - lc_nj

    # make sure the curves aren't the same length
    # if they are the same length, don't loop 200 times
    if min_offset == max_offset:
        offsets = [min_offset]
        pcm_dists = np.zeros(1)
    else:
        offsets = np.linspace(min_offset, max_offset, 200)
        pcm_dists = np.zeros(200)

    for i, offset in enumerate(offsets):
        # create linear interpolation model for num_data based on arc length
        # evaluate linear interpolation model based on xi and eta of exp data
        xitemp = np.interp(le_sum+offset, lc_sum, xi2)
        etatemp = np.interp(le_sum+offset, lc_sum, eta2)

        d = np.sqrt((eta1-etatemp)**2 + (xi1-xitemp)**2)
        d1 = d[:-1]

        d2 = d[1:n_sum]

        v = 0.5*(d1+d2)*le_sum[1:n_sum]
        pcm_dists[i] = np.sum(v)
    return np.min(pcm_dists)


def dtw(exp_data, num_data, metric='euclidean', **kwargs):
    r"""
    Compute the Dynamic Time Warping distance.

    This computes a generic Dynamic Time Warping (DTW) distance and follows
    the algorithm from [1]_. This can use all distance metrics that are
    available in scipy.spatial.distance.cdist.

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimmensions
    num_data : array_like
        Curve from your numerical data. num_data is of (P, N) shape, where P
        is the number of data points, and N is the number of dimmensions
    metric : str or callable, optional
        The distance metric to use. Default='euclidean'. Refer to the
        documentation for scipy.spatial.distance.cdist. Some examples:
        'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
        'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
        'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
        'wminkowski', 'yule'.
    **kwargs : dict, optional
        Extra arguments to `metric`: refer to each metric documentation in
        scipy.spatial.distance.
        Some examples:

        p : scalar
            The p-norm to apply for Minkowski, weighted and unweighted.
            Default: 2.

        w : ndarray
            The weight vector for metrics that support weights (e.g.,
            Minkowski).

        V : ndarray
            The variance vector for standardized Euclidean.
            Default: var(vstack([XA, XB]), axis=0, ddof=1)

        VI : ndarray
            The inverse of the covariance matrix for Mahalanobis.
            Default: inv(cov(vstack([XA, XB].T))).T

        out : ndarray
            The output array
            If not None, the distance matrix Y is stored in this array.

    Returns
    -------
    r : float
        DTW distance.
    d : ndarray (2-D)
        Cumulative distance matrix

    Notes
    -----
    The DTW distance is d[-1, -1].

    This has O(M, P) computational cost.

    The latest scipy.spatial.distance.cdist information can be found at
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html

    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.

    This uses the euclidean distance for now. In the future it should be
    possible to support other metrics.

    DTW is a non-metric distance, which means DTW doesn't hold the triangle
    inequality.
    https://en.wikipedia.org/wiki/Triangle_inequality

    References
    ----------
    .. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information
        and Computer Science Department University of Hawaii at Manoa Honolulu,
        USA, 855, pp.1-23.
        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> r, d = dtw(exp_data, num_data)

    The euclidean distance is used by default. You can use metric and **kwargs
    to specify different types of distance metrics. The following example uses
    the city block or Manhattan distance between points.

    >>> r, d = dtw(exp_data, num_data, metric='cityblock')

    """
    c = distance.cdist(exp_data, num_data, metric=metric, **kwargs)

    d = np.zeros(c.shape)
    d[0, 0] = c[0, 0]
    n, m = c.shape
    for i in range(1, n):
        d[i, 0] = d[i-1, 0] + c[i, 0]
    for j in range(1, m):
        d[0, j] = d[0, j-1] + c[0, j]
    for i in range(1, n):
        for j in range(1, m):
            d[i, j] = c[i, j] + min((d[i-1, j], d[i, j-1], d[i-1, j-1]))
    return d[-1, -1], d


def dtw_path(d):
    r"""
    Calculates the optimal DTW path from a given DTW cumulative distance
    matrix.

    This function returns the optimal DTW path using the back propagation
    algorithm that is defined in [1]_. This path details the index from each
    curve that is being compared.

    Parameters
    ----------
    d : ndarray (2-D)
        Cumulative distance matrix.

    Returns
    -------
    path : ndarray (2-D)
        The optimal DTW path.

    Notes
    -----
    Note that path[:, 0] represents the indices from exp_data, while
    path[:, 1] represents the indices from the num_data.

    References
    ----------
    .. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information
        and Computer Science Department University of Hawaii at Manoa Honolulu,
        USA, 855, pp.1-23.
        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf

    Examples
    --------
    First calculate the DTW cumulative distance matrix.

    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> r, d = dtw(exp_data, num_data)

    Now you can calculate the optimal DTW path

    >>> path = dtw_path(d)

    You can visualize the path on the cumulative distance matrix using the
    following code.

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.imshow(d.T, origin='lower')
    >>> plt.plot(path[:, 0], path[:, 1], '-k')
    >>> plt.colorbar()
    >>> plt.show()

    """
    path = []
    i, j = d.shape
    i = i - 1
    j = j - 1
    # back propagation starts from the last point,
    # and ends at d[0, 0]
    path.append((i, j))
    while i > 0 or j > 0:
        if i == 0:
            j = j - 1
        elif j == 0:
            i = i - 1
        else:
            temp_step = min([d[i-1, j], d[i, j-1], d[i-1, j-1]])
            if d[i-1, j] == temp_step:
                i = i - 1
            elif d[i, j-1] == temp_step:
                j = j - 1
            else:
                i = i - 1
                j = j - 1
        path.append((i, j))
    path = np.array(path)
    # reverse the order of path, such that it starts with [0, 0]
    return path[::-1]


def mae(exp_data, num_data):
    """
    Compute the Mean Absolute Error (MAE).

    This computes the mean of absolute values of distances between two curves.
    Each curve must have the same number of data points and the same dimension.

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimensions
    num_data : array_like
        Curve from your numerical data. num_data is of (M, N) shape, where M
        is the number of data points, and N is the number of dimensions

    Returns
    -------
    r : float
        MAE.
    """
    c = np.abs(exp_data - num_data)
    return np.mean(c)


def mse(exp_data, num_data):
    """
    Compute the Mean Squared Error (MAE).

    This computes the mean of sqaured distances between two curves.
    Each curve must have the same number of data points and the same dimension.

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimensions
    num_data : array_like
        Curve from your numerical data. num_data is of (M, N) shape, where M
        is the number of data points, and N is the number of dimensions

    Returns
    -------
    r : float
        MSE.
    """
    c = np.square(exp_data - num_data)
    return np.mean(c)

Functions

def area_between_two_curves(exp_data, num_data)

Calculates the area between two curves.

This calculates the area according to the algorithm in [1]_. Each curve is constructed from discretized data points in 2-D space, e.g. each curve consists of x and y data points.

Parameters

exp_data : ndarray (2-D)
Curve from your experimental data.
num_data : ndarray (2-D)
Curve from your numerical data.

Returns

area : float
The area between exp_data and num_data curves.

References

.. [1] Jekel, C. F., Venter, G., Venter, M. P., Stander, N., & Haftka, R. T. (2018). Similarity measures for identifying material parameters from hysteresis loops using inverse analysis. International Journal of Material Forming. https://doi.org/10.1007/s12289-018-1421-8

Notes

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

Expand source code
def area_between_two_curves(exp_data, num_data):
    r"""
    Calculates the area between two curves.

    This calculates the area according to the algorithm in [1]_. Each curve is
    constructed from discretized data points in 2-D space, e.g. each curve
    consists of x and y data points.

    Parameters
    ----------
    exp_data : ndarray (2-D)
        Curve from your experimental data.
    num_data : ndarray (2-D)
        Curve from your numerical data.

    Returns
    -------
    area : float
        The area between exp_data and num_data curves.

    References
    ----------
    .. [1] Jekel, C. F., Venter, G., Venter, M. P., Stander, N., & Haftka, R.
        T. (2018). Similarity measures for identifying material parameters from
        hysteresis loops using inverse analysis. International Journal of
        Material Forming. https://doi.org/10.1007/s12289-018-1421-8

    Notes
    -----
    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.
    """
    # Calculate the area between two curves using quadrilaterals
    # Consider the test data to be data from an experimental test as exp_data
    # Consider the computer simulation (results from numerical model) to be
    # num_data
    #
    # Example on formatting the test and history data:
    # Curve1 = [xi1, eta1]
    # Curve2 = [xi2, eta2]
    # exp_data = np.zeros([len(xi1), 2])
    # num_data = np.zeros([len(xi2), 2])
    # exp_data[:,0] = xi1
    # exp_data[:,1] = eta1
    # num_data[:, 0] = xi2
    # num_data[:, 1] = eta2
    #
    # then you can calculate the area as
    # area = area_between_two_curves(exp_data, num_data)

    n_exp = len(exp_data)
    n_num = len(num_data)

    # the length of exp_data must be larger than the length of num_data
    if n_exp < n_num:
        temp = num_data.copy()
        num_data = exp_data.copy()
        exp_data = temp.copy()
        n_exp = len(exp_data)
        n_num = len(num_data)

    # get the arc length data of the curves
    # arcexp_data, _ = get_arc_length(exp_data)
    _, arcsnum_data = get_arc_length(num_data)

    # let's find the largest gap between point the num_data, and then
    # linearally interpolate between these points such that the num_data
    # becomes the same length as the exp_data
    for i in range(0, n_exp-n_num):
        a = num_data[0:n_num-1, 0]
        b = num_data[1:n_num, 0]
        nIndex = np.argmax(arcsnum_data)
        newX = (b[nIndex] + a[nIndex])/2.0
        #   the interpolation model messes up if x2 < x1 so we do a quick check
        if a[nIndex] < b[nIndex]:
            newY = np.interp(newX, [a[nIndex], b[nIndex]],
                             [num_data[nIndex, 1], num_data[nIndex+1, 1]])
        else:
            newY = np.interp(newX, [b[nIndex], a[nIndex]],
                             [num_data[nIndex+1, 1], num_data[nIndex, 1]])
        num_data = np.insert(num_data, nIndex+1, newX, axis=0)
        num_data[nIndex+1, 1] = newY

        _, arcsnum_data = get_arc_length(num_data)
        n_num = len(num_data)

    # Calculate the quadrilateral area, by looping through all of the quads
    area = []
    for i in range(1, n_exp):
        tempX = [exp_data[i-1, 0], exp_data[i, 0], num_data[i, 0],
                 num_data[i-1, 0]]
        tempY = [exp_data[i-1, 1], exp_data[i, 1], num_data[i, 1],
                 num_data[i-1, 1]]
        area.append(makeQuad(tempX, tempY))
    return np.sum(area)
def curve_length_measure(exp_data, num_data)

Compute the curve length based distance between two curves.

This computes the curve length similarity measure according to [1]_. This implementation follows the OF2 form, which is a self normalizing form based on the average value.

Parameters

exp_data : ndarray (2-D)
Curve from your experimental data.
num_data : ndarray (2-D)
Curve from your numerical data.

Returns

r : float
curve length based similarity distance

Notes

This uses the OF2 method from [1]_.

References

.. [1] A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria for determination of material model parameters. International Journal of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL: http://www.sciencedirect.com/science/article/pii/S0020740311002451

Examples

>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r = curve_length_measure(exp_data, num_data)
Expand source code
def curve_length_measure(exp_data, num_data):
    r"""
    Compute the curve length based distance between two curves.

    This computes the curve length similarity measure according to [1]_. This
    implementation follows the OF2 form, which is a self normalizing form
    based on the average value.

    Parameters
    ----------
    exp_data : ndarray (2-D)
        Curve from your experimental data.
    num_data : ndarray (2-D)
        Curve from your numerical data.

    Returns
    -------
    r : float
        curve length based similarity distance

    Notes
    -----
    This uses the OF2 method from [1]_.

    References
    ----------
    .. [1] A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria
        for determination of material model parameters. International Journal
        of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI
        https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL:
        http://www.sciencedirect.com/science/article/pii/S0020740311002451

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> r = curve_length_measure(exp_data, num_data)

    """
    x_e = exp_data[:, 0]
    y_e = exp_data[:, 1]
    x_c = num_data[:, 0]
    y_c = num_data[:, 1]

    _, le_nj, le_sum = get_length(x_e, y_e)
    _, lc_nj, lc_sum = get_length(x_c, y_c)

    xmean = np.mean(x_e)
    ymean = np.mean(y_e)

    n = len(x_e)

    r_sq = np.zeros(n)
    for i in range(0, n):
        lieq = le_sum[i]*(lc_nj/le_nj)
        xtemp = np.interp(lieq, lc_sum, x_c)
        ytemp = np.interp(lieq, lc_sum, y_c)

        r_sq[i] = np.log(1.0 + (np.abs((xtemp-x_e[i])/xmean)))**2 + \
            np.log(1.0 + (np.abs((ytemp-y_e[i])/ymean)))**2
    return np.sqrt(np.sum(r_sq))
def dtw(exp_data, num_data, metric='euclidean', **kwargs)

Compute the Dynamic Time Warping distance.

This computes a generic Dynamic Time Warping (DTW) distance and follows the algorithm from [1]_. This can use all distance metrics that are available in scipy.spatial.distance.cdist.

Parameters

exp_data : array_like
Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimmensions
num_data : array_like
Curve from your numerical data. num_data is of (P, N) shape, where P is the number of data points, and N is the number of dimmensions
metric : str or callable, optional
The distance metric to use. Default='euclidean'. Refer to the documentation for scipy.spatial.distance.cdist. Some examples: 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
**kwargs : dict, optional

Extra arguments to metric: refer to each metric documentation in scipy.spatial.distance. Some examples:

p : scalar The p-norm to apply for Minkowski, weighted and unweighted. Default: 2.

w : ndarray The weight vector for metrics that support weights (e.g., Minkowski).

V : ndarray The variance vector for standardized Euclidean. Default: var(vstack([XA, XB]), axis=0, ddof=1)

VI : ndarray The inverse of the covariance matrix for Mahalanobis. Default: inv(cov(vstack([XA, XB].T))).T

out : ndarray The output array If not None, the distance matrix Y is stored in this array.

Returns

r : float
DTW distance.
d : ndarray (2-D)
Cumulative distance matrix

Notes

The DTW distance is d[-1, -1].

This has O(M, P) computational cost.

The latest scipy.spatial.distance.cdist information can be found at https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

This uses the euclidean distance for now. In the future it should be possible to support other metrics.

DTW is a non-metric distance, which means DTW doesn't hold the triangle inequality. https://en.wikipedia.org/wiki/Triangle_inequality

References

.. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information and Computer Science Department University of Hawaii at Manoa Honolulu, USA, 855, pp.1-23. http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf

Examples

>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r, d = dtw(exp_data, num_data)

The euclidean distance is used by default. You can use metric and **kwargs to specify different types of distance metrics. The following example uses the city block or Manhattan distance between points.

>>> r, d = dtw(exp_data, num_data, metric='cityblock')
Expand source code
def dtw(exp_data, num_data, metric='euclidean', **kwargs):
    r"""
    Compute the Dynamic Time Warping distance.

    This computes a generic Dynamic Time Warping (DTW) distance and follows
    the algorithm from [1]_. This can use all distance metrics that are
    available in scipy.spatial.distance.cdist.

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimmensions
    num_data : array_like
        Curve from your numerical data. num_data is of (P, N) shape, where P
        is the number of data points, and N is the number of dimmensions
    metric : str or callable, optional
        The distance metric to use. Default='euclidean'. Refer to the
        documentation for scipy.spatial.distance.cdist. Some examples:
        'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
        'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
        'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
        'wminkowski', 'yule'.
    **kwargs : dict, optional
        Extra arguments to `metric`: refer to each metric documentation in
        scipy.spatial.distance.
        Some examples:

        p : scalar
            The p-norm to apply for Minkowski, weighted and unweighted.
            Default: 2.

        w : ndarray
            The weight vector for metrics that support weights (e.g.,
            Minkowski).

        V : ndarray
            The variance vector for standardized Euclidean.
            Default: var(vstack([XA, XB]), axis=0, ddof=1)

        VI : ndarray
            The inverse of the covariance matrix for Mahalanobis.
            Default: inv(cov(vstack([XA, XB].T))).T

        out : ndarray
            The output array
            If not None, the distance matrix Y is stored in this array.

    Returns
    -------
    r : float
        DTW distance.
    d : ndarray (2-D)
        Cumulative distance matrix

    Notes
    -----
    The DTW distance is d[-1, -1].

    This has O(M, P) computational cost.

    The latest scipy.spatial.distance.cdist information can be found at
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html

    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.

    This uses the euclidean distance for now. In the future it should be
    possible to support other metrics.

    DTW is a non-metric distance, which means DTW doesn't hold the triangle
    inequality.
    https://en.wikipedia.org/wiki/Triangle_inequality

    References
    ----------
    .. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information
        and Computer Science Department University of Hawaii at Manoa Honolulu,
        USA, 855, pp.1-23.
        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> r, d = dtw(exp_data, num_data)

    The euclidean distance is used by default. You can use metric and **kwargs
    to specify different types of distance metrics. The following example uses
    the city block or Manhattan distance between points.

    >>> r, d = dtw(exp_data, num_data, metric='cityblock')

    """
    c = distance.cdist(exp_data, num_data, metric=metric, **kwargs)

    d = np.zeros(c.shape)
    d[0, 0] = c[0, 0]
    n, m = c.shape
    for i in range(1, n):
        d[i, 0] = d[i-1, 0] + c[i, 0]
    for j in range(1, m):
        d[0, j] = d[0, j-1] + c[0, j]
    for i in range(1, n):
        for j in range(1, m):
            d[i, j] = c[i, j] + min((d[i-1, j], d[i, j-1], d[i-1, j-1]))
    return d[-1, -1], d
def dtw_path(d)

Calculates the optimal DTW path from a given DTW cumulative distance matrix.

This function returns the optimal DTW path using the back propagation algorithm that is defined in [1]_. This path details the index from each curve that is being compared.

Parameters

d : ndarray (2-D)
Cumulative distance matrix.

Returns

path : ndarray (2-D)
The optimal DTW path.

Notes

Note that path[:, 0] represents the indices from exp_data, while path[:, 1] represents the indices from the num_data.

References

.. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information and Computer Science Department University of Hawaii at Manoa Honolulu, USA, 855, pp.1-23. http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf

Examples

First calculate the DTW cumulative distance matrix.

>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r, d = dtw(exp_data, num_data)

Now you can calculate the optimal DTW path

>>> path = dtw_path(d)

You can visualize the path on the cumulative distance matrix using the following code.

>>> import matplotlib.pyplot as plt
>>> plt.figure()
>>> plt.imshow(d.T, origin='lower')
>>> plt.plot(path[:, 0], path[:, 1], '-k')
>>> plt.colorbar()
>>> plt.show()
Expand source code
def dtw_path(d):
    r"""
    Calculates the optimal DTW path from a given DTW cumulative distance
    matrix.

    This function returns the optimal DTW path using the back propagation
    algorithm that is defined in [1]_. This path details the index from each
    curve that is being compared.

    Parameters
    ----------
    d : ndarray (2-D)
        Cumulative distance matrix.

    Returns
    -------
    path : ndarray (2-D)
        The optimal DTW path.

    Notes
    -----
    Note that path[:, 0] represents the indices from exp_data, while
    path[:, 1] represents the indices from the num_data.

    References
    ----------
    .. [1] Senin, P., 2008. Dynamic time warping algorithm review. Information
        and Computer Science Department University of Hawaii at Manoa Honolulu,
        USA, 855, pp.1-23.
        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf

    Examples
    --------
    First calculate the DTW cumulative distance matrix.

    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> r, d = dtw(exp_data, num_data)

    Now you can calculate the optimal DTW path

    >>> path = dtw_path(d)

    You can visualize the path on the cumulative distance matrix using the
    following code.

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.imshow(d.T, origin='lower')
    >>> plt.plot(path[:, 0], path[:, 1], '-k')
    >>> plt.colorbar()
    >>> plt.show()

    """
    path = []
    i, j = d.shape
    i = i - 1
    j = j - 1
    # back propagation starts from the last point,
    # and ends at d[0, 0]
    path.append((i, j))
    while i > 0 or j > 0:
        if i == 0:
            j = j - 1
        elif j == 0:
            i = i - 1
        else:
            temp_step = min([d[i-1, j], d[i, j-1], d[i-1, j-1]])
            if d[i-1, j] == temp_step:
                i = i - 1
            elif d[i, j-1] == temp_step:
                j = j - 1
            else:
                i = i - 1
                j = j - 1
        path.append((i, j))
    path = np.array(path)
    # reverse the order of path, such that it starts with [0, 0]
    return path[::-1]
def frechet_dist(exp_data, num_data, p=2)

Compute the discrete Frechet distance

Compute the Discrete Frechet Distance between two N-D curves according to [1]_. The Frechet distance has been defined as the walking dog problem. From Wikipedia: "In mathematics, the Frechet distance is a measure of similarity between curves that takes into account the location and ordering of the points along the curves. It is named after Maurice Frechet. https://en.wikipedia.org/wiki/Fr%C3%A9chet_distance

Parameters

exp_data : array_like
Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimmensions
num_data : array_like
Curve from your numerical data. num_data is of (P, N) shape, where P is the number of data points, and N is the number of dimmensions
p : float, 1 <= p <= infinity
Which Minkowski p-norm to use. Default is p=2 (Eculidean). The manhattan distance is p=1.

Returns

df : float
discrete Frechet distance

References

.. [1] Thomas Eiter and Heikki Mannila. Computing discrete Frechet distance. Technical report, 1994. http://www.kr.tuwien.ac.at/staff/eiter/et-archive/cdtr9464.pdf http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.937&rep=rep1&type=pdf

Notes

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

Thanks to Arbel Amir for the issue, and Sen ZHANG for the iterative code https://github.com/cjekel/similarity_measures/issues/6

Examples

>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> df = frechet_dist(exp_data, num_data)
Expand source code
def frechet_dist(exp_data, num_data, p=2):
    r"""
    Compute the discrete Frechet distance

    Compute the Discrete Frechet Distance between two N-D curves according to
    [1]_. The Frechet distance has been defined as the walking dog problem.
    From Wikipedia: "In mathematics, the Frechet distance is a measure of
    similarity between curves that takes into account the location and
    ordering of the points along the curves. It is named after Maurice Frechet.
    https://en.wikipedia.org/wiki/Fr%C3%A9chet_distance

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimmensions
    num_data : array_like
        Curve from your numerical data. num_data is of (P, N) shape, where P
        is the number of data points, and N is the number of dimmensions
    p : float, 1 <= p <= infinity
        Which Minkowski p-norm to use. Default is p=2 (Eculidean).
        The manhattan distance is p=1.

    Returns
    -------
    df : float
        discrete Frechet distance

    References
    ----------
    .. [1] Thomas Eiter and Heikki Mannila. Computing discrete Frechet
        distance. Technical report, 1994.
        http://www.kr.tuwien.ac.at/staff/eiter/et-archive/cdtr9464.pdf
        http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.937&rep=rep1&type=pdf

    Notes
    -----
    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.

    Thanks to Arbel Amir for the issue, and Sen ZHANG for the iterative code
    https://github.com/cjekel/similarity_measures/issues/6

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> df = frechet_dist(exp_data, num_data)

    """
    n = len(exp_data)
    m = len(num_data)
    c = distance.cdist(exp_data, num_data, metric='minkowski', p=p)
    ca = np.ones((n, m))
    ca = np.multiply(ca, -1)
    ca[0, 0] = c[0, 0]
    for i in range(1, n):
        ca[i, 0] = max(ca[i-1, 0], c[i, 0])
    for j in range(1, m):
        ca[0, j] = max(ca[0, j-1], c[0, j])
    for i in range(1, n):
        for j in range(1, m):
            ca[i, j] = max(min(ca[i-1, j], ca[i, j-1], ca[i-1, j-1]),
                           c[i, j])
    return ca[n-1, m-1]
def get_arc_length(dataset)

Obtain arc length distances between every point in 2-D space

Obtains the total arc length of a curve in 2-D space (a curve of x and y) as well as the arc lengths between each two consecutive data points of the curve.

Parameters

dataset : ndarray (2-D)
The dataset of the curve in 2-D space.

Returns

arcLength : float
The sum of all consecutive arc lengths
arcLengths : array_like
A list of the arc lengths between data points

Notes

Your x locations of data points should be dataset[:, 0], and the y locations of the data points should be dataset[:, 1]

Expand source code
def get_arc_length(dataset):
    r"""
    Obtain arc length distances between every point in 2-D space

    Obtains the total arc length of a curve in 2-D space (a curve of x and y)
    as well as the arc lengths between each two consecutive data points of the
    curve.

    Parameters
    ----------
    dataset : ndarray (2-D)
        The dataset of the curve in 2-D space.

    Returns
    -------
    arcLength : float
        The sum of all consecutive arc lengths
    arcLengths : array_like
        A list of the arc lengths between data points

    Notes
    -----
    Your x locations of data points should be dataset[:, 0], and the y
    locations of the data points should be dataset[:, 1]
    """
    #   split the dataset into two discrete datasets, each of length m-1
    m = len(dataset)
    a = dataset[0:m-1, :]
    b = dataset[1:m, :]
    #   use scipy.spatial to compute the euclidean distance
    dataDistance = distance.cdist(a, b, 'euclidean')
    #   this returns a matrix of the euclidean distance between all points
    #   the arc length is simply the sum of the diagonal of this matrix
    arcLengths = np.diagonal(dataDistance)
    arcLength = sum(arcLengths)
    return arcLength, arcLengths
def get_length(x, y, norm_seg_length=True)

Compute arc lengths of an x y curve.

Computes the arc length of a xy curve, the cumulative arc length of an xy, and the total xy arc length of a xy curve. The euclidean distance is used to determine the arc length.

Parameters

x : array_like
the x locations of a curve
y : array_like
the y locations of a curve
norm_seg_length : boolean
Whether to divide the segment length of each curve by the maximum.

Returns

le : ndarray (1-D)
the euclidean distance between every two points
le_total : float
the total arc length distance of a curve
le_cum : ndarray (1-D)
the cumulative sum of euclidean distances between every two points

Examples

>>> le, le_total, le_cum = get_length(x, y)
Expand source code
def get_length(x, y, norm_seg_length=True):
    r"""
    Compute arc lengths of an x y curve.

    Computes the arc length of a xy curve, the cumulative arc length of an xy,
    and the total xy arc length of a xy curve. The euclidean distance is used
    to determine the arc length.

    Parameters
    ----------
    x : array_like
        the x locations of a curve
    y : array_like
        the y locations of a curve
    norm_seg_length : boolean
        Whether to divide the segment length of each curve by the maximum.

    Returns
    -------
    le : ndarray (1-D)
        the euclidean distance between every two points
    le_total : float
        the total arc length distance of a curve
    le_cum : ndarray (1-D)
        the cumulative sum of euclidean distances between every two points

    Examples
    --------
    >>> le, le_total, le_cum = get_length(x, y)

    """
    n = len(x)

    if norm_seg_length:
        xmax = np.max(np.abs(x))
        ymax = np.max(np.abs(y))

        # if your max x or y value is zero... you'll get np.inf
        # as your curve length based measure
        if xmax == 0:
            xmax = 1e-15
        if ymax == 0:
            ymax = 1e-15
    else:
        ymax = 1.0
        xmax = 1.0

    le = np.zeros(n)
    le[0] = 0.0
    l_sum = np.zeros(n)
    l_sum[0] = 0.0
    for i in range(0, n-1):
        le[i+1] = np.sqrt((((x[i+1]-x[i])/xmax)**2)+(((y[i+1]-y[i])/ymax)**2))
        l_sum[i+1] = l_sum[i]+le[i+1]
    return le, np.sum(le), l_sum
def is_simple_quad(ab, bc, cd, da)

Returns True if a quadrilateral is simple

This function performs cross products at the vertices of a quadrilateral. It is possible to use the results to decide whether a quadrilateral is simple or complex. This function returns True if the quadrilateral is simple, and False if the quadrilateral is complex. A complex quadrilateral is a self-intersecting quadrilateral.

Parameters

ab : array_like
[x, y] location of the first vertex
bc : array_like
[x, y] location of the second vertex
cd : array_like
[x, y] location of the third vertex
da : array_like
[x, y] location of the fourth vertex

Returns

simple : bool
True if quadrilateral is simple, False if complex
Expand source code
def is_simple_quad(ab, bc, cd, da):
    r"""
    Returns True if a quadrilateral is simple

    This function performs cross products at the vertices of a quadrilateral.
    It is possible to use the results to decide whether a quadrilateral is
    simple or complex. This function returns True if the quadrilateral is
    simple, and False if the quadrilateral is complex. A complex quadrilateral
    is a self-intersecting quadrilateral.

    Parameters
    ----------
    ab : array_like
        [x, y] location of the first vertex
    bc : array_like
        [x, y] location of the second vertex
    cd : array_like
        [x, y] location of the third vertex
    da : array_like
        [x, y] location of the fourth vertex

    Returns
    -------
    simple : bool
        True if quadrilateral is simple, False if complex
    """
    #   Compute all four cross products
    temp0 = np.cross(ab, bc)
    temp1 = np.cross(bc, cd)
    temp2 = np.cross(cd, da)
    temp3 = np.cross(da, ab)
    cross = np.array([temp0, temp1, temp2, temp3])
    #   See that majority of cross products is non-positive or non-negative
    #   They don't necessarily need to lie in the same 'Z' direction
    if sum(cross > 0) < sum(cross < 0):
        crossTF = cross <= 0
    else:
        crossTF = cross >= 0
    return sum(crossTF) > 2
def mae(exp_data, num_data)

Compute the Mean Absolute Error (MAE).

This computes the mean of absolute values of distances between two curves. Each curve must have the same number of data points and the same dimension.

Parameters

exp_data : array_like
Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions
num_data : array_like
Curve from your numerical data. num_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions

Returns

r : float
MAE.
Expand source code
def mae(exp_data, num_data):
    """
    Compute the Mean Absolute Error (MAE).

    This computes the mean of absolute values of distances between two curves.
    Each curve must have the same number of data points and the same dimension.

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimensions
    num_data : array_like
        Curve from your numerical data. num_data is of (M, N) shape, where M
        is the number of data points, and N is the number of dimensions

    Returns
    -------
    r : float
        MAE.
    """
    c = np.abs(exp_data - num_data)
    return np.mean(c)
def makeQuad(x, y)

Calculate the area from the x and y locations of a quadrilateral

This function first constructs a simple quadrilateral from the x and y locations of the vertices of the quadrilateral. The function then calculates the shoelace area of the simple quadrilateral.

Parameters

x : array_like
the x locations of a quadrilateral
y : array_like
the y locations of a quadrilateral

Returns

area : float
Area of quadrilateral via shoelace formula

Notes

This function rearranges the vertices of a quadrilateral until the quadrilateral is "Simple" (meaning non-complex). Once a simple quadrilateral is found, the area of the quadrilateral is calculated using the shoelace formula.

Expand source code
def makeQuad(x, y):
    r"""
    Calculate the area from the x and y locations of a quadrilateral

    This function first constructs a simple quadrilateral from the x and y
    locations of the vertices of the quadrilateral. The function then
    calculates the shoelace area of the simple quadrilateral.

    Parameters
    ----------
    x : array_like
        the x locations of a quadrilateral
    y : array_like
        the y locations of a quadrilateral

    Returns
    -------
    area : float
        Area of quadrilateral via shoelace formula

    Notes
    -----
    This function rearranges the vertices of a quadrilateral until the
    quadrilateral is "Simple" (meaning non-complex). Once a simple
    quadrilateral is found, the area of the quadrilateral is calculated
    using the shoelace formula.
    """

    # check to see if the provide point order is valid
    # I need to get the values of the cross products of ABxBC, BCxCD, CDxDA,
    # DAxAB, thus I need to create the following arrays AB, BC, CD, DA

    AB = [x[1]-x[0], y[1]-y[0]]
    BC = [x[2]-x[1], y[2]-y[1]]
    CD = [x[3]-x[2], y[3]-y[2]]
    DA = [x[0]-x[3], y[0]-y[3]]

    isQuad = is_simple_quad(AB, BC, CD, DA)

    if isQuad is False:
        # attempt to rearrange the first two points
        x[1], x[0] = x[0], x[1]
        y[1], y[0] = y[0], y[1]
        AB = [x[1]-x[0], y[1]-y[0]]
        BC = [x[2]-x[1], y[2]-y[1]]
        CD = [x[3]-x[2], y[3]-y[2]]
        DA = [x[0]-x[3], y[0]-y[3]]

        isQuad = is_simple_quad(AB, BC, CD, DA)

        if isQuad is False:
            # place the second and first points back where they were, and
            # swap the second and third points
            x[2], x[0], x[1] = x[0], x[1], x[2]
            y[2], y[0], y[1] = y[0], y[1], y[2]
            AB = [x[1]-x[0], y[1]-y[0]]
            BC = [x[2]-x[1], y[2]-y[1]]
            CD = [x[3]-x[2], y[3]-y[2]]
            DA = [x[0]-x[3], y[0]-y[3]]

            isQuad = is_simple_quad(AB, BC, CD, DA)

    # calculate the area via shoelace formula
    area = poly_area(x, y)
    return area
def mse(exp_data, num_data)

Compute the Mean Squared Error (MAE).

This computes the mean of sqaured distances between two curves. Each curve must have the same number of data points and the same dimension.

Parameters

exp_data : array_like
Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions
num_data : array_like
Curve from your numerical data. num_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions

Returns

r : float
MSE.
Expand source code
def mse(exp_data, num_data):
    """
    Compute the Mean Squared Error (MAE).

    This computes the mean of sqaured distances between two curves.
    Each curve must have the same number of data points and the same dimension.

    Parameters
    ----------
    exp_data : array_like
        Curve from your experimental data. exp_data is of (M, N) shape, where
        M is the number of data points, and N is the number of dimensions
    num_data : array_like
        Curve from your numerical data. num_data is of (M, N) shape, where M
        is the number of data points, and N is the number of dimensions

    Returns
    -------
    r : float
        MSE.
    """
    c = np.square(exp_data - num_data)
    return np.mean(c)
def normalizeTwoCurves(x, y, w, z)

Normalize two curves for PCM method.

This normalizes the two curves for PCM method following [1]_.

Parameters

x : array_like (1-D)
x locations for first curve
y : array_like (1-D)
y locations for first curve
w : array_like (1-D)
x locations for second curve curve
z : array_like (1-D)
y locations for second curve

Returns

xi : array_like
normalized x locations for first curve
eta : array_like
normalized y locations for first curve
xiP : array_like
normalized x locations for second curve
etaP : array_like
normalized y locations for second curve

References

.. [1] Katharina Witowski and Nielen Stander. "Parameter Identification of Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation Technology, Integration, and Operations (ATIO) Conference and 14th AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference, Aviation Technology, Integration, and Operations (ATIO) Conferences. doi: doi:10.2514/6.2012-5580.

Examples

>>> xi, eta, xiP, etaP = normalizeTwoCurves(x,y,w,z)
Expand source code
def normalizeTwoCurves(x, y, w, z):
    """
    Normalize two curves for PCM method.

    This normalizes the two curves for PCM method following [1]_.

    Parameters
    ----------
    x : array_like (1-D)
        x locations for first curve
    y : array_like (1-D)
        y locations for first curve
    w : array_like (1-D)
        x locations for second curve curve
    z : array_like (1-D)
        y locations for second curve

    Returns
    -------
    xi : array_like
        normalized x locations for first curve
    eta : array_like
        normalized y locations for first curve
    xiP : array_like
        normalized x locations for second curve
    etaP : array_like
        normalized y locations for second curve

    References
    ----------
    .. [1] Katharina Witowski and Nielen Stander. "Parameter Identification of
        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
        Technology, Integration, and Operations (ATIO) Conference and 14th
        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
        Aviation Technology, Integration, and Operations (ATIO) Conferences.
        doi: doi:10.2514/6.2012-5580.

    Examples
    --------
    >>> xi, eta, xiP, etaP = normalizeTwoCurves(x,y,w,z)

    """
    minX = np.min(x)
    maxX = np.max(x)
    minY = np.min(y)
    maxY = np.max(y)

    xi = (x - minX) / (maxX - minX)
    eta = (y - minY) / (maxY - minY)
    xiP = (w - minX) / (maxX - minX)
    etaP = (z - minY) / (maxY - minY)
    return xi, eta, xiP, etaP
def pcm(exp_data, num_data, norm_seg_length=False)

Compute the Partial Curve Mapping area.

Computes the Partial Cuve Mapping (PCM) similarity measure as proposed by [1]_.

Parameters

exp_data : ndarray (2-D)
Curve from your experimental data.
num_data : ndarray (2-D)
Curve from your numerical data.
norm_seg_length : boolean
Whether to divide the segment length of each curve by the maximum. The default value is false, which more closely follows the algorithm proposed in [1]_. Versions prior to 0.6.0 used norm_seg_length=True.

Returns

p : float
pcm area measure

Notes

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

PCM distance was changed in version 0.6.0. To get the same results from previous versions, set norm_seg_length=True.

References

.. [1] Katharina Witowski and Nielen Stander. "Parameter Identification of Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation Technology, Integration, and Operations (ATIO) Conference and 14th AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference, Aviation Technology, Integration, and Operations (ATIO) Conferences. doi: doi:10.2514/6.2012-5580.

Examples

>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> p = pcm(exp_data, num_data)
Expand source code
def pcm(exp_data, num_data, norm_seg_length=False):
    """
    Compute the Partial Curve Mapping area.

    Computes the Partial Cuve Mapping (PCM) similarity measure as proposed by
    [1]_.

    Parameters
    ----------
    exp_data : ndarray (2-D)
        Curve from your experimental data.
    num_data : ndarray (2-D)
        Curve from your numerical data.
    norm_seg_length : boolean
        Whether to divide the segment length of each curve by the maximum. The
        default value is false, which more closely follows the algorithm
        proposed in [1]_. Versions prior to 0.6.0 used `norm_seg_length=True`.

    Returns
    -------
    p : float
        pcm area measure

    Notes
    -----
    Your x locations of data points should be exp_data[:, 0], and the y
    locations of the data points should be exp_data[:, 1]. Same for num_data.

    PCM distance was changed in version 0.6.0. To get the same results from
    previous versions, set `norm_seg_length=True`.

    References
    ----------
    .. [1] Katharina Witowski and Nielen Stander. "Parameter Identification of
        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
        Technology, Integration, and Operations (ATIO) Conference and 14th
        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
        Aviation Technology, Integration, and Operations (ATIO) Conferences.
        doi: doi:10.2514/6.2012-5580.

    Examples
    --------
    >>> # Generate random experimental data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> exp_data = np.zeros((100, 2))
    >>> exp_data[:, 0] = x
    >>> exp_data[:, 1] = y
    >>> # Generate random numerical data
    >>> x = np.random.random(100)
    >>> y = np.random.random(100)
    >>> num_data = np.zeros((100, 2))
    >>> num_data[:, 0] = x
    >>> num_data[:, 1] = y
    >>> p = pcm(exp_data, num_data)
    """
    # normalize the curves to the experimental data
    xi1, eta1, xi2, eta2 = normalizeTwoCurves(exp_data[:, 0], exp_data[:, 1],
                                              num_data[:, 0], num_data[:, 1])
    # compute the arc lengths of each curve
    le, le_nj, le_sum = get_length(xi1, eta1, norm_seg_length)
    lc, lc_nj, lc_sum = get_length(xi2, eta2, norm_seg_length)
    # scale each segment to the total polygon length
    le = le / le_nj
    le_sum = le_sum / le_nj
    lc = lc / lc_nj
    lc_sum = lc_sum / lc_nj
    # right now exp_data is curve a, and num_data is curve b
    # make sure a is shorter than a', if not swap the defintion
    if lc_nj > le_nj:
        # compute the arc lengths of each curve
        le, le_nj, le_sum = get_length(xi2, eta2, norm_seg_length)
        lc, lc_nj, lc_sum = get_length(xi1, eta1, norm_seg_length)
        # scale each segment to the total polygon length
        le = le / le_nj
        le_sum = le_sum / le_nj
        lc = lc / lc_nj
        lc_sum = lc_sum / lc_nj
        # swap xi1, eta1 with xi2, eta2
        xi1OLD = xi1.copy()
        eta1OLD = eta1.copy()
        xi1 = xi2.copy()
        eta1 = eta2.copy()
        xi2 = xi1OLD.copy()
        eta2 = eta1OLD.copy()

    n_sum = len(le_sum)

    min_offset = 0.0
    max_offset = le_nj - lc_nj

    # make sure the curves aren't the same length
    # if they are the same length, don't loop 200 times
    if min_offset == max_offset:
        offsets = [min_offset]
        pcm_dists = np.zeros(1)
    else:
        offsets = np.linspace(min_offset, max_offset, 200)
        pcm_dists = np.zeros(200)

    for i, offset in enumerate(offsets):
        # create linear interpolation model for num_data based on arc length
        # evaluate linear interpolation model based on xi and eta of exp data
        xitemp = np.interp(le_sum+offset, lc_sum, xi2)
        etatemp = np.interp(le_sum+offset, lc_sum, eta2)

        d = np.sqrt((eta1-etatemp)**2 + (xi1-xitemp)**2)
        d1 = d[:-1]

        d2 = d[1:n_sum]

        v = 0.5*(d1+d2)*le_sum[1:n_sum]
        pcm_dists[i] = np.sum(v)
    return np.min(pcm_dists)
def poly_area(x, y)

A function that computes the polygonal area via the shoelace formula.

This function allows you to take the area of any polygon that is not self intersecting. This is also known as Gauss's area formula. See https://en.wikipedia.org/wiki/Shoelace_formula

Parameters

x : ndarray (1-D)
the x locations of a polygon
y : ndarray (1-D)
the y locations of the polygon

Returns

area : float
the calculated polygonal area

Notes

The x and y locations need to be ordered such that the first vertex of the polynomial correspounds to x[0] and y[0], the second vertex x[1] and y[1] and so forth

Thanks to Mahdi for this one line code https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates

Expand source code
def poly_area(x, y):
    r"""
    A function that computes the polygonal area via the shoelace formula.

    This function allows you to take the area of any polygon that is not self
    intersecting. This is also known as Gauss's area formula. See
    https://en.wikipedia.org/wiki/Shoelace_formula

    Parameters
    ----------
    x : ndarray (1-D)
        the x locations of a polygon
    y : ndarray (1-D)
        the y locations of the polygon

    Returns
    -------
    area : float
        the calculated polygonal area

    Notes
    -----
    The x and y locations need to be ordered such that the first vertex
    of the polynomial correspounds to x[0] and y[0], the second vertex
    x[1] and y[1] and so forth


    Thanks to Mahdi for this one line code
    https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
    """
    return 0.5*np.abs(np.dot(x, np.roll(y, 1))-np.dot(y, np.roll(x, 1)))