similaritymeasures.similaritymeasures

  1from __future__ import division
  2import numpy as np
  3from scipy.spatial import distance
  4# MIT License
  5#
  6# Copyright (c) 2018,2019 Charles Jekel
  7#
  8# Permission is hereby granted, free of charge, to any person obtaining a copy
  9# of this software and associated documentation files (the "Software"), to deal
 10# in the Software without restriction, including without limitation the rights
 11# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12# copies of the Software, and to permit persons to whom the Software is
 13# furnished to do so, subject to the following conditions:
 14#
 15# The above copyright notice and this permission notice shall be included in
 16# all copies or substantial portions of the Software.
 17#
 18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24# SOFTWARE.
 25
 26
 27def poly_area(x, y):
 28    r"""
 29    A function that computes the polygonal area via the shoelace formula.
 30
 31    This function allows you to take the area of any polygon that is not self
 32    intersecting. This is also known as Gauss's area formula. See
 33    https://en.wikipedia.org/wiki/Shoelace_formula
 34
 35    Parameters
 36    ----------
 37    x : ndarray (1-D)
 38        the x locations of a polygon
 39    y : ndarray (1-D)
 40        the y locations of the polygon
 41
 42    Returns
 43    -------
 44    area : float
 45        the calculated polygonal area
 46
 47    Notes
 48    -----
 49    The x and y locations need to be ordered such that the first vertex
 50    of the polynomial correspounds to x[0] and y[0], the second vertex
 51    x[1] and y[1] and so forth
 52
 53
 54    Thanks to Mahdi for this one line code
 55    https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
 56    """
 57    return 0.5*np.abs(np.dot(x, np.roll(y, 1))-np.dot(y, np.roll(x, 1)))
 58
 59
 60def is_simple_quad(ab, bc, cd, da):
 61    r"""
 62    Returns True if a quadrilateral is simple
 63
 64    This function performs cross products at the vertices of a quadrilateral.
 65    It is possible to use the results to decide whether a quadrilateral is
 66    simple or complex. This function returns True if the quadrilateral is
 67    simple, and False if the quadrilateral is complex. A complex quadrilateral
 68    is a self-intersecting quadrilateral.
 69
 70    Parameters
 71    ----------
 72    ab : array_like
 73        [x, y] location of the first vertex
 74    bc : array_like
 75        [x, y] location of the second vertex
 76    cd : array_like
 77        [x, y] location of the third vertex
 78    da : array_like
 79        [x, y] location of the fourth vertex
 80
 81    Returns
 82    -------
 83    simple : bool
 84        True if quadrilateral is simple, False if complex
 85    """
 86    #   Compute all four cross products
 87    temp0 = np.cross(ab, bc)
 88    temp1 = np.cross(bc, cd)
 89    temp2 = np.cross(cd, da)
 90    temp3 = np.cross(da, ab)
 91    cross = np.array([temp0, temp1, temp2, temp3])
 92    #   See that majority of cross products is non-positive or non-negative
 93    #   They don't necessarily need to lie in the same 'Z' direction
 94    if sum(cross > 0) < sum(cross < 0):
 95        crossTF = cross <= 0
 96    else:
 97        crossTF = cross >= 0
 98    return sum(crossTF) > 2
 99
100
101def makeQuad(x, y):
102    r"""
103    Calculate the area from the x and y locations of a quadrilateral
104
105    This function first constructs a simple quadrilateral from the x and y
106    locations of the vertices of the quadrilateral. The function then
107    calculates the shoelace area of the simple quadrilateral.
108
109    Parameters
110    ----------
111    x : array_like
112        the x locations of a quadrilateral
113    y : array_like
114        the y locations of a quadrilateral
115
116    Returns
117    -------
118    area : float
119        Area of quadrilateral via shoelace formula
120
121    Notes
122    -----
123    This function rearranges the vertices of a quadrilateral until the
124    quadrilateral is "Simple" (meaning non-complex). Once a simple
125    quadrilateral is found, the area of the quadrilateral is calculated
126    using the shoelace formula.
127    """
128
129    # check to see if the provide point order is valid
130    # I need to get the values of the cross products of ABxBC, BCxCD, CDxDA,
131    # DAxAB, thus I need to create the following arrays AB, BC, CD, DA
132
133    AB = [x[1]-x[0], y[1]-y[0]]
134    BC = [x[2]-x[1], y[2]-y[1]]
135    CD = [x[3]-x[2], y[3]-y[2]]
136    DA = [x[0]-x[3], y[0]-y[3]]
137
138    isQuad = is_simple_quad(AB, BC, CD, DA)
139
140    if isQuad is False:
141        # attempt to rearrange the first two points
142        x[1], x[0] = x[0], x[1]
143        y[1], y[0] = y[0], y[1]
144        AB = [x[1]-x[0], y[1]-y[0]]
145        BC = [x[2]-x[1], y[2]-y[1]]
146        CD = [x[3]-x[2], y[3]-y[2]]
147        DA = [x[0]-x[3], y[0]-y[3]]
148
149        isQuad = is_simple_quad(AB, BC, CD, DA)
150
151        if isQuad is False:
152            # place the second and first points back where they were, and
153            # swap the second and third points
154            x[2], x[0], x[1] = x[0], x[1], x[2]
155            y[2], y[0], y[1] = y[0], y[1], y[2]
156            AB = [x[1]-x[0], y[1]-y[0]]
157            BC = [x[2]-x[1], y[2]-y[1]]
158            CD = [x[3]-x[2], y[3]-y[2]]
159            DA = [x[0]-x[3], y[0]-y[3]]
160
161            isQuad = is_simple_quad(AB, BC, CD, DA)
162
163    # calculate the area via shoelace formula
164    area = poly_area(x, y)
165    return area
166
167
168def get_arc_length(dataset):
169    r"""
170    Obtain arc length distances between every point in 2-D space
171
172    Obtains the total arc length of a curve in 2-D space (a curve of x and y)
173    as well as the arc lengths between each two consecutive data points of the
174    curve.
175
176    Parameters
177    ----------
178    dataset : ndarray (2-D)
179        The dataset of the curve in 2-D space.
180
181    Returns
182    -------
183    arcLength : float
184        The sum of all consecutive arc lengths
185    arcLengths : array_like
186        A list of the arc lengths between data points
187
188    Notes
189    -----
190    Your x locations of data points should be dataset[:, 0], and the y
191    locations of the data points should be dataset[:, 1]
192    """
193    #   split the dataset into two discrete datasets, each of length m-1
194    m = len(dataset)
195    a = dataset[0:m-1, :]
196    b = dataset[1:m, :]
197    #   use scipy.spatial to compute the euclidean distance
198    dataDistance = distance.cdist(a, b, 'euclidean')
199    #   this returns a matrix of the euclidean distance between all points
200    #   the arc length is simply the sum of the diagonal of this matrix
201    arcLengths = np.diagonal(dataDistance)
202    arcLength = sum(arcLengths)
203    return arcLength, arcLengths
204
205
206def area_between_two_curves(exp_data, num_data):
207    r"""
208    Calculates the area between two curves.
209
210    This calculates the area according to the algorithm in [1]_. Each curve is
211    constructed from discretized data points in 2-D space, e.g. each curve
212    consists of x and y data points.
213
214    Parameters
215    ----------
216    exp_data : ndarray (2-D)
217        Curve from your experimental data.
218    num_data : ndarray (2-D)
219        Curve from your numerical data.
220
221    Returns
222    -------
223    area : float
224        The area between exp_data and num_data curves.
225
226    Notes
227    -----
228    Your x locations of data points should be exp_data[:, 0], and the y
229    locations of the data points should be exp_data[:, 1]. Same for num_data.
230
231    .. [1] Jekel, C. F., Venter, G., Venter, M. P., Stander, N., & Haftka, R.
232        T. (2018). Similarity measures for identifying material parameters from
233        hysteresis loops using inverse analysis. International Journal of
234        Material Forming. https://doi.org/10.1007/s12289-018-1421-8
235    """
236    # Calculate the area between two curves using quadrilaterals
237    # Consider the test data to be data from an experimental test as exp_data
238    # Consider the computer simulation (results from numerical model) to be
239    # num_data
240    #
241    # Example on formatting the test and history data:
242    # Curve1 = [xi1, eta1]
243    # Curve2 = [xi2, eta2]
244    # exp_data = np.zeros([len(xi1), 2])
245    # num_data = np.zeros([len(xi2), 2])
246    # exp_data[:,0] = xi1
247    # exp_data[:,1] = eta1
248    # num_data[:, 0] = xi2
249    # num_data[:, 1] = eta2
250    #
251    # then you can calculate the area as
252    # area = area_between_two_curves(exp_data, num_data)
253
254    n_exp = len(exp_data)
255    n_num = len(num_data)
256
257    # the length of exp_data must be larger than the length of num_data
258    if n_exp < n_num:
259        temp = num_data.copy()
260        num_data = exp_data.copy()
261        exp_data = temp.copy()
262        n_exp = len(exp_data)
263        n_num = len(num_data)
264
265    # get the arc length data of the curves
266    # arcexp_data, _ = get_arc_length(exp_data)
267    _, arcsnum_data = get_arc_length(num_data)
268
269    # let's find the largest gap between point the num_data, and then
270    # linearally interpolate between these points such that the num_data
271    # becomes the same length as the exp_data
272    for i in range(0, n_exp-n_num):
273        a = num_data[0:n_num-1, 0]
274        b = num_data[1:n_num, 0]
275        nIndex = np.argmax(arcsnum_data)
276        newX = (b[nIndex] + a[nIndex])/2.0
277        #   the interpolation model messes up if x2 < x1 so we do a quick check
278        if a[nIndex] < b[nIndex]:
279            newY = np.interp(newX, [a[nIndex], b[nIndex]],
280                             [num_data[nIndex, 1], num_data[nIndex+1, 1]])
281        else:
282            newY = np.interp(newX, [b[nIndex], a[nIndex]],
283                             [num_data[nIndex+1, 1], num_data[nIndex, 1]])
284        num_data = np.insert(num_data, nIndex+1, newX, axis=0)
285        num_data[nIndex+1, 1] = newY
286
287        _, arcsnum_data = get_arc_length(num_data)
288        n_num = len(num_data)
289
290    # Calculate the quadrilateral area, by looping through all of the quads
291    area = []
292    for i in range(1, n_exp):
293        tempX = [exp_data[i-1, 0], exp_data[i, 0], num_data[i, 0],
294                 num_data[i-1, 0]]
295        tempY = [exp_data[i-1, 1], exp_data[i, 1], num_data[i, 1],
296                 num_data[i-1, 1]]
297        area.append(makeQuad(tempX, tempY))
298    return np.sum(area)
299
300
301def get_length(x, y, norm_seg_length=True):
302    r"""
303    Compute arc lengths of an x y curve.
304
305    Computes the arc length of a xy curve, the cumulative arc length of an xy,
306    and the total xy arc length of a xy curve. The euclidean distance is used
307    to determine the arc length.
308
309    Parameters
310    ----------
311    x : array_like
312        the x locations of a curve
313    y : array_like
314        the y locations of a curve
315    norm_seg_length : boolean
316        Whether to divide the segment length of each curve by the maximum.
317
318    Returns
319    -------
320    le : ndarray (1-D)
321        the euclidean distance between every two points
322    le_total : float
323        the total arc length distance of a curve
324    le_cum : ndarray (1-D)
325        the cumulative sum of euclidean distances between every two points
326
327    Examples
328    --------
329    >>> le, le_total, le_cum = get_length(x, y)
330
331    """
332    n = len(x)
333
334    if norm_seg_length:
335        xmax = np.max(np.abs(x))
336        ymax = np.max(np.abs(y))
337
338        # if your max x or y value is zero... you'll get np.inf
339        # as your curve length based measure
340        if xmax == 0:
341            xmax = 1e-15
342        if ymax == 0:
343            ymax = 1e-15
344    else:
345        ymax = 1.0
346        xmax = 1.0
347
348    le = np.zeros(n)
349    le[0] = 0.0
350    l_sum = np.zeros(n)
351    l_sum[0] = 0.0
352    for i in range(0, n-1):
353        le[i+1] = np.sqrt((((x[i+1]-x[i])/xmax)**2)+(((y[i+1]-y[i])/ymax)**2))
354        l_sum[i+1] = l_sum[i]+le[i+1]
355    return le, np.sum(le), l_sum
356
357
358def curve_length_measure(exp_data, num_data):
359    r"""
360    Compute the curve length based distance between two curves.
361
362    This computes the curve length similarity measure according to [2]_. This
363    implementation follows the OF2 form, which is a self normalizing form
364    based on the average value.
365
366    Parameters
367    ----------
368    exp_data : ndarray (2-D)
369        Curve from your experimental data.
370    num_data : ndarray (2-D)
371        Curve from your numerical data.
372
373    Returns
374    -------
375    r : float
376        curve length based similarity distance
377
378    Notes
379    -----
380    This uses the OF2 method from [2]_.
381
382    Examples
383    --------
384    >>> # Generate random experimental data
385    >>> x = np.random.random(100)
386    >>> y = np.random.random(100)
387    >>> exp_data = np.zeros((100, 2))
388    >>> exp_data[:, 0] = x
389    >>> exp_data[:, 1] = y
390    >>> # Generate random numerical data
391    >>> x = np.random.random(100)
392    >>> y = np.random.random(100)
393    >>> num_data = np.zeros((100, 2))
394    >>> num_data[:, 0] = x
395    >>> num_data[:, 1] = y
396    >>> r = curve_length_measure(exp_data, num_data)
397
398
399    .. [2] A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria
400        for determination of material model parameters. International Journal
401        of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI
402        https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL:
403        http://www.sciencedirect.com/science/article/pii/S0020740311002451
404
405    """
406    x_e = exp_data[:, 0]
407    y_e = exp_data[:, 1]
408    x_c = num_data[:, 0]
409    y_c = num_data[:, 1]
410
411    _, le_nj, le_sum = get_length(x_e, y_e)
412    _, lc_nj, lc_sum = get_length(x_c, y_c)
413
414    xmean = np.mean(x_e)
415    ymean = np.mean(y_e)
416
417    factor = lc_nj/le_nj
418
419    lieq = le_sum * factor
420
421    xinterp = np.interp(lieq, lc_sum, x_c)
422    yinterp = np.interp(lieq, lc_sum, y_c)
423
424    r_sq = np.log(1.0 + (np.abs(xinterp-x_e)/xmean))**2
425    r_sq += np.log(1.0 + (np.abs(yinterp-y_e)/ymean))**2
426    return np.sqrt(np.sum(r_sq))
427
428
429def frechet_dist(exp_data, num_data, p=2):
430    r"""
431    Compute the discrete Frechet distance
432
433    Compute the Discrete Frechet Distance between two N-D curves according to
434    [3]_. The Frechet distance has been defined as the walking dog problem.
435    From Wikipedia: "In mathematics, the Frechet distance is a measure of
436    similarity between curves that takes into account the location and
437    ordering of the points along the curves. It is named after Maurice Frechet.
438    https://en.wikipedia.org/wiki/Fr%C3%A9chet_distance
439
440    Parameters
441    ----------
442    exp_data : array_like
443        Curve from your experimental data. exp_data is of (M, N) shape, where
444        M is the number of data points, and N is the number of dimmensions
445    num_data : array_like
446        Curve from your numerical data. num_data is of (P, N) shape, where P
447        is the number of data points, and N is the number of dimmensions
448    p : float, 1 <= p <= infinity
449        Which Minkowski p-norm to use. Default is p=2 (Eculidean).
450        The manhattan distance is p=1.
451
452    Returns
453    -------
454    df : float
455        discrete Frechet distance
456
457    Notes
458    -----
459    Your x locations of data points should be exp_data[:, 0], and the y
460    locations of the data points should be exp_data[:, 1]. Same for num_data.
461
462    Thanks to Arbel Amir for the issue, and Sen ZHANG for the iterative code
463    https://github.com/cjekel/similarity_measures/issues/6
464
465    Examples
466    --------
467    >>> # Generate random experimental data
468    >>> x = np.random.random(100)
469    >>> y = np.random.random(100)
470    >>> exp_data = np.zeros((100, 2))
471    >>> exp_data[:, 0] = x
472    >>> exp_data[:, 1] = y
473    >>> # Generate random numerical data
474    >>> x = np.random.random(100)
475    >>> y = np.random.random(100)
476    >>> num_data = np.zeros((100, 2))
477    >>> num_data[:, 0] = x
478    >>> num_data[:, 1] = y
479    >>> df = frechet_dist(exp_data, num_data)
480
481    .. [3] Thomas Eiter and Heikki Mannila. Computing discrete Frechet
482        distance. Technical report, 1994.
483        http://www.kr.tuwien.ac.at/staff/eiter/et-archive/cdtr9464.pdf
484        http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.937&rep=rep1&type=pdf
485    """
486    n = len(exp_data)
487    m = len(num_data)
488    c = distance.cdist(exp_data, num_data, metric='minkowski', p=p)
489    ca = np.ones((n, m))
490    ca = np.multiply(ca, -1)
491    ca[0, 0] = c[0, 0]
492    for i in range(1, n):
493        ca[i, 0] = max(ca[i-1, 0], c[i, 0])
494    for j in range(1, m):
495        ca[0, j] = max(ca[0, j-1], c[0, j])
496    for i in range(1, n):
497        for j in range(1, m):
498            ca[i, j] = max(min(ca[i-1, j], ca[i, j-1], ca[i-1, j-1]),
499                           c[i, j])
500    return ca[n-1, m-1]
501
502
503def normalizeTwoCurves(x, y, w, z):
504    """
505    Normalize two curves for PCM method.
506
507    This normalizes the two curves for PCM method following [4]_.
508
509    Parameters
510    ----------
511    x : array_like (1-D)
512        x locations for first curve
513    y : array_like (1-D)
514        y locations for first curve
515    w : array_like (1-D)
516        x locations for second curve curve
517    z : array_like (1-D)
518        y locations for second curve
519
520    Returns
521    -------
522    xi : array_like
523        normalized x locations for first curve
524    eta : array_like
525        normalized y locations for first curve
526    xiP : array_like
527        normalized x locations for second curve
528    etaP : array_like
529        normalized y locations for second curve
530
531
532    Examples
533    --------
534    >>> xi, eta, xiP, etaP = normalizeTwoCurves(x,y,w,z)
535
536    .. [4] Katharina Witowski and Nielen Stander. "Parameter Identification of
537        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
538        Technology, Integration, and Operations (ATIO) Conference and 14th
539        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
540        Aviation Technology, Integration, and Operations (ATIO) Conferences.
541        doi: doi:10.2514/6.2012-5580.
542    """
543    minX = np.min(x)
544    maxX = np.max(x)
545    minY = np.min(y)
546    maxY = np.max(y)
547
548    xi = (x - minX) / (maxX - minX)
549    eta = (y - minY) / (maxY - minY)
550    xiP = (w - minX) / (maxX - minX)
551    etaP = (z - minY) / (maxY - minY)
552    return xi, eta, xiP, etaP
553
554
555def pcm(exp_data, num_data, norm_seg_length=False):
556    """
557    Compute the Partial Curve Mapping area.
558
559    Computes the Partial Cuve Mapping (PCM) similarity measure as proposed by
560    [5]_.
561
562    Parameters
563    ----------
564    exp_data : ndarray (2-D)
565        Curve from your experimental data.
566    num_data : ndarray (2-D)
567        Curve from your numerical data.
568    norm_seg_length : boolean
569        Whether to divide the segment length of each curve by the maximum. The
570        default value is false, which more closely follows the algorithm
571        proposed in [1]_. Versions prior to 0.6.0 used `norm_seg_length=True`.
572
573    Returns
574    -------
575    p : float
576        pcm area measure
577
578    Notes
579    -----
580    Your x locations of data points should be exp_data[:, 0], and the y
581    locations of the data points should be exp_data[:, 1]. Same for num_data.
582
583    PCM distance was changed in version 0.6.0. To get the same results from
584    previous versions, set `norm_seg_length=True`.
585
586    Examples
587    --------
588    >>> # Generate random experimental data
589    >>> x = np.random.random(100)
590    >>> y = np.random.random(100)
591    >>> exp_data = np.zeros((100, 2))
592    >>> exp_data[:, 0] = x
593    >>> exp_data[:, 1] = y
594    >>> # Generate random numerical data
595    >>> x = np.random.random(100)
596    >>> y = np.random.random(100)
597    >>> num_data = np.zeros((100, 2))
598    >>> num_data[:, 0] = x
599    >>> num_data[:, 1] = y
600    >>> p = pcm(exp_data, num_data)
601
602    .. [5] Katharina Witowski and Nielen Stander. "Parameter Identification of
603        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
604        Technology, Integration, and Operations (ATIO) Conference and 14th
605        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
606        Aviation Technology, Integration, and Operations (ATIO) Conferences.
607        doi: doi:10.2514/6.2012-5580.
608    """
609    # normalize the curves to the experimental data
610    xi1, eta1, xi2, eta2 = normalizeTwoCurves(exp_data[:, 0], exp_data[:, 1],
611                                              num_data[:, 0], num_data[:, 1])
612    # compute the arc lengths of each curve
613    le, le_nj, le_sum = get_length(xi1, eta1, norm_seg_length)
614    lc, lc_nj, lc_sum = get_length(xi2, eta2, norm_seg_length)
615    # scale each segment to the total polygon length
616    le = le / le_nj
617    le_sum = le_sum / le_nj
618    lc = lc / lc_nj
619    lc_sum = lc_sum / lc_nj
620    # right now exp_data is curve a, and num_data is curve b
621    # make sure a is shorter than a', if not swap the defintion
622    if lc_nj > le_nj:
623        # compute the arc lengths of each curve
624        le, le_nj, le_sum = get_length(xi2, eta2, norm_seg_length)
625        lc, lc_nj, lc_sum = get_length(xi1, eta1, norm_seg_length)
626        # scale each segment to the total polygon length
627        le = le / le_nj
628        le_sum = le_sum / le_nj
629        lc = lc / lc_nj
630        lc_sum = lc_sum / lc_nj
631        # swap xi1, eta1 with xi2, eta2
632        xi1OLD = xi1.copy()
633        eta1OLD = eta1.copy()
634        xi1 = xi2.copy()
635        eta1 = eta2.copy()
636        xi2 = xi1OLD.copy()
637        eta2 = eta1OLD.copy()
638
639    n_sum = len(le_sum)
640
641    min_offset = 0.0
642    max_offset = le_nj - lc_nj
643
644    # make sure the curves aren't the same length
645    # if they are the same length, don't loop 200 times
646    if min_offset == max_offset:
647        offsets = [min_offset]
648        pcm_dists = np.zeros(1)
649    else:
650        offsets = np.linspace(min_offset, max_offset, 200)
651        pcm_dists = np.zeros(200)
652
653    for i, offset in enumerate(offsets):
654        # create linear interpolation model for num_data based on arc length
655        # evaluate linear interpolation model based on xi and eta of exp data
656        xitemp = np.interp(le_sum+offset, lc_sum, xi2)
657        etatemp = np.interp(le_sum+offset, lc_sum, eta2)
658
659        d = np.sqrt((eta1-etatemp)**2 + (xi1-xitemp)**2)
660        d1 = d[:-1]
661
662        d2 = d[1:n_sum]
663
664        v = 0.5*(d1+d2)*le_sum[1:n_sum]
665        pcm_dists[i] = np.sum(v)
666    return np.min(pcm_dists)
667
668
669def dtw(exp_data, num_data, metric='euclidean', **kwargs):
670    r"""
671    Compute the Dynamic Time Warping distance.
672
673    This computes a generic Dynamic Time Warping (DTW) distance and follows
674    the algorithm from [6]_. This can use all distance metrics that are
675    available in scipy.spatial.distance.cdist.
676
677    Parameters
678    ----------
679    exp_data : array_like
680        Curve from your experimental data. exp_data is of (M, N) shape, where
681        M is the number of data points, and N is the number of dimmensions
682    num_data : array_like
683        Curve from your numerical data. num_data is of (P, N) shape, where P
684        is the number of data points, and N is the number of dimmensions
685    metric : str or callable, optional
686        The distance metric to use. Default='euclidean'. Refer to the
687        documentation for scipy.spatial.distance.cdist. Some examples:
688        'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
689        'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
690        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
691        'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
692        'wminkowski', 'yule'.
693    **kwargs : dict, optional
694        Extra arguments to `metric`: refer to each metric documentation in
695        scipy.spatial.distance.
696        Some examples:
697
698        p : scalar
699            The p-norm to apply for Minkowski, weighted and unweighted.
700            Default: 2.
701
702        w : ndarray
703            The weight vector for metrics that support weights (e.g.,
704            Minkowski).
705
706        V : ndarray
707            The variance vector for standardized Euclidean.
708            Default: var(vstack([XA, XB]), axis=0, ddof=1)
709
710        VI : ndarray
711            The inverse of the covariance matrix for Mahalanobis.
712            Default: inv(cov(vstack([XA, XB].T))).T
713
714        out : ndarray
715            The output array
716            If not None, the distance matrix Y is stored in this array.
717
718    Returns
719    -------
720    r : float
721        DTW distance.
722    d : ndarray (2-D)
723        Cumulative distance matrix
724
725    Notes
726    -----
727    The DTW distance is d[-1, -1].
728
729    This has O(M, P) computational cost.
730
731    The latest scipy.spatial.distance.cdist information can be found at
732    https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html
733
734    Your x locations of data points should be exp_data[:, 0], and the y
735    locations of the data points should be exp_data[:, 1]. Same for num_data.
736
737    This uses the euclidean distance for now. In the future it should be
738    possible to support other metrics.
739
740    DTW is a non-metric distance, which means DTW doesn't hold the triangle
741    inequality.
742    https://en.wikipedia.org/wiki/Triangle_inequality
743
744    Examples
745    --------
746    >>> # Generate random experimental data
747    >>> x = np.random.random(100)
748    >>> y = np.random.random(100)
749    >>> exp_data = np.zeros((100, 2))
750    >>> exp_data[:, 0] = x
751    >>> exp_data[:, 1] = y
752    >>> # Generate random numerical data
753    >>> x = np.random.random(100)
754    >>> y = np.random.random(100)
755    >>> num_data = np.zeros((100, 2))
756    >>> num_data[:, 0] = x
757    >>> num_data[:, 1] = y
758    >>> r, d = dtw(exp_data, num_data)
759
760    The euclidean distance is used by default. You can use metric and **kwargs
761    to specify different types of distance metrics. The following example uses
762    the city block or Manhattan distance between points.
763
764    >>> r, d = dtw(exp_data, num_data, metric='cityblock')
765
766    .. [6] Senin, P., 2008. Dynamic time warping algorithm review. Information
767        and Computer Science Department University of Hawaii at Manoa Honolulu,
768        USA, 855, pp.1-23.
769        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf
770    """
771    c = distance.cdist(exp_data, num_data, metric=metric, **kwargs)
772
773    d = np.zeros(c.shape)
774    d[0, 0] = c[0, 0]
775    n, m = c.shape
776    for i in range(1, n):
777        d[i, 0] = d[i-1, 0] + c[i, 0]
778    for j in range(1, m):
779        d[0, j] = d[0, j-1] + c[0, j]
780    for i in range(1, n):
781        for j in range(1, m):
782            d[i, j] = c[i, j] + min((d[i-1, j], d[i, j-1], d[i-1, j-1]))
783    return d[-1, -1], d
784
785
786def dtw_path(d):
787    r"""
788    Calculates the optimal DTW path from a given DTW cumulative distance
789    matrix.
790
791    This function returns the optimal DTW path using the back propagation
792    algorithm that is defined in [7]_. This path details the index from each
793    curve that is being compared.
794
795    Parameters
796    ----------
797    d : ndarray (2-D)
798        Cumulative distance matrix.
799
800    Returns
801    -------
802    path : ndarray (2-D)
803        The optimal DTW path.
804
805    Notes
806    -----
807    Note that path[:, 0] represents the indices from exp_data, while
808    path[:, 1] represents the indices from the num_data.
809
810    .. [7] Senin, P., 2008. Dynamic time warping algorithm review. Information
811        and Computer Science Department University of Hawaii at Manoa Honolulu,
812        USA, 855, pp.1-23.
813        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf
814
815    Examples
816    --------
817    First calculate the DTW cumulative distance matrix.
818
819    >>> # Generate random experimental data
820    >>> x = np.random.random(100)
821    >>> y = np.random.random(100)
822    >>> exp_data = np.zeros((100, 2))
823    >>> exp_data[:, 0] = x
824    >>> exp_data[:, 1] = y
825    >>> # Generate random numerical data
826    >>> x = np.random.random(100)
827    >>> y = np.random.random(100)
828    >>> num_data = np.zeros((100, 2))
829    >>> num_data[:, 0] = x
830    >>> num_data[:, 1] = y
831    >>> r, d = dtw(exp_data, num_data)
832
833    Now you can calculate the optimal DTW path
834
835    >>> path = dtw_path(d)
836
837    You can visualize the path on the cumulative distance matrix using the
838    following code.
839
840    >>> import matplotlib.pyplot as plt
841    >>> plt.figure()
842    >>> plt.imshow(d.T, origin='lower')
843    >>> plt.plot(path[:, 0], path[:, 1], '-k')
844    >>> plt.colorbar()
845    >>> plt.show()
846
847    """
848    path = []
849    i, j = d.shape
850    i = i - 1
851    j = j - 1
852    # back propagation starts from the last point,
853    # and ends at d[0, 0]
854    path.append((i, j))
855    while i > 0 or j > 0:
856        if i == 0:
857            j = j - 1
858        elif j == 0:
859            i = i - 1
860        else:
861            temp_step = min([d[i-1, j], d[i, j-1], d[i-1, j-1]])
862            if d[i-1, j] == temp_step:
863                i = i - 1
864            elif d[i, j-1] == temp_step:
865                j = j - 1
866            else:
867                i = i - 1
868                j = j - 1
869        path.append((i, j))
870    path = np.array(path)
871    # reverse the order of path, such that it starts with [0, 0]
872    return path[::-1]
873
874
875def mae(exp_data, num_data):
876    """
877    Compute the Mean Absolute Error (MAE).
878
879    This computes the mean of absolute values of distances between two curves.
880    Each curve must have the same number of data points and the same dimension.
881
882    Parameters
883    ----------
884    exp_data : array_like
885        Curve from your experimental data. exp_data is of (M, N) shape, where
886        M is the number of data points, and N is the number of dimensions
887    num_data : array_like
888        Curve from your numerical data. num_data is of (M, N) shape, where M
889        is the number of data points, and N is the number of dimensions
890
891    Returns
892    -------
893    r : float
894        MAE.
895    """
896    c = np.abs(exp_data - num_data)
897    return np.mean(c)
898
899
900def mse(exp_data, num_data):
901    """
902    Compute the Mean Squared Error (MAE).
903
904    This computes the mean of sqaured distances between two curves.
905    Each curve must have the same number of data points and the same dimension.
906
907    Parameters
908    ----------
909    exp_data : array_like
910        Curve from your experimental data. exp_data is of (M, N) shape, where
911        M is the number of data points, and N is the number of dimensions
912    num_data : array_like
913        Curve from your numerical data. num_data is of (M, N) shape, where M
914        is the number of data points, and N is the number of dimensions
915
916    Returns
917    -------
918    r : float
919        MSE.
920    """
921    c = np.square(exp_data - num_data)
922    return np.mean(c)
def poly_area(x, y):
28def poly_area(x, y):
29    r"""
30    A function that computes the polygonal area via the shoelace formula.
31
32    This function allows you to take the area of any polygon that is not self
33    intersecting. This is also known as Gauss's area formula. See
34    https://en.wikipedia.org/wiki/Shoelace_formula
35
36    Parameters
37    ----------
38    x : ndarray (1-D)
39        the x locations of a polygon
40    y : ndarray (1-D)
41        the y locations of the polygon
42
43    Returns
44    -------
45    area : float
46        the calculated polygonal area
47
48    Notes
49    -----
50    The x and y locations need to be ordered such that the first vertex
51    of the polynomial correspounds to x[0] and y[0], the second vertex
52    x[1] and y[1] and so forth
53
54
55    Thanks to Mahdi for this one line code
56    https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
57    """
58    return 0.5*np.abs(np.dot(x, np.roll(y, 1))-np.dot(y, np.roll(x, 1)))

A function that computes the polygonal area via the shoelace formula.

This function allows you to take the area of any polygon that is not self intersecting. This is also known as Gauss's area formula. See https://en.wikipedia.org/wiki/Shoelace_formula

Parameters
  • x (ndarray (1-D)): the x locations of a polygon
  • y (ndarray (1-D)): the y locations of the polygon
Returns
  • area (float): the calculated polygonal area
Notes

The x and y locations need to be ordered such that the first vertex of the polynomial correspounds to x[0] and y[0], the second vertex x[1] and y[1] and so forth

Thanks to Mahdi for this one line code https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates

def is_simple_quad(ab, bc, cd, da):
61def is_simple_quad(ab, bc, cd, da):
62    r"""
63    Returns True if a quadrilateral is simple
64
65    This function performs cross products at the vertices of a quadrilateral.
66    It is possible to use the results to decide whether a quadrilateral is
67    simple or complex. This function returns True if the quadrilateral is
68    simple, and False if the quadrilateral is complex. A complex quadrilateral
69    is a self-intersecting quadrilateral.
70
71    Parameters
72    ----------
73    ab : array_like
74        [x, y] location of the first vertex
75    bc : array_like
76        [x, y] location of the second vertex
77    cd : array_like
78        [x, y] location of the third vertex
79    da : array_like
80        [x, y] location of the fourth vertex
81
82    Returns
83    -------
84    simple : bool
85        True if quadrilateral is simple, False if complex
86    """
87    #   Compute all four cross products
88    temp0 = np.cross(ab, bc)
89    temp1 = np.cross(bc, cd)
90    temp2 = np.cross(cd, da)
91    temp3 = np.cross(da, ab)
92    cross = np.array([temp0, temp1, temp2, temp3])
93    #   See that majority of cross products is non-positive or non-negative
94    #   They don't necessarily need to lie in the same 'Z' direction
95    if sum(cross > 0) < sum(cross < 0):
96        crossTF = cross <= 0
97    else:
98        crossTF = cross >= 0
99    return sum(crossTF) > 2

Returns True if a quadrilateral is simple

This function performs cross products at the vertices of a quadrilateral. It is possible to use the results to decide whether a quadrilateral is simple or complex. This function returns True if the quadrilateral is simple, and False if the quadrilateral is complex. A complex quadrilateral is a self-intersecting quadrilateral.

Parameters
  • ab (array_like): [x, y] location of the first vertex
  • bc (array_like): [x, y] location of the second vertex
  • cd (array_like): [x, y] location of the third vertex
  • da (array_like): [x, y] location of the fourth vertex
Returns
  • simple (bool): True if quadrilateral is simple, False if complex
def makeQuad(x, y):
102def makeQuad(x, y):
103    r"""
104    Calculate the area from the x and y locations of a quadrilateral
105
106    This function first constructs a simple quadrilateral from the x and y
107    locations of the vertices of the quadrilateral. The function then
108    calculates the shoelace area of the simple quadrilateral.
109
110    Parameters
111    ----------
112    x : array_like
113        the x locations of a quadrilateral
114    y : array_like
115        the y locations of a quadrilateral
116
117    Returns
118    -------
119    area : float
120        Area of quadrilateral via shoelace formula
121
122    Notes
123    -----
124    This function rearranges the vertices of a quadrilateral until the
125    quadrilateral is "Simple" (meaning non-complex). Once a simple
126    quadrilateral is found, the area of the quadrilateral is calculated
127    using the shoelace formula.
128    """
129
130    # check to see if the provide point order is valid
131    # I need to get the values of the cross products of ABxBC, BCxCD, CDxDA,
132    # DAxAB, thus I need to create the following arrays AB, BC, CD, DA
133
134    AB = [x[1]-x[0], y[1]-y[0]]
135    BC = [x[2]-x[1], y[2]-y[1]]
136    CD = [x[3]-x[2], y[3]-y[2]]
137    DA = [x[0]-x[3], y[0]-y[3]]
138
139    isQuad = is_simple_quad(AB, BC, CD, DA)
140
141    if isQuad is False:
142        # attempt to rearrange the first two points
143        x[1], x[0] = x[0], x[1]
144        y[1], y[0] = y[0], y[1]
145        AB = [x[1]-x[0], y[1]-y[0]]
146        BC = [x[2]-x[1], y[2]-y[1]]
147        CD = [x[3]-x[2], y[3]-y[2]]
148        DA = [x[0]-x[3], y[0]-y[3]]
149
150        isQuad = is_simple_quad(AB, BC, CD, DA)
151
152        if isQuad is False:
153            # place the second and first points back where they were, and
154            # swap the second and third points
155            x[2], x[0], x[1] = x[0], x[1], x[2]
156            y[2], y[0], y[1] = y[0], y[1], y[2]
157            AB = [x[1]-x[0], y[1]-y[0]]
158            BC = [x[2]-x[1], y[2]-y[1]]
159            CD = [x[3]-x[2], y[3]-y[2]]
160            DA = [x[0]-x[3], y[0]-y[3]]
161
162            isQuad = is_simple_quad(AB, BC, CD, DA)
163
164    # calculate the area via shoelace formula
165    area = poly_area(x, y)
166    return area

Calculate the area from the x and y locations of a quadrilateral

This function first constructs a simple quadrilateral from the x and y locations of the vertices of the quadrilateral. The function then calculates the shoelace area of the simple quadrilateral.

Parameters
  • x (array_like): the x locations of a quadrilateral
  • y (array_like): the y locations of a quadrilateral
Returns
  • area (float): Area of quadrilateral via shoelace formula
Notes

This function rearranges the vertices of a quadrilateral until the quadrilateral is "Simple" (meaning non-complex). Once a simple quadrilateral is found, the area of the quadrilateral is calculated using the shoelace formula.

def get_arc_length(dataset):
169def get_arc_length(dataset):
170    r"""
171    Obtain arc length distances between every point in 2-D space
172
173    Obtains the total arc length of a curve in 2-D space (a curve of x and y)
174    as well as the arc lengths between each two consecutive data points of the
175    curve.
176
177    Parameters
178    ----------
179    dataset : ndarray (2-D)
180        The dataset of the curve in 2-D space.
181
182    Returns
183    -------
184    arcLength : float
185        The sum of all consecutive arc lengths
186    arcLengths : array_like
187        A list of the arc lengths between data points
188
189    Notes
190    -----
191    Your x locations of data points should be dataset[:, 0], and the y
192    locations of the data points should be dataset[:, 1]
193    """
194    #   split the dataset into two discrete datasets, each of length m-1
195    m = len(dataset)
196    a = dataset[0:m-1, :]
197    b = dataset[1:m, :]
198    #   use scipy.spatial to compute the euclidean distance
199    dataDistance = distance.cdist(a, b, 'euclidean')
200    #   this returns a matrix of the euclidean distance between all points
201    #   the arc length is simply the sum of the diagonal of this matrix
202    arcLengths = np.diagonal(dataDistance)
203    arcLength = sum(arcLengths)
204    return arcLength, arcLengths

Obtain arc length distances between every point in 2-D space

Obtains the total arc length of a curve in 2-D space (a curve of x and y) as well as the arc lengths between each two consecutive data points of the curve.

Parameters
  • dataset (ndarray (2-D)): The dataset of the curve in 2-D space.
Returns
  • arcLength (float): The sum of all consecutive arc lengths
  • arcLengths (array_like): A list of the arc lengths between data points
Notes

Your x locations of data points should be dataset[:, 0], and the y locations of the data points should be dataset[:, 1]

def area_between_two_curves(exp_data, num_data):
207def area_between_two_curves(exp_data, num_data):
208    r"""
209    Calculates the area between two curves.
210
211    This calculates the area according to the algorithm in [1]_. Each curve is
212    constructed from discretized data points in 2-D space, e.g. each curve
213    consists of x and y data points.
214
215    Parameters
216    ----------
217    exp_data : ndarray (2-D)
218        Curve from your experimental data.
219    num_data : ndarray (2-D)
220        Curve from your numerical data.
221
222    Returns
223    -------
224    area : float
225        The area between exp_data and num_data curves.
226
227    Notes
228    -----
229    Your x locations of data points should be exp_data[:, 0], and the y
230    locations of the data points should be exp_data[:, 1]. Same for num_data.
231
232    .. [1] Jekel, C. F., Venter, G., Venter, M. P., Stander, N., & Haftka, R.
233        T. (2018). Similarity measures for identifying material parameters from
234        hysteresis loops using inverse analysis. International Journal of
235        Material Forming. https://doi.org/10.1007/s12289-018-1421-8
236    """
237    # Calculate the area between two curves using quadrilaterals
238    # Consider the test data to be data from an experimental test as exp_data
239    # Consider the computer simulation (results from numerical model) to be
240    # num_data
241    #
242    # Example on formatting the test and history data:
243    # Curve1 = [xi1, eta1]
244    # Curve2 = [xi2, eta2]
245    # exp_data = np.zeros([len(xi1), 2])
246    # num_data = np.zeros([len(xi2), 2])
247    # exp_data[:,0] = xi1
248    # exp_data[:,1] = eta1
249    # num_data[:, 0] = xi2
250    # num_data[:, 1] = eta2
251    #
252    # then you can calculate the area as
253    # area = area_between_two_curves(exp_data, num_data)
254
255    n_exp = len(exp_data)
256    n_num = len(num_data)
257
258    # the length of exp_data must be larger than the length of num_data
259    if n_exp < n_num:
260        temp = num_data.copy()
261        num_data = exp_data.copy()
262        exp_data = temp.copy()
263        n_exp = len(exp_data)
264        n_num = len(num_data)
265
266    # get the arc length data of the curves
267    # arcexp_data, _ = get_arc_length(exp_data)
268    _, arcsnum_data = get_arc_length(num_data)
269
270    # let's find the largest gap between point the num_data, and then
271    # linearally interpolate between these points such that the num_data
272    # becomes the same length as the exp_data
273    for i in range(0, n_exp-n_num):
274        a = num_data[0:n_num-1, 0]
275        b = num_data[1:n_num, 0]
276        nIndex = np.argmax(arcsnum_data)
277        newX = (b[nIndex] + a[nIndex])/2.0
278        #   the interpolation model messes up if x2 < x1 so we do a quick check
279        if a[nIndex] < b[nIndex]:
280            newY = np.interp(newX, [a[nIndex], b[nIndex]],
281                             [num_data[nIndex, 1], num_data[nIndex+1, 1]])
282        else:
283            newY = np.interp(newX, [b[nIndex], a[nIndex]],
284                             [num_data[nIndex+1, 1], num_data[nIndex, 1]])
285        num_data = np.insert(num_data, nIndex+1, newX, axis=0)
286        num_data[nIndex+1, 1] = newY
287
288        _, arcsnum_data = get_arc_length(num_data)
289        n_num = len(num_data)
290
291    # Calculate the quadrilateral area, by looping through all of the quads
292    area = []
293    for i in range(1, n_exp):
294        tempX = [exp_data[i-1, 0], exp_data[i, 0], num_data[i, 0],
295                 num_data[i-1, 0]]
296        tempY = [exp_data[i-1, 1], exp_data[i, 1], num_data[i, 1],
297                 num_data[i-1, 1]]
298        area.append(makeQuad(tempX, tempY))
299    return np.sum(area)

Calculates the area between two curves.

This calculates the area according to the algorithm in 1. Each curve is constructed from discretized data points in 2-D space, e.g. each curve consists of x and y data points.

Parameters
  • exp_data (ndarray (2-D)): Curve from your experimental data.
  • num_data (ndarray (2-D)): Curve from your numerical data.
Returns
  • area (float): The area between exp_data and num_data curves.
Notes

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.


  1. Jekel, C. F., Venter, G., Venter, M. P., Stander, N., & Haftka, R. T. (2018). Similarity measures for identifying material parameters from hysteresis loops using inverse analysis. International Journal of Material Forming. https://doi.org/10.1007/s12289-018-1421-8 

def get_length(x, y, norm_seg_length=True):
302def get_length(x, y, norm_seg_length=True):
303    r"""
304    Compute arc lengths of an x y curve.
305
306    Computes the arc length of a xy curve, the cumulative arc length of an xy,
307    and the total xy arc length of a xy curve. The euclidean distance is used
308    to determine the arc length.
309
310    Parameters
311    ----------
312    x : array_like
313        the x locations of a curve
314    y : array_like
315        the y locations of a curve
316    norm_seg_length : boolean
317        Whether to divide the segment length of each curve by the maximum.
318
319    Returns
320    -------
321    le : ndarray (1-D)
322        the euclidean distance between every two points
323    le_total : float
324        the total arc length distance of a curve
325    le_cum : ndarray (1-D)
326        the cumulative sum of euclidean distances between every two points
327
328    Examples
329    --------
330    >>> le, le_total, le_cum = get_length(x, y)
331
332    """
333    n = len(x)
334
335    if norm_seg_length:
336        xmax = np.max(np.abs(x))
337        ymax = np.max(np.abs(y))
338
339        # if your max x or y value is zero... you'll get np.inf
340        # as your curve length based measure
341        if xmax == 0:
342            xmax = 1e-15
343        if ymax == 0:
344            ymax = 1e-15
345    else:
346        ymax = 1.0
347        xmax = 1.0
348
349    le = np.zeros(n)
350    le[0] = 0.0
351    l_sum = np.zeros(n)
352    l_sum[0] = 0.0
353    for i in range(0, n-1):
354        le[i+1] = np.sqrt((((x[i+1]-x[i])/xmax)**2)+(((y[i+1]-y[i])/ymax)**2))
355        l_sum[i+1] = l_sum[i]+le[i+1]
356    return le, np.sum(le), l_sum

Compute arc lengths of an x y curve.

Computes the arc length of a xy curve, the cumulative arc length of an xy, and the total xy arc length of a xy curve. The euclidean distance is used to determine the arc length.

Parameters
  • x (array_like): the x locations of a curve
  • y (array_like): the y locations of a curve
  • norm_seg_length (boolean): Whether to divide the segment length of each curve by the maximum.
Returns
  • le (ndarray (1-D)): the euclidean distance between every two points
  • le_total (float): the total arc length distance of a curve
  • le_cum (ndarray (1-D)): the cumulative sum of euclidean distances between every two points
Examples
>>> le, le_total, le_cum = get_length(x, y)
def curve_length_measure(exp_data, num_data):
359def curve_length_measure(exp_data, num_data):
360    r"""
361    Compute the curve length based distance between two curves.
362
363    This computes the curve length similarity measure according to [2]_. This
364    implementation follows the OF2 form, which is a self normalizing form
365    based on the average value.
366
367    Parameters
368    ----------
369    exp_data : ndarray (2-D)
370        Curve from your experimental data.
371    num_data : ndarray (2-D)
372        Curve from your numerical data.
373
374    Returns
375    -------
376    r : float
377        curve length based similarity distance
378
379    Notes
380    -----
381    This uses the OF2 method from [2]_.
382
383    Examples
384    --------
385    >>> # Generate random experimental data
386    >>> x = np.random.random(100)
387    >>> y = np.random.random(100)
388    >>> exp_data = np.zeros((100, 2))
389    >>> exp_data[:, 0] = x
390    >>> exp_data[:, 1] = y
391    >>> # Generate random numerical data
392    >>> x = np.random.random(100)
393    >>> y = np.random.random(100)
394    >>> num_data = np.zeros((100, 2))
395    >>> num_data[:, 0] = x
396    >>> num_data[:, 1] = y
397    >>> r = curve_length_measure(exp_data, num_data)
398
399
400    .. [2] A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria
401        for determination of material model parameters. International Journal
402        of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI
403        https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL:
404        http://www.sciencedirect.com/science/article/pii/S0020740311002451
405
406    """
407    x_e = exp_data[:, 0]
408    y_e = exp_data[:, 1]
409    x_c = num_data[:, 0]
410    y_c = num_data[:, 1]
411
412    _, le_nj, le_sum = get_length(x_e, y_e)
413    _, lc_nj, lc_sum = get_length(x_c, y_c)
414
415    xmean = np.mean(x_e)
416    ymean = np.mean(y_e)
417
418    factor = lc_nj/le_nj
419
420    lieq = le_sum * factor
421
422    xinterp = np.interp(lieq, lc_sum, x_c)
423    yinterp = np.interp(lieq, lc_sum, y_c)
424
425    r_sq = np.log(1.0 + (np.abs(xinterp-x_e)/xmean))**2
426    r_sq += np.log(1.0 + (np.abs(yinterp-y_e)/ymean))**2
427    return np.sqrt(np.sum(r_sq))

Compute the curve length based distance between two curves.

This computes the curve length similarity measure according to 1. This implementation follows the OF2 form, which is a self normalizing form based on the average value.

Parameters
  • exp_data (ndarray (2-D)): Curve from your experimental data.
  • num_data (ndarray (2-D)): Curve from your numerical data.
Returns
  • r (float): curve length based similarity distance
Notes

This uses the OF2 method from 2.

Examples
>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r = curve_length_measure(exp_data, num_data)

  1. A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria for determination of material model parameters. International Journal of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL: http://www.sciencedirect.com/science/article/pii/S0020740311002451 

  2. A Andrade-Campos, R De-Carvalho, and R A F Valente. Novel criteria for determination of material model parameters. International Journal of Mechanical Sciences, 54(1):294-305, 2012. ISSN 0020-7403. DOI https://doi.org/10.1016/j.ijmecsci.2011.11.010 URL: http://www.sciencedirect.com/science/article/pii/S0020740311002451 

def frechet_dist(exp_data, num_data, p=2):
430def frechet_dist(exp_data, num_data, p=2):
431    r"""
432    Compute the discrete Frechet distance
433
434    Compute the Discrete Frechet Distance between two N-D curves according to
435    [3]_. The Frechet distance has been defined as the walking dog problem.
436    From Wikipedia: "In mathematics, the Frechet distance is a measure of
437    similarity between curves that takes into account the location and
438    ordering of the points along the curves. It is named after Maurice Frechet.
439    https://en.wikipedia.org/wiki/Fr%C3%A9chet_distance
440
441    Parameters
442    ----------
443    exp_data : array_like
444        Curve from your experimental data. exp_data is of (M, N) shape, where
445        M is the number of data points, and N is the number of dimmensions
446    num_data : array_like
447        Curve from your numerical data. num_data is of (P, N) shape, where P
448        is the number of data points, and N is the number of dimmensions
449    p : float, 1 <= p <= infinity
450        Which Minkowski p-norm to use. Default is p=2 (Eculidean).
451        The manhattan distance is p=1.
452
453    Returns
454    -------
455    df : float
456        discrete Frechet distance
457
458    Notes
459    -----
460    Your x locations of data points should be exp_data[:, 0], and the y
461    locations of the data points should be exp_data[:, 1]. Same for num_data.
462
463    Thanks to Arbel Amir for the issue, and Sen ZHANG for the iterative code
464    https://github.com/cjekel/similarity_measures/issues/6
465
466    Examples
467    --------
468    >>> # Generate random experimental data
469    >>> x = np.random.random(100)
470    >>> y = np.random.random(100)
471    >>> exp_data = np.zeros((100, 2))
472    >>> exp_data[:, 0] = x
473    >>> exp_data[:, 1] = y
474    >>> # Generate random numerical data
475    >>> x = np.random.random(100)
476    >>> y = np.random.random(100)
477    >>> num_data = np.zeros((100, 2))
478    >>> num_data[:, 0] = x
479    >>> num_data[:, 1] = y
480    >>> df = frechet_dist(exp_data, num_data)
481
482    .. [3] Thomas Eiter and Heikki Mannila. Computing discrete Frechet
483        distance. Technical report, 1994.
484        http://www.kr.tuwien.ac.at/staff/eiter/et-archive/cdtr9464.pdf
485        http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.937&rep=rep1&type=pdf
486    """
487    n = len(exp_data)
488    m = len(num_data)
489    c = distance.cdist(exp_data, num_data, metric='minkowski', p=p)
490    ca = np.ones((n, m))
491    ca = np.multiply(ca, -1)
492    ca[0, 0] = c[0, 0]
493    for i in range(1, n):
494        ca[i, 0] = max(ca[i-1, 0], c[i, 0])
495    for j in range(1, m):
496        ca[0, j] = max(ca[0, j-1], c[0, j])
497    for i in range(1, n):
498        for j in range(1, m):
499            ca[i, j] = max(min(ca[i-1, j], ca[i, j-1], ca[i-1, j-1]),
500                           c[i, j])
501    return ca[n-1, m-1]

Compute the discrete Frechet distance

Compute the Discrete Frechet Distance between two N-D curves according to 1. The Frechet distance has been defined as the walking dog problem. From Wikipedia: "In mathematics, the Frechet distance is a measure of similarity between curves that takes into account the location and ordering of the points along the curves. It is named after Maurice Frechet. https://en.wikipedia.org/wiki/Fr%C3%A9chet_distance

Parameters
  • exp_data (array_like): Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimmensions
  • num_data (array_like): Curve from your numerical data. num_data is of (P, N) shape, where P is the number of data points, and N is the number of dimmensions
  • p (float, 1 <= p <= infinity): Which Minkowski p-norm to use. Default is p=2 (Eculidean). The manhattan distance is p=1.
Returns
  • df (float): discrete Frechet distance
Notes

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

Thanks to Arbel Amir for the issue, and Sen ZHANG for the iterative code https://github.com/cjekel/similarity_measures/issues/6

Examples
>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> df = frechet_dist(exp_data, num_data)

  1. Thomas Eiter and Heikki Mannila. Computing discrete Frechet distance. Technical report, 1994. http://www.kr.tuwien.ac.at/staff/eiter/et-archive/cdtr9464.pdf http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.90.937&rep=rep1&type=pdf 

def normalizeTwoCurves(x, y, w, z):
504def normalizeTwoCurves(x, y, w, z):
505    """
506    Normalize two curves for PCM method.
507
508    This normalizes the two curves for PCM method following [4]_.
509
510    Parameters
511    ----------
512    x : array_like (1-D)
513        x locations for first curve
514    y : array_like (1-D)
515        y locations for first curve
516    w : array_like (1-D)
517        x locations for second curve curve
518    z : array_like (1-D)
519        y locations for second curve
520
521    Returns
522    -------
523    xi : array_like
524        normalized x locations for first curve
525    eta : array_like
526        normalized y locations for first curve
527    xiP : array_like
528        normalized x locations for second curve
529    etaP : array_like
530        normalized y locations for second curve
531
532
533    Examples
534    --------
535    >>> xi, eta, xiP, etaP = normalizeTwoCurves(x,y,w,z)
536
537    .. [4] Katharina Witowski and Nielen Stander. "Parameter Identification of
538        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
539        Technology, Integration, and Operations (ATIO) Conference and 14th
540        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
541        Aviation Technology, Integration, and Operations (ATIO) Conferences.
542        doi: doi:10.2514/6.2012-5580.
543    """
544    minX = np.min(x)
545    maxX = np.max(x)
546    minY = np.min(y)
547    maxY = np.max(y)
548
549    xi = (x - minX) / (maxX - minX)
550    eta = (y - minY) / (maxY - minY)
551    xiP = (w - minX) / (maxX - minX)
552    etaP = (z - minY) / (maxY - minY)
553    return xi, eta, xiP, etaP

Normalize two curves for PCM method.

This normalizes the two curves for PCM method following 1.

Parameters
  • x (array_like (1-D)): x locations for first curve
  • y (array_like (1-D)): y locations for first curve
  • w (array_like (1-D)): x locations for second curve curve
  • z (array_like (1-D)): y locations for second curve
Returns
  • xi (array_like): normalized x locations for first curve
  • eta (array_like): normalized y locations for first curve
  • xiP (array_like): normalized x locations for second curve
  • etaP (array_like): normalized y locations for second curve
Examples
>>> xi, eta, xiP, etaP = normalizeTwoCurves(x,y,w,z)

  1. Katharina Witowski and Nielen Stander. "Parameter Identification of Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation Technology, Integration, and Operations (ATIO) Conference and 14th AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference, Aviation Technology, Integration, and Operations (ATIO) Conferences. doi: doi:10.2514/6.2012-5580. 

def pcm(exp_data, num_data, norm_seg_length=False):
556def pcm(exp_data, num_data, norm_seg_length=False):
557    """
558    Compute the Partial Curve Mapping area.
559
560    Computes the Partial Cuve Mapping (PCM) similarity measure as proposed by
561    [5]_.
562
563    Parameters
564    ----------
565    exp_data : ndarray (2-D)
566        Curve from your experimental data.
567    num_data : ndarray (2-D)
568        Curve from your numerical data.
569    norm_seg_length : boolean
570        Whether to divide the segment length of each curve by the maximum. The
571        default value is false, which more closely follows the algorithm
572        proposed in [1]_. Versions prior to 0.6.0 used `norm_seg_length=True`.
573
574    Returns
575    -------
576    p : float
577        pcm area measure
578
579    Notes
580    -----
581    Your x locations of data points should be exp_data[:, 0], and the y
582    locations of the data points should be exp_data[:, 1]. Same for num_data.
583
584    PCM distance was changed in version 0.6.0. To get the same results from
585    previous versions, set `norm_seg_length=True`.
586
587    Examples
588    --------
589    >>> # Generate random experimental data
590    >>> x = np.random.random(100)
591    >>> y = np.random.random(100)
592    >>> exp_data = np.zeros((100, 2))
593    >>> exp_data[:, 0] = x
594    >>> exp_data[:, 1] = y
595    >>> # Generate random numerical data
596    >>> x = np.random.random(100)
597    >>> y = np.random.random(100)
598    >>> num_data = np.zeros((100, 2))
599    >>> num_data[:, 0] = x
600    >>> num_data[:, 1] = y
601    >>> p = pcm(exp_data, num_data)
602
603    .. [5] Katharina Witowski and Nielen Stander. "Parameter Identification of
604        Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation
605        Technology, Integration, and Operations (ATIO) Conference and 14th
606        AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference,
607        Aviation Technology, Integration, and Operations (ATIO) Conferences.
608        doi: doi:10.2514/6.2012-5580.
609    """
610    # normalize the curves to the experimental data
611    xi1, eta1, xi2, eta2 = normalizeTwoCurves(exp_data[:, 0], exp_data[:, 1],
612                                              num_data[:, 0], num_data[:, 1])
613    # compute the arc lengths of each curve
614    le, le_nj, le_sum = get_length(xi1, eta1, norm_seg_length)
615    lc, lc_nj, lc_sum = get_length(xi2, eta2, norm_seg_length)
616    # scale each segment to the total polygon length
617    le = le / le_nj
618    le_sum = le_sum / le_nj
619    lc = lc / lc_nj
620    lc_sum = lc_sum / lc_nj
621    # right now exp_data is curve a, and num_data is curve b
622    # make sure a is shorter than a', if not swap the defintion
623    if lc_nj > le_nj:
624        # compute the arc lengths of each curve
625        le, le_nj, le_sum = get_length(xi2, eta2, norm_seg_length)
626        lc, lc_nj, lc_sum = get_length(xi1, eta1, norm_seg_length)
627        # scale each segment to the total polygon length
628        le = le / le_nj
629        le_sum = le_sum / le_nj
630        lc = lc / lc_nj
631        lc_sum = lc_sum / lc_nj
632        # swap xi1, eta1 with xi2, eta2
633        xi1OLD = xi1.copy()
634        eta1OLD = eta1.copy()
635        xi1 = xi2.copy()
636        eta1 = eta2.copy()
637        xi2 = xi1OLD.copy()
638        eta2 = eta1OLD.copy()
639
640    n_sum = len(le_sum)
641
642    min_offset = 0.0
643    max_offset = le_nj - lc_nj
644
645    # make sure the curves aren't the same length
646    # if they are the same length, don't loop 200 times
647    if min_offset == max_offset:
648        offsets = [min_offset]
649        pcm_dists = np.zeros(1)
650    else:
651        offsets = np.linspace(min_offset, max_offset, 200)
652        pcm_dists = np.zeros(200)
653
654    for i, offset in enumerate(offsets):
655        # create linear interpolation model for num_data based on arc length
656        # evaluate linear interpolation model based on xi and eta of exp data
657        xitemp = np.interp(le_sum+offset, lc_sum, xi2)
658        etatemp = np.interp(le_sum+offset, lc_sum, eta2)
659
660        d = np.sqrt((eta1-etatemp)**2 + (xi1-xitemp)**2)
661        d1 = d[:-1]
662
663        d2 = d[1:n_sum]
664
665        v = 0.5*(d1+d2)*le_sum[1:n_sum]
666        pcm_dists[i] = np.sum(v)
667    return np.min(pcm_dists)

Compute the Partial Curve Mapping area.

Computes the Partial Cuve Mapping (PCM) similarity measure as proposed by 1.

Parameters
  • exp_data (ndarray (2-D)): Curve from your experimental data.
  • num_data (ndarray (2-D)): Curve from your numerical data.
  • norm_seg_length (boolean): Whether to divide the segment length of each curve by the maximum. The default value is false, which more closely follows the algorithm proposed in [1]_. Versions prior to 0.6.0 used norm_seg_length=True.
Returns
  • p (float): pcm area measure
Notes

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

PCM distance was changed in version 0.6.0. To get the same results from previous versions, set norm_seg_length=True.

Examples
>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> p = pcm(exp_data, num_data)

  1. Katharina Witowski and Nielen Stander. "Parameter Identification of Hysteretic Models Using Partial Curve Mapping", 12th AIAA Aviation Technology, Integration, and Operations (ATIO) Conference and 14th AIAA/ISSMO Multidisciplinary Analysis and Optimization Conference, Aviation Technology, Integration, and Operations (ATIO) Conferences. doi: doi:10.2514/6.2012-5580. 

def dtw(exp_data, num_data, metric='euclidean', **kwargs):
670def dtw(exp_data, num_data, metric='euclidean', **kwargs):
671    r"""
672    Compute the Dynamic Time Warping distance.
673
674    This computes a generic Dynamic Time Warping (DTW) distance and follows
675    the algorithm from [6]_. This can use all distance metrics that are
676    available in scipy.spatial.distance.cdist.
677
678    Parameters
679    ----------
680    exp_data : array_like
681        Curve from your experimental data. exp_data is of (M, N) shape, where
682        M is the number of data points, and N is the number of dimmensions
683    num_data : array_like
684        Curve from your numerical data. num_data is of (P, N) shape, where P
685        is the number of data points, and N is the number of dimmensions
686    metric : str or callable, optional
687        The distance metric to use. Default='euclidean'. Refer to the
688        documentation for scipy.spatial.distance.cdist. Some examples:
689        'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
690        'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
691        'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
692        'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
693        'wminkowski', 'yule'.
694    **kwargs : dict, optional
695        Extra arguments to `metric`: refer to each metric documentation in
696        scipy.spatial.distance.
697        Some examples:
698
699        p : scalar
700            The p-norm to apply for Minkowski, weighted and unweighted.
701            Default: 2.
702
703        w : ndarray
704            The weight vector for metrics that support weights (e.g.,
705            Minkowski).
706
707        V : ndarray
708            The variance vector for standardized Euclidean.
709            Default: var(vstack([XA, XB]), axis=0, ddof=1)
710
711        VI : ndarray
712            The inverse of the covariance matrix for Mahalanobis.
713            Default: inv(cov(vstack([XA, XB].T))).T
714
715        out : ndarray
716            The output array
717            If not None, the distance matrix Y is stored in this array.
718
719    Returns
720    -------
721    r : float
722        DTW distance.
723    d : ndarray (2-D)
724        Cumulative distance matrix
725
726    Notes
727    -----
728    The DTW distance is d[-1, -1].
729
730    This has O(M, P) computational cost.
731
732    The latest scipy.spatial.distance.cdist information can be found at
733    https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html
734
735    Your x locations of data points should be exp_data[:, 0], and the y
736    locations of the data points should be exp_data[:, 1]. Same for num_data.
737
738    This uses the euclidean distance for now. In the future it should be
739    possible to support other metrics.
740
741    DTW is a non-metric distance, which means DTW doesn't hold the triangle
742    inequality.
743    https://en.wikipedia.org/wiki/Triangle_inequality
744
745    Examples
746    --------
747    >>> # Generate random experimental data
748    >>> x = np.random.random(100)
749    >>> y = np.random.random(100)
750    >>> exp_data = np.zeros((100, 2))
751    >>> exp_data[:, 0] = x
752    >>> exp_data[:, 1] = y
753    >>> # Generate random numerical data
754    >>> x = np.random.random(100)
755    >>> y = np.random.random(100)
756    >>> num_data = np.zeros((100, 2))
757    >>> num_data[:, 0] = x
758    >>> num_data[:, 1] = y
759    >>> r, d = dtw(exp_data, num_data)
760
761    The euclidean distance is used by default. You can use metric and **kwargs
762    to specify different types of distance metrics. The following example uses
763    the city block or Manhattan distance between points.
764
765    >>> r, d = dtw(exp_data, num_data, metric='cityblock')
766
767    .. [6] Senin, P., 2008. Dynamic time warping algorithm review. Information
768        and Computer Science Department University of Hawaii at Manoa Honolulu,
769        USA, 855, pp.1-23.
770        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf
771    """
772    c = distance.cdist(exp_data, num_data, metric=metric, **kwargs)
773
774    d = np.zeros(c.shape)
775    d[0, 0] = c[0, 0]
776    n, m = c.shape
777    for i in range(1, n):
778        d[i, 0] = d[i-1, 0] + c[i, 0]
779    for j in range(1, m):
780        d[0, j] = d[0, j-1] + c[0, j]
781    for i in range(1, n):
782        for j in range(1, m):
783            d[i, j] = c[i, j] + min((d[i-1, j], d[i, j-1], d[i-1, j-1]))
784    return d[-1, -1], d

Compute the Dynamic Time Warping distance.

This computes a generic Dynamic Time Warping (DTW) distance and follows the algorithm from 1. This can use all distance metrics that are available in scipy.spatial.distance.cdist.

Parameters
  • exp_data (array_like): Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimmensions
  • num_data (array_like): Curve from your numerical data. num_data is of (P, N) shape, where P is the number of data points, and N is the number of dimmensions
  • metric (str or callable, optional): The distance metric to use. Default='euclidean'. Refer to the documentation for scipy.spatial.distance.cdist. Some examples: 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
  • **kwargs (dict, optional): Extra arguments to metric: refer to each metric documentation in scipy.spatial.distance. Some examples:

    p : scalar The p-norm to apply for Minkowski, weighted and unweighted. Default: 2.

    w : ndarray The weight vector for metrics that support weights (e.g., Minkowski).

    V : ndarray The variance vector for standardized Euclidean. Default: var(vstack([XA, XB]), axis=0, ddof=1)

    VI : ndarray The inverse of the covariance matrix for Mahalanobis. Default: inv(cov(vstack([XA, XB].T))).T

    out : ndarray The output array If not None, the distance matrix Y is stored in this array.

Returns
  • r (float): DTW distance.
  • d (ndarray (2-D)): Cumulative distance matrix
Notes

The DTW distance is d[-1, -1].

This has O(M, P) computational cost.

The latest scipy.spatial.distance.cdist information can be found at https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html

Your x locations of data points should be exp_data[:, 0], and the y locations of the data points should be exp_data[:, 1]. Same for num_data.

This uses the euclidean distance for now. In the future it should be possible to support other metrics.

DTW is a non-metric distance, which means DTW doesn't hold the triangle inequality. https://en.wikipedia.org/wiki/Triangle_inequality

Examples
>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r, d = dtw(exp_data, num_data)

The euclidean distance is used by default. You can use metric and **kwargs to specify different types of distance metrics. The following example uses the city block or Manhattan distance between points.

>>> r, d = dtw(exp_data, num_data, metric='cityblock')

  1. Senin, P., 2008. Dynamic time warping algorithm review. Information and Computer Science Department University of Hawaii at Manoa Honolulu, USA, 855, pp.1-23. http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf 

def dtw_path(d):
787def dtw_path(d):
788    r"""
789    Calculates the optimal DTW path from a given DTW cumulative distance
790    matrix.
791
792    This function returns the optimal DTW path using the back propagation
793    algorithm that is defined in [7]_. This path details the index from each
794    curve that is being compared.
795
796    Parameters
797    ----------
798    d : ndarray (2-D)
799        Cumulative distance matrix.
800
801    Returns
802    -------
803    path : ndarray (2-D)
804        The optimal DTW path.
805
806    Notes
807    -----
808    Note that path[:, 0] represents the indices from exp_data, while
809    path[:, 1] represents the indices from the num_data.
810
811    .. [7] Senin, P., 2008. Dynamic time warping algorithm review. Information
812        and Computer Science Department University of Hawaii at Manoa Honolulu,
813        USA, 855, pp.1-23.
814        http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf
815
816    Examples
817    --------
818    First calculate the DTW cumulative distance matrix.
819
820    >>> # Generate random experimental data
821    >>> x = np.random.random(100)
822    >>> y = np.random.random(100)
823    >>> exp_data = np.zeros((100, 2))
824    >>> exp_data[:, 0] = x
825    >>> exp_data[:, 1] = y
826    >>> # Generate random numerical data
827    >>> x = np.random.random(100)
828    >>> y = np.random.random(100)
829    >>> num_data = np.zeros((100, 2))
830    >>> num_data[:, 0] = x
831    >>> num_data[:, 1] = y
832    >>> r, d = dtw(exp_data, num_data)
833
834    Now you can calculate the optimal DTW path
835
836    >>> path = dtw_path(d)
837
838    You can visualize the path on the cumulative distance matrix using the
839    following code.
840
841    >>> import matplotlib.pyplot as plt
842    >>> plt.figure()
843    >>> plt.imshow(d.T, origin='lower')
844    >>> plt.plot(path[:, 0], path[:, 1], '-k')
845    >>> plt.colorbar()
846    >>> plt.show()
847
848    """
849    path = []
850    i, j = d.shape
851    i = i - 1
852    j = j - 1
853    # back propagation starts from the last point,
854    # and ends at d[0, 0]
855    path.append((i, j))
856    while i > 0 or j > 0:
857        if i == 0:
858            j = j - 1
859        elif j == 0:
860            i = i - 1
861        else:
862            temp_step = min([d[i-1, j], d[i, j-1], d[i-1, j-1]])
863            if d[i-1, j] == temp_step:
864                i = i - 1
865            elif d[i, j-1] == temp_step:
866                j = j - 1
867            else:
868                i = i - 1
869                j = j - 1
870        path.append((i, j))
871    path = np.array(path)
872    # reverse the order of path, such that it starts with [0, 0]
873    return path[::-1]

Calculates the optimal DTW path from a given DTW cumulative distance matrix.

This function returns the optimal DTW path using the back propagation algorithm that is defined in 1. This path details the index from each curve that is being compared.

Parameters
  • d (ndarray (2-D)): Cumulative distance matrix.
Returns
  • path (ndarray (2-D)): The optimal DTW path.
Notes

Note that path[:, 0] represents the indices from exp_data, while path[:, 1] represents the indices from the num_data.

Examples

First calculate the DTW cumulative distance matrix.

>>> # Generate random experimental data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> exp_data = np.zeros((100, 2))
>>> exp_data[:, 0] = x
>>> exp_data[:, 1] = y
>>> # Generate random numerical data
>>> x = np.random.random(100)
>>> y = np.random.random(100)
>>> num_data = np.zeros((100, 2))
>>> num_data[:, 0] = x
>>> num_data[:, 1] = y
>>> r, d = dtw(exp_data, num_data)

Now you can calculate the optimal DTW path

>>> path = dtw_path(d)

You can visualize the path on the cumulative distance matrix using the following code.

>>> import matplotlib.pyplot as plt
>>> plt.figure()
>>> plt.imshow(d.T, origin='lower')
>>> plt.plot(path[:, 0], path[:, 1], '-k')
>>> plt.colorbar()
>>> plt.show()

  1. Senin, P., 2008. Dynamic time warping algorithm review. Information and Computer Science Department University of Hawaii at Manoa Honolulu, USA, 855, pp.1-23. http://seninp.github.io/assets/pubs/senin_dtw_litreview_2008.pdf 

def mae(exp_data, num_data):
876def mae(exp_data, num_data):
877    """
878    Compute the Mean Absolute Error (MAE).
879
880    This computes the mean of absolute values of distances between two curves.
881    Each curve must have the same number of data points and the same dimension.
882
883    Parameters
884    ----------
885    exp_data : array_like
886        Curve from your experimental data. exp_data is of (M, N) shape, where
887        M is the number of data points, and N is the number of dimensions
888    num_data : array_like
889        Curve from your numerical data. num_data is of (M, N) shape, where M
890        is the number of data points, and N is the number of dimensions
891
892    Returns
893    -------
894    r : float
895        MAE.
896    """
897    c = np.abs(exp_data - num_data)
898    return np.mean(c)

Compute the Mean Absolute Error (MAE).

This computes the mean of absolute values of distances between two curves. Each curve must have the same number of data points and the same dimension.

Parameters
  • exp_data (array_like): Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions
  • num_data (array_like): Curve from your numerical data. num_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions
Returns
  • r (float): MAE.
def mse(exp_data, num_data):
901def mse(exp_data, num_data):
902    """
903    Compute the Mean Squared Error (MAE).
904
905    This computes the mean of sqaured distances between two curves.
906    Each curve must have the same number of data points and the same dimension.
907
908    Parameters
909    ----------
910    exp_data : array_like
911        Curve from your experimental data. exp_data is of (M, N) shape, where
912        M is the number of data points, and N is the number of dimensions
913    num_data : array_like
914        Curve from your numerical data. num_data is of (M, N) shape, where M
915        is the number of data points, and N is the number of dimensions
916
917    Returns
918    -------
919    r : float
920        MSE.
921    """
922    c = np.square(exp_data - num_data)
923    return np.mean(c)

Compute the Mean Squared Error (MAE).

This computes the mean of sqaured distances between two curves. Each curve must have the same number of data points and the same dimension.

Parameters
  • exp_data (array_like): Curve from your experimental data. exp_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions
  • num_data (array_like): Curve from your numerical data. num_data is of (M, N) shape, where M is the number of data points, and N is the number of dimensions
Returns
  • r (float): MSE.