prepareBoxplotData.ts 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. function asc<T extends number[]>(arr: T): T {
  20. arr.sort(function (a, b) {
  21. return a - b;
  22. });
  23. return arr;
  24. }
  25. function quantile(ascArr: number[], p: number): number {
  26. const H = (ascArr.length - 1) * p + 1;
  27. const h = Math.floor(H);
  28. const v = +ascArr[h - 1];
  29. const e = H - h;
  30. return e ? v + e * (ascArr[h] - v) : v;
  31. }
  32. /**
  33. * See:
  34. * <https://en.wikipedia.org/wiki/Box_plot#cite_note-frigge_hoaglin_iglewicz-2>
  35. * <http://stat.ethz.ch/R-manual/R-devel/library/grDevices/html/boxplot.stats.html>
  36. *
  37. * Helper method for preparing data.
  38. *
  39. * @param {Array.<number>} rawData like
  40. * [
  41. * [12,232,443], (raw data set for the first box)
  42. * [3843,5545,1232], (raw data set for the second box)
  43. * ...
  44. * ]
  45. * @param {Object} [opt]
  46. *
  47. * @param {(number|string)} [opt.boundIQR=1.5] Data less than min bound is outlier.
  48. * default 1.5, means Q1 - 1.5 * (Q3 - Q1).
  49. * If 'none'/0 passed, min bound will not be used.
  50. * @param {(number|string)} [opt.layout='horizontal']
  51. * Box plot layout, can be 'horizontal' or 'vertical'
  52. * @return {Object} {
  53. * boxData: Array.<Array.<number>>
  54. * outliers: Array.<Array.<number>>
  55. * axisData: Array.<string>
  56. * }
  57. */
  58. export default function (
  59. rawData: number[][],
  60. opt: {
  61. boundIQR?: number | 'none',
  62. layout?: 'horizontal' | 'vertical'
  63. }
  64. ): {
  65. boxData: number[][]
  66. outliers: number[][]
  67. axisData: string[]
  68. } {
  69. opt = opt || {};
  70. const boxData = [];
  71. const outliers = [];
  72. const axisData: string[] = [];
  73. const boundIQR = opt.boundIQR;
  74. const useExtreme = boundIQR === 'none' || boundIQR === 0;
  75. for (let i = 0; i < rawData.length; i++) {
  76. axisData.push(i + '');
  77. const ascList = asc(rawData[i].slice());
  78. const Q1 = quantile(ascList, 0.25);
  79. const Q2 = quantile(ascList, 0.5);
  80. const Q3 = quantile(ascList, 0.75);
  81. const min = ascList[0];
  82. const max = ascList[ascList.length - 1];
  83. const bound = (boundIQR == null ? 1.5 : boundIQR as number) * (Q3 - Q1);
  84. const low = useExtreme
  85. ? min
  86. : Math.max(min, Q1 - bound);
  87. const high = useExtreme
  88. ? max
  89. : Math.min(max, Q3 + bound);
  90. boxData.push([low, Q1, Q2, Q3, high]);
  91. for (let j = 0; j < ascList.length; j++) {
  92. const dataItem = ascList[j];
  93. if (dataItem < low || dataItem > high) {
  94. const outlier = [i, dataItem];
  95. opt.layout === 'vertical' && outlier.reverse();
  96. outliers.push(outlier);
  97. }
  98. }
  99. }
  100. return {
  101. boxData: boxData,
  102. outliers: outliers,
  103. axisData: axisData
  104. };
  105. }