Regression Example

A two-dimensional regression analysis models one data variable as a function of another. The resulting model produces a trend line that summarizes and extrapolates observed data. This example uses parametric regression models to predict IMDB users’ film ratings based on Rotten Tomatoes critics’ ratings. The regression options range from linear regression to other functions such as logarithmic, quadratic, and polynomial regression. Alternatively, see the loess regression example for a non-parametric approach to scatter plot smoothing.

Vega JSON Specification <>

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "description": "A scatter plot with trend line calculated via user-configurable regression methods.",
  "padding": 5,
  "width": 500,
  "height": 500,
  "autosize": "pad",

  "signals": [
    {
      "name": "method", "value": "linear",
      "bind": {"input": "select", "options": [
        "linear", "log", "exp", "pow", "quad", "poly"
      ]}
    },
    {
      "name": "polyOrder", "value": 3,
      "bind": {"input": "range", "min": 1, "max": 10, "step": 1}
    },
    {
      "name": "groupby", "value": "none",
      "bind": {"input": "select", "options": ["none", "genre"]}
    }
  ],

  "data": [
    {
      "name": "movies",
      "url": "data/movies.json",
      "transform": [
        {
          "type": "filter",
          "expr": "datum['Rotten Tomatoes Rating'] != null && datum['IMDB Rating'] != null"
        }
      ]
    },
    {
      "name": "trend",
      "source": "movies",
      "transform": [
        {
          "type": "regression",
          "groupby": [{"signal": "groupby === 'genre' ? 'Major Genre' : 'foo'"}],
          "method": {"signal": "method"},
          "order": {"signal": "polyOrder"},
          "extent": {"signal": "domain('x')"},
          "x": "Rotten Tomatoes Rating",
          "y": "IMDB Rating",
          "as": ["u", "v"]
        }
      ]
    }
  ],

  "scales": [
    {
      "name": "x",
      "type": "linear",
      "domain": {"data": "movies", "field": "Rotten Tomatoes Rating"},
      "range": "width"
    },
    {
      "name": "y",
      "type": "linear",
      "domain": {"data": "movies", "field": "IMDB Rating"},
      "range": "height"
    }
  ],

  "marks": [
    {
      "type": "symbol",
      "from": {"data": "movies"},
      "encode": {
        "enter": {
          "x": {"scale": "x", "field": "Rotten Tomatoes Rating"},
          "y": {"scale": "y", "field": "IMDB Rating"},
          "fillOpacity": {"value": 0.5},
          "size": {"value": 16}
        }
      }
    },
    {
      "type": "group",
      "from": {
        "facet": {
          "data": "trend",
          "name": "curve",
          "groupby": "Major Genre"
        }
      },
      "marks": [
        {
          "type": "line",
          "from": {"data": "curve"},
          "encode": {
            "enter": {
              "x": {"scale": "x", "field": "u"},
              "y": {"scale": "y", "field": "v"},
              "stroke": {"value": "firebrick"}
            }
          }
        }
      ]
    }
  ]
}