Regression Example

A two-dimensional regression analysis models one data variable as a function of another. The resulting model produces a trend line that summarizes and extrapolates observed data. This example uses parametric regression models to predict IMDB users’ film ratings based on Rotten Tomatoes critics’ ratings. The regression options range from linear regression to other functions such as logarithmic, quadratic, and polynomial regression. Alternatively, see the loess regression example for a non-parametric approach to scatter plot smoothing.

Vega JSON Specification <>

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "padding": 5,
  "width": 500,
  "height": 500,
  "autosize": "pad",

  "signals": [
    {
      "name": "method", "value": "linear",
      "bind": {"input": "select", "options": [
        "linear", "log", "exp", "pow", "quad", "poly"
      ]}
    },
    {
      "name": "polyOrder", "value": 3,
      "bind": {"input": "range", "min": 1, "max": 10, "step": 1}
    },
    {
      "name": "groupby", "value": "none",
      "bind": {"input": "select", "options": ["none", "genre"]}
    }
  ],

  "data": [
    {
      "name": "movies",
      "url": "data/movies.json",
      "transform": [
        {
          "type": "filter",
          "expr": "datum.Rotten_Tomatoes_Rating != null && datum.IMDB_Rating != null"
        }
      ]
    },
    {
      "name": "trend",
      "source": "movies",
      "transform": [
        {
          "type": "regression",
          "groupby": [{"signal": "groupby === 'genre' ? 'Major_Genre' : 'foo'"}],
          "method": {"signal": "method"},
          "order": {"signal": "polyOrder"},
          "extent": {"signal": "domain('x')"},
          "x": "Rotten_Tomatoes_Rating",
          "y": "IMDB_Rating",
          "as": ["u", "v"]
        }
      ]
    }
  ],

  "scales": [
    {
      "name": "x",
      "type": "linear",
      "domain": {"data": "movies", "field": "Rotten_Tomatoes_Rating"},
      "range": "width"
    },
    {
      "name": "y",
      "type": "linear",
      "domain": {"data": "movies", "field": "IMDB_Rating"},
      "range": "height"
    }
  ],

  "marks": [
    {
      "type": "symbol",
      "from": {"data": "movies"},
      "encode": {
        "enter": {
          "x": {"scale": "x", "field": "Rotten_Tomatoes_Rating"},
          "y": {"scale": "y", "field": "IMDB_Rating"},
          "fillOpacity": {"value": 0.5},
          "size": {"value": 16}
        }
      }
    },
    {
      "type": "group",
      "from": {
        "facet": {
          "data": "trend",
          "name": "curve",
          "groupby": "Major_Genre"
        }
      },
      "marks": [
        {
          "type": "line",
          "from": {"data": "curve"},
          "encode": {
            "enter": {
              "x": {"scale": "x", "field": "u"},
              "y": {"scale": "y", "field": "v"},
              "stroke": {"value": "firebrick"}
            }
          }
        }
      ]
    }
  ]
}