Binned Scatter Plot Example

A binned scatter plot is a more scalable alternative to the standard scatter plot. The data points are grouped into bins, and an aggregate statistic is used to summarize each bin. Here we use a circular area encoding to depict the count of records, visualizing the density of data points. For higher bin counts color might instead be used, though with some loss of perceptual comparison accuracy.

Vega JSON Specification <>

{
  "$schema": "https://vega.github.io/schema/vega/v3.0.json",
  "width": 200,
  "height": 200,
  "padding": 5,
  "autosize": "pad",

  "data": [
    {
      "name": "source",
      "url": "data/cars.json",
      "transform": [
        {
          "type": "filter",
          "expr": "datum['Horsepower'] != null && datum['Miles_per_Gallon'] != null && datum['Acceleration'] != null"
        }
      ]
    },
    {
      "name": "summary",
      "source": "source",
      "transform": [
        {
          "type": "extent", "field": "Horsepower",
          "signal": "hp_extent"
        },
        {
          "type": "bin", "field": "Horsepower", "maxbins": 10,
          "extent": {"signal": "hp_extent"},
          "as": ["hp0", "hp1"]
        },
        {
          "type": "extent", "field": "Miles_per_Gallon",
          "signal": "mpg_extent"
        },
        {
          "type": "bin", "field": "Miles_per_Gallon", "maxbins": 10,
          "extent": {"signal": "mpg_extent"},
          "as": ["mpg0", "mpg1"]
        },
        {
          "type": "aggregate",
          "groupby": ["hp0", "hp1", "mpg0", "mpg1"]
        }
      ]
    }
  ],

  "scales": [
    {
      "name": "x",
      "type": "linear",
      "round": true,
      "nice": true,
      "zero": true,
      "domain": {"data": "source", "field": "Horsepower"},
      "range": "width"
    },
    {
      "name": "y",
      "type": "linear",
      "round": true,
      "nice": true,
      "zero": true,
      "domain": {"data": "source", "field": "Miles_per_Gallon"},
      "range": "height"
    },
    {
      "name": "size",
      "type": "linear",
      "zero": true,
      "domain": {"data": "summary", "field": "count"},
      "range": [0,360]
    }
  ],

  "axes": [
    {
      "scale": "x",
      "grid": true,
      "domain": false,
      "orient": "bottom",
      "tickCount": 5,
      "title": "Horsepower"
    },
    {
      "scale": "y",
      "grid": true,
      "domain": false,
      "orient": "left",
      "titlePadding": 5,
      "title": "Miles_per_Gallon"
    }
  ],

  "legends": [
    {
      "size": "size",
      "title": "Count",
      "format": "s",
      "encode": {
        "symbols": {
          "update": {
            "strokeWidth": {"value": 2},
            "stroke": {"value": "#4682b4"},
            "shape": {"value": "circle"}
          }
        }
      }
    }
  ],

  "marks": [
    {
      "name": "marks",
      "type": "symbol",
      "from": {"data": "summary"},
      "encode": {
        "update": {
          "x": {"scale": "x", "signal": "(datum.hp0 + datum.hp1) / 2"},
          "y": {"scale": "y", "signal": "(datum.mpg0 + datum.mpg1) / 2"},
          "size": {"scale": "size", "field": "count"},
          "shape": {"value": "circle"},
          "strokeWidth": {"value": 2},
          "stroke": {"value": "#4682b4"},
          "fill": {"value": "transparent"}
        }
      }
    }
  ]
}