Bokehの図形モデル

Bokehの図形モデル#

import helper.magics
from bokeh.plotting import figure
import pandas as pd
import numpy as np
from bokeh.models import ColumnDataSource
from bokeh.layouts import column, row
from bokeh.io import output_notebook, show, reset_output

output_notebook()

Loading BokehJS ...

モデルの属性#

図形オブジェクトはモデルクラスModelを継承しており、図形を描画する際に直接図形オブジェクトを作成する必要はありません。figureオブジェクトの描画メソッドを呼び出すことで、対応する図形オブジェクトを作成し、それをチャートに追加することができます。例えば、figure.line()はLineオブジェクトを作成し、figure.scatter()はScatterオブジェクトを作成します。

図形オブジェクトの属性はデータソースの列と接続することができます。例えば、データソースの列を使用して各散布点の塗りつぶし色を指定することができます。本書で提供されているshow_glyph_info()を使用すると、図形の各属性のタイプを確認することができます。以下のプログラムはScatter図形の各属性を表示します。

from bokeh.models import Scatter
from helper.bokeh import show_glyph_info

%omit 40 show_glyph_info(Scatter)

angle: 
------
AngleSpec(units_default="rad")

angle_units: 
------------
NotSerialized(Enum("deg", "rad", "grad", "turn"))

decorations: 
------------
List(Instance(Decoration))

fill_alpha: 
-----------
AlphaSpec(
    String,
    Float,
    Instance(Value),
    Instance(Field),
    Instance(Expr),
    Struct(value=Float, transform=Instance(Transform)),
    Struct(field=String, transform=Instance(Transform)),
    Struct(expr=Instance(Expression), transform=Instance(Transform)),
)

fill_color: 
-----------
ColorSpec(
    String,
    Nullable(Color),
    Instance(Value),
    Instance(Field),
    Instance(Expr),
    Struct(value=Nullable(Color), transform=Instance(Transform)),
    Struct(field=String, transform=Instance(Transform)),
    Struct(expr=Instance(Expression), transform=Instance(Transform)),
)

hatch_alpha: 
------------
...

上記のプログラムは、塗りつぶし色属性fill_colorのタイプを以下のように表示します：

ColorSpec(
    String,
    Nullable(Color),
    Instance(Value),
    Instance(Field),
    Instance(Expr),
    Struct(value=Nullable(Color), transform=Instance(Transform)),
    Struct(field=String, transform=Instance(Transform)),
    Struct(expr=Instance(Expression), transform=Instance(Transform)),
)

タイプ名にSpecが含まれている場合、その属性はデータソースと接続できることを示します。その値は複数のタイプを持つことができます：例えば、文字列タイプString、色タイプColor、データソースのフィールドField等々。

色タイプColorは、色を表す文字列またはタプルを受け取ることができます。例えば、以下の4つはすべて色を表す値です：

circle.fill_color = "red" #色名
circle.fill_color = "#a240a2"   #16進数色値
circle.fill_color = (100, 100, 255) #タプルで色の赤緑青成分を表す
circle.fill_color = (100, 100, 255, 0.5) #最後の浮動小数点数は不透明度を表す

ColorSpecタイプの属性を文字列で設定する場合、まず文字列を色名として解釈し、失敗した場合はデータソースの列名として解釈します。必ずデータソースの列名をとして解釈させたい場合はfieldオブジェクトを使います。クラスメソッドdataspecs()を使用して、データソースと接続できるすべての属性を取得することができます。

次のセクションでは、一般的な図形の描画方法を詳細に説明します。

散列点Scatter#

Scatter は散布図を描画するモデルで、次の属性はデータソースと連携できます。

from bokeh.models import Scatter

Scatter.dataspecs().keys()

dict_keys(['x', 'y', 'size', 'angle', 'line_color', 'line_alpha', 'line_width', 'line_join', 'line_cap', 'line_dash', 'line_dash_offset', 'fill_color', 'fill_alpha', 'hatch_color', 'hatch_alpha', 'hatch_scale', 'hatch_pattern', 'hatch_weight', 'marker'])

Bokehの列挙型の取り得る値はbokeh.core.enumsモジュールで定義されています。例えば、以下はmarker属性のすべての候補値を取得する方法です：

from bokeh.core import enums

%col 4 list(enums.MarkerType)

['asterisk',             'circle',               'circle_cross',         'circle_dot',          
 'circle_x',             'circle_y',             'cross',                'dash',                
 'diamond',              'diamond_cross',        'diamond_dot',          'dot',                 
 'hex',                  'hex_dot',              'inverted_triangle',    'plus',                
 'square',               'square_cross',         'square_dot',           'square_pin',          
 'square_x',             'star',                 'star_dot',             'triangle',            
 'triangle_dot',         'triangle_pin',         'x',                    'y']                   

散列点を使用してデータを表示する場合、通常データソースには色や散列点の形状を直接表すデータは保存されていません。この場合、データ変換器を使用してデータソースのデータを色や形状などのデータに変換できます。以下では、散列点を使用して3種類のアヤメの花弁サイズの分布を表示します。

❶factor_mark()は分類データを散列点の形状に変換できます。その3つのパラメータは、データソースの列名、散列点形状リスト、分類データリストです。❷factor_cmap()は分類データを色に変換できます。ここでは、分類カラーリストCategory10_10からランダムに3色を選択しています。❸scatter()メソッドを呼び出す際、パラメータlegend_fieldを使用して凡例に対応する列名を指定できます。パラメータcolorを使用して色を設定すると、line_colorとfill_color属性が同時に設定されます。パラメータalphaもcolorと同様に、line_alphaとfill_alpha属性を同時に設定できます。

from bokeh.transform import factor_mark, factor_cmap
from bokeh.palettes import Category10_10
from random import sample

df = pd.read_csv("data/iris.csv")
fig = figure(width=500, height=350)
source = ColumnDataSource(df)
species_types = df.species.unique().tolist()
marker = factor_mark("species", ["diamond", "circle", "triangle"], species_types)  # ❶
color = factor_cmap(
    "species", sample(Category10_10, k=len(species_types)), species_types
)  # ❷
scatter = fig.scatter(
    x="sepal_length",
    y="sepal_width",
    legend_field="species",
    marker=marker,
    color=color,
    source=source,
    size=12,
    alpha=0.6,
)  # ❸
show(fig)

円形Circle#

円形を表す図元Circleデータ空間の長さ単位で円形の半径を指定できます。これにより、描画された円形はチャートビューのズームに合わせて拡大縮小されます。次のテーブルは円形の半径に関連する3つの属性です。radius_unitsが’data’で、チャートのmatch_aspect属性がTrueの場合、チャートのX軸とY軸の単位長さは同じになり、円形の半径はデータ空間の長さ単位になります。X軸とY軸の単位長さが異なる場合、radius_dimension属性を使用して半径の単位長さを指定します。’x’はX軸の単位長さを使用し、’y’はY軸の単位長さを使用し、’max’はX軸とY軸の単位長さの大きい方を使用し、’min’は単位長さの小さい方を使用します。

属性名	属性類型	説明
`radius`	`DistanceSpec`	円形の半径
`radius_dimension`	`Enum`	半径長さに対応する次元、候補値は’x’, ‘y’, ‘max’, ‘min’
`radius_units`	`Enum`	半径の単位、候補値は’screen’, ‘data’、デフォルト値は’data’

以下の例では、異なるサイズと塗りつぶし色の円形を使用して絵を構成し、その効果は次のグラフのようになります。ファイル”venus-face.csv”の各行は1つの円形に対応し、xとy列は円の中心座標、radius列は円の半径、r、g、b列は塗りつぶし色の赤、緑、青の3つの成分です。pandas.read_csv()を使用してこのファイルを読み込んだ後、DataFrameオブジェクトのassign()メソッドを呼び出して新しいDataFrameオブジェクトを作成します。そのcolor列はr、g、b列を16進数に変換した後の色値です。

df = pd.read_csv("data/venus-face.csv")
df2 = df.assign(
    color=df.eval("r * 2**16 + g * 2**8 + b").apply("#{:06x}".format),
).drop(["r", "g", "b"], axis=1)
%C df.head();df2.head()

             df.head()                         df2.head()         
-----------------------------------  -----------------------------
     x    y   radius    r    g    b       x    y   radius    color
0    0  500  71.5124  197  212  213  0    0  500  71.5124  #c5d4d5
1    0  363  63.9851  191  209  211  1    0  363  63.9851  #bfd1d3
2    0  139  59.2101  201  215  213  2    0  139  59.2101  #c9d7d5
3  159  500  48.4660  193  209  204  3  159  500  48.4660  #c1d1cc
4    0  249  43.8037  184  203  203  4    0  249  43.8037  #b8cbcb

ColumnDataSourceはDataFrameオブジェクトを直接データソースのデータに変換することをサポートしています。circle()を呼び出して円形図元を描画する際、円形の半径radius属性はデータソースのradius列に対応し、塗りつぶし色属性fill_colorはcolor列に対応します。画像全体の比率を正しく保つために、match_acpect属性をTrueに、aspect_ratio属性を1に設定する必要があります。

fig = figure(match_aspect=True, aspect_ratio=1, x_range=(0, 500), y_range=(0, 500))
source = ColumnDataSource(data=df2)
fig.circle(
    "x", "y", radius="radius", fill_color="color", source=source, line_color=None
)
show(fig)

線分Segment#

図元Segmentを使用して、複数の色と太さが異なる線分を描画できます。そのx0, x1, y0, y1, line_color, line_alpha, line_widthなどの属性はデータソースに接続できます。以下の例では、Segment図元を使用して電子ピアノから録音したMIDIデータを表示し、その効果は次のグラフのようになります。

ファイル”for_elise.csv”には電子ピアノのキー情報が保存されています。各行は1つのキーイベントを表し、note列はキーの音符、velocity列はキーの強度、start列とend列はキーが押された時間と離された時間です。DataFrameオブジェクトとして読み込んだ後、音符をテキスト形式に変換します。例えば76は”E5”に変換されます。キーの強度は0から127の整数であるため、それを2倍してインデックスとしてカラーテーブルViridis256から対応する色を取得できます。

from bokeh.palettes import Viridis256

notes = "C C# D D# E F F# G G# A A# B".split()
all_notes_name = pd.Series([f"{note}{i-1}" for i in range(0, 15) for note in notes])
df = pd.read_csv("data/for_elise.csv")
df["note_name"] = all_notes_name[df.note].values
df["color"] = [Viridis256[i * 2] for i in df.velocity]
print(df.head(3))

   note  velocity  start    end note_name    color
  76        65  0.391  0.506        E5  #1F928C
  75        74  0.495  0.610       D#5  #1FA386
  76        71  0.581  0.713        E5  #1E9D88

以下は描画部分です。❶figure()のパラメータy_rangeがリストの場合、Y軸はそのリストを目盛りラベルとして使用します。❷線分図元Segmentを使用してMIDI音符を表します。その属性x0、y0、x1、y1は線分の始点座標と終点座標に対応します。❸マウスホバーツールを追加します。このツールのtooltip属性は、ホバー時に表示するヒント情報をカスタマイズできます。その各要素はタプルで、第0要素はヒントラベル、第1要素が@で始まる場合、データソースの対応する列から情報を取得して表示します。

from bokeh.models import HoverTool

fig = figure(
    y_range=all_notes_name.loc[df.note.min() : df.note.max()], width=800, height=400
)  # ❶
source = ColumnDataSource(data=df)
fig.segment(
    x0="start",
    x1="end",
    y0="note_name",
    y1="note_name",
    color="color",
    source=source,
    line_width=6,
)  # ❷
hover = HoverTool(tooltips=[("note", "@note_name"), ("velocity", "@velocity")])  # ❸
fig.tools.append(hover)
show(fig)

多線MultiLine#

Line図元は単一の折れ線しか表せません。複数の折れ線を描画するには、Figure.line()をループで呼び出し、各折れ線に個別のデータソースオブジェクトを指定する必要があります。大量の折れ線を描画する場合、この方法は時間がかかります。そのため、Bokehは複数の折れ線を描画できるMultiLine図元を提供しています。そのxs, ys, line_color, line_width, line_alphaなどの属性はデータソースに接続できます。xsとysはネストされたリストで、その各ペアは1つの折れ線の座標リストです。

以下のプログラムは正方形と直角三角形を描画します。2つの形状の線幅と線の色はデータソースで指定されます。その結果は次のグラフ(左)のようになります。

source1 = ColumnDataSource(
    data=dict(
        xs=[[0, 1, 1, 0, 0], [0.1, 0.9, 0.5, 0.1]],
        ys=[[0, 0, 1, 1, 0], [0.1, 0.1, 0.5, 0.1]],
        color=["red", "green"],
        width=[2, 4],
    )
)
fig1 = figure(frame_width=300, frame_height=300, toolbar_location=None)
fig1.multi_line("xs", "ys", line_color="color", line_width="width", source=source1);

折れ線の座標リストにNaNが含まれている場合、折れ線はNaNを区切り点として複数の折れ線に分割されます。以下のsource2では、xsとysリストには1つの要素しかありませんが、折れ線は2つの折れ線に分割されます。これらの折れ線は論理的には1つの折れ線であるため、colorとwidthリストには1つの要素しかありません。その結果は次のグラフ(右)のようになります。|

source2 = ColumnDataSource(
    data=dict(
        xs=[[0, 1, 1, 0, 0, np.nan, 0.1, 0.9, 0.5, 0.1]],
        ys=[[0, 0, 1, 1, 0, np.nan, 0.1, 0.1, 0.5, 0.1]],
        color=["red"],
        width=[2],
    )
)
fig2 = figure(frame_width=300, frame_height=300, toolbar_location=None)
fig2.multi_line("xs", "ys", line_color="color", line_width="width", source=source2)
layout = row(fig1, fig2)
show(layout)

以下では、Bokeh の MultiLine 図元を使用して等値線図を描画する方法を示します。Bokeh 自体には等値線を計算する機能がないため、contourpy を利用して等高線を生成します。

等値線をプロットする際、同じレベルの等値線が複数の独立した曲線として表現されることがあります。これらの曲線を適切に処理するために、np.vstack() を使用して座標データを結合し、曲線の区切りとして NaN を挿入することで、Bokeh の MultiLine で適切に描画できるようにします。

データソース (ColumnDataSource) には以下の 3 つのキーを含めます。xs と ys は配列のリストで、それぞれの配列が 1 つのレベルに対応します。各配列内では、NaN を挿入することで複数の曲線を区切ります。

"xs"：等値線の X 座標のリスト
"ys"：等値線の Y 座標のリスト
"levels"：各等値線のレベル値（Z 値）

等値線の色は、linear_cmap() を使用して levels の数値をカラーマップ viridis にマッピングし、各等値線に対応する色を設定します。

from bokeh.transform import linear_cmap
from bokeh.palettes import viridis
import contourpy

Y, X = np.mgrid[-6:6:100j, -8:8:150j]
Z = np.sin(X) + np.cos(Y)

contour_gen = contourpy.contour_generator(X, Y, Z, name="serial")

level_values = np.linspace(Z.min(), Z.max(), 10)[1:-1]
levels_lines = contour_gen.multi_lines(level_values)

xs = []
ys = []
for level in levels_lines:
    lines = []
    for line in level:
        lines.append(line)
        lines.append([[np.nan, np.nan]])
    lines = np.vstack(lines)
    xs.append(lines[:, 0])
    ys.append(lines[:, 1])

source = ColumnDataSource(data={"xs": xs, "ys": ys, "levels": level_values})

fig = figure(
    match_aspect=True,
    aspect_ratio=8 / 6,
    x_range=(X.min(), X.max()),
    y_range=(Y.min(), Y.max()),
)
cmap = linear_cmap("levels", viridis(256), Z.min(), Z.max())
lines = fig.multi_line("xs", "ys", line_color=cmap, source=source, line_width=2)
show(fig)

多角形ブロックPatchとPatches#

LineとMultiLineと同様に、Patchは多角形の塗りつぶし領域を描画し、Patchesは複数の多角形の塗りつぶし領域を描画するために使用されます。Patchesの属性はMultiLineと似ていますが、塗りつぶしの色やパターンに関連するいくつかの属性が追加されています。例えば、fill_alpha、fill_color、hatch_alpha、hatch_colorなどです。以下の例では、Patchesを使用して中国地図の各省级行政区を描画し、その結果を次のグラフに示します。

import json
from bokeh.models import ColumnDataSource, CustomJS, HoverTool, Div
from bokeh.plotting import figure
from bokeh.transform import linear_cmap
from bokeh.palettes import viridis

with open("data/china_simple.json", encoding="utf-8") as f:
    geo = json.load(f)  # ❶

xs = []
ys = []
population = []

for feature in geo["features"]:
    population.append(feature["properties"]["population"])  # ❷
    geometry = feature["geometry"]
    geo_type = geometry["type"]
    coordinates = geometry["coordinates"]

    if geo_type == "Polygon":
        x, y = zip(*coordinates[0])
    elif geo_type == "MultiPolygon":
        xys = []
        for item in coordinates:
            xys.extend(item[0])
            xys.append([np.nan, np.nan])
        x, y = zip(*xys)

    xs.append(x)
    ys.append(y)

fig = figure(match_aspect=True, aspect_scale=1, frame_width=500, frame_height=350)
source = ColumnDataSource(data=dict(xs=xs, ys=ys, population=population))
cmap = linear_cmap(
    "population", viridis(256), low=np.min(population), high=np.max(population)
)
patches = fig.patches("xs", "ys", source=source, fill_color=cmap, line_color="black")
show(fig)

Bokehは、ブラウザでGeoJSONを解析できるデータソースクラスGeoJSONDataSourceも提供しています。そのgeojson属性はGeoJSONデータを表す文字列で、ブラウザで実行されるJavaScriptプログラムがこの文字列をデータソースの列に変換します。xsとys列はそれぞれ多角形領域のX-Y軸座標を保存し、各領域の属性もデータソースに保存されます。例えば、population列にはすべての領域のpopulation属性が保存されます。以下では、GeoJSONDataSourceを使用して上のグラフと同じ地図を描画します。

from bokeh.models import GeoJSONDataSource

with open("data/china_simple.json", encoding="utf-8") as f:
    geojson_string = f.read()

source = GeoJSONDataSource(geojson=geojson_string)
fig = figure(match_aspect=True, aspect_scale=1, frame_width=500, frame_height=350)
cmap = linear_cmap("population", viridis(256), low=None, high=None)
patches = fig.patches("xs", "ys", source=source, line_color="black", fill_color=cmap)
show(fig)

GeoJSONDataSourceデータソースのデータ列はブラウザで生成されるため、Pythonで直接GeoJSONデータの解析結果を確認することはできません。

Patchesの他に、Bokehは穴のある多角形を描画するためのMultiPolygons図形も提供しています。これは、多層にネストされたリストを使用して多角形上の各点の座標値を表します。具体的な使用方法については、Bokehのドキュメントを参照してください。

六角形グリッドHexTile#

HexTileは、蜂の巣状の正六角形グリッドを描画するために使用されます。そのfill_alpha, fill_color, line_alpha, line_color, line_width, q, r, scaleなどの属性はデータソースに接続できます。属性q, rは六角形グリッド座標系の座標です。六角形グリッドには、尖頂(pointytop)と平頂(flattop)の2つの配置方法があります。次のグラフに示すように、青いブロックはQ軸、オレンジのブロックはR軸で、ブロック内の数字はQ-R座標系での座標です。R軸の正方向は下向きであることに注意してください。

from bokeh.palettes import Category10_10
from bokeh.util.hex import axial_to_cartesian
from bokeh.layouts import row

q, r = [a.ravel() for a in np.mgrid[-2:3, -2:3]]
colors = np.array([Category10_10[7]] * len(q))
colors[r == 0] = Category10_10[0]
colors[q == 0] = Category10_10[1]
colors[(r == 0) & (q == 0)] = Category10_10[2]

figs = []
for orientation in ["pointytop", "flattop"]:
    fig = figure(match_aspect=True, toolbar_location=None, title=orientation)
    fig.grid.visible = False
    fig.hex_tile(
        q,
        r,
        size=1,
        line_color="white",
        fill_color=colors,
        alpha=0.5,
        orientation=orientation,
    )
    x, y = axial_to_cartesian(q, r, 1, orientation)
    fig.text(
        x,
        y,
        text=["(%d, %d)" % (q, r) for (q, r) in zip(q, r)],
        text_baseline="middle",
        text_align="center",
        font_size="10pt",
    )
    figs.append(fig)

layout_hex_tile = row(figs)
show(layout_hex_tile)

六角形グリッドの座標を計算するために、Bokehはいくつかのヘルパー関数を提供しています：cartesian_to_axial()、axial_to_cartesian()、hexbin()。cartesian_to_axial()はデカルト座標系の座標を六角形グリッドの座標に変換し、hexbin()は散在する点が六角形グリッドに落ちる回数をカウントします。

以下の例では、画像を六角形モザイクに変換し、その効果を次のグラフに示します。❶cartesian_to_axial()を使用して画像のピクセル座標点を六角形グリッド座標系のグリッド座標に変換します。そのパラメータsizeは六角形の外接円の半径を指定します。❷画像をDataFrameオブジェクトに変換します。これには5列のデータがあり、q列とr列は各ピクセルに対応する六角形グリッド座標で、red、green、blue列は各ピクセルの3つの色成分です。❸groupby()メソッドを使用して、各グリッド内のピクセルの3つの色成分の平均値を計算します。❹最後に、平均色を16進数の文字列に変換します。❺hex_tile()を呼び出して六角形グリッドを描画する際には、cartesian_to_axial()と同じsizeとorientationパラメータ値を使用する必要があります。

from imageio.v3 import imread
from bokeh.util.hex import cartesian_to_axial

img = imread("data/vinci_target.png")
h, w, _ = img.shape

size = 8
Y, X = np.mgrid[:h, :w]
Q, R = cartesian_to_axial(X, Y, size, orientation="pointytop")  # ❶

df_img = pd.DataFrame(
    np.dstack([Q, R, img[::-1]]).reshape(-1, 5),
    columns=["q", "r", "red", "green", "blue"],
)  # ❷
df_img2 = df_img.groupby(["q", "r"]).mean().astype(np.uint32).reset_index()  # ❸

tile_colors = (
    df_img2.eval("red*2**16 + green*2**8 + blue").map("#{:06x}".format).tolist()
)  # ❹

source = ColumnDataSource(
    data=dict(q=df_img2.q.values, r=df_img2.r.values, color=tile_colors)
)
fig = figure(
    match_aspect=True,
    frame_width=w,
    frame_height=h,
    x_range=(0, w),
    y_range=(0, h),
    toolbar_location=None,
)
fig.xaxis.visible = fig.yaxis.visible = False
renderer = fig.hex_tile(
    "q",
    "r",
    fill_color="color",
    line_color=None,
    size=size,
    orientation="pointytop",
    source=source,
)  # ❺
show(fig);

テキストText#

テキスト図形Textは、通常、チャートに注釈情報を追加するために使用されます。そのangle, text, text_alpha, text_color, text_font_size, x, x_offset, y_offsetなどの属性はデータソースに接続できます。x, yはテキストの位置を指定し、この座標とテキスト位置の関係はtext_align属性とtext_baseline属性によって決定されます。次のグラフは、これらの属性の候補値とそれらがテキストとどのように相対的に配置されるかを示しています。

# %hide
from bokeh.core import enums
from itertools import product

text_align = list(enums.TextAlign)
text_baseline = list(enums.TextBaseline)

fig = figure(
    height=400,
    x_range=["left", "center", "right"],
    y_range=["bottom", "ideographic", "alphabetic", "middle", "hanging", "top"],
    toolbar_location=None,
)
fig.xaxis.axis_label = "text_align"
fig.yaxis.axis_label = "text_baseline"

for align, baseline in product(text_align, text_baseline):
    fig.text(
        [align],
        [baseline],
        ["Py³"],
        font_size="30pt",
        text_align=align,
        text_baseline=baseline,
    )

show(fig)

以下の例では、Textを使用して次のグラフに示すようなワードクラウドを表示します。ファイル”python_words.csv”は、wordcloudライブラリを使用して作成されたワードクラウドデータで、Python標準ライブラリのソースコードで頻繁に出現する単語を統計しています。そのデータは次のテーブルに示すように、x、y列はテキストの座標で、テキストの配置は'center'と'bottom'、size列はテキストのサイズで、’32px’はテキストサイズが32ピクセルであることを示します。これは単位付きの文字列で、'px'はピクセル単位であることを示します。angle列はラジアン単位のテキストの方向です。

x	y	word	size	angle	color
239	453	return	32px	0	#84adcd
201	293	header	11px	0	#3975a5
158.5	213	val	6px	0	#ffe873
285	424.5	process	6px	1.5708	#3b77a8
63	348	del	11px	0	#498abd

テキストサイズがピクセル単位であるため、チャートのデータ空間もピクセル単位である場合にのみ、ワードクラウドを正しく表示できます。figure()を作成する際に、パラメータframe_heightとframe_widthを使用して、描画領域の高さと幅をピクセル単位で指定します。また、パラメータx_rangeとy_rangeを使用してX軸とY軸の表示範囲を設定します。表示範囲が描画領域のサイズと一致するため、データ空間の1単位長さは1ピクセルに相当します。

df = pd.read_csv("data/python_words.csv")
from bokeh.core.property.vectorization import Value

fig = figure(
    frame_height=500,
    frame_width=500,
    x_range=(0, 500),
    y_range=(0, 500),
    toolbar_location=None,
)
fig.grid.visible = False
fig.background_fill_color = "black"
source = ColumnDataSource(data=df)
fig.text(
    "x",
    "y",
    text="word",
    angle="angle",
    text_font_size="size",
    text_color="color",
    text_align="center",
    text_baseline="bottom",
    text_font_style="normal",
    text_font=Value("times"),
    source=source,
)

fig.xaxis.visible = False
fig.yaxis.visible = False
show(fig)

画像プリミティブ#

Bokehでは、以下の3つのプリミティブを使用して画像を表示することができます。ImageとImageRGBAのdh, dw, image, x, yなどの属性はデータソースに接続することができ、ImageURLのangle, h, url, w, x, yなどの属性もデータソースに接続することができます。属性x, yは画像の左下隅の座標を指定し、dh, dwはデータ空間における画像の高さと幅です。

Image: パレットまたはカラーマッパーを使用して2次元配列を画像に変換し、通常は2次元関数を表示するために使用されます。
ImageRGBA: 形状が(height, width, 4)の配列を4チャンネル（赤、緑、青、α）の画像として表示します。
ImageURL: 画像のデータはURLによって指定されます。

以下では、これらの3つのプリミティブを使用して2次元関数の画像を表示します。まず、NumPy配列のブロードキャスト機能を使用して2次元配列Zを計算します。Imageプリミティブを使用する場合、パラメータpaletteを使用して数字を色に変換するパレットを指定できます。データ量を減らすために、ここではまず画像を32ビットの単精度浮動小数点数配列に変換します。

from bokeh.palettes import viridis

Y, X = np.ogrid[-3:3:100j, -3:3:100j]
Z = np.sin(X) + np.sin(Y)

x_min, y_min, x_ptp, y_ptp = X.min(), Y.min(), np.ptp(X), np.ptp(Y)
source1 = ColumnDataSource(
    data=dict(
        image=[Z.astype(np.float32)], x=[x_min], y=[y_min], dw=[x_ptp], dh=[y_ptp]
    )
)
fig1 = figure(
    frame_width=300, match_aspect=True, aspect_ratio=x_ptp / y_ptp, title="Image"
)
fig1.image("image", "x", "y", "dw", "dh", source=source1, palette=viridis(256));

ImageRGBAの画像データは、要素タイプが32ビット整数の2次元配列です。以下では、まずmatplotlib.cmを使用して正規化された画像ZnをRGBA画像を表す配列Z_imgに変換します。この配列の形状は(100, 100, 4)です。次に、そのview()を呼び出して、要素タイプがuint32のビュー配列Z_img_showを作成します。その形状は(100, 100)です。

from matplotlib import cm

Zn = (Z - Z.min()) / np.ptp(Z)
Z_img = cm.viridis(Zn, bytes=True)
Z_img_show = Z_img.view(np.uint32)[:, :, 0]
source2 = ColumnDataSource(
    data=dict(image=[Z_img_show], x=[x_min], y=[y_min], dw=[x_ptp], dh=[y_ptp])
)
fig2 = figure(
    frame_width=300, match_aspect=True, aspect_ratio=x_ptp / y_ptp, title="ImageRGBA"
)
fig2.image_rgba("image", "x", "y", "dw", "dh", source=source2);

ImageとImageRGBAはどちらもPythonの配列データをブラウザに渡して表示する必要がありますが、このデータは圧縮されていないため、大きな配列を表示する場合、転送されるデータ量が大きくなります。この状況を避けるために、Pythonで画像データを埋め込み画像データのURL文字列に変換し、ImageURLを使用してその画像を表示することができます。本書で提供されているscpy3.bokehelp.encode_image()は、この変換を実行します。これはOpenCVライブラリを使用しています。画像の原点が左上にあるため、ここではまずZ_imgを垂直方向に反転してからJPEG画像に変換します。ImageURLの属性anchorは、画像のアンカーポイント、つまり原点座標と画像の相対位置を指定できます。3つの方法で描画された画像は次のグラフに示されています。

from helper.bokeh import encode_image

Z_jpeg = encode_image(Z_img[::-1])
source3 = ColumnDataSource(
    data=dict(image=[Z_jpeg], x=[x_min], y=[y_min], dw=[x_ptp], dh=[y_ptp])
)
fig3 = figure(
    frame_width=300, match_aspect=True, aspect_ratio=x_ptp / y_ptp, title="ImageURL"
)
fig3.image_url("image", "x", "y", "dw", "dh", source=source3, anchor="bottom_left");

from bokeh.layouts import row

layout = row(fig1, fig2, fig3)
show(layout)

以下では、これら3つのチャートをそれぞれHTMLファイルとして保存した場合のサイズを確認します。Imageの画像データは32ビット浮動小数点数配列であり、ImageRGBAの画像データは32ビット整数配列であるため、それらが出力するHTMLファイルのサイズはほぼ同じです。一方、ImageURLはJPEG圧縮画像を使用するため、出力するHTMLファイルははるかに小さくなります。

from bokeh import embed

for name, fig in zip(("image", "image_rgba", "image_url"), [fig1, fig2, fig3]):
    size = len(embed.file_html(fig, resources="cdn"))
    print(f"{name:10s} = {size:>6d}")

image      =  67495
image_rgba =  59530
image_url  =   8561