{ "cells": [ { "cell_type": "markdown", "id": "01a71042-e4e0-48cc-bb4e-ab233bc60155", "metadata": {}, "source": [ "# Intro to data structures" ] }, { "cell_type": "code", "execution_count": 333, "id": "bac3f2b0-b22a-4f89-a3f3-3f659bc6b49d", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import polars as pl\n", "from helper.jupyter import row" ] }, { "cell_type": "markdown", "id": "3576ca1a-6060-42e3-9f6c-7782a559166d", "metadata": {}, "source": [ "## Series" ] }, { "cell_type": "markdown", "id": "31340fc5-e7c0-4d47-8439-475d47f2eb86", "metadata": {}, "source": [ "In Polars, the Series object has no index, so we need to use a two-column DataFrame to simulate the functions of a Series in Pandas." ] }, { "cell_type": "code", "execution_count": 334, "id": "2ca4467d-021b-4325-8e9d-053625d97423", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"-1.163544
"b"-0.900663
"c"1.657318
"d"-1.240945
"e"0.964103
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════════╡\n", "│ a ┆ -1.163544 │\n", "│ b ┆ -0.900663 │\n", "│ c ┆ 1.657318 │\n", "│ d ┆ -1.240945 │\n", "│ e ┆ 0.964103 │\n", "└───────┴───────────┘" ] }, "execution_count": 334, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = pl.DataFrame(dict(\n", " index=[\"a\", \"b\", \"c\", \"d\", \"e\"],\n", " value=np.random.randn(5)\n", "))\n", "s" ] }, { "cell_type": "code", "execution_count": 335, "id": "66f6f8dd-7bcd-4220-9c60-e7851ffe94f8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5,)
index
str
"a"
"b"
"c"
"d"
"e"
" ], "text/plain": [ "shape: (5,)\n", "Series: 'index' [str]\n", "[\n", "\t\"a\"\n", "\t\"b\"\n", "\t\"c\"\n", "\t\"d\"\n", "\t\"e\"\n", "]" ] }, "execution_count": 335, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s['index']" ] }, { "cell_type": "code", "execution_count": 336, "id": "9ae388d4-93aa-47f0-871e-3c1d4795b521", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5,)
f64
0.083327
-1.077013
0.120624
-0.480749
-1.196976
" ], "text/plain": [ "shape: (5,)\n", "Series: '' [f64]\n", "[\n", "\t0.083327\n", "\t-1.077013\n", "\t0.120624\n", "\t-0.480749\n", "\t-1.196976\n", "]" ] }, "execution_count": 336, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.Series(np.random.randn(5))" ] }, { "cell_type": "code", "execution_count": 337, "id": "77027cce-70cb-4928-9d3d-c36fd6f13ef1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
indexvalue
stri64
"b"1
"a"0
"c"2
" ], "text/plain": [ "shape: (3, 2)\n", "┌───────┬───────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ i64 │\n", "╞═══════╪═══════╡\n", "│ b ┆ 1 │\n", "│ a ┆ 0 │\n", "│ c ┆ 2 │\n", "└───────┴───────┘" ] }, "execution_count": 337, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pd.Series(d)\n", "d = {\"b\": 1, \"a\": 0, \"c\": 2}\n", "pl.DataFrame(list(d.items()), schema=['index', 'value'], orient='row')" ] }, { "cell_type": "code", "execution_count": 338, "id": "944c723f-5087-4ab0-be05-a66da0ed8588", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"5.0
"b"5.0
"c"5.0
"d"5.0
"e"5.0
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬───────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════╡\n", "│ a ┆ 5.0 │\n", "│ b ┆ 5.0 │\n", "│ c ┆ 5.0 │\n", "│ d ┆ 5.0 │\n", "│ e ┆ 5.0 │\n", "└───────┴───────┘" ] }, "execution_count": 338, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.select(\n", " index=pl.Series([\"a\", \"b\", \"c\", \"d\", \"e\"]),\n", " value=5.0\n", ")" ] }, { "cell_type": "markdown", "id": "aabd1673-8419-4232-86f6-966413daf342", "metadata": {}, "source": [ "### Series is ndarray-like " ] }, { "cell_type": "code", "execution_count": 339, "id": "a1c37ace-405d-4405-86f7-d0db0c283c61", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1.163544306576043" ] }, "execution_count": 339, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s['value'][0]" ] }, { "cell_type": "code", "execution_count": 340, "id": "ae6a4a67-9385-4fa3-8ad4-dd38b1ba8eac", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3,)
value
f64
-1.163544
-0.900663
1.657318
" ], "text/plain": [ "shape: (3,)\n", "Series: 'value' [f64]\n", "[\n", "\t-1.163544\n", "\t-0.900663\n", "\t1.657318\n", "]" ] }, "execution_count": 340, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s['value'][:3]" ] }, { "cell_type": "code", "execution_count": 341, "id": "b61f4afc-99a2-424e-80cb-7b91e6a642b2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 2)
indexvalue
strf64
"c"1.657318
"e"0.964103
" ], "text/plain": [ "shape: (2, 2)\n", "┌───────┬──────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪══════════╡\n", "│ c ┆ 1.657318 │\n", "│ e ┆ 0.964103 │\n", "└───────┴──────────┘" ] }, "execution_count": 341, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s[s > s.median()]\n", "s.filter(pl.col('value') > pl.col('value').median())" ] }, { "cell_type": "code", "execution_count": 342, "id": "d9e0872d-5a3b-43ce-b936-afd052ca8b50", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
indexvalue
strf64
"e"0.964103
"d"-1.240945
"b"-0.900663
" ], "text/plain": [ "shape: (3, 2)\n", "┌───────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════════╡\n", "│ e ┆ 0.964103 │\n", "│ d ┆ -1.240945 │\n", "│ b ┆ -0.900663 │\n", "└───────┴───────────┘" ] }, "execution_count": 342, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s.iloc[[4, 3, 1]]\n", "s[[4, 3, 1]]" ] }, { "cell_type": "code", "execution_count": 343, "id": "3e41e238-f421-476f-b7ff-ff5815c11e21", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"0.312377
"b"0.4063
"c"5.245225
"d"0.289111
"e"2.622434
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬──────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪══════════╡\n", "│ a ┆ 0.312377 │\n", "│ b ┆ 0.4063 │\n", "│ c ┆ 5.245225 │\n", "│ d ┆ 0.289111 │\n", "│ e ┆ 2.622434 │\n", "└───────┴──────────┘" ] }, "execution_count": 343, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s.with_columns(\n", " pl.col('value').exp()\n", ")" ] }, { "cell_type": "code", "execution_count": 344, "id": "ac8a0b86-fe2a-4e4d-97b8-61853df4b52c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Float64" ] }, "execution_count": 344, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s['value'].dtype" ] }, { "cell_type": "code", "execution_count": 345, "id": "1511c955-d697-4da2-bb75-f18181b91762", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-1.16354431, -0.90066273, 1.65731812, -1.24094468, 0.96410308])" ] }, "execution_count": 345, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s.array\n", "s['value'].to_numpy()" ] }, { "cell_type": "markdown", "id": "215944e9-4f6e-4516-9c31-361429b8cbae", "metadata": {}, "source": [ "### Series is dict-like" ] }, { "cell_type": "code", "execution_count": 346, "id": "ebad929e-8eb0-43bc-aa00-da766f1bedec", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1.163544306576043" ] }, "execution_count": 346, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s[\"a\"]\n", "s.select(pl.col('value').filter(pl.col('index') == 'a')).item()" ] }, { "cell_type": "code", "execution_count": 347, "id": "b1d78aa2-7529-476e-a89a-0f978a61e79d", "metadata": {}, "outputs": [], "source": [ "# s[\"e\"] = 12.0\n", "s = s.with_columns(\n", " pl.when(pl.col('index') == 'e')\n", " .then(12.0)\n", " .otherwise(pl.col('value'))\n", " .name.keep()\n", ")" ] }, { "cell_type": "code", "execution_count": 348, "id": "e19d5441-29de-46d6-8d8a-cb0b335bf5c1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 348, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# \"e\" in s\n", "\"e\" in s['index']" ] }, { "cell_type": "code", "execution_count": 349, "id": "87a5c4a5-6a8d-4d44-a58e-1b18d7471de7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 349, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"f\" in s['index']" ] }, { "cell_type": "markdown", "id": "eb0bfd4b-3784-4776-beb9-fb506c733d1e", "metadata": {}, "source": [ "### Vectorized operations and label alignment with Series" ] }, { "cell_type": "code", "execution_count": 350, "id": "394752c6-714b-44b0-8566-448a8c961394", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"-2.327089
"b"-1.801325
"c"3.314636
"d"-2.481889
"e"24.0
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════════╡\n", "│ a ┆ -2.327089 │\n", "│ b ┆ -1.801325 │\n", "│ c ┆ 3.314636 │\n", "│ d ┆ -2.481889 │\n", "│ e ┆ 24.0 │\n", "└───────┴───────────┘" ] }, "execution_count": 350, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s + s\n", "from helper.polars import align_op\n", "align_op(s, s, op=pl.Expr.add)" ] }, { "cell_type": "code", "execution_count": 351, "id": "31c9f84a-9b0c-420f-a7de-f7618bc220b9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"-2.327089
"b"-1.801325
"c"3.314636
"d"-2.481889
"e"24.0
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════════╡\n", "│ a ┆ -2.327089 │\n", "│ b ┆ -1.801325 │\n", "│ c ┆ 3.314636 │\n", "│ d ┆ -2.481889 │\n", "│ e ┆ 24.0 │\n", "└───────┴───────────┘" ] }, "execution_count": 351, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s * 2\n", "s.select(\n", " 'index',\n", " pl.col('value') * 2\n", ")" ] }, { "cell_type": "code", "execution_count": 352, "id": "ee7ad564-aa27-4c71-8ad3-62f41c292123", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"0.312377
"b"0.4063
"c"5.245225
"d"0.289111
"e"162754.791419
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬───────────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════════════╡\n", "│ a ┆ 0.312377 │\n", "│ b ┆ 0.4063 │\n", "│ c ┆ 5.245225 │\n", "│ d ┆ 0.289111 │\n", "│ e ┆ 162754.791419 │\n", "└───────┴───────────────┘" ] }, "execution_count": 352, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# np.exp(s)\n", "s.select(\n", " \"index\",\n", " pl.col(\"value\").exp()\n", ")" ] }, { "cell_type": "code", "execution_count": 353, "id": "fefbc23f-f33a-444e-9b7d-2a60d166f84f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
strf64
"a"null
"b"-1.801325
"c"3.314636
"d"-2.481889
"e"null
" ], "text/plain": [ "shape: (5, 2)\n", "┌───────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ str ┆ f64 │\n", "╞═══════╪═══════════╡\n", "│ a ┆ null │\n", "│ b ┆ -1.801325 │\n", "│ c ┆ 3.314636 │\n", "│ d ┆ -2.481889 │\n", "│ e ┆ null │\n", "└───────┴───────────┘" ] }, "execution_count": 353, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s.iloc[1:] + s.iloc[:-1]\n", "align_op(\n", " s.slice(1), \n", " s.slice(0, len(s) - 1), \n", " op=pl.Expr.add, \n", " fill_value=None, \n", " how=\"full\")" ] }, { "cell_type": "markdown", "id": "bcb43e01-b857-435f-b761-5c04353c994d", "metadata": {}, "source": [ "### Name attribute" ] }, { "cell_type": "code", "execution_count": 354, "id": "0be666d2-02a6-428a-9811-db799a3c0fae", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5,)
something
f64
1.879844
1.553987
-1.190783
-1.452195
-0.553582
" ], "text/plain": [ "shape: (5,)\n", "Series: 'something' [f64]\n", "[\n", "\t1.879844\n", "\t1.553987\n", "\t-1.190783\n", "\t-1.452195\n", "\t-0.553582\n", "]" ] }, "execution_count": 354, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = pl.Series(\"something\", np.random.randn(5))\n", "s" ] }, { "cell_type": "code", "execution_count": 355, "id": "6f8ecbb0-ca48-4712-a6af-1c89cf0c954e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'something'" ] }, "execution_count": 355, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s.name" ] }, { "cell_type": "code", "execution_count": 356, "id": "29398f27-6770-4d07-ad27-0ae1d4d3d887", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'different'" ] }, "execution_count": 356, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2 = s.rename('different')\n", "s2.name" ] }, { "cell_type": "markdown", "id": "06a08088-3a8c-443b-8852-d3c91d32e9b6", "metadata": {}, "source": [ "## DataFrame" ] }, { "cell_type": "code", "execution_count": 357, "id": "c0893511-65d5-4662-83b5-5fac76c00462", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 3)
indexonetwo
strf64f64
"a"1.01.0
"b"2.02.0
"c"3.03.0
"d"null4.0
" ], "text/plain": [ "shape: (4, 3)\n", "┌───────┬──────┬─────┐\n", "│ index ┆ one ┆ two │\n", "│ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 │\n", "╞═══════╪══════╪═════╡\n", "│ a ┆ 1.0 ┆ 1.0 │\n", "│ b ┆ 2.0 ┆ 2.0 │\n", "│ c ┆ 3.0 ┆ 3.0 │\n", "│ d ┆ null ┆ 4.0 │\n", "└───────┴──────┴─────┘" ] }, "execution_count": 357, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = pl.DataFrame(dict(index=[\"a\", \"b\", \"c\"], one=[1.0, 2.0, 3.0]))\n", "s2 = pl.DataFrame(dict(index=[\"a\", \"b\", \"c\", \"d\"], two=[1.0, 2.0, 3.0, 4.0]))\n", "df = s1.join(s2, on='index', how='full', coalesce=True)\n", "df" ] }, { "cell_type": "code", "execution_count": 358, "id": "37ee0b5f-48b8-4961-8a92-407ed512eaac", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4,)
index
str
"a"
"b"
"c"
"d"
" ], "text/plain": [ "shape: (4,)\n", "Series: 'index' [str]\n", "[\n", "\t\"a\"\n", "\t\"b\"\n", "\t\"c\"\n", "\t\"d\"\n", "]" ] }, "execution_count": 358, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['index']" ] }, { "cell_type": "code", "execution_count": 359, "id": "20b9d7f7-3556-4494-9221-d204ef48fffc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['index', 'one', 'two']" ] }, "execution_count": 359, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 360, "id": "6453b26e-2972-49e9-8673-1c49f5f144b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['one', 'two']" ] }, "execution_count": 360, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop('index').columns" ] }, { "cell_type": "markdown", "id": "7260ead1-9795-4d4e-be66-ec509d4b2252", "metadata": {}, "source": [ "### From dict of ndarrays / lists" ] }, { "cell_type": "code", "execution_count": 361, "id": "92a4a508-4d26-4323-8594-ab61ba3ece12", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 2)
onetwo
f64f64
1.04.0
2.03.0
3.02.0
4.01.0
" ], "text/plain": [ "shape: (4, 2)\n", "┌─────┬─────┐\n", "│ one ┆ two │\n", "│ --- ┆ --- │\n", "│ f64 ┆ f64 │\n", "╞═════╪═════╡\n", "│ 1.0 ┆ 4.0 │\n", "│ 2.0 ┆ 3.0 │\n", "│ 3.0 ┆ 2.0 │\n", "│ 4.0 ┆ 1.0 │\n", "└─────┴─────┘" ] }, "execution_count": 361, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = {\"one\": [1.0, 2.0, 3.0, 4.0], \"two\": [4.0, 3.0, 2.0, 1.0]}\n", "pl.DataFrame(d)" ] }, { "cell_type": "code", "execution_count": 362, "id": "aef1a07a-ba62-4a0f-8a9d-4fa4e97ca984", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 3)
indexonetwo
strf64f64
"a"1.04.0
"b"2.03.0
"c"3.02.0
"d"4.01.0
" ], "text/plain": [ "shape: (4, 3)\n", "┌───────┬─────┬─────┐\n", "│ index ┆ one ┆ two │\n", "│ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 │\n", "╞═══════╪═════╪═════╡\n", "│ a ┆ 1.0 ┆ 4.0 │\n", "│ b ┆ 2.0 ┆ 3.0 │\n", "│ c ┆ 3.0 ┆ 2.0 │\n", "│ d ┆ 4.0 ┆ 1.0 │\n", "└───────┴─────┴─────┘" ] }, "execution_count": 362, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.DataFrame(d).insert_column(0, pl.Series('index', [\"a\", \"b\", \"c\", \"d\"]))" ] }, { "cell_type": "markdown", "id": "45d357db-45ad-4c90-b05f-149cc479a3bd", "metadata": {}, "source": [ "### From structured or record array" ] }, { "cell_type": "code", "execution_count": 363, "id": "a0746ca5-4543-4ee3-9c1d-281332d93481", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
ABC
i32f32binary
12.0b"Hello"
23.0b"World"
" ], "text/plain": [ "shape: (2, 3)\n", "┌─────┬─────┬──────────┐\n", "│ A ┆ B ┆ C │\n", "│ --- ┆ --- ┆ --- │\n", "│ i32 ┆ f32 ┆ binary │\n", "╞═════╪═════╪══════════╡\n", "│ 1 ┆ 2.0 ┆ b\"Hello\" │\n", "│ 2 ┆ 3.0 ┆ b\"World\" │\n", "└─────┴─────┴──────────┘" ] }, "execution_count": 363, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.zeros((2,), dtype=[(\"A\", \"i4\"), (\"B\", \"f4\"), (\"C\", \"a10\")])\n", "data[:] = [(1, 2.0, \"Hello\"), (2, 3.0, \"World\")]\n", "pl.DataFrame(data)" ] }, { "cell_type": "code", "execution_count": 364, "id": "0111e1c6-cd1d-477e-824a-d6f5705db324", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 4)
indexABC
stri32f32binary
"first"12.0b"Hello"
"second"23.0b"World"
" ], "text/plain": [ "shape: (2, 4)\n", "┌────────┬─────┬─────┬──────────┐\n", "│ index ┆ A ┆ B ┆ C │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ i32 ┆ f32 ┆ binary │\n", "╞════════╪═════╪═════╪══════════╡\n", "│ first ┆ 1 ┆ 2.0 ┆ b\"Hello\" │\n", "│ second ┆ 2 ┆ 3.0 ┆ b\"World\" │\n", "└────────┴─────┴─────┴──────────┘" ] }, "execution_count": 364, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.DataFrame(data).insert_column(0, pl.Series('index', ['first', 'second']))" ] }, { "cell_type": "code", "execution_count": 365, "id": "797697c8-a0f6-4f82-bf27-8b0688246c74", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
CAB
binaryi32f32
b"Hello"12.0
b"World"23.0
" ], "text/plain": [ "shape: (2, 3)\n", "┌──────────┬─────┬─────┐\n", "│ C ┆ A ┆ B │\n", "│ --- ┆ --- ┆ --- │\n", "│ binary ┆ i32 ┆ f32 │\n", "╞══════════╪═════╪═════╡\n", "│ b\"Hello\" ┆ 1 ┆ 2.0 │\n", "│ b\"World\" ┆ 2 ┆ 3.0 │\n", "└──────────┴─────┴─────┘" ] }, "execution_count": 365, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.DataFrame(data).select(\"C\", \"A\", \"B\")" ] }, { "cell_type": "markdown", "id": "ebb1baa1-83fa-4b62-823a-ab6bf92549d7", "metadata": {}, "source": [ "### From a list of dicts" ] }, { "cell_type": "code", "execution_count": 366, "id": "5399a765-4929-4b4e-be33-af1a04459bbd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
abc
i64i64i64
12null
51020
" ], "text/plain": [ "shape: (2, 3)\n", "┌─────┬─────┬──────┐\n", "│ a ┆ b ┆ c │\n", "│ --- ┆ --- ┆ --- │\n", "│ i64 ┆ i64 ┆ i64 │\n", "╞═════╪═════╪══════╡\n", "│ 1 ┆ 2 ┆ null │\n", "│ 5 ┆ 10 ┆ 20 │\n", "└─────┴─────┴──────┘" ] }, "execution_count": 366, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2 = [{\"a\": 1, \"b\": 2}, {\"a\": 5, \"b\": 10, \"c\": 20}]\n", "pl.DataFrame(data2)" ] }, { "cell_type": "code", "execution_count": 367, "id": "1c37287f-f1ad-419b-bccd-3b8cdd889b22", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 4)
indexabc
stri64i64i64
"first"12null
"second"51020
" ], "text/plain": [ "shape: (2, 4)\n", "┌────────┬─────┬─────┬──────┐\n", "│ index ┆ a ┆ b ┆ c │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ i64 ┆ i64 ┆ i64 │\n", "╞════════╪═════╪═════╪══════╡\n", "│ first ┆ 1 ┆ 2 ┆ null │\n", "│ second ┆ 5 ┆ 10 ┆ 20 │\n", "└────────┴─────┴─────┴──────┘" ] }, "execution_count": 367, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.DataFrame(data2).insert_column(0, pl.Series('index', ['first', 'second']))" ] }, { "cell_type": "code", "execution_count": 368, "id": "770967f6-b780-45dc-8f7d-c16d26f3115f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 2)
ab
i64i64
12
510
" ], "text/plain": [ "shape: (2, 2)\n", "┌─────┬─────┐\n", "│ a ┆ b │\n", "│ --- ┆ --- │\n", "│ i64 ┆ i64 │\n", "╞═════╪═════╡\n", "│ 1 ┆ 2 │\n", "│ 5 ┆ 10 │\n", "└─────┴─────┘" ] }, "execution_count": 368, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.DataFrame(data2).select('a', 'b')" ] }, { "cell_type": "markdown", "id": "5b9e0ce8-8a38-48ee-978a-361b8b34c7c1", "metadata": {}, "source": [ "### From a dict of tuples" ] }, { "cell_type": "code", "execution_count": 369, "id": "727c9f44-6027-44b7-9849-476741515a0d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 7)
index0index1a-ba-aa-cb-ab-b
strstri64i64i64i64i64
"A""B"145810
"A""C"2367null
"A""D"nullnullnullnull9
" ], "text/plain": [ "shape: (3, 7)\n", "┌────────┬────────┬──────┬──────┬──────┬──────┬──────┐\n", "│ index0 ┆ index1 ┆ a-b ┆ a-a ┆ a-c ┆ b-a ┆ b-b │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", "╞════════╪════════╪══════╪══════╪══════╪══════╪══════╡\n", "│ A ┆ B ┆ 1 ┆ 4 ┆ 5 ┆ 8 ┆ 10 │\n", "│ A ┆ C ┆ 2 ┆ 3 ┆ 6 ┆ 7 ┆ null │\n", "│ A ┆ D ┆ null ┆ null ┆ null ┆ null ┆ 9 │\n", "└────────┴────────┴──────┴──────┴──────┴──────┴──────┘" ] }, "execution_count": 369, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = {\n", " (\"a\", \"b\"): {(\"A\", \"B\"): 1, (\"A\", \"C\"): 2},\n", " (\"a\", \"a\"): {(\"A\", \"C\"): 3, (\"A\", \"B\"): 4},\n", " (\"a\", \"c\"): {(\"A\", \"B\"): 5, (\"A\", \"C\"): 6},\n", " (\"b\", \"a\"): {(\"A\", \"C\"): 7, (\"A\", \"B\"): 8},\n", " (\"b\", \"b\"): {(\"A\", \"D\"): 9, (\"A\", \"B\"): 10},\n", "}\n", "\n", "dfs = []\n", "for key, value in data.items():\n", " rows = []\n", " for key2, value2 in value.items():\n", " data_row = list(key2) + [value2]\n", " rows.append(data_row)\n", " dfs.append(\n", " pl.DataFrame(rows, orient='row', schema=['index0', 'index1', '-'.join(key)])\n", " )\n", " \n", "pl.concat(dfs, how=\"align\") " ] }, { "cell_type": "markdown", "id": "e719e7c0-d54d-4373-9f63-79263852375f", "metadata": {}, "source": [ "### From a Series" ] }, { "cell_type": "code", "execution_count": 370, "id": "3e24739c-f6ad-4053-b918-5ea374c2fe2a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
indexser
stri64
"a"0
"b"1
"c"2
" ], "text/plain": [ "shape: (3, 2)\n", "┌───────┬─────┐\n", "│ index ┆ ser │\n", "│ --- ┆ --- │\n", "│ str ┆ i64 │\n", "╞═══════╪═════╡\n", "│ a ┆ 0 │\n", "│ b ┆ 1 │\n", "│ c ┆ 2 │\n", "└───────┴─────┘" ] }, "execution_count": 370, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser = pl.Series('ser', range(3))\n", "pl.DataFrame(ser).insert_column(0, pl.Series('index', list(\"abc\")))" ] }, { "cell_type": "markdown", "id": "46e919a5-8360-4710-b492-86eb49ee24c4", "metadata": {}, "source": [ "### From a list of namedtuples" ] }, { "cell_type": "code", "execution_count": 371, "id": "b41c6f70-feff-4573-b1b5-4e1d150d08e5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
xy
i64i64
00
03
23
" ], "text/plain": [ "shape: (3, 2)\n", "┌─────┬─────┐\n", "│ x ┆ y │\n", "│ --- ┆ --- │\n", "│ i64 ┆ i64 │\n", "╞═════╪═════╡\n", "│ 0 ┆ 0 │\n", "│ 0 ┆ 3 │\n", "│ 2 ┆ 3 │\n", "└─────┴─────┘" ] }, "execution_count": 371, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from collections import namedtuple\n", "Point = namedtuple(\"Point\", \"x y\")\n", "pl.DataFrame([Point(0, 0), Point(0, 3), (2, 3)])" ] }, { "cell_type": "code", "execution_count": 372, "id": "4201b810-f48a-4bd5-a60a-77797c9c34b0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 3)
xyz
i64i64i64
000
035
23null
" ], "text/plain": [ "shape: (3, 3)\n", "┌─────┬─────┬──────┐\n", "│ x ┆ y ┆ z │\n", "│ --- ┆ --- ┆ --- │\n", "│ i64 ┆ i64 ┆ i64 │\n", "╞═════╪═════╪══════╡\n", "│ 0 ┆ 0 ┆ 0 │\n", "│ 0 ┆ 3 ┆ 5 │\n", "│ 2 ┆ 3 ┆ null │\n", "└─────┴─────┴──────┘" ] }, "execution_count": 372, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Point3D = namedtuple(\"Point3D\", \"x y z\")\n", "data = [Point3D(0, 0, 0), Point3D(0, 3, 5), Point(2, 3)]\n", "pl.DataFrame([p._asdict() for p in data])" ] }, { "cell_type": "markdown", "id": "c41a3dc8-5043-4507-87c8-4f49b06e8686", "metadata": {}, "source": [ "### From a list of dataclasses" ] }, { "cell_type": "code", "execution_count": 373, "id": "6dbfc181-384f-4896-940e-a8975a9fbb4b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
xy
i64i64
00
03
23
" ], "text/plain": [ "shape: (3, 2)\n", "┌─────┬─────┐\n", "│ x ┆ y │\n", "│ --- ┆ --- │\n", "│ i64 ┆ i64 │\n", "╞═════╪═════╡\n", "│ 0 ┆ 0 │\n", "│ 0 ┆ 3 │\n", "│ 2 ┆ 3 │\n", "└─────┴─────┘" ] }, "execution_count": 373, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from dataclasses import make_dataclass\n", "Point = make_dataclass(\"Point\", [(\"x\", int), (\"y\", int)])\n", "pl.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])" ] }, { "cell_type": "markdown", "id": "947db500-e0e4-495e-9d3c-7f4c8a9f1c93", "metadata": {}, "source": [ "### Alternate constructors" ] }, { "cell_type": "markdown", "id": "0e964439-0ec3-439e-a3ef-7100ff1f26a0", "metadata": {}, "source": [ "#### DataFrame.from_dict" ] }, { "cell_type": "code", "execution_count": 374, "id": "bf32ca20-a8f5-4582-ab8f-8cab709c3fe7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
AB
i64i64
14
25
36
" ], "text/plain": [ "shape: (3, 2)\n", "┌─────┬─────┐\n", "│ A ┆ B │\n", "│ --- ┆ --- │\n", "│ i64 ┆ i64 │\n", "╞═════╪═════╡\n", "│ 1 ┆ 4 │\n", "│ 2 ┆ 5 │\n", "│ 3 ┆ 6 │\n", "└─────┴─────┘" ] }, "execution_count": 374, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.DataFrame(dict([(\"A\", [1, 2, 3]), (\"B\", [4, 5, 6])]))" ] }, { "cell_type": "code", "execution_count": 375, "id": "a7b8f7b5-5421-43d5-908a-f6057321425a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 4)
indexonetwothree
stri64i64i64
"A"123
"B"456
" ], "text/plain": [ "shape: (2, 4)\n", "┌───────┬─────┬─────┬───────┐\n", "│ index ┆ one ┆ two ┆ three │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ i64 ┆ i64 ┆ i64 │\n", "╞═══════╪═════╪═════╪═══════╡\n", "│ A ┆ 1 ┆ 2 ┆ 3 │\n", "│ B ┆ 4 ┆ 5 ┆ 6 │\n", "└───────┴─────┴─────┴───────┘" ] }, "execution_count": 375, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = dict([(\"A\", [1, 2, 3]), (\"B\", [4, 5, 6])])\n", "pl.DataFrame(\n", " list(data.values()), schema=['one', 'two', 'three'], orient='row'\n", ").insert_column(0, pl.Series('index', data.keys()))" ] }, { "cell_type": "markdown", "id": "284afd22-b8ca-4ed0-9d2c-c4e2f61cde3c", "metadata": {}, "source": [ "#### DataFrame.from_records" ] }, { "cell_type": "code", "execution_count": 376, "id": "1a02d357-773e-402a-8ac0-50050b9b2aab", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
ABC
i32f32binary
12.0b"Hello"
23.0b"World"
" ], "text/plain": [ "shape: (2, 3)\n", "┌─────┬─────┬──────────┐\n", "│ A ┆ B ┆ C │\n", "│ --- ┆ --- ┆ --- │\n", "│ i32 ┆ f32 ┆ binary │\n", "╞═════╪═════╪══════════╡\n", "│ 1 ┆ 2.0 ┆ b\"Hello\" │\n", "│ 2 ┆ 3.0 ┆ b\"World\" │\n", "└─────┴─────┴──────────┘" ] }, "execution_count": 376, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.zeros((2,), dtype=[(\"A\", \"i4\"), (\"B\", \"f4\"), (\"C\", \"a10\")])\n", "data[:] = [(1, 2.0, \"Hello\"), (2, 3.0, \"World\")]\n", "pl.DataFrame(data)" ] }, { "cell_type": "markdown", "id": "20222785-9460-42f6-ade9-a372555885ef", "metadata": {}, "source": [ "### Column selection, addition, deletion" ] }, { "cell_type": "code", "execution_count": 377, "id": "87831144-1c70-48d7-9888-dacc495bdb11", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4,)
one
f64
1.0
2.0
3.0
null
" ], "text/plain": [ "shape: (4,)\n", "Series: 'one' [f64]\n", "[\n", "\t1.0\n", "\t2.0\n", "\t3.0\n", "\tnull\n", "]" ] }, "execution_count": 377, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['one']" ] }, { "cell_type": "code", "execution_count": 378, "id": "7b5089b5-d007-4dca-b924-78b80f25562a", "metadata": {}, "outputs": [], "source": [ "df = df.with_columns(\n", " three=pl.col('one') * pl.col('two'),\n", " flag=pl.col('one') > 2\n", ")" ] }, { "cell_type": "code", "execution_count": 379, "id": "e1832ed6-f46a-4782-be90-50887ded2f1c", "metadata": {}, "outputs": [], "source": [ "# del df[\"two\"]\n", "df = df.drop('two')" ] }, { "cell_type": "code", "execution_count": 380, "id": "b2520e50-49bd-400c-8a00-9e934132043e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 3)
indexoneflag
strf64bool
"a"1.0false
"b"2.0false
"c"3.0true
"d"nullnull
" ], "text/plain": [ "shape: (4, 3)\n", "┌───────┬──────┬───────┐\n", "│ index ┆ one ┆ flag │\n", "│ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ bool │\n", "╞═══════╪══════╪═══════╡\n", "│ a ┆ 1.0 ┆ false │\n", "│ b ┆ 2.0 ┆ false │\n", "│ c ┆ 3.0 ┆ true │\n", "│ d ┆ null ┆ null │\n", "└───────┴──────┴───────┘" ] }, "execution_count": 380, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#three = df.pop(\"three\")\n", "three = df['three']\n", "df = df.drop('three')\n", "df" ] }, { "cell_type": "code", "execution_count": 381, "id": "dbc0e570-7235-42e5-92fc-3f09761bfc25", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 4)
indexoneflagfoo
strf64boolstr
"a"1.0false"bar"
"b"2.0false"bar"
"c"3.0true"bar"
"d"nullnull"bar"
" ], "text/plain": [ "shape: (4, 4)\n", "┌───────┬──────┬───────┬─────┐\n", "│ index ┆ one ┆ flag ┆ foo │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ bool ┆ str │\n", "╞═══════╪══════╪═══════╪═════╡\n", "│ a ┆ 1.0 ┆ false ┆ bar │\n", "│ b ┆ 2.0 ┆ false ┆ bar │\n", "│ c ┆ 3.0 ┆ true ┆ bar │\n", "│ d ┆ null ┆ null ┆ bar │\n", "└───────┴──────┴───────┴─────┘" ] }, "execution_count": 381, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.with_columns(foo=pl.lit('bar'))\n", "df" ] }, { "cell_type": "code", "execution_count": 382, "id": "62f699d2-1c7a-41fb-b0ff-09b76e66c51c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 5)
indexoneflagfooone_trunc
strf64boolstrf64
"a"1.0false"bar"1.0
"b"2.0false"bar"2.0
"c"3.0true"bar"null
"d"nullnull"bar"null
" ], "text/plain": [ "shape: (4, 5)\n", "┌───────┬──────┬───────┬─────┬───────────┐\n", "│ index ┆ one ┆ flag ┆ foo ┆ one_trunc │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ bool ┆ str ┆ f64 │\n", "╞═══════╪══════╪═══════╪═════╪═══════════╡\n", "│ a ┆ 1.0 ┆ false ┆ bar ┆ 1.0 │\n", "│ b ┆ 2.0 ┆ false ┆ bar ┆ 2.0 │\n", "│ c ┆ 3.0 ┆ true ┆ bar ┆ null │\n", "│ d ┆ null ┆ null ┆ bar ┆ null │\n", "└───────┴──────┴───────┴─────┴───────────┘" ] }, "execution_count": 382, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df[\"one_trunc\"] = df[\"one\"][:2]\n", "df = df.with_columns(\n", " one_trunc=pl.col('one').slice(0, 2).append(pl.repeat(None, pl.len() - 2))\n", ")\n", "df" ] }, { "cell_type": "code", "execution_count": 383, "id": "642cd941-0eeb-439c-ae66-aeb5a1ee3f6d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 6)
indexbaroneflagfooone_trunc
strf64f64boolstrf64
"a"1.01.0false"bar"1.0
"b"2.02.0false"bar"2.0
"c"3.03.0true"bar"null
"d"nullnullnull"bar"null
" ], "text/plain": [ "shape: (4, 6)\n", "┌───────┬──────┬──────┬───────┬─────┬───────────┐\n", "│ index ┆ bar ┆ one ┆ flag ┆ foo ┆ one_trunc │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 ┆ bool ┆ str ┆ f64 │\n", "╞═══════╪══════╪══════╪═══════╪═════╪═══════════╡\n", "│ a ┆ 1.0 ┆ 1.0 ┆ false ┆ bar ┆ 1.0 │\n", "│ b ┆ 2.0 ┆ 2.0 ┆ false ┆ bar ┆ 2.0 │\n", "│ c ┆ 3.0 ┆ 3.0 ┆ true ┆ bar ┆ null │\n", "│ d ┆ null ┆ null ┆ null ┆ bar ┆ null │\n", "└───────┴──────┴──────┴───────┴─────┴───────────┘" ] }, "execution_count": 383, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.insert_column(1, df[\"one\"].rename('bar'))\n", "df" ] }, { "cell_type": "markdown", "id": "0eb600bf-9d23-461d-9ff4-4a939fc9b753", "metadata": {}, "source": [ "### Assigning new columns in method chains" ] }, { "cell_type": "code", "execution_count": 384, "id": "2daee1ec-23fd-4e8c-a52a-3941f143c0ce", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 5)
SepalLengthSepalWidthPetalLengthPetalWidthName
f64f64f64f64str
5.13.51.40.2"Iris-setosa"
4.93.01.40.2"Iris-setosa"
4.73.21.30.2"Iris-setosa"
4.63.11.50.2"Iris-setosa"
5.03.61.40.2"Iris-setosa"
" ], "text/plain": [ "shape: (5, 5)\n", "┌─────────────┬────────────┬─────────────┬────────────┬─────────────┐\n", "│ SepalLength ┆ SepalWidth ┆ PetalLength ┆ PetalWidth ┆ Name │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n", "╞═════════════╪════════════╪═════════════╪════════════╪═════════════╡\n", "│ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n", "│ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n", "│ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ Iris-setosa │\n", "│ 4.6 ┆ 3.1 ┆ 1.5 ┆ 0.2 ┆ Iris-setosa │\n", "│ 5.0 ┆ 3.6 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n", "└─────────────┴────────────┴─────────────┴────────────┴─────────────┘" ] }, "execution_count": 384, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris = pl.read_csv('data/iris.data')\n", "iris.head()" ] }, { "cell_type": "code", "execution_count": 385, "id": "3f1fb844-8c32-40f3-9082-e84a43d887ad", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 6)
SepalLengthSepalWidthPetalLengthPetalWidthNamesepal_ratio
f64f64f64f64strf64
5.13.51.40.2"Iris-setosa"0.686275
4.93.01.40.2"Iris-setosa"0.612245
4.73.21.30.2"Iris-setosa"0.680851
4.63.11.50.2"Iris-setosa"0.673913
5.03.61.40.2"Iris-setosa"0.72
" ], "text/plain": [ "shape: (5, 6)\n", "┌─────────────┬────────────┬─────────────┬────────────┬─────────────┬─────────────┐\n", "│ SepalLength ┆ SepalWidth ┆ PetalLength ┆ PetalWidth ┆ Name ┆ sepal_ratio │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ f64 │\n", "╞═════════════╪════════════╪═════════════╪════════════╪═════════════╪═════════════╡\n", "│ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa ┆ 0.686275 │\n", "│ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa ┆ 0.612245 │\n", "│ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ Iris-setosa ┆ 0.680851 │\n", "│ 4.6 ┆ 3.1 ┆ 1.5 ┆ 0.2 ┆ Iris-setosa ┆ 0.673913 │\n", "│ 5.0 ┆ 3.6 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa ┆ 0.72 │\n", "└─────────────┴────────────┴─────────────┴────────────┴─────────────┴─────────────┘" ] }, "execution_count": 385, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.with_columns(\n", " sepal_ratio=pl.col('SepalWidth') / pl.col('SepalLength')\n", ").head()" ] }, { "cell_type": "code", "execution_count": 386, "id": "10a436d9-33ab-4d4a-8dda-7eb2b9ef2f24", "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " const force = true;\n", " const py_version = '3.6.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", " const reloading = false;\n", " const Bokeh = root.Bokeh;\n", "\n", " // Set a timeout for this load but only if we are not already initializing\n", " if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) {\n", " if (callback != null)\n", " callback();\n", " });\n", " } finally {\n", " delete root._bokeh_onload_callbacks;\n", " }\n", " console.debug(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", " if (css_urls == null) css_urls = [];\n", " if (js_urls == null) js_urls = [];\n", " if (js_modules == null) js_modules = [];\n", " if (js_exports == null) js_exports = {};\n", "\n", " root._bokeh_onload_callbacks.push(callback);\n", "\n", " if (root._bokeh_is_loading > 0) {\n", " // Don't load bokeh if it is still initializing\n", " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", " // There is nothing to load\n", " run_callbacks();\n", " return null;\n", " }\n", "\n", " function on_load() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", " run_callbacks()\n", " }\n", " }\n", " window._bokeh_on_load = on_load\n", "\n", " function on_error(e) {\n", " const src_el = e.srcElement\n", " console.error(\"failed to load \" + (src_el.href || src_el.src));\n", " }\n", "\n", " const skip = [];\n", " if (window.requirejs) {\n", " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", " root._bokeh_is_loading = css_urls.length + 0;\n", " } else {\n", " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", " }\n", "\n", " const existing_stylesheets = []\n", " const links = document.getElementsByTagName('link')\n", " for (let i = 0; i < links.length; i++) {\n", " const link = links[i]\n", " if (link.href != null) {\n", " existing_stylesheets.push(link.href)\n", " }\n", " }\n", " for (let i = 0; i < css_urls.length; i++) {\n", " const url = css_urls[i];\n", " const escaped = encodeURI(url)\n", " if (existing_stylesheets.indexOf(escaped) !== -1) {\n", " on_load()\n", " continue;\n", " }\n", " const element = document.createElement(\"link\");\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.rel = \"stylesheet\";\n", " element.type = \"text/css\";\n", " element.href = url;\n", " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", " document.body.appendChild(element);\n", " } var existing_scripts = []\n", " const scripts = document.getElementsByTagName('script')\n", " for (let i = 0; i < scripts.length; i++) {\n", " var script = scripts[i]\n", " if (script.src != null) {\n", " existing_scripts.push(script.src)\n", " }\n", " }\n", " for (let i = 0; i < js_urls.length; i++) {\n", " const url = js_urls[i];\n", " const escaped = encodeURI(url)\n", " if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n", " if (!window.requirejs) {\n", " on_load();\n", " }\n", " continue;\n", " }\n", " const element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " for (let i = 0; i < js_modules.length; i++) {\n", " const url = js_modules[i];\n", " const escaped = encodeURI(url)\n", " if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n", " if (!window.requirejs) {\n", " on_load();\n", " }\n", " continue;\n", " }\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " element.type = \"module\";\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " for (const name in js_exports) {\n", " const url = js_exports[name];\n", " const escaped = encodeURI(url)\n", " if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n", " if (!window.requirejs) {\n", " on_load();\n", " }\n", " continue;\n", " }\n", " var element = document.createElement('script');\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.type = \"module\";\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " element.textContent = `\n", " import ${name} from \"${url}\"\n", " window.${name} = ${name}\n", " window._bokeh_on_load()\n", " `\n", " document.head.appendChild(element);\n", " }\n", " if (!js_urls.length && !js_modules.length) {\n", " on_load()\n", " }\n", " };\n", "\n", " function inject_raw_css(css) {\n", " const element = document.createElement(\"style\");\n", " element.appendChild(document.createTextNode(css));\n", " document.body.appendChild(element);\n", " }\n", "\n", " const js_urls = [\"https://cdn.holoviz.org/panel/1.5.4/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.holoviz.org/panel/1.5.4/dist/panel.min.js\"];\n", " const js_modules = [];\n", " const js_exports = {};\n", " const css_urls = [];\n", " const inline_js = [ function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", "function(Bokeh) {} // ensure no trailing comma for IE\n", " ];\n", "\n", " function run_inline_js() {\n", " if ((root.Bokeh !== undefined) || (force === true)) {\n", " for (let i = 0; i < inline_js.length; i++) {\n", " try {\n", " inline_js[i].call(root, root.Bokeh);\n", " } catch(e) {\n", " if (!reloading) {\n", " throw e;\n", " }\n", " }\n", " }\n", " // Cache old bokeh versions\n", " if (Bokeh != undefined && !reloading) {\n", " var NewBokeh = root.Bokeh;\n", " if (Bokeh.versions === undefined) {\n", " Bokeh.versions = new Map();\n", " }\n", " if (NewBokeh.version !== Bokeh.version) {\n", " Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", " }\n", " root.Bokeh = Bokeh;\n", " }\n", " } else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " }\n", " root._bokeh_is_initializing = false\n", " }\n", "\n", " function load_or_wait() {\n", " // Implement a backoff loop that tries to ensure we do not load multiple\n", " // versions of Bokeh and its dependencies at the same time.\n", " // In recent versions we use the root._bokeh_is_initializing flag\n", " // to determine whether there is an ongoing attempt to initialize\n", " // bokeh, however for backward compatibility we also try to ensure\n", " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", " // before older versions are fully initialized.\n", " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", " // If the timeout and bokeh was not successfully loaded we reset\n", " // everything and try loading again\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_is_initializing = false;\n", " root._bokeh_onload_callbacks = undefined;\n", " root._bokeh_is_loading = 0\n", " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", " load_or_wait();\n", " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", " setTimeout(load_or_wait, 100);\n", " } else {\n", " root._bokeh_is_initializing = true\n", " root._bokeh_onload_callbacks = []\n", " const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n", " if (!reloading && !bokeh_loaded) {\n", " if (root.Bokeh) {\n", " root.Bokeh = undefined;\n", " }\n", " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " }\n", " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", " }\n", " // Give older versions of the autoload script a head-start to ensure\n", " // they initialize before we start loading newer version.\n", " setTimeout(load_or_wait, 100)\n", "}(window));" ], "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n const py_version = '3.6.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n const reloading = false;\n const Bokeh = root.Bokeh;\n\n // Set a timeout for this load but only if we are not already initializing\n if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n // Don't load bokeh if it is still initializing\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n // There is nothing to load\n run_callbacks();\n return null;\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error(e) {\n const src_el = e.srcElement\n console.error(\"failed to load \" + (src_el.href || src_el.src));\n }\n\n const skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n const existing_stylesheets = []\n const links = document.getElementsByTagName('link')\n for (let i = 0; i < links.length; i++) {\n const link = links[i]\n if (link.href != null) {\n existing_stylesheets.push(link.href)\n }\n }\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const escaped = encodeURI(url)\n if (existing_stylesheets.indexOf(escaped) !== -1) {\n on_load()\n continue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n const scripts = document.getElementsByTagName('script')\n for (let i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n existing_scripts.push(script.src)\n }\n }\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (let i = 0; i < js_modules.length; i++) {\n const url = js_modules[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n const url = js_exports[name];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.holoviz.org/panel/1.5.4/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.holoviz.org/panel/1.5.4/dist/panel.min.js\"];\n const js_modules = [];\n const js_exports = {};\n const css_urls = [];\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (let i = 0; i < inline_js.length; i++) {\n try {\n inline_js[i].call(root, root.Bokeh);\n } catch(e) {\n if (!reloading) {\n throw e;\n }\n }\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n var NewBokeh = root.Bokeh;\n if (Bokeh.versions === undefined) {\n Bokeh.versions = new Map();\n }\n if (NewBokeh.version !== Bokeh.version) {\n Bokeh.versions.set(NewBokeh.version, NewBokeh)\n }\n root.Bokeh = Bokeh;\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n // If the timeout and bokeh was not successfully loaded we reset\n // everything and try loading again\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n root._bokeh_is_loading = 0\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n if (root.Bokeh) {\n root.Bokeh = undefined;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", "}\n", "\n", "\n", " function JupyterCommManager() {\n", " }\n", "\n", " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", " comm_manager.register_target(comm_id, function(comm) {\n", " comm.on_msg(msg_handler);\n", " });\n", " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", " comm.onMsg = msg_handler;\n", " });\n", " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", " var messages = comm.messages[Symbol.asyncIterator]();\n", " function processIteratorResult(result) {\n", " var message = result.value;\n", " console.log(message)\n", " var content = {data: message.data, comm_id};\n", " var buffers = []\n", " for (var buffer of message.buffers || []) {\n", " buffers.push(new DataView(buffer))\n", " }\n", " var metadata = message.metadata || {};\n", " var msg = {content, buffers, metadata}\n", " msg_handler(msg);\n", " return messages.next().then(processIteratorResult);\n", " }\n", " return messages.next().then(processIteratorResult);\n", " })\n", " }\n", " }\n", "\n", " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", " if (comm_id in window.PyViz.comms) {\n", " return window.PyViz.comms[comm_id];\n", " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", " if (msg_handler) {\n", " comm.on_msg(msg_handler);\n", " }\n", " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", " comm.open();\n", " if (msg_handler) {\n", " comm.onMsg = msg_handler;\n", " }\n", " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", " comm_promise.then((comm) => {\n", " window.PyViz.comms[comm_id] = comm;\n", " if (msg_handler) {\n", " var messages = comm.messages[Symbol.asyncIterator]();\n", " function processIteratorResult(result) {\n", " var message = result.value;\n", " var content = {data: message.data};\n", " var metadata = message.metadata || {comm_id};\n", " var msg = {content, metadata}\n", " msg_handler(msg);\n", " return messages.next().then(processIteratorResult);\n", " }\n", " return messages.next().then(processIteratorResult);\n", " }\n", " }) \n", " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", " return comm_promise.then((comm) => {\n", " comm.send(data, metadata, buffers, disposeOnDone);\n", " });\n", " };\n", " var comm = {\n", " send: sendClosure\n", " };\n", " }\n", " window.PyViz.comms[comm_id] = comm;\n", " return comm;\n", " }\n", " window.PyViz.comm_manager = new JupyterCommManager();\n", " \n", "\n", "\n", "var JS_MIME_TYPE = 'application/javascript';\n", "var HTML_MIME_TYPE = 'text/html';\n", "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", "var CLASS_NAME = 'output';\n", "\n", "/**\n", " * Render data to the DOM node\n", " */\n", "function render(props, node) {\n", " var div = document.createElement(\"div\");\n", " var script = document.createElement(\"script\");\n", " node.appendChild(div);\n", " node.appendChild(script);\n", "}\n", "\n", "/**\n", " * Handle when a new output is added\n", " */\n", "function handle_add_output(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", " if (id !== undefined) {\n", " var nchildren = toinsert.length;\n", " var html_node = toinsert[nchildren-1].children[0];\n", " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", " var scripts = [];\n", " var nodelist = html_node.querySelectorAll(\"script\");\n", " for (var i in nodelist) {\n", " if (nodelist.hasOwnProperty(i)) {\n", " scripts.push(nodelist[i])\n", " }\n", " }\n", "\n", " scripts.forEach( function (oldScript) {\n", " var newScript = document.createElement(\"script\");\n", " var attrs = [];\n", " var nodemap = oldScript.attributes;\n", " for (var j in nodemap) {\n", " if (nodemap.hasOwnProperty(j)) {\n", " attrs.push(nodemap[j])\n", " }\n", " }\n", " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", " oldScript.parentNode.replaceChild(newScript, oldScript);\n", " });\n", " if (JS_MIME_TYPE in output.data) {\n", " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", " }\n", " output_area._hv_plot_id = id;\n", " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", " window.PyViz.plot_index[id] = Bokeh.index[id];\n", " } else {\n", " window.PyViz.plot_index[id] = null;\n", " }\n", " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", "}\n", "\n", "/**\n", " * Handle when an output is cleared or removed\n", " */\n", "function handle_clear_output(event, handle) {\n", " var id = handle.cell.output_area._hv_plot_id;\n", " var server_id = handle.cell.output_area._bokeh_server_id;\n", " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", " if (server_id !== null) {\n", " comm.send({event_type: 'server_delete', 'id': server_id});\n", " return;\n", " } else if (comm !== null) {\n", " comm.send({event_type: 'delete', 'id': id});\n", " }\n", " delete PyViz.plot_index[id];\n", " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", " var doc = window.Bokeh.index[id].model.document\n", " doc.clear();\n", " const i = window.Bokeh.documents.indexOf(doc);\n", " if (i > -1) {\n", " window.Bokeh.documents.splice(i, 1);\n", " }\n", " }\n", "}\n", "\n", "/**\n", " * Handle kernel restart event\n", " */\n", "function handle_kernel_cleanup(event, handle) {\n", " delete PyViz.comms[\"hv-extension-comm\"];\n", " window.PyViz.plot_index = {}\n", "}\n", "\n", "/**\n", " * Handle update_display_data messages\n", " */\n", "function handle_update_output(event, handle) {\n", " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", " handle_add_output(event, handle)\n", "}\n", "\n", "function register_renderer(events, OutputArea) {\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[0]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " events.on('output_added.OutputArea', handle_add_output);\n", " events.on('output_updated.OutputArea', handle_update_output);\n", " events.on('clear_output.CodeCell', handle_clear_output);\n", " events.on('delete.Cell', handle_clear_output);\n", " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", "\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " safe: true,\n", " index: 0\n", " });\n", "}\n", "\n", "if (window.Jupyter !== undefined) {\n", " try {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " } catch(err) {\n", " }\n", "}\n" ], "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ] }, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "aa2ce072-9ef1-4947-a226-77b5bf9f211e" } }, "output_type": "display_data" } ], "source": [ "import hvplot.polars" ] }, { "cell_type": "code", "execution_count": 387, "id": "3404064a-8ed3-47ff-9042-7a6d95b75378", "metadata": {}, "outputs": [ { "data": {}, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ], "text/plain": [ ":Scatter [SepalRatio] (PetalRatio)" ] }, "execution_count": 387, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "2b09b27e-d22d-460b-a686-160c9c624267" } }, "output_type": "execute_result" } ], "source": [ "(\n", "iris\n", ".filter(pl.col('SepalLength') > 5)\n", ".with_columns(\n", " SepalRatio=pl.col('SepalWidth') / pl.col('SepalLength'),\n", " PetalRatio=pl.col('PetalWidth') / pl.col('PetalLength')\n", ")\n", ".hvplot.scatter(x='SepalRatio', y='PetalRatio')\n", ")" ] }, { "cell_type": "code", "execution_count": 388, "id": "81cefd81-fa8c-431d-a2a6-a1adbf6bbd2b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 4)
ABCD
i64i64i64i64
1456
2579
36912
" ], "text/plain": [ "shape: (3, 4)\n", "┌─────┬─────┬─────┬─────┐\n", "│ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ i64 ┆ i64 ┆ i64 │\n", "╞═════╪═════╪═════╪═════╡\n", "│ 1 ┆ 4 ┆ 5 ┆ 6 │\n", "│ 2 ┆ 5 ┆ 7 ┆ 9 │\n", "│ 3 ┆ 6 ┆ 9 ┆ 12 │\n", "└─────┴─────┴─────┴─────┘" ] }, "execution_count": 388, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfa = pl.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n", "(\n", "dfa\n", ".with_columns(C = pl.col('A') + pl.col('B'))\n", ".with_columns(D = pl.col('A') + pl.col('C'))\n", ")" ] }, { "cell_type": "markdown", "id": "54bc640b-d56f-4990-8ecd-3d5d5ff0a6f8", "metadata": {}, "source": [ "### Indexing / selection" ] }, { "cell_type": "code", "execution_count": 389, "id": "8cf234e3-84bb-4423-bfde-e54ab7b78792", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 6)
indexbaroneflagfooone_trunc
strf64f64boolstrf64
"b"2.02.0false"bar"2.0
" ], "text/plain": [ "shape: (1, 6)\n", "┌───────┬─────┬─────┬───────┬─────┬───────────┐\n", "│ index ┆ bar ┆ one ┆ flag ┆ foo ┆ one_trunc │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 ┆ bool ┆ str ┆ f64 │\n", "╞═══════╪═════╪═════╪═══════╪═════╪═══════════╡\n", "│ b ┆ 2.0 ┆ 2.0 ┆ false ┆ bar ┆ 2.0 │\n", "└───────┴─────┴─────┴───────┴─────┴───────────┘" ] }, "execution_count": 389, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.loc[\"b\"]\n", "df.filter(pl.col('index') == 'b')" ] }, { "cell_type": "code", "execution_count": 390, "id": "17f2b709-93d3-4bee-a496-1a9665ca1194", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 6)
indexbaroneflagfooone_trunc
strf64f64boolstrf64
"c"3.03.0true"bar"null
" ], "text/plain": [ "shape: (1, 6)\n", "┌───────┬─────┬─────┬──────┬─────┬───────────┐\n", "│ index ┆ bar ┆ one ┆ flag ┆ foo ┆ one_trunc │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 ┆ bool ┆ str ┆ f64 │\n", "╞═══════╪═════╪═════╪══════╪═════╪═══════════╡\n", "│ c ┆ 3.0 ┆ 3.0 ┆ true ┆ bar ┆ null │\n", "└───────┴─────┴─────┴──────┴─────┴───────────┘" ] }, "execution_count": 390, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.iloc[2]\n", "df.slice(2, 1)" ] }, { "cell_type": "markdown", "id": "00546c12-931a-4f0a-8d18-80e8e45fc646", "metadata": {}, "source": [ "### Data alignment and arithmetic" ] }, { "cell_type": "code", "execution_count": 391, "id": "b977f5d6-962a-4cac-9234-1f331a8b88c0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 5)
indexABCD
u32f64f64f64f64
00.7792980.9018110.0911261.41107
1-0.936034-2.6214840.2378251.146001
2-1.192351-3.3607481.5635030.1362
30.933616-0.613897-1.3189110.193137
4-0.195308-0.56240.1188950.222491
50.876072-0.8137321.3678140.554935
60.737510.9945051.1042280.4458
7nullnullnull0.959163
8nullnullnull1.144944
9nullnullnull0.158198
" ], "text/plain": [ "shape: (10, 5)\n", "┌───────┬───────────┬───────────┬───────────┬──────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ u32 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════╪═══════════╪═══════════╪═══════════╪══════════╡\n", "│ 0 ┆ 0.779298 ┆ 0.901811 ┆ 0.091126 ┆ 1.41107 │\n", "│ 1 ┆ -0.936034 ┆ -2.621484 ┆ 0.237825 ┆ 1.146001 │\n", "│ 2 ┆ -1.192351 ┆ -3.360748 ┆ 1.563503 ┆ 0.1362 │\n", "│ 3 ┆ 0.933616 ┆ -0.613897 ┆ -1.318911 ┆ 0.193137 │\n", "│ 4 ┆ -0.195308 ┆ -0.5624 ┆ 0.118895 ┆ 0.222491 │\n", "│ 5 ┆ 0.876072 ┆ -0.813732 ┆ 1.367814 ┆ 0.554935 │\n", "│ 6 ┆ 0.73751 ┆ 0.994505 ┆ 1.104228 ┆ 0.4458 │\n", "│ 7 ┆ null ┆ null ┆ null ┆ 0.959163 │\n", "│ 8 ┆ null ┆ null ┆ null ┆ 1.144944 │\n", "│ 9 ┆ null ┆ null ┆ null ┆ 0.158198 │\n", "└───────┴───────────┴───────────┴───────────┴──────────┘" ] }, "execution_count": 391, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df + df2\n", "df = pl.DataFrame(np.random.randn(10, 4), schema=[\"A\", \"B\", \"C\", \"D\"])\n", "df2 = pl.DataFrame(np.random.randn(7, 3), schema=[\"A\", \"B\", \"C\"])\n", "align_op(df.with_row_index(), df2.with_row_index(), pl.Expr.add, fill_value=None)" ] }, { "cell_type": "code", "execution_count": 392, "id": "7bc4f733-f1c9-4970-9379-c7c87ab01c58", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 4)
ABCD
f64f64f64f64
0.00.00.00.0
-0.755136-2.4810020.449509-0.265068
-0.660488-3.6968911.680688-1.27487
1.186271-2.438415-0.240415-1.217932
-0.508953-1.890291.344603-1.188579
-0.631357-2.7167060.657985-0.856135
0.117999-1.3908570.727739-0.96527
0.549259-1.1762272.202544-0.451907
0.772859-1.2504992.459803-0.266126
-0.366154-0.0298161.599533-1.252872
" ], "text/plain": [ "shape: (10, 4)\n", "┌───────────┬───────────┬───────────┬───────────┐\n", "│ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ -0.755136 ┆ -2.481002 ┆ 0.449509 ┆ -0.265068 │\n", "│ -0.660488 ┆ -3.696891 ┆ 1.680688 ┆ -1.27487 │\n", "│ 1.186271 ┆ -2.438415 ┆ -0.240415 ┆ -1.217932 │\n", "│ -0.508953 ┆ -1.89029 ┆ 1.344603 ┆ -1.188579 │\n", "│ -0.631357 ┆ -2.716706 ┆ 0.657985 ┆ -0.856135 │\n", "│ 0.117999 ┆ -1.390857 ┆ 0.727739 ┆ -0.96527 │\n", "│ 0.549259 ┆ -1.176227 ┆ 2.202544 ┆ -0.451907 │\n", "│ 0.772859 ┆ -1.250499 ┆ 2.459803 ┆ -0.266126 │\n", "│ -0.366154 ┆ -0.029816 ┆ 1.599533 ┆ -1.252872 │\n", "└───────────┴───────────┴───────────┴───────────┘" ] }, "execution_count": 392, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df - df.iloc[0]\n", "df.select(pl.all() - pl.all().first())" ] }, { "cell_type": "code", "execution_count": 393, "id": "d0899303-ad74-4436-ad20-b6bf64278269", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 4)
ABCD
f64f64f64f64
0.64673110.257392-3.4792339.055349
-3.128947-2.147616-1.2316887.730007
-2.65571-8.2270614.9242062.681001
6.578087-1.934685-4.6813062.965687
-1.8980360.805943.2437823.112453
-2.510055-3.32614-0.1893094.774675
1.2367253.3031070.1594634.228999
3.3930254.3762577.5334886.795814
4.5110274.0048998.8197797.724719
-1.18403910.1083114.5184312.790988
" ], "text/plain": [ "shape: (10, 4)\n", "┌───────────┬───────────┬───────────┬──────────┐\n", "│ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪═══════════╪═══════════╪══════════╡\n", "│ 0.646731 ┆ 10.257392 ┆ -3.479233 ┆ 9.055349 │\n", "│ -3.128947 ┆ -2.147616 ┆ -1.231688 ┆ 7.730007 │\n", "│ -2.65571 ┆ -8.227061 ┆ 4.924206 ┆ 2.681001 │\n", "│ 6.578087 ┆ -1.934685 ┆ -4.681306 ┆ 2.965687 │\n", "│ -1.898036 ┆ 0.80594 ┆ 3.243782 ┆ 3.112453 │\n", "│ -2.510055 ┆ -3.32614 ┆ -0.189309 ┆ 4.774675 │\n", "│ 1.236725 ┆ 3.303107 ┆ 0.159463 ┆ 4.228999 │\n", "│ 3.393025 ┆ 4.376257 ┆ 7.533488 ┆ 6.795814 │\n", "│ 4.511027 ┆ 4.004899 ┆ 8.819779 ┆ 7.724719 │\n", "│ -1.184039 ┆ 10.108311 ┆ 4.518431 ┆ 2.790988 │\n", "└───────────┴───────────┴───────────┴──────────┘" ] }, "execution_count": 393, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df * 5 + 2\n", "df.select(pl.all() * 5 + 2) # or this" ] }, { "cell_type": "code", "execution_count": 394, "id": "26b483f5-a539-4ab4-bd45-5591f24f514c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 4)
ABCD
f64f64f64f64
-3.6947570.605518-0.9125360.708682
-0.974859-1.205512-1.5471790.872599
-1.07395-0.4888991.7098667.342137
1.092159-1.27075-0.7483575.177661
-1.282697-4.1873954.0199984.494571
-1.108634-0.938766-2.2838261.802013
-6.5507213.836984-2.7165982.243159
3.589312.1041490.9035891.042576
1.9912172.4938910.7331620.873405
-1.5703320.6166511.9853636.321207
" ], "text/plain": [ "shape: (10, 4)\n", "┌───────────┬───────────┬───────────┬──────────┐\n", "│ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪═══════════╪═══════════╪══════════╡\n", "│ -3.694757 ┆ 0.605518 ┆ -0.912536 ┆ 0.708682 │\n", "│ -0.974859 ┆ -1.205512 ┆ -1.547179 ┆ 0.872599 │\n", "│ -1.07395 ┆ -0.488899 ┆ 1.709866 ┆ 7.342137 │\n", "│ 1.092159 ┆ -1.27075 ┆ -0.748357 ┆ 5.177661 │\n", "│ -1.282697 ┆ -4.187395 ┆ 4.019998 ┆ 4.494571 │\n", "│ -1.108634 ┆ -0.938766 ┆ -2.283826 ┆ 1.802013 │\n", "│ -6.550721 ┆ 3.836984 ┆ -2.716598 ┆ 2.243159 │\n", "│ 3.58931 ┆ 2.104149 ┆ 0.903589 ┆ 1.042576 │\n", "│ 1.991217 ┆ 2.493891 ┆ 0.733162 ┆ 0.873405 │\n", "│ -1.570332 ┆ 0.616651 ┆ 1.985363 ┆ 6.321207 │\n", "└───────────┴───────────┴───────────┴──────────┘" ] }, "execution_count": 394, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select((1 / pl.all()).name.keep())" ] }, { "cell_type": "code", "execution_count": 395, "id": "84dbebb7-0775-4838-904d-d3c740224a6c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 4)
ABCD
f64f64f64f64
0.0053667.4386081.4421133.964551
1.1072180.4734940.1745171.724807
0.75173317.5034380.1169910.000344
0.7028390.3834953.1883470.001391
0.3694060.0032530.0038290.00245
0.6619841.2875680.0367580.094835
0.0005430.0046140.0183610.039497
0.0060250.0510151.5000850.846387
0.063610.0258523.4609971.718449
0.164456.9157850.0643640.000626
" ], "text/plain": [ "shape: (10, 4)\n", "┌──────────┬───────────┬──────────┬──────────┐\n", "│ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞══════════╪═══════════╪══════════╪══════════╡\n", "│ 0.005366 ┆ 7.438608 ┆ 1.442113 ┆ 3.964551 │\n", "│ 1.107218 ┆ 0.473494 ┆ 0.174517 ┆ 1.724807 │\n", "│ 0.751733 ┆ 17.503438 ┆ 0.116991 ┆ 0.000344 │\n", "│ 0.702839 ┆ 0.383495 ┆ 3.188347 ┆ 0.001391 │\n", "│ 0.369406 ┆ 0.003253 ┆ 0.003829 ┆ 0.00245 │\n", "│ 0.661984 ┆ 1.287568 ┆ 0.036758 ┆ 0.094835 │\n", "│ 0.000543 ┆ 0.004614 ┆ 0.018361 ┆ 0.039497 │\n", "│ 0.006025 ┆ 0.051015 ┆ 1.500085 ┆ 0.846387 │\n", "│ 0.06361 ┆ 0.025852 ┆ 3.460997 ┆ 1.718449 │\n", "│ 0.16445 ┆ 6.915785 ┆ 0.064364 ┆ 0.000626 │\n", "└──────────┴───────────┴──────────┴──────────┘" ] }, "execution_count": 395, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(pl.all() ** 4)" ] }, { "cell_type": "code", "execution_count": 396, "id": "c878bcc2-478d-41ef-bae7-14e755469f2e", "metadata": {}, "outputs": [], "source": [ "df1 = pl.DataFrame({\"a\": [1, 0, 1], \"b\": [0, 1, 1]}).cast(pl.Boolean)\n", "df2 = pl.DataFrame({\"a\": [0, 1, 1], \"b\": [1, 1, 0]}).cast(pl.Boolean)" ] }, { "cell_type": "code", "execution_count": 397, "id": "4386501a-83ab-441d-88a8-26ee08bd376b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 3)
indexab
u32boolbool
0falsefalse
1falsetrue
2truefalse
" ], "text/plain": [ "shape: (3, 3)\n", "┌───────┬───────┬───────┐\n", "│ index ┆ a ┆ b │\n", "│ --- ┆ --- ┆ --- │\n", "│ u32 ┆ bool ┆ bool │\n", "╞═══════╪═══════╪═══════╡\n", "│ 0 ┆ false ┆ false │\n", "│ 1 ┆ false ┆ true │\n", "│ 2 ┆ true ┆ false │\n", "└───────┴───────┴───────┘" ] }, "execution_count": 397, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df1 & df2\n", "align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.and_)" ] }, { "cell_type": "code", "execution_count": 398, "id": "c5003633-2de5-4a26-8c20-64f85589e402", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 3)
indexab
u32boolbool
0truetrue
1truetrue
2truetrue
" ], "text/plain": [ "shape: (3, 3)\n", "┌───────┬──────┬──────┐\n", "│ index ┆ a ┆ b │\n", "│ --- ┆ --- ┆ --- │\n", "│ u32 ┆ bool ┆ bool │\n", "╞═══════╪══════╪══════╡\n", "│ 0 ┆ true ┆ true │\n", "│ 1 ┆ true ┆ true │\n", "│ 2 ┆ true ┆ true │\n", "└───────┴──────┴──────┘" ] }, "execution_count": 398, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df1 | df2\n", "align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.or_)" ] }, { "cell_type": "code", "execution_count": 399, "id": "9dcb3e5f-ca32-4625-ab87-fb0a6cd9e30e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 3)
indexab
u32boolbool
0truetrue
1truefalse
2falsetrue
" ], "text/plain": [ "shape: (3, 3)\n", "┌───────┬───────┬───────┐\n", "│ index ┆ a ┆ b │\n", "│ --- ┆ --- ┆ --- │\n", "│ u32 ┆ bool ┆ bool │\n", "╞═══════╪═══════╪═══════╡\n", "│ 0 ┆ true ┆ true │\n", "│ 1 ┆ true ┆ false │\n", "│ 2 ┆ false ┆ true │\n", "└───────┴───────┴───────┘" ] }, "execution_count": 399, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df1 ^ df2\n", "align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.xor)" ] }, { "cell_type": "code", "execution_count": 400, "id": "5e7e2f60-6c03-4f93-ab54-be1e1c573774", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
ab
boolbool
falsetrue
truefalse
falsefalse
" ], "text/plain": [ "shape: (3, 2)\n", "┌───────┬───────┐\n", "│ a ┆ b │\n", "│ --- ┆ --- │\n", "│ bool ┆ bool │\n", "╞═══════╪═══════╡\n", "│ false ┆ true │\n", "│ true ┆ false │\n", "│ false ┆ false │\n", "└───────┴───────┘" ] }, "execution_count": 400, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# -df1\n", "df1.select(pl.all().not_())" ] }, { "cell_type": "markdown", "id": "a6e74e35-aaa9-447c-b1d6-e4563c505408", "metadata": {}, "source": [ "### Transposing" ] }, { "cell_type": "code", "execution_count": 401, "id": "4f752c20-fa32-42ad-a63c-5cd7f347f895", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 6)
indexcolumn_0column_1column_2column_3column_4
strf64f64f64f64f64
"A"-0.270654-1.025789-0.9311420.915617-0.779607
"B"1.651478-0.829523-2.045412-0.786937-0.238812
"C"-1.095847-0.6463380.584841-1.3362610.248756
"D"1.411071.1460010.13620.1931370.222491
" ], "text/plain": [ "shape: (4, 6)\n", "┌───────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ column_4 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ A ┆ -0.270654 ┆ -1.025789 ┆ -0.931142 ┆ 0.915617 ┆ -0.779607 │\n", "│ B ┆ 1.651478 ┆ -0.829523 ┆ -2.045412 ┆ -0.786937 ┆ -0.238812 │\n", "│ C ┆ -1.095847 ┆ -0.646338 ┆ 0.584841 ┆ -1.336261 ┆ 0.248756 │\n", "│ D ┆ 1.41107 ┆ 1.146001 ┆ 0.1362 ┆ 0.193137 ┆ 0.222491 │\n", "└───────┴───────────┴───────────┴───────────┴───────────┴───────────┘" ] }, "execution_count": 401, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.slice(0, 5).transpose(include_header=True, header_name='index')" ] }, { "cell_type": "markdown", "id": "fde7671d-70d7-4afd-abb8-8590458e0b56", "metadata": {}, "source": [ "### DataFrame interoperability with NumPy functions" ] }, { "cell_type": "code", "execution_count": 402, "id": "a34197b6-6b7a-406f-ac47-257079465cbb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.76288056, 5.2146839 , 0.33425647, 4.10033964],\n", " [0.35851332, 0.43625722, 0.52396123, 3.14558954],\n", " [0.39410336, 0.12932687, 1.79470589, 1.14591119],\n", " [2.49831716, 0.45523708, 0.26282647, 1.21304945],\n", " [0.4585861 , 0.78756298, 1.28242953, 1.24918415],\n", " [0.4057529 , 0.34464929, 0.64541498, 1.74182763],\n", " [0.85842587, 1.29773619, 0.69204278, 1.56173886],\n", " [1.32128542, 1.60841852, 3.02435428, 2.60951072],\n", " [1.65236152, 1.49328708, 3.91163667, 3.14226478],\n", " [0.52897833, 5.06149624, 1.65480988, 1.17139769]])" ] }, "execution_count": 402, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.exp(df)" ] }, { "cell_type": "code", "execution_count": 403, "id": "ccd80ad9-a9d1-4b89-ac4c-1daa895c7a2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-0.27065379, 1.65147847, -1.0958467 , 1.41106981],\n", " [-1.02578947, -0.82952326, -0.64633759, 1.14600133],\n", " [-0.93114207, -2.04541218, 0.58484116, 0.13620012],\n", " [ 0.91561737, -0.78693694, -1.33626128, 0.19313739],\n", " [-0.77960722, -0.23881194, 0.24875635, 0.22249066],\n", " [-0.90201093, -1.06522792, -0.4378618 , 0.55493493],\n", " [-0.15265495, 0.26062136, -0.3681075 , 0.44579985],\n", " [ 0.27860506, 0.47525141, 1.10669761, 0.95916274],\n", " [ 0.50220549, 0.40097979, 1.36395587, 1.14494381],\n", " [-0.63680781, 1.62166214, 0.50368613, 0.15819764]])" ] }, "execution_count": 403, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.asarray(df)" ] }, { "cell_type": "code", "execution_count": 404, "id": "d7152ec2-b29b-4bbd-8c76-47eced83c852", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4,)
f64
2.718282
7.389056
20.085537
54.59815
" ], "text/plain": [ "shape: (4,)\n", "Series: '' [f64]\n", "[\n", "\t2.718282\n", "\t7.389056\n", "\t20.085537\n", "\t54.59815\n", "]" ] }, "execution_count": 404, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ser = pl.Series([1, 2, 3, 4])\n", "np.exp(ser)" ] }, { "cell_type": "code", "execution_count": 405, "id": "c13aff6c-114b-4994-89bf-53e5cd6cc3d8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
valueindex
i64str
1"a"
2"b"
3"c"
\n", "shape: (3, 2)
valueindex
i64str
1"b"
3"a"
5"c"
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ser1 = pl.DataFrame(dict(value=[1, 2, 3], index=[\"a\", \"b\", \"c\"]))\n", "ser2 = pl.DataFrame(dict(value=[1, 3, 5], index=[\"b\", \"a\", \"c\"]))\n", "row(ser1, ser2)" ] }, { "cell_type": "code", "execution_count": 411, "id": "1fb0d0d4-e3d5-46ec-b55c-8cfef4e0777e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 2)
valueindex
i64str
1"a"
2"b"
3"c"
\n", "shape: (3, 2)
valueindex
i64str
3"a"
1"b"
5"c"
\n", "shape: (3, 2)
indexvalue
stri64
"a"1
"b"0
"c"3
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# np.remainder(ser1, ser2)\n", "ser1_a, ser2_a = pl.align_frames(ser1, ser2, on='index')\n", "row(ser1_a, ser2_a, pl.select(\n", " index=ser1_a['index'],\n", " value=np.remainder(ser1_a['value'], ser2_a['value'])) \n", ")" ] }, { "cell_type": "markdown", "id": "a825998e-fb2f-4ce0-8c6a-eb3f88572d4e", "metadata": {}, "source": [ "### Console display" ] }, { "cell_type": "code", "execution_count": 412, "id": "107465f3-4340-4bb4-9f4b-b2cbb92d364b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shape: (100, 23)\n", "┌───────┬───────────┬──────┬───────┬───┬─────┬─────┬─────┬──────┐\n", "│ id ┆ player ┆ year ┆ stint ┆ … ┆ hbp ┆ sh ┆ sf ┆ gidp │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ str ┆ i64 ┆ i64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════╪═══════════╪══════╪═══════╪═══╪═════╪═════╪═════╪══════╡\n", "│ 88641 ┆ womacto01 ┆ 2006 ┆ 2 ┆ … ┆ 0.0 ┆ 3.0 ┆ 0.0 ┆ 0.0 │\n", "│ 88643 ┆ schilcu01 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ 88645 ┆ myersmi01 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ 88649 ┆ helliri01 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ 88650 ┆ johnsra05 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 89525 ┆ benitar01 ┆ 2007 ┆ 2 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ 89526 ┆ benitar01 ┆ 2007 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ 89530 ┆ ausmubr01 ┆ 2007 ┆ 1 ┆ … ┆ 6.0 ┆ 4.0 ┆ 1.0 ┆ 11.0 │\n", "│ 89533 ┆ aloumo01 ┆ 2007 ┆ 1 ┆ … ┆ 2.0 ┆ 0.0 ┆ 3.0 ┆ 13.0 │\n", "│ 89534 ┆ alomasa02 ┆ 2007 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "└───────┴───────────┴──────┴───────┴───┴─────┴─────┴─────┴──────┘\n" ] } ], "source": [ "baseball = pl.read_csv('data/baseball.csv')\n", "print(baseball)" ] }, { "cell_type": "code", "execution_count": 414, "id": "2a4375d0-9853-4702-a4ba-7b69c118e1f0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Rows: 100\n", "Columns: 23\n", "$ id 88641, 88643, 88645, 88649, 88650, 88652, 88653, 88662, 89177, 89178\n", "$ player 'womacto01', 'schilcu01', 'myersmi01', 'helliri01', 'johnsra05', 'finlest01', 'gonzalu01', 'seleaa01', 'francju01', 'francju01'\n", "$ year 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007\n", "$ stint 2, 1, 1, 1, 1, 1, 1, 1, 2, 1\n", "$ team 'CHN', 'BOS', 'NYA', 'MIL', 'NYA', 'SFN', 'ARI', 'LAN', 'ATL', 'NYN'\n", "$ lg 'NL', 'AL', 'AL', 'NL', 'AL', 'NL', 'NL', 'NL', 'NL', 'NL'\n", "$ g 19, 31, 62, 20, 33, 139, 153, 28, 15, 40\n", "$ ab 50, 2, 0, 3, 6, 426, 586, 26, 40, 50\n", "$ r 6, 0, 0, 0, 0, 66, 93, 2, 1, 7\n", "$ h 14, 1, 0, 0, 1, 105, 159, 5, 10, 10\n", "$ X2b 1, 0, 0, 0, 0, 21, 52, 1, 3, 0\n", "$ X3b 0, 0, 0, 0, 0, 12, 2, 0, 0, 0\n", "$ hr 1, 0, 0, 0, 0, 6, 15, 0, 0, 1\n", "$ rbi 2.0, 0.0, 0.0, 0.0, 0.0, 40.0, 73.0, 0.0, 8.0, 8.0\n", "$ sb 1.0, 0.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 2.0\n", "$ cs 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0\n", "$ bb 4, 0, 0, 0, 0, 46, 69, 1, 4, 10\n", "$ so 4.0, 1.0, 0.0, 2.0, 4.0, 55.0, 58.0, 7.0, 10.0, 13.0\n", "$ ibb 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 10.0, 0.0, 1.0, 0.0\n", "$ hbp 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 0.0, 0.0, 0.0\n", "$ sh 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 6.0, 0.0, 0.0\n", "$ sf 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 0.0, 1.0, 1.0\n", "$ gidp 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 14.0, 1.0, 1.0, 1.0\n", "\n" ] } ], "source": [ "baseball.glimpse()" ] }, { "cell_type": "code", "execution_count": 419, "id": "fdcdae5f-88e6-410f-bba6-2304dedc7824", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shape: (20, 12)\n", "┌───────┬───────────┬──────┬───────┬───┬─────┬─────┬─────┬─────┐\n", "│ id ┆ player ┆ year ┆ stint ┆ … ┆ r ┆ h ┆ X2b ┆ X3b │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ str ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", "╞═══════╪═══════════╪══════╪═══════╪═══╪═════╪═════╪═════╪═════╡\n", "│ 89474 ┆ finlest01 ┆ 2007 ┆ 1 ┆ … ┆ 9 ┆ 17 ┆ 3 ┆ 0 │\n", "│ 89480 ┆ embreal01 ┆ 2007 ┆ 1 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n", "│ 89481 ┆ edmonji01 ┆ 2007 ┆ 1 ┆ … ┆ 39 ┆ 92 ┆ 15 ┆ 2 │\n", "│ 89482 ┆ easleda01 ┆ 2007 ┆ 1 ┆ … ┆ 24 ┆ 54 ┆ 6 ┆ 0 │\n", "│ 89489 ┆ delgaca01 ┆ 2007 ┆ 1 ┆ … ┆ 71 ┆ 139 ┆ 30 ┆ 0 │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 89525 ┆ benitar01 ┆ 2007 ┆ 2 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n", "│ 89526 ┆ benitar01 ┆ 2007 ┆ 1 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n", "│ 89530 ┆ ausmubr01 ┆ 2007 ┆ 1 ┆ … ┆ 38 ┆ 82 ┆ 16 ┆ 3 │\n", "│ 89533 ┆ aloumo01 ┆ 2007 ┆ 1 ┆ … ┆ 51 ┆ 112 ┆ 19 ┆ 1 │\n", "│ 89534 ┆ alomasa02 ┆ 2007 ┆ 1 ┆ … ┆ 1 ┆ 3 ┆ 1 ┆ 0 │\n", "└───────┴───────────┴──────┴───────┴───┴─────┴─────┴─────┴─────┘\n" ] } ], "source": [ "print(\n", " baseball.select(pl.nth(range(0, 12)).tail(20))\n", ")" ] }, { "cell_type": "code", "execution_count": 422, "id": "ceacf4e9-2259-4bcc-b9ee-a343286c36f6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shape: (3, 12)\n", "┌──────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n", "│ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ … ┆ column_8 ┆ column_9 ┆ column_10 ┆ column_11 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 0.545788 ┆ -0.786944 ┆ 0.511419 ┆ -1.465452 ┆ … ┆ 0.103465 ┆ 1.25846 ┆ -1.961481 ┆ -0.892518 │\n", "│ 0.828004 ┆ 0.291711 ┆ -0.666151 ┆ -0.056065 ┆ … ┆ -1.487244 ┆ -0.325992 ┆ -0.25713 ┆ -0.381324 │\n", "│ 0.44002 ┆ 0.21451 ┆ 0.285626 ┆ -0.689011 ┆ … ┆ -0.06698 ┆ 1.482008 ┆ 1.288873 ┆ -1.12619 │\n", "└──────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘\n" ] } ], "source": [ "print(pl.DataFrame(np.random.randn(3, 12)))" ] }, { "cell_type": "code", "execution_count": 423, "id": "57ee1cf9-b23a-41a4-865d-db99607a77e0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shape: (3, 12)\n", "┌─────┬─────┬─────┬─────┬───┬─────┬─────┬─────┬─────┐\n", "│ col ┆ col ┆ col ┆ col ┆ … ┆ col ┆ col ┆ col ┆ col │\n", "│ umn ┆ umn ┆ umn ┆ umn ┆ ┆ umn ┆ umn ┆ umn ┆ umn │\n", "│ _0 ┆ _1 ┆ _2 ┆ _3 ┆ ┆ _8 ┆ _9 ┆ _10 ┆ _11 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═════╪═════╪═════╪═════╪═══╪═════╪═════╪═════╪═════╡\n", "│ -1. ┆ 0.0 ┆ -0. ┆ 1.7 ┆ … ┆ 0.8 ┆ 0.3 ┆ -1. ┆ -1. │\n", "│ 593 ┆ 321 ┆ 382 ┆ 495 ┆ ┆ 431 ┆ 008 ┆ 529 ┆ 861 │\n", "│ 329 ┆ 95 ┆ 463 ┆ 8 ┆ ┆ 73 ┆ 16 ┆ 093 ┆ 916 │\n", "│ 0.5 ┆ -0. ┆ 0.5 ┆ -0. ┆ … ┆ -0. ┆ -0. ┆ -1. ┆ 0.6 │\n", "│ 488 ┆ 337 ┆ 436 ┆ 509 ┆ ┆ 462 ┆ 292 ┆ 385 ┆ 538 │\n", "│ 96 ┆ 439 ┆ 99 ┆ 815 ┆ ┆ 243 ┆ 441 ┆ 327 ┆ 87 │\n", "│ -1. ┆ -0. ┆ 0.7 ┆ -0. ┆ … ┆ 0.1 ┆ -0. ┆ -0. ┆ 0.5 │\n", "│ 150 ┆ 856 ┆ 171 ┆ 110 ┆ ┆ 902 ┆ 484 ┆ 245 ┆ 015 │\n", "│ 248 ┆ 766 ┆ 96 ┆ 738 ┆ ┆ 8 ┆ 427 ┆ 482 ┆ 93 │\n", "└─────┴─────┴─────┴─────┴───┴─────┴─────┴─────┴─────┘\n" ] } ], "source": [ "with pl.Config() as cfg:\n", " cfg.set_tbl_width_chars(40)\n", " print(pl.DataFrame(np.random.randn(3, 12)))" ] }, { "cell_type": "code", "execution_count": 424, "id": "e1c51e82-1ed2-4b53-9315-f5465710219b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shape: (2, 2)\n", "┌─────────────┬──────────────┐\n", "│ filename ┆ path │\n", "│ --- ┆ --- │\n", "│ str ┆ str │\n", "╞═════════════╪══════════════╡\n", "│ filename_01 ┆ media/user_n │\n", "│ ┆ ame/storage/ │\n", "│ ┆ folder… │\n", "│ filename_02 ┆ media/user_n │\n", "│ ┆ ame/storage/ │\n", "│ ┆ folder… │\n", "└─────────────┴──────────────┘\n" ] } ], "source": [ "datafile = {\n", " \"filename\": [\"filename_01\", \"filename_02\"],\n", " \"path\": [\n", " \"media/user_name/storage/folder_01/filename_01\",\n", " \"media/user_name/storage/folder_02/filename_02\",\n", " ],\n", "}\n", "with pl.Config() as cfg:\n", " cfg.set_tbl_width_chars(30)\n", " print(pl.DataFrame(datafile))" ] }, { "cell_type": "code", "execution_count": 425, "id": "01846bec-7998-491c-84b9-df7dfea964d5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shape: (2, 2)\n", "┌─────────────┬─────────────────────────────────┐\n", "│ filename ┆ path │\n", "│ --- ┆ --- │\n", "│ str ┆ str │\n", "╞═════════════╪═════════════════════════════════╡\n", "│ filename_01 ┆ media/user_name/storage/folder… │\n", "│ filename_02 ┆ media/user_name/storage/folder… │\n", "└─────────────┴─────────────────────────────────┘\n" ] } ], "source": [ "with pl.Config() as cfg:\n", " cfg.set_tbl_width_chars(100)\n", " print(pl.DataFrame(datafile))" ] }, { "cell_type": "markdown", "id": "8d5da449-149c-47eb-a5cd-5ddea1147ff3", "metadata": {}, "source": [ "### DataFrame column attribute access and IPython completion" ] }, { "cell_type": "markdown", "id": "484e82d1-745f-4943-8855-59e068956312", "metadata": {}, "source": [ "There is no such function in polars." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }