{
"cells": [
{
"cell_type": "markdown",
"id": "01a71042-e4e0-48cc-bb4e-ab233bc60155",
"metadata": {},
"source": [
"# Intro to data structures"
]
},
{
"cell_type": "code",
"execution_count": 333,
"id": "bac3f2b0-b22a-4f89-a3f3-3f659bc6b49d",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import polars as pl\n",
"from helper.jupyter import row"
]
},
{
"cell_type": "markdown",
"id": "3576ca1a-6060-42e3-9f6c-7782a559166d",
"metadata": {},
"source": [
"## Series"
]
},
{
"cell_type": "markdown",
"id": "31340fc5-e7c0-4d47-8439-475d47f2eb86",
"metadata": {},
"source": [
"In Polars, the Series object has no index, so we need to use a two-column DataFrame to simulate the functions of a Series in Pandas."
]
},
{
"cell_type": "code",
"execution_count": 334,
"id": "2ca4467d-021b-4325-8e9d-053625d97423",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | -1.163544 |
"b" | -0.900663 |
"c" | 1.657318 |
"d" | -1.240945 |
"e" | 0.964103 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬───────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════════╡\n",
"│ a ┆ -1.163544 │\n",
"│ b ┆ -0.900663 │\n",
"│ c ┆ 1.657318 │\n",
"│ d ┆ -1.240945 │\n",
"│ e ┆ 0.964103 │\n",
"└───────┴───────────┘"
]
},
"execution_count": 334,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pl.DataFrame(dict(\n",
" index=[\"a\", \"b\", \"c\", \"d\", \"e\"],\n",
" value=np.random.randn(5)\n",
"))\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 335,
"id": "66f6f8dd-7bcd-4220-9c60-e7851ffe94f8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (5,)\n",
"Series: 'index' [str]\n",
"[\n",
"\t\"a\"\n",
"\t\"b\"\n",
"\t\"c\"\n",
"\t\"d\"\n",
"\t\"e\"\n",
"]"
]
},
"execution_count": 335,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['index']"
]
},
{
"cell_type": "code",
"execution_count": 336,
"id": "9ae388d4-93aa-47f0-871e-3c1d4795b521",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5,) |
---|
f64 |
0.083327 |
-1.077013 |
0.120624 |
-0.480749 |
-1.196976 |
"
],
"text/plain": [
"shape: (5,)\n",
"Series: '' [f64]\n",
"[\n",
"\t0.083327\n",
"\t-1.077013\n",
"\t0.120624\n",
"\t-0.480749\n",
"\t-1.196976\n",
"]"
]
},
"execution_count": 336,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.Series(np.random.randn(5))"
]
},
{
"cell_type": "code",
"execution_count": 337,
"id": "77027cce-70cb-4928-9d3d-c36fd6f13ef1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 2)index | value |
---|
str | i64 |
"b" | 1 |
"a" | 0 |
"c" | 2 |
"
],
"text/plain": [
"shape: (3, 2)\n",
"┌───────┬───────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ i64 │\n",
"╞═══════╪═══════╡\n",
"│ b ┆ 1 │\n",
"│ a ┆ 0 │\n",
"│ c ┆ 2 │\n",
"└───────┴───────┘"
]
},
"execution_count": 337,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pd.Series(d)\n",
"d = {\"b\": 1, \"a\": 0, \"c\": 2}\n",
"pl.DataFrame(list(d.items()), schema=['index', 'value'], orient='row')"
]
},
{
"cell_type": "code",
"execution_count": 338,
"id": "944c723f-5087-4ab0-be05-a66da0ed8588",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | 5.0 |
"b" | 5.0 |
"c" | 5.0 |
"d" | 5.0 |
"e" | 5.0 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬───────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════╡\n",
"│ a ┆ 5.0 │\n",
"│ b ┆ 5.0 │\n",
"│ c ┆ 5.0 │\n",
"│ d ┆ 5.0 │\n",
"│ e ┆ 5.0 │\n",
"└───────┴───────┘"
]
},
"execution_count": 338,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.select(\n",
" index=pl.Series([\"a\", \"b\", \"c\", \"d\", \"e\"]),\n",
" value=5.0\n",
")"
]
},
{
"cell_type": "markdown",
"id": "aabd1673-8419-4232-86f6-966413daf342",
"metadata": {},
"source": [
"### Series is ndarray-like "
]
},
{
"cell_type": "code",
"execution_count": 339,
"id": "a1c37ace-405d-4405-86f7-d0db0c283c61",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1.163544306576043"
]
},
"execution_count": 339,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['value'][0]"
]
},
{
"cell_type": "code",
"execution_count": 340,
"id": "ae6a4a67-9385-4fa3-8ad4-dd38b1ba8eac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3,)value |
---|
f64 |
-1.163544 |
-0.900663 |
1.657318 |
"
],
"text/plain": [
"shape: (3,)\n",
"Series: 'value' [f64]\n",
"[\n",
"\t-1.163544\n",
"\t-0.900663\n",
"\t1.657318\n",
"]"
]
},
"execution_count": 340,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['value'][:3]"
]
},
{
"cell_type": "code",
"execution_count": 341,
"id": "b61f4afc-99a2-424e-80cb-7b91e6a642b2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 2)index | value |
---|
str | f64 |
"c" | 1.657318 |
"e" | 0.964103 |
"
],
"text/plain": [
"shape: (2, 2)\n",
"┌───────┬──────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪══════════╡\n",
"│ c ┆ 1.657318 │\n",
"│ e ┆ 0.964103 │\n",
"└───────┴──────────┘"
]
},
"execution_count": 341,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s[s > s.median()]\n",
"s.filter(pl.col('value') > pl.col('value').median())"
]
},
{
"cell_type": "code",
"execution_count": 342,
"id": "d9e0872d-5a3b-43ce-b936-afd052ca8b50",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 2)index | value |
---|
str | f64 |
"e" | 0.964103 |
"d" | -1.240945 |
"b" | -0.900663 |
"
],
"text/plain": [
"shape: (3, 2)\n",
"┌───────┬───────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════════╡\n",
"│ e ┆ 0.964103 │\n",
"│ d ┆ -1.240945 │\n",
"│ b ┆ -0.900663 │\n",
"└───────┴───────────┘"
]
},
"execution_count": 342,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s.iloc[[4, 3, 1]]\n",
"s[[4, 3, 1]]"
]
},
{
"cell_type": "code",
"execution_count": 343,
"id": "3e41e238-f421-476f-b7ff-ff5815c11e21",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | 0.312377 |
"b" | 0.4063 |
"c" | 5.245225 |
"d" | 0.289111 |
"e" | 2.622434 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬──────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪══════════╡\n",
"│ a ┆ 0.312377 │\n",
"│ b ┆ 0.4063 │\n",
"│ c ┆ 5.245225 │\n",
"│ d ┆ 0.289111 │\n",
"│ e ┆ 2.622434 │\n",
"└───────┴──────────┘"
]
},
"execution_count": 343,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.with_columns(\n",
" pl.col('value').exp()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 344,
"id": "ac8a0b86-fe2a-4e4d-97b8-61853df4b52c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Float64"
]
},
"execution_count": 344,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s['value'].dtype"
]
},
{
"cell_type": "code",
"execution_count": 345,
"id": "1511c955-d697-4da2-bb75-f18181b91762",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-1.16354431, -0.90066273, 1.65731812, -1.24094468, 0.96410308])"
]
},
"execution_count": 345,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s.array\n",
"s['value'].to_numpy()"
]
},
{
"cell_type": "markdown",
"id": "215944e9-4f6e-4516-9c31-361429b8cbae",
"metadata": {},
"source": [
"### Series is dict-like"
]
},
{
"cell_type": "code",
"execution_count": 346,
"id": "ebad929e-8eb0-43bc-aa00-da766f1bedec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1.163544306576043"
]
},
"execution_count": 346,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s[\"a\"]\n",
"s.select(pl.col('value').filter(pl.col('index') == 'a')).item()"
]
},
{
"cell_type": "code",
"execution_count": 347,
"id": "b1d78aa2-7529-476e-a89a-0f978a61e79d",
"metadata": {},
"outputs": [],
"source": [
"# s[\"e\"] = 12.0\n",
"s = s.with_columns(\n",
" pl.when(pl.col('index') == 'e')\n",
" .then(12.0)\n",
" .otherwise(pl.col('value'))\n",
" .name.keep()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 348,
"id": "e19d5441-29de-46d6-8d8a-cb0b335bf5c1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 348,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# \"e\" in s\n",
"\"e\" in s['index']"
]
},
{
"cell_type": "code",
"execution_count": 349,
"id": "87a5c4a5-6a8d-4d44-a58e-1b18d7471de7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 349,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"f\" in s['index']"
]
},
{
"cell_type": "markdown",
"id": "eb0bfd4b-3784-4776-beb9-fb506c733d1e",
"metadata": {},
"source": [
"### Vectorized operations and label alignment with Series"
]
},
{
"cell_type": "code",
"execution_count": 350,
"id": "394752c6-714b-44b0-8566-448a8c961394",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | -2.327089 |
"b" | -1.801325 |
"c" | 3.314636 |
"d" | -2.481889 |
"e" | 24.0 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬───────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════════╡\n",
"│ a ┆ -2.327089 │\n",
"│ b ┆ -1.801325 │\n",
"│ c ┆ 3.314636 │\n",
"│ d ┆ -2.481889 │\n",
"│ e ┆ 24.0 │\n",
"└───────┴───────────┘"
]
},
"execution_count": 350,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s + s\n",
"from helper.polars import align_op\n",
"align_op(s, s, op=pl.Expr.add)"
]
},
{
"cell_type": "code",
"execution_count": 351,
"id": "31c9f84a-9b0c-420f-a7de-f7618bc220b9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | -2.327089 |
"b" | -1.801325 |
"c" | 3.314636 |
"d" | -2.481889 |
"e" | 24.0 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬───────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════════╡\n",
"│ a ┆ -2.327089 │\n",
"│ b ┆ -1.801325 │\n",
"│ c ┆ 3.314636 │\n",
"│ d ┆ -2.481889 │\n",
"│ e ┆ 24.0 │\n",
"└───────┴───────────┘"
]
},
"execution_count": 351,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s * 2\n",
"s.select(\n",
" 'index',\n",
" pl.col('value') * 2\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 352,
"id": "ee7ad564-aa27-4c71-8ad3-62f41c292123",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | 0.312377 |
"b" | 0.4063 |
"c" | 5.245225 |
"d" | 0.289111 |
"e" | 162754.791419 |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬───────────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════════════╡\n",
"│ a ┆ 0.312377 │\n",
"│ b ┆ 0.4063 │\n",
"│ c ┆ 5.245225 │\n",
"│ d ┆ 0.289111 │\n",
"│ e ┆ 162754.791419 │\n",
"└───────┴───────────────┘"
]
},
"execution_count": 352,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# np.exp(s)\n",
"s.select(\n",
" \"index\",\n",
" pl.col(\"value\").exp()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 353,
"id": "fefbc23f-f33a-444e-9b7d-2a60d166f84f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 2)index | value |
---|
str | f64 |
"a" | null |
"b" | -1.801325 |
"c" | 3.314636 |
"d" | -2.481889 |
"e" | null |
"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────┬───────────┐\n",
"│ index ┆ value │\n",
"│ --- ┆ --- │\n",
"│ str ┆ f64 │\n",
"╞═══════╪═══════════╡\n",
"│ a ┆ null │\n",
"│ b ┆ -1.801325 │\n",
"│ c ┆ 3.314636 │\n",
"│ d ┆ -2.481889 │\n",
"│ e ┆ null │\n",
"└───────┴───────────┘"
]
},
"execution_count": 353,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# s.iloc[1:] + s.iloc[:-1]\n",
"align_op(\n",
" s.slice(1), \n",
" s.slice(0, len(s) - 1), \n",
" op=pl.Expr.add, \n",
" fill_value=None, \n",
" how=\"full\")"
]
},
{
"cell_type": "markdown",
"id": "bcb43e01-b857-435f-b761-5c04353c994d",
"metadata": {},
"source": [
"### Name attribute"
]
},
{
"cell_type": "code",
"execution_count": 354,
"id": "0be666d2-02a6-428a-9811-db799a3c0fae",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5,)something |
---|
f64 |
1.879844 |
1.553987 |
-1.190783 |
-1.452195 |
-0.553582 |
"
],
"text/plain": [
"shape: (5,)\n",
"Series: 'something' [f64]\n",
"[\n",
"\t1.879844\n",
"\t1.553987\n",
"\t-1.190783\n",
"\t-1.452195\n",
"\t-0.553582\n",
"]"
]
},
"execution_count": 354,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s = pl.Series(\"something\", np.random.randn(5))\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 355,
"id": "6f8ecbb0-ca48-4712-a6af-1c89cf0c954e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'something'"
]
},
"execution_count": 355,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.name"
]
},
{
"cell_type": "code",
"execution_count": 356,
"id": "29398f27-6770-4d07-ad27-0ae1d4d3d887",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'different'"
]
},
"execution_count": 356,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s2 = s.rename('different')\n",
"s2.name"
]
},
{
"cell_type": "markdown",
"id": "06a08088-3a8c-443b-8852-d3c91d32e9b6",
"metadata": {},
"source": [
"## DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 357,
"id": "c0893511-65d5-4662-83b5-5fac76c00462",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 3)index | one | two |
---|
str | f64 | f64 |
"a" | 1.0 | 1.0 |
"b" | 2.0 | 2.0 |
"c" | 3.0 | 3.0 |
"d" | null | 4.0 |
"
],
"text/plain": [
"shape: (4, 3)\n",
"┌───────┬──────┬─────┐\n",
"│ index ┆ one ┆ two │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ f64 │\n",
"╞═══════╪══════╪═════╡\n",
"│ a ┆ 1.0 ┆ 1.0 │\n",
"│ b ┆ 2.0 ┆ 2.0 │\n",
"│ c ┆ 3.0 ┆ 3.0 │\n",
"│ d ┆ null ┆ 4.0 │\n",
"└───────┴──────┴─────┘"
]
},
"execution_count": 357,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1 = pl.DataFrame(dict(index=[\"a\", \"b\", \"c\"], one=[1.0, 2.0, 3.0]))\n",
"s2 = pl.DataFrame(dict(index=[\"a\", \"b\", \"c\", \"d\"], two=[1.0, 2.0, 3.0, 4.0]))\n",
"df = s1.join(s2, on='index', how='full', coalesce=True)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 358,
"id": "37ee0b5f-48b8-4961-8a92-407ed512eaac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (4,)\n",
"Series: 'index' [str]\n",
"[\n",
"\t\"a\"\n",
"\t\"b\"\n",
"\t\"c\"\n",
"\t\"d\"\n",
"]"
]
},
"execution_count": 358,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['index']"
]
},
{
"cell_type": "code",
"execution_count": 359,
"id": "20b9d7f7-3556-4494-9221-d204ef48fffc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['index', 'one', 'two']"
]
},
"execution_count": 359,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 360,
"id": "6453b26e-2972-49e9-8673-1c49f5f144b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['one', 'two']"
]
},
"execution_count": 360,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop('index').columns"
]
},
{
"cell_type": "markdown",
"id": "7260ead1-9795-4d4e-be66-ec509d4b2252",
"metadata": {},
"source": [
"### From dict of ndarrays / lists"
]
},
{
"cell_type": "code",
"execution_count": 361,
"id": "92a4a508-4d26-4323-8594-ab61ba3ece12",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 2)one | two |
---|
f64 | f64 |
1.0 | 4.0 |
2.0 | 3.0 |
3.0 | 2.0 |
4.0 | 1.0 |
"
],
"text/plain": [
"shape: (4, 2)\n",
"┌─────┬─────┐\n",
"│ one ┆ two │\n",
"│ --- ┆ --- │\n",
"│ f64 ┆ f64 │\n",
"╞═════╪═════╡\n",
"│ 1.0 ┆ 4.0 │\n",
"│ 2.0 ┆ 3.0 │\n",
"│ 3.0 ┆ 2.0 │\n",
"│ 4.0 ┆ 1.0 │\n",
"└─────┴─────┘"
]
},
"execution_count": 361,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d = {\"one\": [1.0, 2.0, 3.0, 4.0], \"two\": [4.0, 3.0, 2.0, 1.0]}\n",
"pl.DataFrame(d)"
]
},
{
"cell_type": "code",
"execution_count": 362,
"id": "aef1a07a-ba62-4a0f-8a9d-4fa4e97ca984",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 3)index | one | two |
---|
str | f64 | f64 |
"a" | 1.0 | 4.0 |
"b" | 2.0 | 3.0 |
"c" | 3.0 | 2.0 |
"d" | 4.0 | 1.0 |
"
],
"text/plain": [
"shape: (4, 3)\n",
"┌───────┬─────┬─────┐\n",
"│ index ┆ one ┆ two │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ f64 │\n",
"╞═══════╪═════╪═════╡\n",
"│ a ┆ 1.0 ┆ 4.0 │\n",
"│ b ┆ 2.0 ┆ 3.0 │\n",
"│ c ┆ 3.0 ┆ 2.0 │\n",
"│ d ┆ 4.0 ┆ 1.0 │\n",
"└───────┴─────┴─────┘"
]
},
"execution_count": 362,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.DataFrame(d).insert_column(0, pl.Series('index', [\"a\", \"b\", \"c\", \"d\"]))"
]
},
{
"cell_type": "markdown",
"id": "45d357db-45ad-4c90-b05f-149cc479a3bd",
"metadata": {},
"source": [
"### From structured or record array"
]
},
{
"cell_type": "code",
"execution_count": 363,
"id": "a0746ca5-4543-4ee3-9c1d-281332d93481",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 3)A | B | C |
---|
i32 | f32 | binary |
1 | 2.0 | b"Hello" |
2 | 3.0 | b"World" |
"
],
"text/plain": [
"shape: (2, 3)\n",
"┌─────┬─────┬──────────┐\n",
"│ A ┆ B ┆ C │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ i32 ┆ f32 ┆ binary │\n",
"╞═════╪═════╪══════════╡\n",
"│ 1 ┆ 2.0 ┆ b\"Hello\" │\n",
"│ 2 ┆ 3.0 ┆ b\"World\" │\n",
"└─────┴─────┴──────────┘"
]
},
"execution_count": 363,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = np.zeros((2,), dtype=[(\"A\", \"i4\"), (\"B\", \"f4\"), (\"C\", \"a10\")])\n",
"data[:] = [(1, 2.0, \"Hello\"), (2, 3.0, \"World\")]\n",
"pl.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 364,
"id": "0111e1c6-cd1d-477e-824a-d6f5705db324",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 4)index | A | B | C |
---|
str | i32 | f32 | binary |
"first" | 1 | 2.0 | b"Hello" |
"second" | 2 | 3.0 | b"World" |
"
],
"text/plain": [
"shape: (2, 4)\n",
"┌────────┬─────┬─────┬──────────┐\n",
"│ index ┆ A ┆ B ┆ C │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ i32 ┆ f32 ┆ binary │\n",
"╞════════╪═════╪═════╪══════════╡\n",
"│ first ┆ 1 ┆ 2.0 ┆ b\"Hello\" │\n",
"│ second ┆ 2 ┆ 3.0 ┆ b\"World\" │\n",
"└────────┴─────┴─────┴──────────┘"
]
},
"execution_count": 364,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.DataFrame(data).insert_column(0, pl.Series('index', ['first', 'second']))"
]
},
{
"cell_type": "code",
"execution_count": 365,
"id": "797697c8-a0f6-4f82-bf27-8b0688246c74",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 3)C | A | B |
---|
binary | i32 | f32 |
b"Hello" | 1 | 2.0 |
b"World" | 2 | 3.0 |
"
],
"text/plain": [
"shape: (2, 3)\n",
"┌──────────┬─────┬─────┐\n",
"│ C ┆ A ┆ B │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ binary ┆ i32 ┆ f32 │\n",
"╞══════════╪═════╪═════╡\n",
"│ b\"Hello\" ┆ 1 ┆ 2.0 │\n",
"│ b\"World\" ┆ 2 ┆ 3.0 │\n",
"└──────────┴─────┴─────┘"
]
},
"execution_count": 365,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.DataFrame(data).select(\"C\", \"A\", \"B\")"
]
},
{
"cell_type": "markdown",
"id": "ebb1baa1-83fa-4b62-823a-ab6bf92549d7",
"metadata": {},
"source": [
"### From a list of dicts"
]
},
{
"cell_type": "code",
"execution_count": 366,
"id": "5399a765-4929-4b4e-be33-af1a04459bbd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (2, 3)\n",
"┌─────┬─────┬──────┐\n",
"│ a ┆ b ┆ c │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ i64 │\n",
"╞═════╪═════╪══════╡\n",
"│ 1 ┆ 2 ┆ null │\n",
"│ 5 ┆ 10 ┆ 20 │\n",
"└─────┴─────┴──────┘"
]
},
"execution_count": 366,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data2 = [{\"a\": 1, \"b\": 2}, {\"a\": 5, \"b\": 10, \"c\": 20}]\n",
"pl.DataFrame(data2)"
]
},
{
"cell_type": "code",
"execution_count": 367,
"id": "1c37287f-f1ad-419b-bccd-3b8cdd889b22",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 4)index | a | b | c |
---|
str | i64 | i64 | i64 |
"first" | 1 | 2 | null |
"second" | 5 | 10 | 20 |
"
],
"text/plain": [
"shape: (2, 4)\n",
"┌────────┬─────┬─────┬──────┐\n",
"│ index ┆ a ┆ b ┆ c │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ i64 ┆ i64 ┆ i64 │\n",
"╞════════╪═════╪═════╪══════╡\n",
"│ first ┆ 1 ┆ 2 ┆ null │\n",
"│ second ┆ 5 ┆ 10 ┆ 20 │\n",
"└────────┴─────┴─────┴──────┘"
]
},
"execution_count": 367,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.DataFrame(data2).insert_column(0, pl.Series('index', ['first', 'second']))"
]
},
{
"cell_type": "code",
"execution_count": 368,
"id": "770967f6-b780-45dc-8f7d-c16d26f3115f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (2, 2)\n",
"┌─────┬─────┐\n",
"│ a ┆ b │\n",
"│ --- ┆ --- │\n",
"│ i64 ┆ i64 │\n",
"╞═════╪═════╡\n",
"│ 1 ┆ 2 │\n",
"│ 5 ┆ 10 │\n",
"└─────┴─────┘"
]
},
"execution_count": 368,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.DataFrame(data2).select('a', 'b')"
]
},
{
"cell_type": "markdown",
"id": "5b9e0ce8-8a38-48ee-978a-361b8b34c7c1",
"metadata": {},
"source": [
"### From a dict of tuples"
]
},
{
"cell_type": "code",
"execution_count": 369,
"id": "727c9f44-6027-44b7-9849-476741515a0d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 7)index0 | index1 | a-b | a-a | a-c | b-a | b-b |
---|
str | str | i64 | i64 | i64 | i64 | i64 |
"A" | "B" | 1 | 4 | 5 | 8 | 10 |
"A" | "C" | 2 | 3 | 6 | 7 | null |
"A" | "D" | null | null | null | null | 9 |
"
],
"text/plain": [
"shape: (3, 7)\n",
"┌────────┬────────┬──────┬──────┬──────┬──────┬──────┐\n",
"│ index0 ┆ index1 ┆ a-b ┆ a-a ┆ a-c ┆ b-a ┆ b-b │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n",
"╞════════╪════════╪══════╪══════╪══════╪══════╪══════╡\n",
"│ A ┆ B ┆ 1 ┆ 4 ┆ 5 ┆ 8 ┆ 10 │\n",
"│ A ┆ C ┆ 2 ┆ 3 ┆ 6 ┆ 7 ┆ null │\n",
"│ A ┆ D ┆ null ┆ null ┆ null ┆ null ┆ 9 │\n",
"└────────┴────────┴──────┴──────┴──────┴──────┴──────┘"
]
},
"execution_count": 369,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {\n",
" (\"a\", \"b\"): {(\"A\", \"B\"): 1, (\"A\", \"C\"): 2},\n",
" (\"a\", \"a\"): {(\"A\", \"C\"): 3, (\"A\", \"B\"): 4},\n",
" (\"a\", \"c\"): {(\"A\", \"B\"): 5, (\"A\", \"C\"): 6},\n",
" (\"b\", \"a\"): {(\"A\", \"C\"): 7, (\"A\", \"B\"): 8},\n",
" (\"b\", \"b\"): {(\"A\", \"D\"): 9, (\"A\", \"B\"): 10},\n",
"}\n",
"\n",
"dfs = []\n",
"for key, value in data.items():\n",
" rows = []\n",
" for key2, value2 in value.items():\n",
" data_row = list(key2) + [value2]\n",
" rows.append(data_row)\n",
" dfs.append(\n",
" pl.DataFrame(rows, orient='row', schema=['index0', 'index1', '-'.join(key)])\n",
" )\n",
" \n",
"pl.concat(dfs, how=\"align\") "
]
},
{
"cell_type": "markdown",
"id": "e719e7c0-d54d-4373-9f63-79263852375f",
"metadata": {},
"source": [
"### From a Series"
]
},
{
"cell_type": "code",
"execution_count": 370,
"id": "3e24739c-f6ad-4053-b918-5ea374c2fe2a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 2)index | ser |
---|
str | i64 |
"a" | 0 |
"b" | 1 |
"c" | 2 |
"
],
"text/plain": [
"shape: (3, 2)\n",
"┌───────┬─────┐\n",
"│ index ┆ ser │\n",
"│ --- ┆ --- │\n",
"│ str ┆ i64 │\n",
"╞═══════╪═════╡\n",
"│ a ┆ 0 │\n",
"│ b ┆ 1 │\n",
"│ c ┆ 2 │\n",
"└───────┴─────┘"
]
},
"execution_count": 370,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser = pl.Series('ser', range(3))\n",
"pl.DataFrame(ser).insert_column(0, pl.Series('index', list(\"abc\")))"
]
},
{
"cell_type": "markdown",
"id": "46e919a5-8360-4710-b492-86eb49ee24c4",
"metadata": {},
"source": [
"### From a list of namedtuples"
]
},
{
"cell_type": "code",
"execution_count": 371,
"id": "b41c6f70-feff-4573-b1b5-4e1d150d08e5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (3, 2)\n",
"┌─────┬─────┐\n",
"│ x ┆ y │\n",
"│ --- ┆ --- │\n",
"│ i64 ┆ i64 │\n",
"╞═════╪═════╡\n",
"│ 0 ┆ 0 │\n",
"│ 0 ┆ 3 │\n",
"│ 2 ┆ 3 │\n",
"└─────┴─────┘"
]
},
"execution_count": 371,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from collections import namedtuple\n",
"Point = namedtuple(\"Point\", \"x y\")\n",
"pl.DataFrame([Point(0, 0), Point(0, 3), (2, 3)])"
]
},
{
"cell_type": "code",
"execution_count": 372,
"id": "4201b810-f48a-4bd5-a60a-77797c9c34b0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (3, 3)\n",
"┌─────┬─────┬──────┐\n",
"│ x ┆ y ┆ z │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ i64 │\n",
"╞═════╪═════╪══════╡\n",
"│ 0 ┆ 0 ┆ 0 │\n",
"│ 0 ┆ 3 ┆ 5 │\n",
"│ 2 ┆ 3 ┆ null │\n",
"└─────┴─────┴──────┘"
]
},
"execution_count": 372,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Point3D = namedtuple(\"Point3D\", \"x y z\")\n",
"data = [Point3D(0, 0, 0), Point3D(0, 3, 5), Point(2, 3)]\n",
"pl.DataFrame([p._asdict() for p in data])"
]
},
{
"cell_type": "markdown",
"id": "c41a3dc8-5043-4507-87c8-4f49b06e8686",
"metadata": {},
"source": [
"### From a list of dataclasses"
]
},
{
"cell_type": "code",
"execution_count": 373,
"id": "6dbfc181-384f-4896-940e-a8975a9fbb4b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (3, 2)\n",
"┌─────┬─────┐\n",
"│ x ┆ y │\n",
"│ --- ┆ --- │\n",
"│ i64 ┆ i64 │\n",
"╞═════╪═════╡\n",
"│ 0 ┆ 0 │\n",
"│ 0 ┆ 3 │\n",
"│ 2 ┆ 3 │\n",
"└─────┴─────┘"
]
},
"execution_count": 373,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dataclasses import make_dataclass\n",
"Point = make_dataclass(\"Point\", [(\"x\", int), (\"y\", int)])\n",
"pl.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])"
]
},
{
"cell_type": "markdown",
"id": "947db500-e0e4-495e-9d3c-7f4c8a9f1c93",
"metadata": {},
"source": [
"### Alternate constructors"
]
},
{
"cell_type": "markdown",
"id": "0e964439-0ec3-439e-a3ef-7100ff1f26a0",
"metadata": {},
"source": [
"#### DataFrame.from_dict"
]
},
{
"cell_type": "code",
"execution_count": 374,
"id": "bf32ca20-a8f5-4582-ab8f-8cab709c3fe7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (3, 2)\n",
"┌─────┬─────┐\n",
"│ A ┆ B │\n",
"│ --- ┆ --- │\n",
"│ i64 ┆ i64 │\n",
"╞═════╪═════╡\n",
"│ 1 ┆ 4 │\n",
"│ 2 ┆ 5 │\n",
"│ 3 ┆ 6 │\n",
"└─────┴─────┘"
]
},
"execution_count": 374,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.DataFrame(dict([(\"A\", [1, 2, 3]), (\"B\", [4, 5, 6])]))"
]
},
{
"cell_type": "code",
"execution_count": 375,
"id": "a7b8f7b5-5421-43d5-908a-f6057321425a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 4)index | one | two | three |
---|
str | i64 | i64 | i64 |
"A" | 1 | 2 | 3 |
"B" | 4 | 5 | 6 |
"
],
"text/plain": [
"shape: (2, 4)\n",
"┌───────┬─────┬─────┬───────┐\n",
"│ index ┆ one ┆ two ┆ three │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ i64 ┆ i64 ┆ i64 │\n",
"╞═══════╪═════╪═════╪═══════╡\n",
"│ A ┆ 1 ┆ 2 ┆ 3 │\n",
"│ B ┆ 4 ┆ 5 ┆ 6 │\n",
"└───────┴─────┴─────┴───────┘"
]
},
"execution_count": 375,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = dict([(\"A\", [1, 2, 3]), (\"B\", [4, 5, 6])])\n",
"pl.DataFrame(\n",
" list(data.values()), schema=['one', 'two', 'three'], orient='row'\n",
").insert_column(0, pl.Series('index', data.keys()))"
]
},
{
"cell_type": "markdown",
"id": "284afd22-b8ca-4ed0-9d2c-c4e2f61cde3c",
"metadata": {},
"source": [
"#### DataFrame.from_records"
]
},
{
"cell_type": "code",
"execution_count": 376,
"id": "1a02d357-773e-402a-8ac0-50050b9b2aab",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (2, 3)A | B | C |
---|
i32 | f32 | binary |
1 | 2.0 | b"Hello" |
2 | 3.0 | b"World" |
"
],
"text/plain": [
"shape: (2, 3)\n",
"┌─────┬─────┬──────────┐\n",
"│ A ┆ B ┆ C │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ i32 ┆ f32 ┆ binary │\n",
"╞═════╪═════╪══════════╡\n",
"│ 1 ┆ 2.0 ┆ b\"Hello\" │\n",
"│ 2 ┆ 3.0 ┆ b\"World\" │\n",
"└─────┴─────┴──────────┘"
]
},
"execution_count": 376,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = np.zeros((2,), dtype=[(\"A\", \"i4\"), (\"B\", \"f4\"), (\"C\", \"a10\")])\n",
"data[:] = [(1, 2.0, \"Hello\"), (2, 3.0, \"World\")]\n",
"pl.DataFrame(data)"
]
},
{
"cell_type": "markdown",
"id": "20222785-9460-42f6-ade9-a372555885ef",
"metadata": {},
"source": [
"### Column selection, addition, deletion"
]
},
{
"cell_type": "code",
"execution_count": 377,
"id": "87831144-1c70-48d7-9888-dacc495bdb11",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
"shape: (4,)\n",
"Series: 'one' [f64]\n",
"[\n",
"\t1.0\n",
"\t2.0\n",
"\t3.0\n",
"\tnull\n",
"]"
]
},
"execution_count": 377,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['one']"
]
},
{
"cell_type": "code",
"execution_count": 378,
"id": "7b5089b5-d007-4dca-b924-78b80f25562a",
"metadata": {},
"outputs": [],
"source": [
"df = df.with_columns(\n",
" three=pl.col('one') * pl.col('two'),\n",
" flag=pl.col('one') > 2\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 379,
"id": "e1832ed6-f46a-4782-be90-50887ded2f1c",
"metadata": {},
"outputs": [],
"source": [
"# del df[\"two\"]\n",
"df = df.drop('two')"
]
},
{
"cell_type": "code",
"execution_count": 380,
"id": "b2520e50-49bd-400c-8a00-9e934132043e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 3)index | one | flag |
---|
str | f64 | bool |
"a" | 1.0 | false |
"b" | 2.0 | false |
"c" | 3.0 | true |
"d" | null | null |
"
],
"text/plain": [
"shape: (4, 3)\n",
"┌───────┬──────┬───────┐\n",
"│ index ┆ one ┆ flag │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ bool │\n",
"╞═══════╪══════╪═══════╡\n",
"│ a ┆ 1.0 ┆ false │\n",
"│ b ┆ 2.0 ┆ false │\n",
"│ c ┆ 3.0 ┆ true │\n",
"│ d ┆ null ┆ null │\n",
"└───────┴──────┴───────┘"
]
},
"execution_count": 380,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#three = df.pop(\"three\")\n",
"three = df['three']\n",
"df = df.drop('three')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 381,
"id": "dbc0e570-7235-42e5-92fc-3f09761bfc25",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 4)index | one | flag | foo |
---|
str | f64 | bool | str |
"a" | 1.0 | false | "bar" |
"b" | 2.0 | false | "bar" |
"c" | 3.0 | true | "bar" |
"d" | null | null | "bar" |
"
],
"text/plain": [
"shape: (4, 4)\n",
"┌───────┬──────┬───────┬─────┐\n",
"│ index ┆ one ┆ flag ┆ foo │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ bool ┆ str │\n",
"╞═══════╪══════╪═══════╪═════╡\n",
"│ a ┆ 1.0 ┆ false ┆ bar │\n",
"│ b ┆ 2.0 ┆ false ┆ bar │\n",
"│ c ┆ 3.0 ┆ true ┆ bar │\n",
"│ d ┆ null ┆ null ┆ bar │\n",
"└───────┴──────┴───────┴─────┘"
]
},
"execution_count": 381,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df.with_columns(foo=pl.lit('bar'))\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 382,
"id": "62f699d2-1c7a-41fb-b0ff-09b76e66c51c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 5)index | one | flag | foo | one_trunc |
---|
str | f64 | bool | str | f64 |
"a" | 1.0 | false | "bar" | 1.0 |
"b" | 2.0 | false | "bar" | 2.0 |
"c" | 3.0 | true | "bar" | null |
"d" | null | null | "bar" | null |
"
],
"text/plain": [
"shape: (4, 5)\n",
"┌───────┬──────┬───────┬─────┬───────────┐\n",
"│ index ┆ one ┆ flag ┆ foo ┆ one_trunc │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ bool ┆ str ┆ f64 │\n",
"╞═══════╪══════╪═══════╪═════╪═══════════╡\n",
"│ a ┆ 1.0 ┆ false ┆ bar ┆ 1.0 │\n",
"│ b ┆ 2.0 ┆ false ┆ bar ┆ 2.0 │\n",
"│ c ┆ 3.0 ┆ true ┆ bar ┆ null │\n",
"│ d ┆ null ┆ null ┆ bar ┆ null │\n",
"└───────┴──────┴───────┴─────┴───────────┘"
]
},
"execution_count": 382,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df[\"one_trunc\"] = df[\"one\"][:2]\n",
"df = df.with_columns(\n",
" one_trunc=pl.col('one').slice(0, 2).append(pl.repeat(None, pl.len() - 2))\n",
")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 383,
"id": "642cd941-0eeb-439c-ae66-aeb5a1ee3f6d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 6)index | bar | one | flag | foo | one_trunc |
---|
str | f64 | f64 | bool | str | f64 |
"a" | 1.0 | 1.0 | false | "bar" | 1.0 |
"b" | 2.0 | 2.0 | false | "bar" | 2.0 |
"c" | 3.0 | 3.0 | true | "bar" | null |
"d" | null | null | null | "bar" | null |
"
],
"text/plain": [
"shape: (4, 6)\n",
"┌───────┬──────┬──────┬───────┬─────┬───────────┐\n",
"│ index ┆ bar ┆ one ┆ flag ┆ foo ┆ one_trunc │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ f64 ┆ bool ┆ str ┆ f64 │\n",
"╞═══════╪══════╪══════╪═══════╪═════╪═══════════╡\n",
"│ a ┆ 1.0 ┆ 1.0 ┆ false ┆ bar ┆ 1.0 │\n",
"│ b ┆ 2.0 ┆ 2.0 ┆ false ┆ bar ┆ 2.0 │\n",
"│ c ┆ 3.0 ┆ 3.0 ┆ true ┆ bar ┆ null │\n",
"│ d ┆ null ┆ null ┆ null ┆ bar ┆ null │\n",
"└───────┴──────┴──────┴───────┴─────┴───────────┘"
]
},
"execution_count": 383,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.insert_column(1, df[\"one\"].rename('bar'))\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "0eb600bf-9d23-461d-9ff4-4a939fc9b753",
"metadata": {},
"source": [
"### Assigning new columns in method chains"
]
},
{
"cell_type": "code",
"execution_count": 384,
"id": "2daee1ec-23fd-4e8c-a52a-3941f143c0ce",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 5)SepalLength | SepalWidth | PetalLength | PetalWidth | Name |
---|
f64 | f64 | f64 | f64 | str |
5.1 | 3.5 | 1.4 | 0.2 | "Iris-setosa" |
4.9 | 3.0 | 1.4 | 0.2 | "Iris-setosa" |
4.7 | 3.2 | 1.3 | 0.2 | "Iris-setosa" |
4.6 | 3.1 | 1.5 | 0.2 | "Iris-setosa" |
5.0 | 3.6 | 1.4 | 0.2 | "Iris-setosa" |
"
],
"text/plain": [
"shape: (5, 5)\n",
"┌─────────────┬────────────┬─────────────┬────────────┬─────────────┐\n",
"│ SepalLength ┆ SepalWidth ┆ PetalLength ┆ PetalWidth ┆ Name │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n",
"╞═════════════╪════════════╪═════════════╪════════════╪═════════════╡\n",
"│ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 4.6 ┆ 3.1 ┆ 1.5 ┆ 0.2 ┆ Iris-setosa │\n",
"│ 5.0 ┆ 3.6 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa │\n",
"└─────────────┴────────────┴─────────────┴────────────┴─────────────┘"
]
},
"execution_count": 384,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris = pl.read_csv('data/iris.data')\n",
"iris.head()"
]
},
{
"cell_type": "code",
"execution_count": 385,
"id": "3f1fb844-8c32-40f3-9082-e84a43d887ad",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (5, 6)SepalLength | SepalWidth | PetalLength | PetalWidth | Name | sepal_ratio |
---|
f64 | f64 | f64 | f64 | str | f64 |
5.1 | 3.5 | 1.4 | 0.2 | "Iris-setosa" | 0.686275 |
4.9 | 3.0 | 1.4 | 0.2 | "Iris-setosa" | 0.612245 |
4.7 | 3.2 | 1.3 | 0.2 | "Iris-setosa" | 0.680851 |
4.6 | 3.1 | 1.5 | 0.2 | "Iris-setosa" | 0.673913 |
5.0 | 3.6 | 1.4 | 0.2 | "Iris-setosa" | 0.72 |
"
],
"text/plain": [
"shape: (5, 6)\n",
"┌─────────────┬────────────┬─────────────┬────────────┬─────────────┬─────────────┐\n",
"│ SepalLength ┆ SepalWidth ┆ PetalLength ┆ PetalWidth ┆ Name ┆ sepal_ratio │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str ┆ f64 │\n",
"╞═════════════╪════════════╪═════════════╪════════════╪═════════════╪═════════════╡\n",
"│ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa ┆ 0.686275 │\n",
"│ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa ┆ 0.612245 │\n",
"│ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ Iris-setosa ┆ 0.680851 │\n",
"│ 4.6 ┆ 3.1 ┆ 1.5 ┆ 0.2 ┆ Iris-setosa ┆ 0.673913 │\n",
"│ 5.0 ┆ 3.6 ┆ 1.4 ┆ 0.2 ┆ Iris-setosa ┆ 0.72 │\n",
"└─────────────┴────────────┴─────────────┴────────────┴─────────────┴─────────────┘"
]
},
"execution_count": 385,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris.with_columns(\n",
" sepal_ratio=pl.col('SepalWidth') / pl.col('SepalLength')\n",
").head()"
]
},
{
"cell_type": "code",
"execution_count": 386,
"id": "10a436d9-33ab-4d4a-8dda-7eb2b9ef2f24",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"(function(root) {\n",
" function now() {\n",
" return new Date();\n",
" }\n",
"\n",
" const force = true;\n",
" const py_version = '3.6.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n",
" const reloading = false;\n",
" const Bokeh = root.Bokeh;\n",
"\n",
" // Set a timeout for this load but only if we are not already initializing\n",
" if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n",
" root._bokeh_timeout = Date.now() + 5000;\n",
" root._bokeh_failed_load = false;\n",
" }\n",
"\n",
" function run_callbacks() {\n",
" try {\n",
" root._bokeh_onload_callbacks.forEach(function(callback) {\n",
" if (callback != null)\n",
" callback();\n",
" });\n",
" } finally {\n",
" delete root._bokeh_onload_callbacks;\n",
" }\n",
" console.debug(\"Bokeh: all callbacks have finished\");\n",
" }\n",
"\n",
" function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n",
" if (css_urls == null) css_urls = [];\n",
" if (js_urls == null) js_urls = [];\n",
" if (js_modules == null) js_modules = [];\n",
" if (js_exports == null) js_exports = {};\n",
"\n",
" root._bokeh_onload_callbacks.push(callback);\n",
"\n",
" if (root._bokeh_is_loading > 0) {\n",
" // Don't load bokeh if it is still initializing\n",
" console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
" return null;\n",
" } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n",
" // There is nothing to load\n",
" run_callbacks();\n",
" return null;\n",
" }\n",
"\n",
" function on_load() {\n",
" root._bokeh_is_loading--;\n",
" if (root._bokeh_is_loading === 0) {\n",
" console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
" run_callbacks()\n",
" }\n",
" }\n",
" window._bokeh_on_load = on_load\n",
"\n",
" function on_error(e) {\n",
" const src_el = e.srcElement\n",
" console.error(\"failed to load \" + (src_el.href || src_el.src));\n",
" }\n",
"\n",
" const skip = [];\n",
" if (window.requirejs) {\n",
" window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n",
" root._bokeh_is_loading = css_urls.length + 0;\n",
" } else {\n",
" root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n",
" }\n",
"\n",
" const existing_stylesheets = []\n",
" const links = document.getElementsByTagName('link')\n",
" for (let i = 0; i < links.length; i++) {\n",
" const link = links[i]\n",
" if (link.href != null) {\n",
" existing_stylesheets.push(link.href)\n",
" }\n",
" }\n",
" for (let i = 0; i < css_urls.length; i++) {\n",
" const url = css_urls[i];\n",
" const escaped = encodeURI(url)\n",
" if (existing_stylesheets.indexOf(escaped) !== -1) {\n",
" on_load()\n",
" continue;\n",
" }\n",
" const element = document.createElement(\"link\");\n",
" element.onload = on_load;\n",
" element.onerror = on_error;\n",
" element.rel = \"stylesheet\";\n",
" element.type = \"text/css\";\n",
" element.href = url;\n",
" console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
" document.body.appendChild(element);\n",
" } var existing_scripts = []\n",
" const scripts = document.getElementsByTagName('script')\n",
" for (let i = 0; i < scripts.length; i++) {\n",
" var script = scripts[i]\n",
" if (script.src != null) {\n",
" existing_scripts.push(script.src)\n",
" }\n",
" }\n",
" for (let i = 0; i < js_urls.length; i++) {\n",
" const url = js_urls[i];\n",
" const escaped = encodeURI(url)\n",
" if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n",
" if (!window.requirejs) {\n",
" on_load();\n",
" }\n",
" continue;\n",
" }\n",
" const element = document.createElement('script');\n",
" element.onload = on_load;\n",
" element.onerror = on_error;\n",
" element.async = false;\n",
" element.src = url;\n",
" console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
" document.head.appendChild(element);\n",
" }\n",
" for (let i = 0; i < js_modules.length; i++) {\n",
" const url = js_modules[i];\n",
" const escaped = encodeURI(url)\n",
" if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n",
" if (!window.requirejs) {\n",
" on_load();\n",
" }\n",
" continue;\n",
" }\n",
" var element = document.createElement('script');\n",
" element.onload = on_load;\n",
" element.onerror = on_error;\n",
" element.async = false;\n",
" element.src = url;\n",
" element.type = \"module\";\n",
" console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
" document.head.appendChild(element);\n",
" }\n",
" for (const name in js_exports) {\n",
" const url = js_exports[name];\n",
" const escaped = encodeURI(url)\n",
" if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n",
" if (!window.requirejs) {\n",
" on_load();\n",
" }\n",
" continue;\n",
" }\n",
" var element = document.createElement('script');\n",
" element.onerror = on_error;\n",
" element.async = false;\n",
" element.type = \"module\";\n",
" console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
" element.textContent = `\n",
" import ${name} from \"${url}\"\n",
" window.${name} = ${name}\n",
" window._bokeh_on_load()\n",
" `\n",
" document.head.appendChild(element);\n",
" }\n",
" if (!js_urls.length && !js_modules.length) {\n",
" on_load()\n",
" }\n",
" };\n",
"\n",
" function inject_raw_css(css) {\n",
" const element = document.createElement(\"style\");\n",
" element.appendChild(document.createTextNode(css));\n",
" document.body.appendChild(element);\n",
" }\n",
"\n",
" const js_urls = [\"https://cdn.holoviz.org/panel/1.5.4/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.holoviz.org/panel/1.5.4/dist/panel.min.js\"];\n",
" const js_modules = [];\n",
" const js_exports = {};\n",
" const css_urls = [];\n",
" const inline_js = [ function(Bokeh) {\n",
" Bokeh.set_log_level(\"info\");\n",
" },\n",
"function(Bokeh) {} // ensure no trailing comma for IE\n",
" ];\n",
"\n",
" function run_inline_js() {\n",
" if ((root.Bokeh !== undefined) || (force === true)) {\n",
" for (let i = 0; i < inline_js.length; i++) {\n",
" try {\n",
" inline_js[i].call(root, root.Bokeh);\n",
" } catch(e) {\n",
" if (!reloading) {\n",
" throw e;\n",
" }\n",
" }\n",
" }\n",
" // Cache old bokeh versions\n",
" if (Bokeh != undefined && !reloading) {\n",
" var NewBokeh = root.Bokeh;\n",
" if (Bokeh.versions === undefined) {\n",
" Bokeh.versions = new Map();\n",
" }\n",
" if (NewBokeh.version !== Bokeh.version) {\n",
" Bokeh.versions.set(NewBokeh.version, NewBokeh)\n",
" }\n",
" root.Bokeh = Bokeh;\n",
" }\n",
" } else if (Date.now() < root._bokeh_timeout) {\n",
" setTimeout(run_inline_js, 100);\n",
" } else if (!root._bokeh_failed_load) {\n",
" console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
" root._bokeh_failed_load = true;\n",
" }\n",
" root._bokeh_is_initializing = false\n",
" }\n",
"\n",
" function load_or_wait() {\n",
" // Implement a backoff loop that tries to ensure we do not load multiple\n",
" // versions of Bokeh and its dependencies at the same time.\n",
" // In recent versions we use the root._bokeh_is_initializing flag\n",
" // to determine whether there is an ongoing attempt to initialize\n",
" // bokeh, however for backward compatibility we also try to ensure\n",
" // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n",
" // before older versions are fully initialized.\n",
" if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n",
" // If the timeout and bokeh was not successfully loaded we reset\n",
" // everything and try loading again\n",
" root._bokeh_timeout = Date.now() + 5000;\n",
" root._bokeh_is_initializing = false;\n",
" root._bokeh_onload_callbacks = undefined;\n",
" root._bokeh_is_loading = 0\n",
" console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n",
" load_or_wait();\n",
" } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n",
" setTimeout(load_or_wait, 100);\n",
" } else {\n",
" root._bokeh_is_initializing = true\n",
" root._bokeh_onload_callbacks = []\n",
" const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n",
" if (!reloading && !bokeh_loaded) {\n",
" if (root.Bokeh) {\n",
" root.Bokeh = undefined;\n",
" }\n",
" console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
" }\n",
" load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n",
" console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
" run_inline_js();\n",
" });\n",
" }\n",
" }\n",
" // Give older versions of the autoload script a head-start to ensure\n",
" // they initialize before we start loading newer version.\n",
" setTimeout(load_or_wait, 100)\n",
"}(window));"
],
"application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n const py_version = '3.6.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n const reloading = false;\n const Bokeh = root.Bokeh;\n\n // Set a timeout for this load but only if we are not already initializing\n if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n // Don't load bokeh if it is still initializing\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n // There is nothing to load\n run_callbacks();\n return null;\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error(e) {\n const src_el = e.srcElement\n console.error(\"failed to load \" + (src_el.href || src_el.src));\n }\n\n const skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n const existing_stylesheets = []\n const links = document.getElementsByTagName('link')\n for (let i = 0; i < links.length; i++) {\n const link = links[i]\n if (link.href != null) {\n existing_stylesheets.push(link.href)\n }\n }\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const escaped = encodeURI(url)\n if (existing_stylesheets.indexOf(escaped) !== -1) {\n on_load()\n continue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n const scripts = document.getElementsByTagName('script')\n for (let i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n existing_scripts.push(script.src)\n }\n }\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (let i = 0; i < js_modules.length; i++) {\n const url = js_modules[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n const url = js_exports[name];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.holoviz.org/panel/1.5.4/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.holoviz.org/panel/1.5.4/dist/panel.min.js\"];\n const js_modules = [];\n const js_exports = {};\n const css_urls = [];\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (let i = 0; i < inline_js.length; i++) {\n try {\n inline_js[i].call(root, root.Bokeh);\n } catch(e) {\n if (!reloading) {\n throw e;\n }\n }\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n var NewBokeh = root.Bokeh;\n if (Bokeh.versions === undefined) {\n Bokeh.versions = new Map();\n }\n if (NewBokeh.version !== Bokeh.version) {\n Bokeh.versions.set(NewBokeh.version, NewBokeh)\n }\n root.Bokeh = Bokeh;\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n // If the timeout and bokeh was not successfully loaded we reset\n // everything and try loading again\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n root._bokeh_is_loading = 0\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n if (root.Bokeh) {\n root.Bokeh = undefined;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));"
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
"if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n",
" window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n",
"}\n",
"\n",
"\n",
" function JupyterCommManager() {\n",
" }\n",
"\n",
" JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n",
" if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n",
" var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n",
" comm_manager.register_target(comm_id, function(comm) {\n",
" comm.on_msg(msg_handler);\n",
" });\n",
" } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n",
" window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n",
" comm.onMsg = msg_handler;\n",
" });\n",
" } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n",
" google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n",
" var messages = comm.messages[Symbol.asyncIterator]();\n",
" function processIteratorResult(result) {\n",
" var message = result.value;\n",
" console.log(message)\n",
" var content = {data: message.data, comm_id};\n",
" var buffers = []\n",
" for (var buffer of message.buffers || []) {\n",
" buffers.push(new DataView(buffer))\n",
" }\n",
" var metadata = message.metadata || {};\n",
" var msg = {content, buffers, metadata}\n",
" msg_handler(msg);\n",
" return messages.next().then(processIteratorResult);\n",
" }\n",
" return messages.next().then(processIteratorResult);\n",
" })\n",
" }\n",
" }\n",
"\n",
" JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n",
" if (comm_id in window.PyViz.comms) {\n",
" return window.PyViz.comms[comm_id];\n",
" } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n",
" var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n",
" var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n",
" if (msg_handler) {\n",
" comm.on_msg(msg_handler);\n",
" }\n",
" } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n",
" var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n",
" comm.open();\n",
" if (msg_handler) {\n",
" comm.onMsg = msg_handler;\n",
" }\n",
" } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n",
" var comm_promise = google.colab.kernel.comms.open(comm_id)\n",
" comm_promise.then((comm) => {\n",
" window.PyViz.comms[comm_id] = comm;\n",
" if (msg_handler) {\n",
" var messages = comm.messages[Symbol.asyncIterator]();\n",
" function processIteratorResult(result) {\n",
" var message = result.value;\n",
" var content = {data: message.data};\n",
" var metadata = message.metadata || {comm_id};\n",
" var msg = {content, metadata}\n",
" msg_handler(msg);\n",
" return messages.next().then(processIteratorResult);\n",
" }\n",
" return messages.next().then(processIteratorResult);\n",
" }\n",
" }) \n",
" var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n",
" return comm_promise.then((comm) => {\n",
" comm.send(data, metadata, buffers, disposeOnDone);\n",
" });\n",
" };\n",
" var comm = {\n",
" send: sendClosure\n",
" };\n",
" }\n",
" window.PyViz.comms[comm_id] = comm;\n",
" return comm;\n",
" }\n",
" window.PyViz.comm_manager = new JupyterCommManager();\n",
" \n",
"\n",
"\n",
"var JS_MIME_TYPE = 'application/javascript';\n",
"var HTML_MIME_TYPE = 'text/html';\n",
"var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n",
"var CLASS_NAME = 'output';\n",
"\n",
"/**\n",
" * Render data to the DOM node\n",
" */\n",
"function render(props, node) {\n",
" var div = document.createElement(\"div\");\n",
" var script = document.createElement(\"script\");\n",
" node.appendChild(div);\n",
" node.appendChild(script);\n",
"}\n",
"\n",
"/**\n",
" * Handle when a new output is added\n",
" */\n",
"function handle_add_output(event, handle) {\n",
" var output_area = handle.output_area;\n",
" var output = handle.output;\n",
" if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
" return\n",
" }\n",
" var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
" var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
" if (id !== undefined) {\n",
" var nchildren = toinsert.length;\n",
" var html_node = toinsert[nchildren-1].children[0];\n",
" html_node.innerHTML = output.data[HTML_MIME_TYPE];\n",
" var scripts = [];\n",
" var nodelist = html_node.querySelectorAll(\"script\");\n",
" for (var i in nodelist) {\n",
" if (nodelist.hasOwnProperty(i)) {\n",
" scripts.push(nodelist[i])\n",
" }\n",
" }\n",
"\n",
" scripts.forEach( function (oldScript) {\n",
" var newScript = document.createElement(\"script\");\n",
" var attrs = [];\n",
" var nodemap = oldScript.attributes;\n",
" for (var j in nodemap) {\n",
" if (nodemap.hasOwnProperty(j)) {\n",
" attrs.push(nodemap[j])\n",
" }\n",
" }\n",
" attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n",
" newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n",
" oldScript.parentNode.replaceChild(newScript, oldScript);\n",
" });\n",
" if (JS_MIME_TYPE in output.data) {\n",
" toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n",
" }\n",
" output_area._hv_plot_id = id;\n",
" if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n",
" window.PyViz.plot_index[id] = Bokeh.index[id];\n",
" } else {\n",
" window.PyViz.plot_index[id] = null;\n",
" }\n",
" } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
" var bk_div = document.createElement(\"div\");\n",
" bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
" var script_attrs = bk_div.children[0].attributes;\n",
" for (var i = 0; i < script_attrs.length; i++) {\n",
" toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
" }\n",
" // store reference to server id on output_area\n",
" output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
" }\n",
"}\n",
"\n",
"/**\n",
" * Handle when an output is cleared or removed\n",
" */\n",
"function handle_clear_output(event, handle) {\n",
" var id = handle.cell.output_area._hv_plot_id;\n",
" var server_id = handle.cell.output_area._bokeh_server_id;\n",
" if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n",
" var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n",
" if (server_id !== null) {\n",
" comm.send({event_type: 'server_delete', 'id': server_id});\n",
" return;\n",
" } else if (comm !== null) {\n",
" comm.send({event_type: 'delete', 'id': id});\n",
" }\n",
" delete PyViz.plot_index[id];\n",
" if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n",
" var doc = window.Bokeh.index[id].model.document\n",
" doc.clear();\n",
" const i = window.Bokeh.documents.indexOf(doc);\n",
" if (i > -1) {\n",
" window.Bokeh.documents.splice(i, 1);\n",
" }\n",
" }\n",
"}\n",
"\n",
"/**\n",
" * Handle kernel restart event\n",
" */\n",
"function handle_kernel_cleanup(event, handle) {\n",
" delete PyViz.comms[\"hv-extension-comm\"];\n",
" window.PyViz.plot_index = {}\n",
"}\n",
"\n",
"/**\n",
" * Handle update_display_data messages\n",
" */\n",
"function handle_update_output(event, handle) {\n",
" handle_clear_output(event, {cell: {output_area: handle.output_area}})\n",
" handle_add_output(event, handle)\n",
"}\n",
"\n",
"function register_renderer(events, OutputArea) {\n",
" function append_mime(data, metadata, element) {\n",
" // create a DOM node to render to\n",
" var toinsert = this.create_output_subarea(\n",
" metadata,\n",
" CLASS_NAME,\n",
" EXEC_MIME_TYPE\n",
" );\n",
" this.keyboard_manager.register_events(toinsert);\n",
" // Render to node\n",
" var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
" render(props, toinsert[0]);\n",
" element.append(toinsert);\n",
" return toinsert\n",
" }\n",
"\n",
" events.on('output_added.OutputArea', handle_add_output);\n",
" events.on('output_updated.OutputArea', handle_update_output);\n",
" events.on('clear_output.CodeCell', handle_clear_output);\n",
" events.on('delete.Cell', handle_clear_output);\n",
" events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n",
"\n",
" OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
" safe: true,\n",
" index: 0\n",
" });\n",
"}\n",
"\n",
"if (window.Jupyter !== undefined) {\n",
" try {\n",
" var events = require('base/js/events');\n",
" var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
" if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
" register_renderer(events, OutputArea);\n",
" }\n",
" } catch(err) {\n",
" }\n",
"}\n"
],
"application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n"
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.holoviews_exec.v0+json": "",
"text/html": [
"\n",
""
]
},
"metadata": {
"application/vnd.holoviews_exec.v0+json": {
"id": "aa2ce072-9ef1-4947-a226-77b5bf9f211e"
}
},
"output_type": "display_data"
}
],
"source": [
"import hvplot.polars"
]
},
{
"cell_type": "code",
"execution_count": 387,
"id": "3404064a-8ed3-47ff-9042-7a6d95b75378",
"metadata": {},
"outputs": [
{
"data": {},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.holoviews_exec.v0+json": "",
"text/html": [
"\n",
""
],
"text/plain": [
":Scatter [SepalRatio] (PetalRatio)"
]
},
"execution_count": 387,
"metadata": {
"application/vnd.holoviews_exec.v0+json": {
"id": "2b09b27e-d22d-460b-a686-160c9c624267"
}
},
"output_type": "execute_result"
}
],
"source": [
"(\n",
"iris\n",
".filter(pl.col('SepalLength') > 5)\n",
".with_columns(\n",
" SepalRatio=pl.col('SepalWidth') / pl.col('SepalLength'),\n",
" PetalRatio=pl.col('PetalWidth') / pl.col('PetalLength')\n",
")\n",
".hvplot.scatter(x='SepalRatio', y='PetalRatio')\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 388,
"id": "81cefd81-fa8c-431d-a2a6-a1adbf6bbd2b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 4)A | B | C | D |
---|
i64 | i64 | i64 | i64 |
1 | 4 | 5 | 6 |
2 | 5 | 7 | 9 |
3 | 6 | 9 | 12 |
"
],
"text/plain": [
"shape: (3, 4)\n",
"┌─────┬─────┬─────┬─────┐\n",
"│ A ┆ B ┆ C ┆ D │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ i64 ┆ i64 │\n",
"╞═════╪═════╪═════╪═════╡\n",
"│ 1 ┆ 4 ┆ 5 ┆ 6 │\n",
"│ 2 ┆ 5 ┆ 7 ┆ 9 │\n",
"│ 3 ┆ 6 ┆ 9 ┆ 12 │\n",
"└─────┴─────┴─────┴─────┘"
]
},
"execution_count": 388,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfa = pl.DataFrame({\"A\": [1, 2, 3], \"B\": [4, 5, 6]})\n",
"(\n",
"dfa\n",
".with_columns(C = pl.col('A') + pl.col('B'))\n",
".with_columns(D = pl.col('A') + pl.col('C'))\n",
")"
]
},
{
"cell_type": "markdown",
"id": "54bc640b-d56f-4990-8ecd-3d5d5ff0a6f8",
"metadata": {},
"source": [
"### Indexing / selection"
]
},
{
"cell_type": "code",
"execution_count": 389,
"id": "8cf234e3-84bb-4423-bfde-e54ab7b78792",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (1, 6)index | bar | one | flag | foo | one_trunc |
---|
str | f64 | f64 | bool | str | f64 |
"b" | 2.0 | 2.0 | false | "bar" | 2.0 |
"
],
"text/plain": [
"shape: (1, 6)\n",
"┌───────┬─────┬─────┬───────┬─────┬───────────┐\n",
"│ index ┆ bar ┆ one ┆ flag ┆ foo ┆ one_trunc │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ f64 ┆ bool ┆ str ┆ f64 │\n",
"╞═══════╪═════╪═════╪═══════╪═════╪═══════════╡\n",
"│ b ┆ 2.0 ┆ 2.0 ┆ false ┆ bar ┆ 2.0 │\n",
"└───────┴─────┴─────┴───────┴─────┴───────────┘"
]
},
"execution_count": 389,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df.loc[\"b\"]\n",
"df.filter(pl.col('index') == 'b')"
]
},
{
"cell_type": "code",
"execution_count": 390,
"id": "17f2b709-93d3-4bee-a496-1a9665ca1194",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (1, 6)index | bar | one | flag | foo | one_trunc |
---|
str | f64 | f64 | bool | str | f64 |
"c" | 3.0 | 3.0 | true | "bar" | null |
"
],
"text/plain": [
"shape: (1, 6)\n",
"┌───────┬─────┬─────┬──────┬─────┬───────────┐\n",
"│ index ┆ bar ┆ one ┆ flag ┆ foo ┆ one_trunc │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ f64 ┆ bool ┆ str ┆ f64 │\n",
"╞═══════╪═════╪═════╪══════╪═════╪═══════════╡\n",
"│ c ┆ 3.0 ┆ 3.0 ┆ true ┆ bar ┆ null │\n",
"└───────┴─────┴─────┴──────┴─────┴───────────┘"
]
},
"execution_count": 390,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df.iloc[2]\n",
"df.slice(2, 1)"
]
},
{
"cell_type": "markdown",
"id": "00546c12-931a-4f0a-8d18-80e8e45fc646",
"metadata": {},
"source": [
"### Data alignment and arithmetic"
]
},
{
"cell_type": "code",
"execution_count": 391,
"id": "b977f5d6-962a-4cac-9234-1f331a8b88c0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (10, 5)index | A | B | C | D |
---|
u32 | f64 | f64 | f64 | f64 |
0 | 0.779298 | 0.901811 | 0.091126 | 1.41107 |
1 | -0.936034 | -2.621484 | 0.237825 | 1.146001 |
2 | -1.192351 | -3.360748 | 1.563503 | 0.1362 |
3 | 0.933616 | -0.613897 | -1.318911 | 0.193137 |
4 | -0.195308 | -0.5624 | 0.118895 | 0.222491 |
5 | 0.876072 | -0.813732 | 1.367814 | 0.554935 |
6 | 0.73751 | 0.994505 | 1.104228 | 0.4458 |
7 | null | null | null | 0.959163 |
8 | null | null | null | 1.144944 |
9 | null | null | null | 0.158198 |
"
],
"text/plain": [
"shape: (10, 5)\n",
"┌───────┬───────────┬───────────┬───────────┬──────────┐\n",
"│ index ┆ A ┆ B ┆ C ┆ D │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═══════════╪═══════════╪═══════════╪══════════╡\n",
"│ 0 ┆ 0.779298 ┆ 0.901811 ┆ 0.091126 ┆ 1.41107 │\n",
"│ 1 ┆ -0.936034 ┆ -2.621484 ┆ 0.237825 ┆ 1.146001 │\n",
"│ 2 ┆ -1.192351 ┆ -3.360748 ┆ 1.563503 ┆ 0.1362 │\n",
"│ 3 ┆ 0.933616 ┆ -0.613897 ┆ -1.318911 ┆ 0.193137 │\n",
"│ 4 ┆ -0.195308 ┆ -0.5624 ┆ 0.118895 ┆ 0.222491 │\n",
"│ 5 ┆ 0.876072 ┆ -0.813732 ┆ 1.367814 ┆ 0.554935 │\n",
"│ 6 ┆ 0.73751 ┆ 0.994505 ┆ 1.104228 ┆ 0.4458 │\n",
"│ 7 ┆ null ┆ null ┆ null ┆ 0.959163 │\n",
"│ 8 ┆ null ┆ null ┆ null ┆ 1.144944 │\n",
"│ 9 ┆ null ┆ null ┆ null ┆ 0.158198 │\n",
"└───────┴───────────┴───────────┴───────────┴──────────┘"
]
},
"execution_count": 391,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df + df2\n",
"df = pl.DataFrame(np.random.randn(10, 4), schema=[\"A\", \"B\", \"C\", \"D\"])\n",
"df2 = pl.DataFrame(np.random.randn(7, 3), schema=[\"A\", \"B\", \"C\"])\n",
"align_op(df.with_row_index(), df2.with_row_index(), pl.Expr.add, fill_value=None)"
]
},
{
"cell_type": "code",
"execution_count": 392,
"id": "7bc4f733-f1c9-4970-9379-c7c87ab01c58",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (10, 4)A | B | C | D |
---|
f64 | f64 | f64 | f64 |
0.0 | 0.0 | 0.0 | 0.0 |
-0.755136 | -2.481002 | 0.449509 | -0.265068 |
-0.660488 | -3.696891 | 1.680688 | -1.27487 |
1.186271 | -2.438415 | -0.240415 | -1.217932 |
-0.508953 | -1.89029 | 1.344603 | -1.188579 |
-0.631357 | -2.716706 | 0.657985 | -0.856135 |
0.117999 | -1.390857 | 0.727739 | -0.96527 |
0.549259 | -1.176227 | 2.202544 | -0.451907 |
0.772859 | -1.250499 | 2.459803 | -0.266126 |
-0.366154 | -0.029816 | 1.599533 | -1.252872 |
"
],
"text/plain": [
"shape: (10, 4)\n",
"┌───────────┬───────────┬───────────┬───────────┐\n",
"│ A ┆ B ┆ C ┆ D │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════════╪═══════════╪═══════════╪═══════════╡\n",
"│ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ -0.755136 ┆ -2.481002 ┆ 0.449509 ┆ -0.265068 │\n",
"│ -0.660488 ┆ -3.696891 ┆ 1.680688 ┆ -1.27487 │\n",
"│ 1.186271 ┆ -2.438415 ┆ -0.240415 ┆ -1.217932 │\n",
"│ -0.508953 ┆ -1.89029 ┆ 1.344603 ┆ -1.188579 │\n",
"│ -0.631357 ┆ -2.716706 ┆ 0.657985 ┆ -0.856135 │\n",
"│ 0.117999 ┆ -1.390857 ┆ 0.727739 ┆ -0.96527 │\n",
"│ 0.549259 ┆ -1.176227 ┆ 2.202544 ┆ -0.451907 │\n",
"│ 0.772859 ┆ -1.250499 ┆ 2.459803 ┆ -0.266126 │\n",
"│ -0.366154 ┆ -0.029816 ┆ 1.599533 ┆ -1.252872 │\n",
"└───────────┴───────────┴───────────┴───────────┘"
]
},
"execution_count": 392,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df - df.iloc[0]\n",
"df.select(pl.all() - pl.all().first())"
]
},
{
"cell_type": "code",
"execution_count": 393,
"id": "d0899303-ad74-4436-ad20-b6bf64278269",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (10, 4)A | B | C | D |
---|
f64 | f64 | f64 | f64 |
0.646731 | 10.257392 | -3.479233 | 9.055349 |
-3.128947 | -2.147616 | -1.231688 | 7.730007 |
-2.65571 | -8.227061 | 4.924206 | 2.681001 |
6.578087 | -1.934685 | -4.681306 | 2.965687 |
-1.898036 | 0.80594 | 3.243782 | 3.112453 |
-2.510055 | -3.32614 | -0.189309 | 4.774675 |
1.236725 | 3.303107 | 0.159463 | 4.228999 |
3.393025 | 4.376257 | 7.533488 | 6.795814 |
4.511027 | 4.004899 | 8.819779 | 7.724719 |
-1.184039 | 10.108311 | 4.518431 | 2.790988 |
"
],
"text/plain": [
"shape: (10, 4)\n",
"┌───────────┬───────────┬───────────┬──────────┐\n",
"│ A ┆ B ┆ C ┆ D │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════════╪═══════════╪═══════════╪══════════╡\n",
"│ 0.646731 ┆ 10.257392 ┆ -3.479233 ┆ 9.055349 │\n",
"│ -3.128947 ┆ -2.147616 ┆ -1.231688 ┆ 7.730007 │\n",
"│ -2.65571 ┆ -8.227061 ┆ 4.924206 ┆ 2.681001 │\n",
"│ 6.578087 ┆ -1.934685 ┆ -4.681306 ┆ 2.965687 │\n",
"│ -1.898036 ┆ 0.80594 ┆ 3.243782 ┆ 3.112453 │\n",
"│ -2.510055 ┆ -3.32614 ┆ -0.189309 ┆ 4.774675 │\n",
"│ 1.236725 ┆ 3.303107 ┆ 0.159463 ┆ 4.228999 │\n",
"│ 3.393025 ┆ 4.376257 ┆ 7.533488 ┆ 6.795814 │\n",
"│ 4.511027 ┆ 4.004899 ┆ 8.819779 ┆ 7.724719 │\n",
"│ -1.184039 ┆ 10.108311 ┆ 4.518431 ┆ 2.790988 │\n",
"└───────────┴───────────┴───────────┴──────────┘"
]
},
"execution_count": 393,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df * 5 + 2\n",
"df.select(pl.all() * 5 + 2) # or this"
]
},
{
"cell_type": "code",
"execution_count": 394,
"id": "26b483f5-a539-4ab4-bd45-5591f24f514c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (10, 4)A | B | C | D |
---|
f64 | f64 | f64 | f64 |
-3.694757 | 0.605518 | -0.912536 | 0.708682 |
-0.974859 | -1.205512 | -1.547179 | 0.872599 |
-1.07395 | -0.488899 | 1.709866 | 7.342137 |
1.092159 | -1.27075 | -0.748357 | 5.177661 |
-1.282697 | -4.187395 | 4.019998 | 4.494571 |
-1.108634 | -0.938766 | -2.283826 | 1.802013 |
-6.550721 | 3.836984 | -2.716598 | 2.243159 |
3.58931 | 2.104149 | 0.903589 | 1.042576 |
1.991217 | 2.493891 | 0.733162 | 0.873405 |
-1.570332 | 0.616651 | 1.985363 | 6.321207 |
"
],
"text/plain": [
"shape: (10, 4)\n",
"┌───────────┬───────────┬───────────┬──────────┐\n",
"│ A ┆ B ┆ C ┆ D │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════════╪═══════════╪═══════════╪══════════╡\n",
"│ -3.694757 ┆ 0.605518 ┆ -0.912536 ┆ 0.708682 │\n",
"│ -0.974859 ┆ -1.205512 ┆ -1.547179 ┆ 0.872599 │\n",
"│ -1.07395 ┆ -0.488899 ┆ 1.709866 ┆ 7.342137 │\n",
"│ 1.092159 ┆ -1.27075 ┆ -0.748357 ┆ 5.177661 │\n",
"│ -1.282697 ┆ -4.187395 ┆ 4.019998 ┆ 4.494571 │\n",
"│ -1.108634 ┆ -0.938766 ┆ -2.283826 ┆ 1.802013 │\n",
"│ -6.550721 ┆ 3.836984 ┆ -2.716598 ┆ 2.243159 │\n",
"│ 3.58931 ┆ 2.104149 ┆ 0.903589 ┆ 1.042576 │\n",
"│ 1.991217 ┆ 2.493891 ┆ 0.733162 ┆ 0.873405 │\n",
"│ -1.570332 ┆ 0.616651 ┆ 1.985363 ┆ 6.321207 │\n",
"└───────────┴───────────┴───────────┴──────────┘"
]
},
"execution_count": 394,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select((1 / pl.all()).name.keep())"
]
},
{
"cell_type": "code",
"execution_count": 395,
"id": "84dbebb7-0775-4838-904d-d3c740224a6c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (10, 4)A | B | C | D |
---|
f64 | f64 | f64 | f64 |
0.005366 | 7.438608 | 1.442113 | 3.964551 |
1.107218 | 0.473494 | 0.174517 | 1.724807 |
0.751733 | 17.503438 | 0.116991 | 0.000344 |
0.702839 | 0.383495 | 3.188347 | 0.001391 |
0.369406 | 0.003253 | 0.003829 | 0.00245 |
0.661984 | 1.287568 | 0.036758 | 0.094835 |
0.000543 | 0.004614 | 0.018361 | 0.039497 |
0.006025 | 0.051015 | 1.500085 | 0.846387 |
0.06361 | 0.025852 | 3.460997 | 1.718449 |
0.16445 | 6.915785 | 0.064364 | 0.000626 |
"
],
"text/plain": [
"shape: (10, 4)\n",
"┌──────────┬───────────┬──────────┬──────────┐\n",
"│ A ┆ B ┆ C ┆ D │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞══════════╪═══════════╪══════════╪══════════╡\n",
"│ 0.005366 ┆ 7.438608 ┆ 1.442113 ┆ 3.964551 │\n",
"│ 1.107218 ┆ 0.473494 ┆ 0.174517 ┆ 1.724807 │\n",
"│ 0.751733 ┆ 17.503438 ┆ 0.116991 ┆ 0.000344 │\n",
"│ 0.702839 ┆ 0.383495 ┆ 3.188347 ┆ 0.001391 │\n",
"│ 0.369406 ┆ 0.003253 ┆ 0.003829 ┆ 0.00245 │\n",
"│ 0.661984 ┆ 1.287568 ┆ 0.036758 ┆ 0.094835 │\n",
"│ 0.000543 ┆ 0.004614 ┆ 0.018361 ┆ 0.039497 │\n",
"│ 0.006025 ┆ 0.051015 ┆ 1.500085 ┆ 0.846387 │\n",
"│ 0.06361 ┆ 0.025852 ┆ 3.460997 ┆ 1.718449 │\n",
"│ 0.16445 ┆ 6.915785 ┆ 0.064364 ┆ 0.000626 │\n",
"└──────────┴───────────┴──────────┴──────────┘"
]
},
"execution_count": 395,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select(pl.all() ** 4)"
]
},
{
"cell_type": "code",
"execution_count": 396,
"id": "c878bcc2-478d-41ef-bae7-14e755469f2e",
"metadata": {},
"outputs": [],
"source": [
"df1 = pl.DataFrame({\"a\": [1, 0, 1], \"b\": [0, 1, 1]}).cast(pl.Boolean)\n",
"df2 = pl.DataFrame({\"a\": [0, 1, 1], \"b\": [1, 1, 0]}).cast(pl.Boolean)"
]
},
{
"cell_type": "code",
"execution_count": 397,
"id": "4386501a-83ab-441d-88a8-26ee08bd376b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 3)index | a | b |
---|
u32 | bool | bool |
0 | false | false |
1 | false | true |
2 | true | false |
"
],
"text/plain": [
"shape: (3, 3)\n",
"┌───────┬───────┬───────┐\n",
"│ index ┆ a ┆ b │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ bool ┆ bool │\n",
"╞═══════╪═══════╪═══════╡\n",
"│ 0 ┆ false ┆ false │\n",
"│ 1 ┆ false ┆ true │\n",
"│ 2 ┆ true ┆ false │\n",
"└───────┴───────┴───────┘"
]
},
"execution_count": 397,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df1 & df2\n",
"align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.and_)"
]
},
{
"cell_type": "code",
"execution_count": 398,
"id": "c5003633-2de5-4a26-8c20-64f85589e402",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 3)index | a | b |
---|
u32 | bool | bool |
0 | true | true |
1 | true | true |
2 | true | true |
"
],
"text/plain": [
"shape: (3, 3)\n",
"┌───────┬──────┬──────┐\n",
"│ index ┆ a ┆ b │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ bool ┆ bool │\n",
"╞═══════╪══════╪══════╡\n",
"│ 0 ┆ true ┆ true │\n",
"│ 1 ┆ true ┆ true │\n",
"│ 2 ┆ true ┆ true │\n",
"└───────┴──────┴──────┘"
]
},
"execution_count": 398,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df1 | df2\n",
"align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.or_)"
]
},
{
"cell_type": "code",
"execution_count": 399,
"id": "9dcb3e5f-ca32-4625-ab87-fb0a6cd9e30e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 3)index | a | b |
---|
u32 | bool | bool |
0 | true | true |
1 | true | false |
2 | false | true |
"
],
"text/plain": [
"shape: (3, 3)\n",
"┌───────┬───────┬───────┐\n",
"│ index ┆ a ┆ b │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ u32 ┆ bool ┆ bool │\n",
"╞═══════╪═══════╪═══════╡\n",
"│ 0 ┆ true ┆ true │\n",
"│ 1 ┆ true ┆ false │\n",
"│ 2 ┆ false ┆ true │\n",
"└───────┴───────┴───────┘"
]
},
"execution_count": 399,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# df1 ^ df2\n",
"align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.xor)"
]
},
{
"cell_type": "code",
"execution_count": 400,
"id": "5e7e2f60-6c03-4f93-ab54-be1e1c573774",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (3, 2)a | b |
---|
bool | bool |
false | true |
true | false |
false | false |
"
],
"text/plain": [
"shape: (3, 2)\n",
"┌───────┬───────┐\n",
"│ a ┆ b │\n",
"│ --- ┆ --- │\n",
"│ bool ┆ bool │\n",
"╞═══════╪═══════╡\n",
"│ false ┆ true │\n",
"│ true ┆ false │\n",
"│ false ┆ false │\n",
"└───────┴───────┘"
]
},
"execution_count": 400,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# -df1\n",
"df1.select(pl.all().not_())"
]
},
{
"cell_type": "markdown",
"id": "a6e74e35-aaa9-447c-b1d6-e4563c505408",
"metadata": {},
"source": [
"### Transposing"
]
},
{
"cell_type": "code",
"execution_count": 401,
"id": "4f752c20-fa32-42ad-a63c-5cd7f347f895",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4, 6)index | column_0 | column_1 | column_2 | column_3 | column_4 |
---|
str | f64 | f64 | f64 | f64 | f64 |
"A" | -0.270654 | -1.025789 | -0.931142 | 0.915617 | -0.779607 |
"B" | 1.651478 | -0.829523 | -2.045412 | -0.786937 | -0.238812 |
"C" | -1.095847 | -0.646338 | 0.584841 | -1.336261 | 0.248756 |
"D" | 1.41107 | 1.146001 | 0.1362 | 0.193137 | 0.222491 |
"
],
"text/plain": [
"shape: (4, 6)\n",
"┌───────┬───────────┬───────────┬───────────┬───────────┬───────────┐\n",
"│ index ┆ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ column_4 │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
"│ A ┆ -0.270654 ┆ -1.025789 ┆ -0.931142 ┆ 0.915617 ┆ -0.779607 │\n",
"│ B ┆ 1.651478 ┆ -0.829523 ┆ -2.045412 ┆ -0.786937 ┆ -0.238812 │\n",
"│ C ┆ -1.095847 ┆ -0.646338 ┆ 0.584841 ┆ -1.336261 ┆ 0.248756 │\n",
"│ D ┆ 1.41107 ┆ 1.146001 ┆ 0.1362 ┆ 0.193137 ┆ 0.222491 │\n",
"└───────┴───────────┴───────────┴───────────┴───────────┴───────────┘"
]
},
"execution_count": 401,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.slice(0, 5).transpose(include_header=True, header_name='index')"
]
},
{
"cell_type": "markdown",
"id": "fde7671d-70d7-4afd-abb8-8590458e0b56",
"metadata": {},
"source": [
"### DataFrame interoperability with NumPy functions"
]
},
{
"cell_type": "code",
"execution_count": 402,
"id": "a34197b6-6b7a-406f-ac47-257079465cbb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.76288056, 5.2146839 , 0.33425647, 4.10033964],\n",
" [0.35851332, 0.43625722, 0.52396123, 3.14558954],\n",
" [0.39410336, 0.12932687, 1.79470589, 1.14591119],\n",
" [2.49831716, 0.45523708, 0.26282647, 1.21304945],\n",
" [0.4585861 , 0.78756298, 1.28242953, 1.24918415],\n",
" [0.4057529 , 0.34464929, 0.64541498, 1.74182763],\n",
" [0.85842587, 1.29773619, 0.69204278, 1.56173886],\n",
" [1.32128542, 1.60841852, 3.02435428, 2.60951072],\n",
" [1.65236152, 1.49328708, 3.91163667, 3.14226478],\n",
" [0.52897833, 5.06149624, 1.65480988, 1.17139769]])"
]
},
"execution_count": 402,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.exp(df)"
]
},
{
"cell_type": "code",
"execution_count": 403,
"id": "ccd80ad9-a9d1-4b89-ac4c-1daa895c7a2c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.27065379, 1.65147847, -1.0958467 , 1.41106981],\n",
" [-1.02578947, -0.82952326, -0.64633759, 1.14600133],\n",
" [-0.93114207, -2.04541218, 0.58484116, 0.13620012],\n",
" [ 0.91561737, -0.78693694, -1.33626128, 0.19313739],\n",
" [-0.77960722, -0.23881194, 0.24875635, 0.22249066],\n",
" [-0.90201093, -1.06522792, -0.4378618 , 0.55493493],\n",
" [-0.15265495, 0.26062136, -0.3681075 , 0.44579985],\n",
" [ 0.27860506, 0.47525141, 1.10669761, 0.95916274],\n",
" [ 0.50220549, 0.40097979, 1.36395587, 1.14494381],\n",
" [-0.63680781, 1.62166214, 0.50368613, 0.15819764]])"
]
},
"execution_count": 403,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(df)"
]
},
{
"cell_type": "code",
"execution_count": 404,
"id": "d7152ec2-b29b-4bbd-8c76-47eced83c852",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
shape: (4,) |
---|
f64 |
2.718282 |
7.389056 |
20.085537 |
54.59815 |
"
],
"text/plain": [
"shape: (4,)\n",
"Series: '' [f64]\n",
"[\n",
"\t2.718282\n",
"\t7.389056\n",
"\t20.085537\n",
"\t54.59815\n",
"]"
]
},
"execution_count": 404,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser = pl.Series([1, 2, 3, 4])\n",
"np.exp(ser)"
]
},
{
"cell_type": "code",
"execution_count": 405,
"id": "c13aff6c-114b-4994-89bf-53e5cd6cc3d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" shape: (3, 2)value | index |
---|
i64 | str | 1 | "a" | 2 | "b" | 3 | "c" |
| \n",
" shape: (3, 2)value | index |
---|
i64 | str | 1 | "b" | 3 | "a" | 5 | "c" |
|
"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ser1 = pl.DataFrame(dict(value=[1, 2, 3], index=[\"a\", \"b\", \"c\"]))\n",
"ser2 = pl.DataFrame(dict(value=[1, 3, 5], index=[\"b\", \"a\", \"c\"]))\n",
"row(ser1, ser2)"
]
},
{
"cell_type": "code",
"execution_count": 411,
"id": "1fb0d0d4-e3d5-46ec-b55c-8cfef4e0777e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" shape: (3, 2)value | index |
---|
i64 | str | 1 | "a" | 2 | "b" | 3 | "c" |
| \n",
" shape: (3, 2)value | index |
---|
i64 | str | 3 | "a" | 1 | "b" | 5 | "c" |
| \n",
" shape: (3, 2)index | value |
---|
str | i64 | "a" | 1 | "b" | 0 | "c" | 3 |
|
"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# np.remainder(ser1, ser2)\n",
"ser1_a, ser2_a = pl.align_frames(ser1, ser2, on='index')\n",
"row(ser1_a, ser2_a, pl.select(\n",
" index=ser1_a['index'],\n",
" value=np.remainder(ser1_a['value'], ser2_a['value'])) \n",
")"
]
},
{
"cell_type": "markdown",
"id": "a825998e-fb2f-4ce0-8c6a-eb3f88572d4e",
"metadata": {},
"source": [
"### Console display"
]
},
{
"cell_type": "code",
"execution_count": 412,
"id": "107465f3-4340-4bb4-9f4b-b2cbb92d364b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape: (100, 23)\n",
"┌───────┬───────────┬──────┬───────┬───┬─────┬─────┬─────┬──────┐\n",
"│ id ┆ player ┆ year ┆ stint ┆ … ┆ hbp ┆ sh ┆ sf ┆ gidp │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ str ┆ i64 ┆ i64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═══════════╪══════╪═══════╪═══╪═════╪═════╪═════╪══════╡\n",
"│ 88641 ┆ womacto01 ┆ 2006 ┆ 2 ┆ … ┆ 0.0 ┆ 3.0 ┆ 0.0 ┆ 0.0 │\n",
"│ 88643 ┆ schilcu01 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ 88645 ┆ myersmi01 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ 88649 ┆ helliri01 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ 88650 ┆ johnsra05 ┆ 2006 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 89525 ┆ benitar01 ┆ 2007 ┆ 2 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ 89526 ┆ benitar01 ┆ 2007 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"│ 89530 ┆ ausmubr01 ┆ 2007 ┆ 1 ┆ … ┆ 6.0 ┆ 4.0 ┆ 1.0 ┆ 11.0 │\n",
"│ 89533 ┆ aloumo01 ┆ 2007 ┆ 1 ┆ … ┆ 2.0 ┆ 0.0 ┆ 3.0 ┆ 13.0 │\n",
"│ 89534 ┆ alomasa02 ┆ 2007 ┆ 1 ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n",
"└───────┴───────────┴──────┴───────┴───┴─────┴─────┴─────┴──────┘\n"
]
}
],
"source": [
"baseball = pl.read_csv('data/baseball.csv')\n",
"print(baseball)"
]
},
{
"cell_type": "code",
"execution_count": 414,
"id": "2a4375d0-9853-4702-a4ba-7b69c118e1f0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rows: 100\n",
"Columns: 23\n",
"$ id 88641, 88643, 88645, 88649, 88650, 88652, 88653, 88662, 89177, 89178\n",
"$ player 'womacto01', 'schilcu01', 'myersmi01', 'helliri01', 'johnsra05', 'finlest01', 'gonzalu01', 'seleaa01', 'francju01', 'francju01'\n",
"$ year 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007\n",
"$ stint 2, 1, 1, 1, 1, 1, 1, 1, 2, 1\n",
"$ team 'CHN', 'BOS', 'NYA', 'MIL', 'NYA', 'SFN', 'ARI', 'LAN', 'ATL', 'NYN'\n",
"$ lg 'NL', 'AL', 'AL', 'NL', 'AL', 'NL', 'NL', 'NL', 'NL', 'NL'\n",
"$ g 19, 31, 62, 20, 33, 139, 153, 28, 15, 40\n",
"$ ab 50, 2, 0, 3, 6, 426, 586, 26, 40, 50\n",
"$ r 6, 0, 0, 0, 0, 66, 93, 2, 1, 7\n",
"$ h 14, 1, 0, 0, 1, 105, 159, 5, 10, 10\n",
"$ X2b 1, 0, 0, 0, 0, 21, 52, 1, 3, 0\n",
"$ X3b 0, 0, 0, 0, 0, 12, 2, 0, 0, 0\n",
"$ hr 1, 0, 0, 0, 0, 6, 15, 0, 0, 1\n",
"$ rbi 2.0, 0.0, 0.0, 0.0, 0.0, 40.0, 73.0, 0.0, 8.0, 8.0\n",
"$ sb 1.0, 0.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 2.0\n",
"$ cs 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0\n",
"$ bb 4, 0, 0, 0, 0, 46, 69, 1, 4, 10\n",
"$ so 4.0, 1.0, 0.0, 2.0, 4.0, 55.0, 58.0, 7.0, 10.0, 13.0\n",
"$ ibb 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 10.0, 0.0, 1.0, 0.0\n",
"$ hbp 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 0.0, 0.0, 0.0\n",
"$ sh 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 6.0, 0.0, 0.0\n",
"$ sf 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 0.0, 1.0, 1.0\n",
"$ gidp 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 14.0, 1.0, 1.0, 1.0\n",
"\n"
]
}
],
"source": [
"baseball.glimpse()"
]
},
{
"cell_type": "code",
"execution_count": 419,
"id": "fdcdae5f-88e6-410f-bba6-2304dedc7824",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape: (20, 12)\n",
"┌───────┬───────────┬──────┬───────┬───┬─────┬─────┬─────┬─────┐\n",
"│ id ┆ player ┆ year ┆ stint ┆ … ┆ r ┆ h ┆ X2b ┆ X3b │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ str ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n",
"╞═══════╪═══════════╪══════╪═══════╪═══╪═════╪═════╪═════╪═════╡\n",
"│ 89474 ┆ finlest01 ┆ 2007 ┆ 1 ┆ … ┆ 9 ┆ 17 ┆ 3 ┆ 0 │\n",
"│ 89480 ┆ embreal01 ┆ 2007 ┆ 1 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n",
"│ 89481 ┆ edmonji01 ┆ 2007 ┆ 1 ┆ … ┆ 39 ┆ 92 ┆ 15 ┆ 2 │\n",
"│ 89482 ┆ easleda01 ┆ 2007 ┆ 1 ┆ … ┆ 24 ┆ 54 ┆ 6 ┆ 0 │\n",
"│ 89489 ┆ delgaca01 ┆ 2007 ┆ 1 ┆ … ┆ 71 ┆ 139 ┆ 30 ┆ 0 │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 89525 ┆ benitar01 ┆ 2007 ┆ 2 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n",
"│ 89526 ┆ benitar01 ┆ 2007 ┆ 1 ┆ … ┆ 0 ┆ 0 ┆ 0 ┆ 0 │\n",
"│ 89530 ┆ ausmubr01 ┆ 2007 ┆ 1 ┆ … ┆ 38 ┆ 82 ┆ 16 ┆ 3 │\n",
"│ 89533 ┆ aloumo01 ┆ 2007 ┆ 1 ┆ … ┆ 51 ┆ 112 ┆ 19 ┆ 1 │\n",
"│ 89534 ┆ alomasa02 ┆ 2007 ┆ 1 ┆ … ┆ 1 ┆ 3 ┆ 1 ┆ 0 │\n",
"└───────┴───────────┴──────┴───────┴───┴─────┴─────┴─────┴─────┘\n"
]
}
],
"source": [
"print(\n",
" baseball.select(pl.nth(range(0, 12)).tail(20))\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 422,
"id": "ceacf4e9-2259-4bcc-b9ee-a343286c36f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape: (3, 12)\n",
"┌──────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐\n",
"│ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ … ┆ column_8 ┆ column_9 ┆ column_10 ┆ column_11 │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡\n",
"│ 0.545788 ┆ -0.786944 ┆ 0.511419 ┆ -1.465452 ┆ … ┆ 0.103465 ┆ 1.25846 ┆ -1.961481 ┆ -0.892518 │\n",
"│ 0.828004 ┆ 0.291711 ┆ -0.666151 ┆ -0.056065 ┆ … ┆ -1.487244 ┆ -0.325992 ┆ -0.25713 ┆ -0.381324 │\n",
"│ 0.44002 ┆ 0.21451 ┆ 0.285626 ┆ -0.689011 ┆ … ┆ -0.06698 ┆ 1.482008 ┆ 1.288873 ┆ -1.12619 │\n",
"└──────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘\n"
]
}
],
"source": [
"print(pl.DataFrame(np.random.randn(3, 12)))"
]
},
{
"cell_type": "code",
"execution_count": 423,
"id": "57ee1cf9-b23a-41a4-865d-db99607a77e0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape: (3, 12)\n",
"┌─────┬─────┬─────┬─────┬───┬─────┬─────┬─────┬─────┐\n",
"│ col ┆ col ┆ col ┆ col ┆ … ┆ col ┆ col ┆ col ┆ col │\n",
"│ umn ┆ umn ┆ umn ┆ umn ┆ ┆ umn ┆ umn ┆ umn ┆ umn │\n",
"│ _0 ┆ _1 ┆ _2 ┆ _3 ┆ ┆ _8 ┆ _9 ┆ _10 ┆ _11 │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═════╪═════╪═════╪═════╪═══╪═════╪═════╪═════╪═════╡\n",
"│ -1. ┆ 0.0 ┆ -0. ┆ 1.7 ┆ … ┆ 0.8 ┆ 0.3 ┆ -1. ┆ -1. │\n",
"│ 593 ┆ 321 ┆ 382 ┆ 495 ┆ ┆ 431 ┆ 008 ┆ 529 ┆ 861 │\n",
"│ 329 ┆ 95 ┆ 463 ┆ 8 ┆ ┆ 73 ┆ 16 ┆ 093 ┆ 916 │\n",
"│ 0.5 ┆ -0. ┆ 0.5 ┆ -0. ┆ … ┆ -0. ┆ -0. ┆ -1. ┆ 0.6 │\n",
"│ 488 ┆ 337 ┆ 436 ┆ 509 ┆ ┆ 462 ┆ 292 ┆ 385 ┆ 538 │\n",
"│ 96 ┆ 439 ┆ 99 ┆ 815 ┆ ┆ 243 ┆ 441 ┆ 327 ┆ 87 │\n",
"│ -1. ┆ -0. ┆ 0.7 ┆ -0. ┆ … ┆ 0.1 ┆ -0. ┆ -0. ┆ 0.5 │\n",
"│ 150 ┆ 856 ┆ 171 ┆ 110 ┆ ┆ 902 ┆ 484 ┆ 245 ┆ 015 │\n",
"│ 248 ┆ 766 ┆ 96 ┆ 738 ┆ ┆ 8 ┆ 427 ┆ 482 ┆ 93 │\n",
"└─────┴─────┴─────┴─────┴───┴─────┴─────┴─────┴─────┘\n"
]
}
],
"source": [
"with pl.Config() as cfg:\n",
" cfg.set_tbl_width_chars(40)\n",
" print(pl.DataFrame(np.random.randn(3, 12)))"
]
},
{
"cell_type": "code",
"execution_count": 424,
"id": "e1c51e82-1ed2-4b53-9315-f5465710219b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape: (2, 2)\n",
"┌─────────────┬──────────────┐\n",
"│ filename ┆ path │\n",
"│ --- ┆ --- │\n",
"│ str ┆ str │\n",
"╞═════════════╪══════════════╡\n",
"│ filename_01 ┆ media/user_n │\n",
"│ ┆ ame/storage/ │\n",
"│ ┆ folder… │\n",
"│ filename_02 ┆ media/user_n │\n",
"│ ┆ ame/storage/ │\n",
"│ ┆ folder… │\n",
"└─────────────┴──────────────┘\n"
]
}
],
"source": [
"datafile = {\n",
" \"filename\": [\"filename_01\", \"filename_02\"],\n",
" \"path\": [\n",
" \"media/user_name/storage/folder_01/filename_01\",\n",
" \"media/user_name/storage/folder_02/filename_02\",\n",
" ],\n",
"}\n",
"with pl.Config() as cfg:\n",
" cfg.set_tbl_width_chars(30)\n",
" print(pl.DataFrame(datafile))"
]
},
{
"cell_type": "code",
"execution_count": 425,
"id": "01846bec-7998-491c-84b9-df7dfea964d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"shape: (2, 2)\n",
"┌─────────────┬─────────────────────────────────┐\n",
"│ filename ┆ path │\n",
"│ --- ┆ --- │\n",
"│ str ┆ str │\n",
"╞═════════════╪═════════════════════════════════╡\n",
"│ filename_01 ┆ media/user_name/storage/folder… │\n",
"│ filename_02 ┆ media/user_name/storage/folder… │\n",
"└─────────────┴─────────────────────────────────┘\n"
]
}
],
"source": [
"with pl.Config() as cfg:\n",
" cfg.set_tbl_width_chars(100)\n",
" print(pl.DataFrame(datafile))"
]
},
{
"cell_type": "markdown",
"id": "8d5da449-149c-47eb-a5cd-5ddea1147ff3",
"metadata": {},
"source": [
"### DataFrame column attribute access and IPython completion"
]
},
{
"cell_type": "markdown",
"id": "484e82d1-745f-4943-8855-59e068956312",
"metadata": {},
"source": [
"There is no such function in polars."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}