{ "cells": [ { "cell_type": "markdown", "id": "ed9490e5-0e99-42eb-8b33-4a602ec28673", "metadata": {}, "source": [ "# 10 minutes to pandas" ] }, { "cell_type": "code", "execution_count": 1, "id": "54d11b1c-d290-4364-bdbe-1f8d11e53c97", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import polars as pl\n", "from helper.jupyter import row" ] }, { "cell_type": "code", "execution_count": 131, "id": "22ee56ae-4767-447f-b46c-641c0f321c96", "metadata": {}, "outputs": [], "source": [ "def to_dataframe(data):\n", " import re\n", " from io import StringIO\n", " data = re.sub(r'[ \\t]+', ',', data.strip())\n", " return pl.read_csv(StringIO(data))" ] }, { "cell_type": "markdown", "id": "a353bf66-d7ec-4f09-8351-34cf5e470e1a", "metadata": {}, "source": [ "## Object creation" ] }, { "cell_type": "code", "execution_count": 2, "id": "9b2c0c9b-acd9-4c34-b43f-e8f9e5e18db0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6,)
f64
1.0
3.0
5.0
NaN
6.0
8.0
\n", "shape: (6,)
i64
1
3
5
null
6
8
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "s = pl.Series([1, 3, 5, np.nan, 6, 8], strict=False)\n", "s2 = pl.Series([1, 3, 5, None, 6, 8])\n", "row(s, s2)" ] }, { "cell_type": "code", "execution_count": 3, "id": "94d83c8a-180d-4af3-a485-02faced33d18", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6,)
index
date
2013-01-01
2013-01-02
2013-01-03
2013-01-04
2013-01-05
2013-01-06
" ], "text/plain": [ "shape: (6,)\n", "Series: 'index' [date]\n", "[\n", "\t2013-01-01\n", "\t2013-01-02\n", "\t2013-01-03\n", "\t2013-01-04\n", "\t2013-01-05\n", "\t2013-01-06\n", "]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dates = pl.date_range(\n", " pl.date(2013, 1, 1), \n", " pl.date(2013, 1, 6), \n", " interval='1d', eager=True).alias('index')\n", "dates" ] }, { "cell_type": "code", "execution_count": 4, "id": "67635ded-bf92-471a-a567-a62e51d308f2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 5)
indexABCD
datef64f64f64f64
2013-01-01-1.281556-0.976005-0.449651.508403
2013-01-022.2995780.450922-0.758744-0.762308
2013-01-030.489373-0.1240090.597175-0.365254
2013-01-04-0.6892020.0405630.8834050.400285
2013-01-050.9721641.5965521.191523-1.907236
2013-01-060.8309180.2114870.910451.73558
" ], "text/plain": [ "shape: (6, 5)\n", "┌────────────┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 2013-01-01 ┆ -1.281556 ┆ -0.976005 ┆ -0.44965 ┆ 1.508403 │\n", "│ 2013-01-02 ┆ 2.299578 ┆ 0.450922 ┆ -0.758744 ┆ -0.762308 │\n", "│ 2013-01-03 ┆ 0.489373 ┆ -0.124009 ┆ 0.597175 ┆ -0.365254 │\n", "│ 2013-01-04 ┆ -0.689202 ┆ 0.040563 ┆ 0.883405 ┆ 0.400285 │\n", "│ 2013-01-05 ┆ 0.972164 ┆ 1.596552 ┆ 1.191523 ┆ -1.907236 │\n", "│ 2013-01-06 ┆ 0.830918 ┆ 0.211487 ┆ 0.91045 ┆ 1.73558 │\n", "└────────────┴───────────┴───────────┴───────────┴───────────┘" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pl.DataFrame(np.random.randn(6, 4), schema=list(\"ABCD\"))\n", "df.insert_column(0, dates)\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "id": "e6513760-b0b2-4876-9185-b5d47524fc2f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 5)
indexABCD
datef64f64f64f64
2013-01-010.4691-0.2829-1.5091-1.1356
2013-01-021.2121-0.17320.1192-1.0442
2013-01-03-0.8618-2.1046-0.49491.0718
2013-01-040.7216-0.7068-1.03960.2719
2013-01-05-0.4250.5670.2762-1.0874
2013-01-06-0.67370.1136-1.47840.525
" ], "text/plain": [ "shape: (6, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ -1.0874 │\n", "│ 2013-01-06 ┆ -0.6737 ┆ 0.1136 ┆ -1.4784 ┆ 0.525 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr = np.array([[ 0.4691, -0.2829, -1.5091, -1.1356],\n", " [ 1.2121, -0.1732, 0.1192, -1.0442],\n", " [-0.8618, -2.1046, -0.4949, 1.0718],\n", " [ 0.7216, -0.7068, -1.0396, 0.2719],\n", " [-0.425 , 0.567 , 0.2762, -1.0874],\n", " [-0.6737, 0.1136, -1.4784, 0.525 ]])\n", "df = pl.DataFrame(arr, schema=list(\"ABCD\"))\n", "df.insert_column(0, dates)" ] }, { "cell_type": "code", "execution_count": 6, "id": "929d7c44-1256-4bb8-b32a-5be6d31ee574", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 5)
ABDEF
f64datei32catstr
1.02013-01-023"test""foo"
1.02013-01-023"train""foo"
1.02013-01-023"test""foo"
1.02013-01-023"train""foo"
" ], "text/plain": [ "shape: (4, 5)\n", "┌─────┬────────────┬─────┬───────┬─────┐\n", "│ A ┆ B ┆ D ┆ E ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ date ┆ i32 ┆ cat ┆ str │\n", "╞═════╪════════════╪═════╪═══════╪═════╡\n", "│ 1.0 ┆ 2013-01-02 ┆ 3 ┆ test ┆ foo │\n", "│ 1.0 ┆ 2013-01-02 ┆ 3 ┆ train ┆ foo │\n", "│ 1.0 ┆ 2013-01-02 ┆ 3 ┆ test ┆ foo │\n", "│ 1.0 ┆ 2013-01-02 ┆ 3 ┆ train ┆ foo │\n", "└─────┴────────────┴─────┴───────┴─────┘" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pl.select(\n", " A=1.0,\n", " B=pl.date(2013, 1, 2),\n", " D=np.array([3] * 4, dtype=\"int32\"),\n", " E=pl.lit(pl.Series(['test', 'train', 'test', 'train'], dtype=pl.Categorical)),\n", " F=pl.lit(\"foo\"),\n", ")\n", "df2" ] }, { "cell_type": "code", "execution_count": 7, "id": "e52df712-08d4-460f-b55e-a1bf4762b24f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Float64, Date, Int32, Categorical(ordering='physical'), String]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.dtypes" ] }, { "cell_type": "code", "execution_count": 8, "id": "0437ea69-342f-447d-b1cb-63f9426bcb6f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Schema([('A', Float64),\n", " ('B', Date),\n", " ('D', Int32),\n", " ('E', Categorical(ordering='physical')),\n", " ('F', String)])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.schema" ] }, { "cell_type": "markdown", "id": "b6f5274d-b19b-46f1-b5d3-4b0da9276e36", "metadata": {}, "source": [ "## Viewing data" ] }, { "cell_type": "code", "execution_count": 9, "id": "a526539d-a6ba-437c-a33b-1244d12cac29", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 5)
indexABCD
datef64f64f64f64
2013-01-010.4691-0.2829-1.5091-1.1356
2013-01-021.2121-0.17320.1192-1.0442
2013-01-03-0.8618-2.1046-0.49491.0718
2013-01-040.7216-0.7068-1.03960.2719
2013-01-05-0.4250.5670.2762-1.0874
" ], "text/plain": [ "shape: (5, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ -1.0874 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(5)" ] }, { "cell_type": "code", "execution_count": 10, "id": "f589cece-7220-4c6d-b7f2-33aad5fad693", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 5)
indexABCD
datef64f64f64f64
2013-01-040.7216-0.7068-1.03960.2719
2013-01-05-0.4250.5670.2762-1.0874
2013-01-06-0.67370.1136-1.47840.525
" ], "text/plain": [ "shape: (3, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ -1.0874 │\n", "│ 2013-01-06 ┆ -0.6737 ┆ 0.1136 ┆ -1.4784 ┆ 0.525 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail(3)" ] }, { "cell_type": "code", "execution_count": 11, "id": "abb533e6-f0db-4ec0-9cee-f99878777eca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6,)
index
date
2013-01-01
2013-01-02
2013-01-03
2013-01-04
2013-01-05
2013-01-06
" ], "text/plain": [ "shape: (6,)\n", "Series: 'index' [date]\n", "[\n", "\t2013-01-01\n", "\t2013-01-02\n", "\t2013-01-03\n", "\t2013-01-04\n", "\t2013-01-05\n", "\t2013-01-06\n", "]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.get_column('index')" ] }, { "cell_type": "code", "execution_count": 12, "id": "65271b5e-e0a2-4e76-a88a-8f840151f4d8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['index', 'A', 'B', 'C', 'D']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 13, "id": "f2c6f01c-ba4c-46c8-99a3-32f818852e06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0.4691, -0.2829, -1.5091, -1.1356],\n", " [ 1.2121, -0.1732, 0.1192, -1.0442],\n", " [-0.8618, -2.1046, -0.4949, 1.0718],\n", " [ 0.7216, -0.7068, -1.0396, 0.2719],\n", " [-0.425 , 0.567 , 0.2762, -1.0874],\n", " [-0.6737, 0.1136, -1.4784, 0.525 ]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop('index').to_numpy()" ] }, { "cell_type": "code", "execution_count": 14, "id": "6b86da6c-0f6a-470d-a244-cc49d1762ca7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Float64, Date, Int32, Categorical(ordering='physical'), String]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.dtypes" ] }, { "cell_type": "code", "execution_count": 15, "id": "7302b3b0-f20e-4f10-83a0-332127c79485", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Schema([('A', Float64),\n", " ('B', Date),\n", " ('D', Int32),\n", " ('E', Categorical(ordering='physical')),\n", " ('F', String)])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.schema" ] }, { "cell_type": "code", "execution_count": 16, "id": "d4947218-5d35-48ff-8107-e08af5fcc4c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1.0, datetime.date(2013, 1, 2), 3, 'test', 'foo'],\n", " [1.0, datetime.date(2013, 1, 2), 3, 'train', 'foo'],\n", " [1.0, datetime.date(2013, 1, 2), 3, 'test', 'foo'],\n", " [1.0, datetime.date(2013, 1, 2), 3, 'train', 'foo']], dtype=object)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.to_numpy()" ] }, { "cell_type": "code", "execution_count": 17, "id": "96018407-870c-483c-ad9b-7e1a311a68a6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([(1., '2013-01-02', 3, 'test', 'foo'),\n", " (1., '2013-01-02', 3, 'train', 'foo'),\n", " (1., '2013-01-02', 3, 'test', 'foo'),\n", " (1., '2013-01-02', 3, 'train', 'foo')],\n", " dtype=[('A', '\n", "shape: (9, 6)
statisticindexABCD
strstrf64f64f64f64
"count""6"6.06.06.06.0
"null_count""0"0.00.00.00.0
"mean""2013-01-03 12:00:00"0.073717-0.43115-0.687767-0.233083
"std"null0.8431530.9228210.7798850.973111
"min""2013-01-01"-0.8618-2.1046-1.5091-1.1356
"25%""2013-01-02"-0.6737-0.7068-1.4784-1.0874
"50%""2013-01-04"0.4691-0.1732-0.49490.2719
"75%""2013-01-05"0.72160.11360.11920.525
"max""2013-01-06"1.21210.5670.27621.0718
" ], "text/plain": [ "shape: (9, 6)\n", "┌────────────┬─────────────────────┬──────────┬──────────┬───────────┬───────────┐\n", "│ statistic ┆ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════════════════╪══════════╪══════════╪═══════════╪═══════════╡\n", "│ count ┆ 6 ┆ 6.0 ┆ 6.0 ┆ 6.0 ┆ 6.0 │\n", "│ null_count ┆ 0 ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0 │\n", "│ mean ┆ 2013-01-03 12:00:00 ┆ 0.073717 ┆ -0.43115 ┆ -0.687767 ┆ -0.233083 │\n", "│ std ┆ null ┆ 0.843153 ┆ 0.922821 ┆ 0.779885 ┆ 0.973111 │\n", "│ min ┆ 2013-01-01 ┆ -0.8618 ┆ -2.1046 ┆ -1.5091 ┆ -1.1356 │\n", "│ 25% ┆ 2013-01-02 ┆ -0.6737 ┆ -0.7068 ┆ -1.4784 ┆ -1.0874 │\n", "│ 50% ┆ 2013-01-04 ┆ 0.4691 ┆ -0.1732 ┆ -0.4949 ┆ 0.2719 │\n", "│ 75% ┆ 2013-01-05 ┆ 0.7216 ┆ 0.1136 ┆ 0.1192 ┆ 0.525 │\n", "│ max ┆ 2013-01-06 ┆ 1.2121 ┆ 0.567 ┆ 0.2762 ┆ 1.0718 │\n", "└────────────┴─────────────────────┴──────────┴──────────┴───────────┴───────────┘" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 19, "id": "f937e415-fe03-4f9c-93ae-dc82d7de11d3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 7)
column2013-01-012013-01-022013-01-032013-01-042013-01-052013-01-06
strf64f64f64f64f64f64
"A"0.46911.2121-0.86180.7216-0.425-0.6737
"B"-0.2829-0.1732-2.1046-0.70680.5670.1136
"C"-1.50910.1192-0.4949-1.03960.2762-1.4784
"D"-1.1356-1.04421.07180.2719-1.08740.525
" ], "text/plain": [ "shape: (4, 7)\n", "┌────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┐\n", "│ column ┆ 2013-01-01 ┆ 2013-01-02 ┆ 2013-01-03 ┆ 2013-01-04 ┆ 2013-01-05 ┆ 2013-01-06 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════╪════════════╪════════════╪════════════╪════════════╪════════════╪════════════╡\n", "│ A ┆ 0.4691 ┆ 1.2121 ┆ -0.8618 ┆ 0.7216 ┆ -0.425 ┆ -0.6737 │\n", "│ B ┆ -0.2829 ┆ -0.1732 ┆ -2.1046 ┆ -0.7068 ┆ 0.567 ┆ 0.1136 │\n", "│ C ┆ -1.5091 ┆ 0.1192 ┆ -0.4949 ┆ -1.0396 ┆ 0.2762 ┆ -1.4784 │\n", "│ D ┆ -1.1356 ┆ -1.0442 ┆ 1.0718 ┆ 0.2719 ┆ -1.0874 ┆ 0.525 │\n", "└────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┘" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop('index').transpose(\n", " include_header=True, \n", " column_names=df.get_column('index').cast(pl.String))" ] }, { "cell_type": "code", "execution_count": 20, "id": "4f4ba7e5-cfb0-45d6-8c1b-33265c8589a8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 5)
indexDCBA
datef64f64f64f64
2013-01-01-1.1356-1.5091-0.28290.4691
2013-01-02-1.04420.1192-0.17321.2121
2013-01-031.0718-0.4949-2.1046-0.8618
2013-01-040.2719-1.0396-0.70680.7216
2013-01-05-1.08740.27620.567-0.425
2013-01-060.525-1.47840.1136-0.6737
" ], "text/plain": [ "shape: (6, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ D ┆ C ┆ B ┆ A │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ -1.1356 ┆ -1.5091 ┆ -0.2829 ┆ 0.4691 │\n", "│ 2013-01-02 ┆ -1.0442 ┆ 0.1192 ┆ -0.1732 ┆ 1.2121 │\n", "│ 2013-01-03 ┆ 1.0718 ┆ -0.4949 ┆ -2.1046 ┆ -0.8618 │\n", "│ 2013-01-04 ┆ 0.2719 ┆ -1.0396 ┆ -0.7068 ┆ 0.7216 │\n", "│ 2013-01-05 ┆ -1.0874 ┆ 0.2762 ┆ 0.567 ┆ -0.425 │\n", "│ 2013-01-06 ┆ 0.525 ┆ -1.4784 ┆ 0.1136 ┆ -0.6737 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select('index', *sorted(df.columns[1:], reverse=True))" ] }, { "cell_type": "code", "execution_count": 21, "id": "a30e2966-dffa-4c5d-9801-df3bdec44342", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 5)
indexABCD
datef64f64f64f64
2013-01-03-0.8618-2.1046-0.49491.0718
2013-01-040.7216-0.7068-1.03960.2719
2013-01-010.4691-0.2829-1.5091-1.1356
2013-01-021.2121-0.17320.1192-1.0442
2013-01-06-0.67370.1136-1.47840.525
2013-01-05-0.4250.5670.2762-1.0874
" ], "text/plain": [ "shape: (6, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-06 ┆ -0.6737 ┆ 0.1136 ┆ -1.4784 ┆ 0.525 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ -1.0874 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort('B')" ] }, { "cell_type": "markdown", "id": "eac5045a-acba-4423-94bd-fc77203373e5", "metadata": {}, "source": [ "## Selection" ] }, { "cell_type": "markdown", "id": "687c9d00-c6b5-47bc-bfef-a96ecd9f1a10", "metadata": {}, "source": [ "### Getitem ([])" ] }, { "cell_type": "code", "execution_count": 22, "id": "1202feaf-b222-4c19-b318-76dd9e37945a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6,)
A
f64
0.4691
1.2121
-0.8618
0.7216
-0.425
-0.6737
" ], "text/plain": [ "shape: (6,)\n", "Series: 'A' [f64]\n", "[\n", "\t0.4691\n", "\t1.2121\n", "\t-0.8618\n", "\t0.7216\n", "\t-0.425\n", "\t-0.6737\n", "]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['A']" ] }, { "cell_type": "code", "execution_count": 23, "id": "a2ae2b7b-70d1-4b5c-8e81-d5caaeeed526", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 5)
indexABCD
datef64f64f64f64
2013-01-010.4691-0.2829-1.5091-1.1356
2013-01-021.2121-0.17320.1192-1.0442
2013-01-03-0.8618-2.1046-0.49491.0718
" ], "text/plain": [ "shape: (3, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.slice(0, 3)" ] }, { "cell_type": "code", "execution_count": 24, "id": "885792d5-ad76-4007-b045-4b151f95146f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 5)
indexABCD
datef64f64f64f64
2013-01-021.2121-0.17320.1192-1.0442
2013-01-03-0.8618-2.1046-0.49491.0718
2013-01-040.7216-0.7068-1.03960.2719
" ], "text/plain": [ "shape: (3, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.filter(\n", " pl.col('index').is_between(\n", " pl.date(2013, 1, 2), \n", " pl.date(2013, 1, 4)\n", " )\n", ")" ] }, { "cell_type": "markdown", "id": "e6cf3012-cd49-4ae2-84ac-883a3806b420", "metadata": {}, "source": [ "### Selection by label" ] }, { "cell_type": "code", "execution_count": 25, "id": "b15c6a0c-edf9-45ca-aa18-34259d5dad3d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 5)
indexABCD
datef64f64f64f64
2013-01-010.4691-0.2829-1.5091-1.1356
" ], "text/plain": [ "shape: (1, 5)\n", "┌────────────┬────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 │\n", "└────────────┴────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.filter(pl.col('index') == pl.col('index').first())" ] }, { "cell_type": "code", "execution_count": 26, "id": "259035b4-6743-41a6-876e-c3f9b1120ceb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 3)
indexAB
datef64f64
2013-01-010.4691-0.2829
2013-01-021.2121-0.1732
2013-01-03-0.8618-2.1046
2013-01-040.7216-0.7068
2013-01-05-0.4250.567
2013-01-06-0.67370.1136
" ], "text/plain": [ "shape: (6, 3)\n", "┌────────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B │\n", "│ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 │\n", "│ 2013-01-06 ┆ -0.6737 ┆ 0.1136 │\n", "└────────────┴─────────┴─────────┘" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select('index', 'A', 'B')" ] }, { "cell_type": "code", "execution_count": 27, "id": "f7bfe1a4-4148-4d1d-b3ea-673438c63f04", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 3)
indexAB
datef64f64
2013-01-021.2121-0.1732
2013-01-03-0.8618-2.1046
2013-01-040.7216-0.7068
" ], "text/plain": [ "shape: (3, 3)\n", "┌────────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B │\n", "│ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╡\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 │\n", "└────────────┴─────────┴─────────┘" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(df\n", ".filter(\n", " pl.col('index').is_between(\n", " pl.date(2013, 1, 2), \n", " pl.date(2013, 1, 4)\n", " )\n", ")\n", ".select('index', 'A', 'B')\n", ")" ] }, { "cell_type": "code", "execution_count": 28, "id": "d7aea6fb-4aca-49e1-99cb-f67f8e682f7e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4691" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", "df\n", ".filter(pl.col('index') == pl.col('index').first())\n", ".select('A')\n", ".item()\n", ")" ] }, { "cell_type": "code", "execution_count": 29, "id": "a9e0ebd8-2a01-4bb0-ac2d-946d0e0d8888", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4691" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.row(0, named=True)['A']" ] }, { "cell_type": "markdown", "id": "1a144d91-c6ec-459b-976c-55442b25e001", "metadata": {}, "source": [ "### Selection by position" ] }, { "cell_type": "code", "execution_count": 30, "id": "c12fdcd6-161a-4135-8496-3bdc34a621dd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 5)
indexABCD
datef64f64f64f64
2013-01-040.7216-0.7068-1.03960.2719
" ], "text/plain": [ "shape: (1, 5)\n", "┌────────────┬────────┬─────────┬─────────┬────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪════════╪═════════╪═════════╪════════╡\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "└────────────┴────────┴─────────┴─────────┴────────┘" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.slice(3, 1)" ] }, { "cell_type": "code", "execution_count": 31, "id": "ab1a5828-556f-4efc-aea7-c67b231a13a2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
indexAB
datef64f64
2013-01-040.7216-0.7068
2013-01-05-0.4250.567
" ], "text/plain": [ "shape: (2, 3)\n", "┌────────────┬────────┬─────────┐\n", "│ index ┆ A ┆ B │\n", "│ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 │\n", "╞════════════╪════════╪═════════╡\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 │\n", "└────────────┴────────┴─────────┘" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(pl.nth(range(3)).slice(3, 2))" ] }, { "cell_type": "code", "execution_count": 32, "id": "16ffa980-4ce1-4bb4-968c-a84107eb498b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 3)
indexAC
datef64f64
2013-01-021.21210.1192
2013-01-03-0.8618-0.4949
2013-01-05-0.4250.2762
" ], "text/plain": [ "shape: (3, 3)\n", "┌────────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ C │\n", "│ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╡\n", "│ 2013-01-02 ┆ 1.2121 ┆ 0.1192 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -0.4949 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.2762 │\n", "└────────────┴─────────┴─────────┘" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(pl.nth([0, 1, 3]).gather([1, 2, 4]))" ] }, { "cell_type": "code", "execution_count": 33, "id": "4fda0470-8b97-4eb7-88f7-01b3f7bde19a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 5)
indexABCD
datef64f64f64f64
2013-01-021.2121-0.17320.1192-1.0442
2013-01-03-0.8618-2.1046-0.49491.0718
" ], "text/plain": [ "shape: (2, 5)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.slice(1, 2)" ] }, { "cell_type": "code", "execution_count": 34, "id": "79b3dccf-3e52-4b1a-bfe2-c9ba1cf5d56a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 3)
indexBC
datef64f64
2013-01-01-0.2829-1.5091
2013-01-02-0.17320.1192
2013-01-03-2.1046-0.4949
2013-01-04-0.7068-1.0396
2013-01-050.5670.2762
2013-01-060.1136-1.4784
" ], "text/plain": [ "shape: (6, 3)\n", "┌────────────┬─────────┬─────────┐\n", "│ index ┆ B ┆ C │\n", "│ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ -0.2829 ┆ -1.5091 │\n", "│ 2013-01-02 ┆ -0.1732 ┆ 0.1192 │\n", "│ 2013-01-03 ┆ -2.1046 ┆ -0.4949 │\n", "│ 2013-01-04 ┆ -0.7068 ┆ -1.0396 │\n", "│ 2013-01-05 ┆ 0.567 ┆ 0.2762 │\n", "│ 2013-01-06 ┆ 0.1136 ┆ -1.4784 │\n", "└────────────┴─────────┴─────────┘" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(pl.nth(0), pl.nth(range(2, 4)))" ] }, { "cell_type": "code", "execution_count": 35, "id": "060e27a9-802a-4889-86b0-88556adb14ec", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-0.1732" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.row(1)[2]" ] }, { "cell_type": "markdown", "id": "754097a1-ff7b-4c89-80a9-ea3fa27bac47", "metadata": {}, "source": [ "## Boolean indexing" ] }, { "cell_type": "code", "execution_count": 36, "id": "5d6eac7c-fd35-49bb-b7c3-e84d8b571ec9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 5)
indexABCD
datef64f64f64f64
2013-01-010.4691-0.2829-1.5091-1.1356
2013-01-021.2121-0.17320.1192-1.0442
2013-01-040.7216-0.7068-1.03960.2719
" ], "text/plain": [ "shape: (3, 5)\n", "┌────────────┬────────┬─────────┬─────────┬─────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪════════╪═════════╪═════════╪═════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 │\n", "└────────────┴────────┴─────────┴─────────┴─────────┘" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.filter(pl.col('A') > 0)" ] }, { "cell_type": "code", "execution_count": 37, "id": "02dc9fc0-8da1-492e-b3e5-ecc1470ca63b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 5)
indexABCD
datef64f64f64f64
2013-01-010.4691nullnullnull
2013-01-021.2121null0.1192null
2013-01-03nullnullnull1.0718
2013-01-040.7216nullnull0.2719
2013-01-05null0.5670.2762null
2013-01-06null0.1136null0.525
" ], "text/plain": [ "shape: (6, 5)\n", "┌────────────┬────────┬────────┬────────┬────────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞════════════╪════════╪════════╪════════╪════════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ null ┆ null ┆ null │\n", "│ 2013-01-02 ┆ 1.2121 ┆ null ┆ 0.1192 ┆ null │\n", "│ 2013-01-03 ┆ null ┆ null ┆ null ┆ 1.0718 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ null ┆ null ┆ 0.2719 │\n", "│ 2013-01-05 ┆ null ┆ 0.567 ┆ 0.2762 ┆ null │\n", "│ 2013-01-06 ┆ null ┆ 0.1136 ┆ null ┆ 0.525 │\n", "└────────────┴────────┴────────┴────────┴────────┘" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(\n", " pl.col('index'),\n", " pl.when(pl.exclude('index') > 0)\n", " .then(pl.exclude('index'))\n", " .otherwise(None)\n", ")" ] }, { "cell_type": "code", "execution_count": 38, "id": "66fbbe1d-e460-40cc-8514-824915a3ca1f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 6)
indexABCDE
datef64f64f64f64str
2013-01-010.4691-0.2829-1.5091-1.1356"one"
2013-01-021.2121-0.17320.1192-1.0442"one"
2013-01-03-0.8618-2.1046-0.49491.0718"two"
2013-01-040.7216-0.7068-1.03960.2719"three"
2013-01-05-0.4250.5670.2762-1.0874"four"
2013-01-06-0.67370.1136-1.47840.525"three"
" ], "text/plain": [ "shape: (6, 6)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┬───────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ E │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╪═══════╡\n", "│ 2013-01-01 ┆ 0.4691 ┆ -0.2829 ┆ -1.5091 ┆ -1.1356 ┆ one │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ -1.0442 ┆ one │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 ┆ two │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 0.2719 ┆ three │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ -1.0874 ┆ four │\n", "│ 2013-01-06 ┆ -0.6737 ┆ 0.1136 ┆ -1.4784 ┆ 0.525 ┆ three │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┴───────┘" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df.with_columns(\n", " E=pl.Series(['one', 'one', 'two', 'three', 'four', 'three'])\n", ")\n", "df2" ] }, { "cell_type": "code", "execution_count": 39, "id": "454a8fed-36a7-4811-990f-76739bb06605", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 6)
indexABCDE
datef64f64f64f64str
2013-01-03-0.8618-2.1046-0.49491.0718"two"
2013-01-05-0.4250.5670.2762-1.0874"four"
" ], "text/plain": [ "shape: (2, 6)\n", "┌────────────┬─────────┬─────────┬─────────┬─────────┬──────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ E │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │\n", "╞════════════╪═════════╪═════════╪═════════╪═════════╪══════╡\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 1.0718 ┆ two │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ -1.0874 ┆ four │\n", "└────────────┴─────────┴─────────┴─────────┴─────────┴──────┘" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.filter(pl.col('E').is_in(['two', 'four']))" ] }, { "cell_type": "markdown", "id": "0bc4f05c-b38d-4356-8b3e-5a20cae3e55c", "metadata": {}, "source": [ "### Setting" ] }, { "cell_type": "code", "execution_count": 40, "id": "56a90504-e14d-4087-9a18-0a87eb05a6a9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 2)
indexvalue
datei64
2013-01-021
2013-01-032
2013-01-043
2013-01-054
2013-01-065
2013-01-076
" ], "text/plain": [ "shape: (6, 2)\n", "┌────────────┬───────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ date ┆ i64 │\n", "╞════════════╪═══════╡\n", "│ 2013-01-02 ┆ 1 │\n", "│ 2013-01-03 ┆ 2 │\n", "│ 2013-01-04 ┆ 3 │\n", "│ 2013-01-05 ┆ 4 │\n", "│ 2013-01-06 ┆ 5 │\n", "│ 2013-01-07 ┆ 6 │\n", "└────────────┴───────┘" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range(\"20130102\", periods=6))\n", "s1 = pl.select(\n", " index=pl.date_range(pl.date(2013, 1, 2), pl.date(2013, 1, 7)),\n", " value=pl.Series([1, 2, 3, 4, 5, 6])\n", ")\n", "s1" ] }, { "cell_type": "code", "execution_count": 41, "id": "f54a636a-5d05-47eb-9fff-80285b9fef4d", "metadata": {}, "outputs": [], "source": [ "# df[\"F\"] = s1\n", "df = df.join(s1.select('index', F='value'), on='index', how='left')" ] }, { "cell_type": "code", "execution_count": 42, "id": "56d5770d-9b88-41b0-b7a6-a7695d345559", "metadata": {}, "outputs": [], "source": [ "# df.at[dates[0], \"A\"] = 0\n", "df = df.with_columns(\n", " pl.when(pl.col('index') == dates[0])\n", " .then(0)\n", " .otherwise('A')\n", " .alias('A')\n", ")" ] }, { "cell_type": "code", "execution_count": 43, "id": "cf7e216a-95bc-4205-9961-77e61b03ce3d", "metadata": {}, "outputs": [], "source": [ "# df.iat[0, 1] = 0\n", "df = df.with_columns(\n", " pl.when(pl.int_range(pl.len()) == 0)\n", " .then(0)\n", " .otherwise(pl.nth(2))\n", " .name.keep()\n", ")" ] }, { "cell_type": "code", "execution_count": 44, "id": "80734813-c1d1-4fae-8921-a9423cbe45af", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 6)
indexABCDF
datef64f64f64i32i64
2013-01-010.00.0-1.50915null
2013-01-021.2121-0.17320.119251
2013-01-03-0.8618-2.1046-0.494952
2013-01-040.7216-0.7068-1.039653
2013-01-05-0.4250.5670.276254
2013-01-06-0.67370.1136-1.478455
" ], "text/plain": [ "shape: (6, 6)\n", "┌────────────┬─────────┬─────────┬─────────┬─────┬──────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ i64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════╪══════╡\n", "│ 2013-01-01 ┆ 0.0 ┆ 0.0 ┆ -1.5091 ┆ 5 ┆ null │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ 5 ┆ 1 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 5 ┆ 2 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 5 ┆ 3 │\n", "│ 2013-01-05 ┆ -0.425 ┆ 0.567 ┆ 0.2762 ┆ 5 ┆ 4 │\n", "│ 2013-01-06 ┆ -0.6737 ┆ 0.1136 ┆ -1.4784 ┆ 5 ┆ 5 │\n", "└────────────┴─────────┴─────────┴─────────┴─────┴──────┘" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.loc[:, \"D\"] = np.array([5] * len(df))\n", "df = df.with_columns(D=5)\n", "df" ] }, { "cell_type": "code", "execution_count": 45, "id": "5f093610-59ae-4b99-b8ec-f02d7e223495", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 6)
indexABCDF
datef64f64f64i32i64
2013-01-010.00.0-1.5091-5null
2013-01-02-1.2121-0.1732-0.1192-5-1
2013-01-03-0.8618-2.1046-0.4949-5-2
2013-01-04-0.7216-0.7068-1.0396-5-3
2013-01-05-0.425-0.567-0.2762-5-4
2013-01-06-0.6737-0.1136-1.4784-5-5
" ], "text/plain": [ "shape: (6, 6)\n", "┌────────────┬─────────┬─────────┬─────────┬─────┬──────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ i64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════╪══════╡\n", "│ 2013-01-01 ┆ 0.0 ┆ 0.0 ┆ -1.5091 ┆ -5 ┆ null │\n", "│ 2013-01-02 ┆ -1.2121 ┆ -0.1732 ┆ -0.1192 ┆ -5 ┆ -1 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ -5 ┆ -2 │\n", "│ 2013-01-04 ┆ -0.7216 ┆ -0.7068 ┆ -1.0396 ┆ -5 ┆ -3 │\n", "│ 2013-01-05 ┆ -0.425 ┆ -0.567 ┆ -0.2762 ┆ -5 ┆ -4 │\n", "│ 2013-01-06 ┆ -0.6737 ┆ -0.1136 ┆ -1.4784 ┆ -5 ┆ -5 │\n", "└────────────┴─────────┴─────────┴─────────┴─────┴──────┘" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df2[df2 > 0] = -df2\n", "df2 = df.select(pl.all())\n", "\n", "others = pl.exclude('index')\n", "df2 = df2.select(\n", " 'index',\n", " pl.when(others > 0)\n", " .then(-others)\n", " .otherwise(others)\n", ")\n", "df2" ] }, { "cell_type": "markdown", "id": "03ecf958-84e9-4999-9ee8-a1072b92f49d", "metadata": {}, "source": [ "## Missing data" ] }, { "cell_type": "code", "execution_count": 46, "id": "f14c3fce-deea-42c2-a31f-57c79903a803", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 7)
indexABCDFE
datef64f64f64i32i64f64
2013-01-010.00.0-1.50915null1.0
2013-01-021.2121-0.17320.1192511.0
2013-01-03-0.8618-2.1046-0.494952null
2013-01-040.7216-0.7068-1.039653null
" ], "text/plain": [ "shape: (4, 7)\n", "┌────────────┬─────────┬─────────┬─────────┬─────┬──────┬──────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F ┆ E │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ i64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════╪══════╪══════╡\n", "│ 2013-01-01 ┆ 0.0 ┆ 0.0 ┆ -1.5091 ┆ 5 ┆ null ┆ 1.0 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ 5 ┆ 1 ┆ 1.0 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 5 ┆ 2 ┆ null │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 5 ┆ 3 ┆ null │\n", "└────────────┴─────────┴─────────┴─────────┴─────┴──────┴──────┘" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = df.select(\n", " pl.all().slice(0, 4),\n", " E=pl.Series([1.0, 1.0, None, None])\n", ")\n", "df1" ] }, { "cell_type": "code", "execution_count": 47, "id": "24f0b02e-104f-49a3-8bb9-9b760ce8219b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 7)
indexABCDFE
datef64f64f64i32i64f64
2013-01-021.2121-0.17320.1192511.0
" ], "text/plain": [ "shape: (1, 7)\n", "┌────────────┬────────┬─────────┬────────┬─────┬─────┬─────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F ┆ E │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ i64 ┆ f64 │\n", "╞════════════╪════════╪═════════╪════════╪═════╪═════╪═════╡\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ 5 ┆ 1 ┆ 1.0 │\n", "└────────────┴────────┴─────────┴────────┴─────┴─────┴─────┘" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.drop_nulls()" ] }, { "cell_type": "code", "execution_count": 48, "id": "cdcdf594-3d63-4590-a8de-4406df3bf7eb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 7)
indexABCDFE
datef64f64f64i32i64f64
2013-01-010.00.0-1.5091551.0
2013-01-021.2121-0.17320.1192511.0
2013-01-03-0.8618-2.1046-0.4949525.0
2013-01-040.7216-0.7068-1.0396535.0
" ], "text/plain": [ "shape: (4, 7)\n", "┌────────────┬─────────┬─────────┬─────────┬─────┬─────┬─────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F ┆ E │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ i32 ┆ i64 ┆ f64 │\n", "╞════════════╪═════════╪═════════╪═════════╪═════╪═════╪═════╡\n", "│ 2013-01-01 ┆ 0.0 ┆ 0.0 ┆ -1.5091 ┆ 5 ┆ 5 ┆ 1.0 │\n", "│ 2013-01-02 ┆ 1.2121 ┆ -0.1732 ┆ 0.1192 ┆ 5 ┆ 1 ┆ 1.0 │\n", "│ 2013-01-03 ┆ -0.8618 ┆ -2.1046 ┆ -0.4949 ┆ 5 ┆ 2 ┆ 5.0 │\n", "│ 2013-01-04 ┆ 0.7216 ┆ -0.7068 ┆ -1.0396 ┆ 5 ┆ 3 ┆ 5.0 │\n", "└────────────┴─────────┴─────────┴─────────┴─────┴─────┴─────┘" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.fill_null(5)" ] }, { "cell_type": "code", "execution_count": 49, "id": "50658e53-9665-4e47-9e08-56509f250f15", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 7)
indexABCDFE
dateboolboolboolboolboolbool
2013-01-01falsefalsefalsefalsetruefalse
2013-01-02falsefalsefalsefalsefalsefalse
2013-01-03falsefalsefalsefalsefalsetrue
2013-01-04falsefalsefalsefalsefalsetrue
" ], "text/plain": [ "shape: (4, 7)\n", "┌────────────┬───────┬───────┬───────┬───────┬───────┬───────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F ┆ E │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ bool ┆ bool ┆ bool ┆ bool ┆ bool ┆ bool │\n", "╞════════════╪═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡\n", "│ 2013-01-01 ┆ false ┆ false ┆ false ┆ false ┆ true ┆ false │\n", "│ 2013-01-02 ┆ false ┆ false ┆ false ┆ false ┆ false ┆ false │\n", "│ 2013-01-03 ┆ false ┆ false ┆ false ┆ false ┆ false ┆ true │\n", "│ 2013-01-04 ┆ false ┆ false ┆ false ┆ false ┆ false ┆ true │\n", "└────────────┴───────┴───────┴───────┴───────┴───────┴───────┘" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.select(\n", " 'index',\n", " pl.exclude('index').is_null()\n", ")" ] }, { "cell_type": "markdown", "id": "38ae812f-f6b8-44d6-8fb8-398a5431464d", "metadata": {}, "source": [ "## Operations" ] }, { "cell_type": "markdown", "id": "92b9393d-cf8f-4403-8cad-0a499e8efd17", "metadata": {}, "source": [ "### Stats" ] }, { "cell_type": "code", "execution_count": 50, "id": "21fbcf7c-2fda-409a-b58a-b4b043fca57c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 5)
ABCDF
f64f64f64f64f64
-0.004467-0.384-0.6877675.03.0
" ], "text/plain": [ "shape: (1, 5)\n", "┌───────────┬────────┬───────────┬─────┬─────┐\n", "│ A ┆ B ┆ C ┆ D ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪════════╪═══════════╪═════╪═════╡\n", "│ -0.004467 ┆ -0.384 ┆ -0.687767 ┆ 5.0 ┆ 3.0 │\n", "└───────────┴────────┴───────────┴─────┴─────┘" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(pl.exclude('index').mean())" ] }, { "cell_type": "code", "execution_count": 51, "id": "ee82dde4-5947-4fbe-8f1c-0f2a88d33dbb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 2)
indexA
datef64
2013-01-010.872725
2013-01-021.43162
2013-01-030.70774
2013-01-041.39504
2013-01-051.88364
2013-01-061.5923
" ], "text/plain": [ "shape: (6, 2)\n", "┌────────────┬──────────┐\n", "│ index ┆ A │\n", "│ --- ┆ --- │\n", "│ date ┆ f64 │\n", "╞════════════╪══════════╡\n", "│ 2013-01-01 ┆ 0.872725 │\n", "│ 2013-01-02 ┆ 1.43162 │\n", "│ 2013-01-03 ┆ 0.70774 │\n", "│ 2013-01-04 ┆ 1.39504 │\n", "│ 2013-01-05 ┆ 1.88364 │\n", "│ 2013-01-06 ┆ 1.5923 │\n", "└────────────┴──────────┘" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select(\n", " 'index',\n", " pl.mean_horizontal(pl.exclude('index'))\n", ")" ] }, { "cell_type": "code", "execution_count": 52, "id": "c32f2057-cfcb-440d-a877-e5658344e5eb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 2)
indexvalue
datei64
2013-01-01null
2013-01-02null
2013-01-031
2013-01-043
2013-01-055
2013-01-06null
" ], "text/plain": [ "shape: (6, 2)\n", "┌────────────┬───────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ date ┆ i64 │\n", "╞════════════╪═══════╡\n", "│ 2013-01-01 ┆ null │\n", "│ 2013-01-02 ┆ null │\n", "│ 2013-01-03 ┆ 1 │\n", "│ 2013-01-04 ┆ 3 │\n", "│ 2013-01-05 ┆ 5 │\n", "│ 2013-01-06 ┆ null │\n", "└────────────┴───────┘" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = pl.select(\n", " index=df.get_column('index'),\n", " value=pl.Series([1, 3, 5, None, 6, 8]).shift(2)\n", ")\n", "s" ] }, { "cell_type": "code", "execution_count": 53, "id": "fec33dde-22aa-411f-9c9c-335dbbb740ab", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 6)
indexABCDF
datef64f64f64i64i64
2013-01-01nullnullnullnullnull
2013-01-02nullnullnullnullnull
2013-01-03-1.8618-3.1046-1.494941
2013-01-04-2.2784-3.7068-4.039620
2013-01-05-5.425-4.433-4.72380-1
2013-01-06nullnullnullnullnull
" ], "text/plain": [ "shape: (6, 6)\n", "┌────────────┬─────────┬─────────┬─────────┬──────┬──────┐\n", "│ index ┆ A ┆ B ┆ C ┆ D ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ date ┆ f64 ┆ f64 ┆ f64 ┆ i64 ┆ i64 │\n", "╞════════════╪═════════╪═════════╪═════════╪══════╪══════╡\n", "│ 2013-01-01 ┆ null ┆ null ┆ null ┆ null ┆ null │\n", "│ 2013-01-02 ┆ null ┆ null ┆ null ┆ null ┆ null │\n", "│ 2013-01-03 ┆ -1.8618 ┆ -3.1046 ┆ -1.4949 ┆ 4 ┆ 1 │\n", "│ 2013-01-04 ┆ -2.2784 ┆ -3.7068 ┆ -4.0396 ┆ 2 ┆ 0 │\n", "│ 2013-01-05 ┆ -5.425 ┆ -4.433 ┆ -4.7238 ┆ 0 ┆ -1 │\n", "│ 2013-01-06 ┆ null ┆ null ┆ null ┆ null ┆ null │\n", "└────────────┴─────────┴─────────┴─────────┴──────┴──────┘" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_l, df_r = pl.align_frames(df, s, on='index')\n", "df_l.select(\n", " 'index',\n", " pl.exclude('index') - df_r.get_column('value')\n", ")" ] }, { "cell_type": "markdown", "id": "406f9279-9e5a-4d5c-ac82-64a5a87a13d0", "metadata": {}, "source": [ "### User defined functions" ] }, { "cell_type": "code", "execution_count": 54, "id": "4c55607a-7ccd-4901-91a4-b9ec0c0860d4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1, 5)
ABCDF
f64f64f64f64f64
-0.025013-2.1504-3.85149328.016.8
" ], "text/plain": [ "shape: (1, 5)\n", "┌───────────┬─────────┬───────────┬──────┬──────┐\n", "│ A ┆ B ┆ C ┆ D ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪═════════╪═══════════╪══════╪══════╡\n", "│ -0.025013 ┆ -2.1504 ┆ -3.851493 ┆ 28.0 ┆ 16.8 │\n", "└───────────┴─────────┴───────────┴──────┴──────┘" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.agg(lambda x: np.mean(x) * 5.6)\n", "df.select(\n", " pl.exclude('index').mean() * 5.6\n", ")" ] }, { "cell_type": "code", "execution_count": 56, "id": "a224c75d-33e3-40c2-978c-98df7b6dc04a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 5)
ABCDF
f64f64f64f64f64
0.00.0-152.72092506.0null
122.66452-17.5278412.06304506.0101.2
-87.21416-212.98552-50.08388506.0202.4
73.02592-71.52816-105.20752506.0303.6
-43.0157.380427.95144506.0404.8
-68.1784411.49632-149.61408506.0506.0
" ], "text/plain": [ "shape: (6, 5)\n", "┌───────────┬────────────┬────────────┬───────┬───────┐\n", "│ A ┆ B ┆ C ┆ D ┆ F │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════════╪════════════╪════════════╪═══════╪═══════╡\n", "│ 0.0 ┆ 0.0 ┆ -152.72092 ┆ 506.0 ┆ null │\n", "│ 122.66452 ┆ -17.52784 ┆ 12.06304 ┆ 506.0 ┆ 101.2 │\n", "│ -87.21416 ┆ -212.98552 ┆ -50.08388 ┆ 506.0 ┆ 202.4 │\n", "│ 73.02592 ┆ -71.52816 ┆ -105.20752 ┆ 506.0 ┆ 303.6 │\n", "│ -43.01 ┆ 57.3804 ┆ 27.95144 ┆ 506.0 ┆ 404.8 │\n", "│ -68.17844 ┆ 11.49632 ┆ -149.61408 ┆ 506.0 ┆ 506.0 │\n", "└───────────┴────────────┴────────────┴───────┴───────┘" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.transform(lambda x: x * 101.2)\n", "df.select(\n", " pl.exclude('index') * 101.2\n", ")" ] }, { "cell_type": "markdown", "id": "c2cd2f26-ca5d-4110-8f1a-7adf15a38a50", "metadata": {}, "source": [ "### Value Counts" ] }, { "cell_type": "code", "execution_count": 58, "id": "ca4c3f90-6b7c-4241-b22c-ec99d0aa8a1a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
count
i32u32
62
41
01
53
23
" ], "text/plain": [ "shape: (5, 2)\n", "┌─────┬───────┐\n", "│ ┆ count │\n", "│ --- ┆ --- │\n", "│ i32 ┆ u32 │\n", "╞═════╪═══════╡\n", "│ 6 ┆ 2 │\n", "│ 4 ┆ 1 │\n", "│ 0 ┆ 1 │\n", "│ 5 ┆ 3 │\n", "│ 2 ┆ 3 │\n", "└─────┴───────┘" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = pl.Series(np.random.randint(0, 7, size=10))\n", "s.value_counts()" ] }, { "cell_type": "markdown", "id": "85d2b979-547a-41b2-902e-2777602ee956", "metadata": {}, "source": [ "### String Methods" ] }, { "cell_type": "code", "execution_count": 59, "id": "9f34ddd7-cc1c-4c17-8071-a3769a9cafc7", "metadata": {}, "outputs": [], "source": [ "s = pl.Series([\"A\", \"B\", \"C\", \"Aaba\", \"Baca\", None, \"CABA\", \"dog\", \"cat\"])" ] }, { "cell_type": "code", "execution_count": 60, "id": "75c9e2cc-bba8-469e-8655-96c7b567d01d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (9,)
str
"a"
"b"
"c"
"aaba"
"baca"
null
"caba"
"dog"
"cat"
" ], "text/plain": [ "shape: (9,)\n", "Series: '' [str]\n", "[\n", "\t\"a\"\n", "\t\"b\"\n", "\t\"c\"\n", "\t\"aaba\"\n", "\t\"baca\"\n", "\tnull\n", "\t\"caba\"\n", "\t\"dog\"\n", "\t\"cat\"\n", "]" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s.str.to_lowercase()" ] }, { "cell_type": "markdown", "id": "9fdd74ec-6466-47e0-a0ce-00ea1e906397", "metadata": {}, "source": [ "## Merge" ] }, { "cell_type": "markdown", "id": "7431721f-98e5-4637-90ad-c91d31a6cb83", "metadata": {}, "source": [ "### Concat" ] }, { "cell_type": "code", "execution_count": 133, "id": "ba4a917f-9477-4a2e-b900-071ac56607e0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 5)
index0123
i64f64f64f64f64
0-0.5487021.467327-1.015962-0.483075
11.63755-1.217659-0.291519-1.745505
2-0.2639520.99146-0.9190690.266046
3-0.7096611.6690521.037882-1.705775
4-0.919854-0.0423791.247642-0.00992
50.2902130.4957670.3629491.548106
6-1.131345-0.0893290.337863-0.945867
7-0.9321321.956030.017587-0.016692
8-0.5752470.254161-1.1437040.215897
91.193555-0.077118-0.40853-0.862495
" ], "text/plain": [ "shape: (10, 5)\n", "┌───────┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ 0 ┆ 1 ┆ 2 ┆ 3 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 0 ┆ -0.548702 ┆ 1.467327 ┆ -1.015962 ┆ -0.483075 │\n", "│ 1 ┆ 1.63755 ┆ -1.217659 ┆ -0.291519 ┆ -1.745505 │\n", "│ 2 ┆ -0.263952 ┆ 0.99146 ┆ -0.919069 ┆ 0.266046 │\n", "│ 3 ┆ -0.709661 ┆ 1.669052 ┆ 1.037882 ┆ -1.705775 │\n", "│ 4 ┆ -0.919854 ┆ -0.042379 ┆ 1.247642 ┆ -0.00992 │\n", "│ 5 ┆ 0.290213 ┆ 0.495767 ┆ 0.362949 ┆ 1.548106 │\n", "│ 6 ┆ -1.131345 ┆ -0.089329 ┆ 0.337863 ┆ -0.945867 │\n", "│ 7 ┆ -0.932132 ┆ 1.95603 ┆ 0.017587 ┆ -0.016692 │\n", "│ 8 ┆ -0.575247 ┆ 0.254161 ┆ -1.143704 ┆ 0.215897 │\n", "│ 9 ┆ 1.193555 ┆ -0.077118 ┆ -0.40853 ┆ -0.862495 │\n", "└───────┴───────────┴───────────┴───────────┴───────────┘" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = \"\"\"\n", "index 0 1 2 3\n", "0 -0.548702 1.467327 -1.015962 -0.483075\n", "1 1.637550 -1.217659 -0.291519 -1.745505\n", "2 -0.263952 0.991460 -0.919069 0.266046\n", "3 -0.709661 1.669052 1.037882 -1.705775\n", "4 -0.919854 -0.042379 1.247642 -0.009920\n", "5 0.290213 0.495767 0.362949 1.548106\n", "6 -1.131345 -0.089329 0.337863 -0.945867\n", "7 -0.932132 1.956030 0.017587 -0.016692\n", "8 -0.575247 0.254161 -1.143704 0.215897\n", "9 1.193555 -0.077118 -0.408530 -0.862495\n", "\"\"\"\n", "df = to_dataframe(data)\n", "df" ] }, { "cell_type": "code", "execution_count": 134, "id": "045af9b0-54f6-4785-9840-e331072e2c77", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 5)
index0123
i64f64f64f64f64
0-0.5487021.467327-1.015962-0.483075
11.63755-1.217659-0.291519-1.745505
2-0.2639520.99146-0.9190690.266046
3-0.7096611.6690521.037882-1.705775
4-0.919854-0.0423791.247642-0.00992
50.2902130.4957670.3629491.548106
6-1.131345-0.0893290.337863-0.945867
7-0.9321321.956030.017587-0.016692
8-0.5752470.254161-1.1437040.215897
91.193555-0.077118-0.40853-0.862495
" ], "text/plain": [ "shape: (10, 5)\n", "┌───────┬───────────┬───────────┬───────────┬───────────┐\n", "│ index ┆ 0 ┆ 1 ┆ 2 ┆ 3 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ f64 │\n", "╞═══════╪═══════════╪═══════════╪═══════════╪═══════════╡\n", "│ 0 ┆ -0.548702 ┆ 1.467327 ┆ -1.015962 ┆ -0.483075 │\n", "│ 1 ┆ 1.63755 ┆ -1.217659 ┆ -0.291519 ┆ -1.745505 │\n", "│ 2 ┆ -0.263952 ┆ 0.99146 ┆ -0.919069 ┆ 0.266046 │\n", "│ 3 ┆ -0.709661 ┆ 1.669052 ┆ 1.037882 ┆ -1.705775 │\n", "│ 4 ┆ -0.919854 ┆ -0.042379 ┆ 1.247642 ┆ -0.00992 │\n", "│ 5 ┆ 0.290213 ┆ 0.495767 ┆ 0.362949 ┆ 1.548106 │\n", "│ 6 ┆ -1.131345 ┆ -0.089329 ┆ 0.337863 ┆ -0.945867 │\n", "│ 7 ┆ -0.932132 ┆ 1.95603 ┆ 0.017587 ┆ -0.016692 │\n", "│ 8 ┆ -0.575247 ┆ 0.254161 ┆ -1.143704 ┆ 0.215897 │\n", "│ 9 ┆ 1.193555 ┆ -0.077118 ┆ -0.40853 ┆ -0.862495 │\n", "└───────┴───────────┴───────────┴───────────┴───────────┘" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.concat([df.slice(0, 3), df.slice(3, 4), df.slice(7)])" ] }, { "cell_type": "markdown", "id": "608ede70-c9e6-4443-b7c5-b67540957838", "metadata": {}, "source": [ "### join" ] }, { "cell_type": "code", "execution_count": 74, "id": "7a974e32-8c60-4180-a46a-92502925751b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 2)
keylval
stri64
"foo"1
"foo"2
\n", "shape: (2, 2)
keyrval
stri64
"foo"4
"foo"5
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "left = pl.DataFrame({\"key\": [\"foo\", \"foo\"], \"lval\": [1, 2]})\n", "right = pl.DataFrame({\"key\": [\"foo\", \"foo\"], \"rval\": [4, 5]})\n", "row(left, right)" ] }, { "cell_type": "code", "execution_count": 76, "id": "9f0feacf-67db-4ac4-bfe8-6e14f06ef970", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 3)
keylvalrval
stri64i64
"foo"14
"foo"24
"foo"15
"foo"25
" ], "text/plain": [ "shape: (4, 3)\n", "┌─────┬──────┬──────┐\n", "│ key ┆ lval ┆ rval │\n", "│ --- ┆ --- ┆ --- │\n", "│ str ┆ i64 ┆ i64 │\n", "╞═════╪══════╪══════╡\n", "│ foo ┆ 1 ┆ 4 │\n", "│ foo ┆ 2 ┆ 4 │\n", "│ foo ┆ 1 ┆ 5 │\n", "│ foo ┆ 2 ┆ 5 │\n", "└─────┴──────┴──────┘" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left.join(right, on='key')" ] }, { "cell_type": "code", "execution_count": 78, "id": "aa0dfa1f-7dc1-4454-aff9-78e4002dc85a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 2)
keylval
stri64
"foo"1
"bar"2
\n", "shape: (2, 2)
keyrval
stri64
"foo"4
"bar"5
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "left = pl.DataFrame({\"key\": [\"foo\", \"bar\"], \"lval\": [1, 2]})\n", "right = pl.DataFrame({\"key\": [\"foo\", \"bar\"], \"rval\": [4, 5]})\n", "row(left, right)" ] }, { "cell_type": "code", "execution_count": 79, "id": "e0f7891d-873e-4a61-9487-9cb2e8df416f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
keylvalrval
stri64i64
"foo"14
"bar"25
" ], "text/plain": [ "shape: (2, 3)\n", "┌─────┬──────┬──────┐\n", "│ key ┆ lval ┆ rval │\n", "│ --- ┆ --- ┆ --- │\n", "│ str ┆ i64 ┆ i64 │\n", "╞═════╪══════╪══════╡\n", "│ foo ┆ 1 ┆ 4 │\n", "│ bar ┆ 2 ┆ 5 │\n", "└─────┴──────┴──────┘" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left.join(right, on='key')" ] }, { "cell_type": "markdown", "id": "b919d32b-7228-4f3e-b0e8-7ae39e6b2b07", "metadata": {}, "source": [ "## Grouping" ] }, { "cell_type": "code", "execution_count": 120, "id": "b4a4f528-a272-4e30-81ba-3a0046d2edb4", "metadata": {}, "outputs": [], "source": [ "data = \"\"\"\n", "A B C D\n", "foo one 1.346061 -1.577585\n", "bar one 1.511763 0.396823\n", "foo two 1.627081 -0.105381\n", "bar three -0.990582 -0.532532\n", "foo two -0.441652 1.453749\n", "bar two 1.211526 1.208843\n", "foo one 0.268520 -0.080952\n", "foo three 0.024580 -0.264610\n", "\"\"\"\n", "df = to_dataframe(data)" ] }, { "cell_type": "code", "execution_count": 121, "id": "99927dee-5cf9-48ab-8169-4c8f50ac88c1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (2, 3)
ACD
strf64f64
"foo"2.82459-0.574779
"bar"1.7327071.073134
" ], "text/plain": [ "shape: (2, 3)\n", "┌─────┬──────────┬───────────┐\n", "│ A ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- │\n", "│ str ┆ f64 ┆ f64 │\n", "╞═════╪══════════╪═══════════╡\n", "│ foo ┆ 2.82459 ┆ -0.574779 │\n", "│ bar ┆ 1.732707 ┆ 1.073134 │\n", "└─────┴──────────┴───────────┘" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.groupby(\"A\")[[\"C\", \"D\"]].sum()\n", "df.group_by('A').agg(\n", " pl.col('C', 'D').sum()\n", ")" ] }, { "cell_type": "code", "execution_count": 91, "id": "9e3cf0ce-90f0-4a07-b9fe-fb15be820f61", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 4)
ABCD
strstrf64f64
"bar""one"1.5117630.396823
"bar""three"-0.990582-0.532532
"bar""two"1.2115261.208843
"foo""one"1.614581-1.658537
"foo""three"0.02458-0.26461
"foo""two"1.1854291.348368
" ], "text/plain": [ "shape: (6, 4)\n", "┌─────┬───────┬───────────┬───────────┐\n", "│ A ┆ B ┆ C ┆ D │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 │\n", "╞═════╪═══════╪═══════════╪═══════════╡\n", "│ bar ┆ one ┆ 1.511763 ┆ 0.396823 │\n", "│ bar ┆ three ┆ -0.990582 ┆ -0.532532 │\n", "│ bar ┆ two ┆ 1.211526 ┆ 1.208843 │\n", "│ foo ┆ one ┆ 1.614581 ┆ -1.658537 │\n", "│ foo ┆ three ┆ 0.02458 ┆ -0.26461 │\n", "│ foo ┆ two ┆ 1.185429 ┆ 1.348368 │\n", "└─────┴───────┴───────────┴───────────┘" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.groupby([\"A\", \"B\"]).sum()\n", "(\n", "df\n", ".group_by('A', 'B')\n", ".agg(pl.all().sum())\n", ".sort('A', 'B')\n", ")" ] }, { "cell_type": "markdown", "id": "55706dd6-4e40-4780-9130-245ec6ee6421", "metadata": {}, "source": [ "## Reshaping" ] }, { "cell_type": "markdown", "id": "5dc506c2-9cb0-4c36-9c33-f941b2f6073d", "metadata": {}, "source": [ "### Stack" ] }, { "cell_type": "code", "execution_count": 113, "id": "e3623c85-6bd4-4d57-acc0-7f4bdb15c38e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 4)
firstsecondAB
strstrf64f64
"bar""one"-0.727965-0.589346
"bar""two"0.339969-0.693205
"baz""one"-0.3393550.593616
"baz""two"0.8843451.591431
" ], "text/plain": [ "shape: (4, 4)\n", "┌───────┬────────┬───────────┬───────────┐\n", "│ first ┆ second ┆ A ┆ B │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 │\n", "╞═══════╪════════╪═══════════╪═══════════╡\n", "│ bar ┆ one ┆ -0.727965 ┆ -0.589346 │\n", "│ bar ┆ two ┆ 0.339969 ┆ -0.693205 │\n", "│ baz ┆ one ┆ -0.339355 ┆ 0.593616 │\n", "│ baz ┆ two ┆ 0.884345 ┆ 1.591431 │\n", "└───────┴────────┴───────────┴───────────┘" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = '''\n", "first second A B\n", "bar one -0.727965 -0.589346\n", "bar two 0.339969 -0.693205\n", "baz one -0.339355 0.593616\n", "baz two 0.884345 1.591431\n", "'''\n", "df2 = to_dataframe(data)\n", "df2" ] }, { "cell_type": "code", "execution_count": 114, "id": "abd21832-d190-44e5-a006-f4a8404ae149", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (8, 4)
firstsecondvariablevalue
strstrstrf64
"bar""one""A"-0.727965
"bar""two""A"0.339969
"baz""one""A"-0.339355
"baz""two""A"0.884345
"bar""one""B"-0.589346
"bar""two""B"-0.693205
"baz""one""B"0.593616
"baz""two""B"1.591431
" ], "text/plain": [ "shape: (8, 4)\n", "┌───────┬────────┬──────────┬───────────┐\n", "│ first ┆ second ┆ variable ┆ value │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ str ┆ f64 │\n", "╞═══════╪════════╪══════════╪═══════════╡\n", "│ bar ┆ one ┆ A ┆ -0.727965 │\n", "│ bar ┆ two ┆ A ┆ 0.339969 │\n", "│ baz ┆ one ┆ A ┆ -0.339355 │\n", "│ baz ┆ two ┆ A ┆ 0.884345 │\n", "│ bar ┆ one ┆ B ┆ -0.589346 │\n", "│ bar ┆ two ┆ B ┆ -0.693205 │\n", "│ baz ┆ one ┆ B ┆ 0.593616 │\n", "│ baz ┆ two ┆ B ┆ 1.591431 │\n", "└───────┴────────┴──────────┴───────────┘" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked = df2.unpivot(['A', 'B'], index=['first', 'second'])\n", "stacked" ] }, { "cell_type": "code", "execution_count": 115, "id": "b6444976-0ed0-4f44-a85f-573f2880c80b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 4)
firstsecondAB
strstrf64f64
"bar""one"-0.727965-0.589346
"bar""two"0.339969-0.693205
"baz""one"-0.3393550.593616
"baz""two"0.8843451.591431
" ], "text/plain": [ "shape: (4, 4)\n", "┌───────┬────────┬───────────┬───────────┐\n", "│ first ┆ second ┆ A ┆ B │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 │\n", "╞═══════╪════════╪═══════════╪═══════════╡\n", "│ bar ┆ one ┆ -0.727965 ┆ -0.589346 │\n", "│ bar ┆ two ┆ 0.339969 ┆ -0.693205 │\n", "│ baz ┆ one ┆ -0.339355 ┆ 0.593616 │\n", "│ baz ┆ two ┆ 0.884345 ┆ 1.591431 │\n", "└───────┴────────┴───────────┴───────────┘" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked.pivot('variable', index=['first', 'second'])" ] }, { "cell_type": "code", "execution_count": 116, "id": "2b1f9eff-65fc-470c-8b69-ecd83c35f77b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 4)
firstvariableonetwo
strstrf64f64
"bar""A"-0.7279650.339969
"baz""A"-0.3393550.884345
"bar""B"-0.589346-0.693205
"baz""B"0.5936161.591431
" ], "text/plain": [ "shape: (4, 4)\n", "┌───────┬──────────┬───────────┬───────────┐\n", "│ first ┆ variable ┆ one ┆ two │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 │\n", "╞═══════╪══════════╪═══════════╪═══════════╡\n", "│ bar ┆ A ┆ -0.727965 ┆ 0.339969 │\n", "│ baz ┆ A ┆ -0.339355 ┆ 0.884345 │\n", "│ bar ┆ B ┆ -0.589346 ┆ -0.693205 │\n", "│ baz ┆ B ┆ 0.593616 ┆ 1.591431 │\n", "└───────┴──────────┴───────────┴───────────┘" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked.pivot('second', index=['first', 'variable'])" ] }, { "cell_type": "code", "execution_count": 117, "id": "73ad5854-8b09-434c-ae29-c5481c23f32a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 4)
secondvariablebarbaz
strstrf64f64
"one""A"-0.727965-0.339355
"two""A"0.3399690.884345
"one""B"-0.5893460.593616
"two""B"-0.6932051.591431
" ], "text/plain": [ "shape: (4, 4)\n", "┌────────┬──────────┬───────────┬───────────┐\n", "│ second ┆ variable ┆ bar ┆ baz │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 │\n", "╞════════╪══════════╪═══════════╪═══════════╡\n", "│ one ┆ A ┆ -0.727965 ┆ -0.339355 │\n", "│ two ┆ A ┆ 0.339969 ┆ 0.884345 │\n", "│ one ┆ B ┆ -0.589346 ┆ 0.593616 │\n", "│ two ┆ B ┆ -0.693205 ┆ 1.591431 │\n", "└────────┴──────────┴───────────┴───────────┘" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked.pivot('first', index=['second', 'variable'])" ] }, { "cell_type": "markdown", "id": "98cc1cdb-5d1d-4b08-912b-8d282d49258e", "metadata": {}, "source": [ "### Pivot tables" ] }, { "cell_type": "code", "execution_count": 118, "id": "6a932bb2-7d42-4036-bb0a-b6b0fd524070", "metadata": {}, "outputs": [], "source": [ "data = '''\n", "index A B C D E\n", "0 one A foo -1.202872 0.047609\n", "1 one B foo -1.814470 -0.136473\n", "2 two C foo 1.018601 -0.561757\n", "3 three A bar -0.595447 -1.623033\n", "4 one B bar 1.395433 0.029399\n", "5 one C bar -0.392670 -0.542108\n", "6 two A foo 0.007207 0.282696\n", "7 three B foo 1.928123 -0.087302\n", "8 one C foo -0.055224 -1.575170\n", "9 one A bar 2.395985 1.771208\n", "10 two B bar 1.552825 0.816482\n", "11 three C bar 0.166599 1.100230\n", "'''\n", "df = to_dataframe(data)" ] }, { "cell_type": "code", "execution_count": 119, "id": "728f4113-5710-477f-94be-eaa46ead1e78", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (9, 4)
ABfoobar
strstrf64f64
"one""A"-1.2028722.395985
"one""B"-1.814471.395433
"one""C"-0.055224-0.39267
"three""A"null-0.595447
"three""B"1.928123null
"three""C"null0.166599
"two""A"0.007207null
"two""B"null1.552825
"two""C"1.018601null
" ], "text/plain": [ "shape: (9, 4)\n", "┌───────┬─────┬───────────┬───────────┐\n", "│ A ┆ B ┆ foo ┆ bar │\n", "│ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ f64 ┆ f64 │\n", "╞═══════╪═════╪═══════════╪═══════════╡\n", "│ one ┆ A ┆ -1.202872 ┆ 2.395985 │\n", "│ one ┆ B ┆ -1.81447 ┆ 1.395433 │\n", "│ one ┆ C ┆ -0.055224 ┆ -0.39267 │\n", "│ three ┆ A ┆ null ┆ -0.595447 │\n", "│ three ┆ B ┆ 1.928123 ┆ null │\n", "│ three ┆ C ┆ null ┆ 0.166599 │\n", "│ two ┆ A ┆ 0.007207 ┆ null │\n", "│ two ┆ B ┆ null ┆ 1.552825 │\n", "│ two ┆ C ┆ 1.018601 ┆ null │\n", "└───────┴─────┴───────────┴───────────┘" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.pivot('C', index=['A', 'B'], values=\"D\").sort('A', 'B')" ] }, { "cell_type": "markdown", "id": "abfced00-ee04-4078-bc7d-d1f0e8ea66dc", "metadata": {}, "source": [ "## Time series" ] }, { "cell_type": "code", "execution_count": null, "id": "aec5afe0-ddc9-4527-b73c-0484eca28375", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 144, "id": "eb1c61e1-03b9-419d-808f-de57cd6c45c1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (4, 2)
indexvalue
datetime[μs]i32
2012-01-01 00:00:0072130
2012-01-01 00:05:0076640
2012-01-01 00:10:0071218
2012-01-01 00:15:0025874
" ], "text/plain": [ "shape: (4, 2)\n", "┌─────────────────────┬───────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ datetime[μs] ┆ i32 │\n", "╞═════════════════════╪═══════╡\n", "│ 2012-01-01 00:00:00 ┆ 72130 │\n", "│ 2012-01-01 00:05:00 ┆ 76640 │\n", "│ 2012-01-01 00:10:00 ┆ 71218 │\n", "│ 2012-01-01 00:15:00 ┆ 25874 │\n", "└─────────────────────┴───────┘" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n = 1000\n", "rng = pl.datetime(2012, 1, 1) + pl.duration(seconds=pl.int_range(0, n))\n", "ts = pl.select(\n", " index=rng,\n", " value=np.random.randint(0, 500, n)\n", ")\n", "(\n", "ts\n", ".group_by(pl.col('index').dt.truncate('5m'), maintain_order=True)\n", ".agg(pl.col('value').sum())\n", ")" ] }, { "cell_type": "code", "execution_count": 145, "id": "f590b159-854f-463a-bdf0-ff105fdb3403", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
datetime[μs]f64
2012-03-06 00:00:00-0.037589
2012-03-07 00:00:00-0.422765
2012-03-08 00:00:000.148907
2012-03-09 00:00:00-0.02686
2012-03-10 00:00:000.813454
" ], "text/plain": [ "shape: (5, 2)\n", "┌─────────────────────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ datetime[μs] ┆ f64 │\n", "╞═════════════════════╪═══════════╡\n", "│ 2012-03-06 00:00:00 ┆ -0.037589 │\n", "│ 2012-03-07 00:00:00 ┆ -0.422765 │\n", "│ 2012-03-08 00:00:00 ┆ 0.148907 │\n", "│ 2012-03-09 00:00:00 ┆ -0.02686 │\n", "│ 2012-03-10 00:00:00 ┆ 0.813454 │\n", "└─────────────────────┴───────────┘" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rng = pl.datetime(2012, 3, 6) + pl.duration(days=pl.int_range(0, 5))\n", "ts = pl.select(\n", " index=rng,\n", " value=np.random.randn(5)\n", ")\n", "ts" ] }, { "cell_type": "code", "execution_count": 148, "id": "a18370c2-3896-45e1-aa9b-6a3018374edd", "metadata": {}, "outputs": [], "source": [ "ts_utc = ts.with_columns(\n", " pl.col('index').dt.replace_time_zone(\"UTC\")\n", ")" ] }, { "cell_type": "code", "execution_count": 149, "id": "49863e28-4aa6-4e6b-99e5-017dc5d2005d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
datetime[μs, US/Eastern]f64
2012-03-05 19:00:00 EST-0.037589
2012-03-06 19:00:00 EST-0.422765
2012-03-07 19:00:00 EST0.148907
2012-03-08 19:00:00 EST-0.02686
2012-03-09 19:00:00 EST0.813454
" ], "text/plain": [ "shape: (5, 2)\n", "┌──────────────────────────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ datetime[μs, US/Eastern] ┆ f64 │\n", "╞══════════════════════════╪═══════════╡\n", "│ 2012-03-05 19:00:00 EST ┆ -0.037589 │\n", "│ 2012-03-06 19:00:00 EST ┆ -0.422765 │\n", "│ 2012-03-07 19:00:00 EST ┆ 0.148907 │\n", "│ 2012-03-08 19:00:00 EST ┆ -0.02686 │\n", "│ 2012-03-09 19:00:00 EST ┆ 0.813454 │\n", "└──────────────────────────┴───────────┘" ] }, "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts_utc.with_columns(\n", " pl.col('index').dt.convert_time_zone('US/Eastern')\n", ")" ] }, { "cell_type": "code", "execution_count": 152, "id": "807d69d7-65a2-4d41-9172-0ef71fd86f74", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
indexvalue
datetime[μs]f64
2012-03-13 00:00:00-0.037589
2012-03-14 00:00:00-0.422765
2012-03-15 00:00:000.148907
2012-03-16 00:00:00-0.02686
2012-03-16 00:00:000.813454
" ], "text/plain": [ "shape: (5, 2)\n", "┌─────────────────────┬───────────┐\n", "│ index ┆ value │\n", "│ --- ┆ --- │\n", "│ datetime[μs] ┆ f64 │\n", "╞═════════════════════╪═══════════╡\n", "│ 2012-03-13 00:00:00 ┆ -0.037589 │\n", "│ 2012-03-14 00:00:00 ┆ -0.422765 │\n", "│ 2012-03-15 00:00:00 ┆ 0.148907 │\n", "│ 2012-03-16 00:00:00 ┆ -0.02686 │\n", "│ 2012-03-16 00:00:00 ┆ 0.813454 │\n", "└─────────────────────┴───────────┘" ] }, "execution_count": 152, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts.with_columns(\n", " pl.col('index').dt.add_business_days(5, roll=\"backward\")\n", ")" ] }, { "cell_type": "markdown", "id": "97cfaf35-917c-4fca-a03a-ea9c59166da5", "metadata": {}, "source": [ "## Categoricals" ] }, { "cell_type": "code", "execution_count": 153, "id": "4fdd08ef-e254-4dbe-8d40-21dc23a583da", "metadata": {}, "outputs": [], "source": [ "df = pl.DataFrame({\n", " \"id\": [1, 2, 3, 4, 5, 6], \n", " \"raw_grade\": [\"a\", \"b\", \"b\", \"a\", \"a\", \"e\"]\n", "})" ] }, { "cell_type": "code", "execution_count": 167, "id": "962ad60b-b560-4f72-9a60-9818d64374f5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 3)
idraw_gradegrade
i64strcat
1"a""a"
2"b""b"
3"b""b"
4"a""a"
5"a""a"
6"e""e"
" ], "text/plain": [ "shape: (6, 3)\n", "┌─────┬───────────┬───────┐\n", "│ id ┆ raw_grade ┆ grade │\n", "│ --- ┆ --- ┆ --- │\n", "│ i64 ┆ str ┆ cat │\n", "╞═════╪═══════════╪═══════╡\n", "│ 1 ┆ a ┆ a │\n", "│ 2 ┆ b ┆ b │\n", "│ 3 ┆ b ┆ b │\n", "│ 4 ┆ a ┆ a │\n", "│ 5 ┆ a ┆ a │\n", "│ 6 ┆ e ┆ e │\n", "└─────┴───────────┴───────┘" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.with_columns(\n", " grade=pl.col('raw_grade').cast(pl.Categorical)\n", ")\n", "df" ] }, { "cell_type": "code", "execution_count": 168, "id": "b68df977-1d76-456f-a377-f6ca7d96cab4", "metadata": {}, "outputs": [], "source": [ "df = df.with_columns(\n", " pl.col('grade')\n", " .cast(pl.String)\n", " .replace(dict(a='very good', b='good', e=\"very bad\"))\n", " .cast(pl.Categorical)\n", ")" ] }, { "cell_type": "code", "execution_count": 170, "id": "e2677f69-3b0c-4389-8932-370939b917df", "metadata": {}, "outputs": [], "source": [ "grades = pl.Enum([\"very bad\", \"bad\", \"medium\", \"good\", \"very good\"])\n", "df = df.with_columns(\n", " pl.col('grade').cast(grades)\n", ")" ] }, { "cell_type": "code", "execution_count": 171, "id": "fd6d4b16-2af0-4006-9081-3dc8a251e830", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (6, 3)
idraw_gradegrade
i64strenum
6"e""very bad"
2"b""good"
3"b""good"
1"a""very good"
4"a""very good"
5"a""very good"
" ], "text/plain": [ "shape: (6, 3)\n", "┌─────┬───────────┬───────────┐\n", "│ id ┆ raw_grade ┆ grade │\n", "│ --- ┆ --- ┆ --- │\n", "│ i64 ┆ str ┆ enum │\n", "╞═════╪═══════════╪═══════════╡\n", "│ 6 ┆ e ┆ very bad │\n", "│ 2 ┆ b ┆ good │\n", "│ 3 ┆ b ┆ good │\n", "│ 1 ┆ a ┆ very good │\n", "│ 4 ┆ a ┆ very good │\n", "│ 5 ┆ a ┆ very good │\n", "└─────┴───────────┴───────────┘" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort('grade')" ] }, { "cell_type": "code", "execution_count": 182, "id": "eef95d13-ffff-462c-95ff-ea2724d3c771", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 2)
lengrade
u32enum
1"very bad"
0"bad"
0"medium"
2"good"
3"very good"
" ], "text/plain": [ "shape: (5, 2)\n", "┌─────┬───────────┐\n", "│ len ┆ grade │\n", "│ --- ┆ --- │\n", "│ u32 ┆ enum │\n", "╞═════╪═══════════╡\n", "│ 1 ┆ very bad │\n", "│ 0 ┆ bad │\n", "│ 0 ┆ medium │\n", "│ 2 ┆ good │\n", "│ 3 ┆ very good │\n", "└─────┴───────────┘" ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df.groupby(\"grade\", observed=False).size()\n", "(\n", "df\n", ".group_by('grade')\n", ".agg(pl.len())\n", ".join(pl.select(grade=grades.categories.cast(grades)), on='grade', how='right')\n", ".with_columns(pl.col('len').fill_null(0))\n", ")" ] }, { "cell_type": "markdown", "id": "f39be3e0-0926-48f3-850d-adf3cf4b9937", "metadata": {}, "source": [ "## Plotting" ] }, { "cell_type": "code", "execution_count": 185, "id": "7d859427-ca0e-4433-b9ec-2fb7ec5fcff7", "metadata": {}, "outputs": [ { "data": { "application/javascript": [ "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", " var py_version = '3.4.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", " var reloading = false;\n", " var Bokeh = root.Bokeh;\n", "\n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) {\n", " if (callback != null)\n", " callback();\n", " });\n", " } finally {\n", " delete root._bokeh_onload_callbacks;\n", " }\n", " console.debug(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", " if (css_urls == null) css_urls = [];\n", " if (js_urls == null) js_urls = [];\n", " if (js_modules == null) js_modules = [];\n", " if (js_exports == null) js_exports = {};\n", "\n", " root._bokeh_onload_callbacks.push(callback);\n", "\n", " if (root._bokeh_is_loading > 0) {\n", " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " if (!reloading) {\n", " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " }\n", "\n", " function on_load() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", " run_callbacks()\n", " }\n", " }\n", " window._bokeh_on_load = on_load\n", "\n", " function on_error() {\n", " console.error(\"failed to load \" + url);\n", " }\n", "\n", " var skip = [];\n", " if (window.requirejs) {\n", " window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n", " root._bokeh_is_loading = css_urls.length + 0;\n", " } else {\n", " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", " }\n", "\n", " var existing_stylesheets = []\n", " var links = document.getElementsByTagName('link')\n", " for (var i = 0; i < links.length; i++) {\n", " var link = links[i]\n", " if (link.href != null) {\n", "\texisting_stylesheets.push(link.href)\n", " }\n", " }\n", " for (var i = 0; i < css_urls.length; i++) {\n", " var url = css_urls[i];\n", " if (existing_stylesheets.indexOf(url) !== -1) {\n", "\ton_load()\n", "\tcontinue;\n", " }\n", " const element = document.createElement(\"link\");\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.rel = \"stylesheet\";\n", " element.type = \"text/css\";\n", " element.href = url;\n", " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", " document.body.appendChild(element);\n", " } var existing_scripts = []\n", " var scripts = document.getElementsByTagName('script')\n", " for (var i = 0; i < scripts.length; i++) {\n", " var script = scripts[i]\n", " if (script.src != null) {\n", "\texisting_scripts.push(script.src)\n", " }\n", " }\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", "\tif (!window.requirejs) {\n", "\t on_load();\n", "\t}\n", "\tcontinue;\n", " }\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " for (var i = 0; i < js_modules.length; i++) {\n", " var url = js_modules[i];\n", " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", "\tif (!window.requirejs) {\n", "\t on_load();\n", "\t}\n", "\tcontinue;\n", " }\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " element.type = \"module\";\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " for (const name in js_exports) {\n", " var url = js_exports[name];\n", " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", "\tif (!window.requirejs) {\n", "\t on_load();\n", "\t}\n", "\tcontinue;\n", " }\n", " var element = document.createElement('script');\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.type = \"module\";\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " element.textContent = `\n", " import ${name} from \"${url}\"\n", " window.${name} = ${name}\n", " window._bokeh_on_load()\n", " `\n", " document.head.appendChild(element);\n", " }\n", " if (!js_urls.length && !js_modules.length) {\n", " on_load()\n", " }\n", " };\n", "\n", " function inject_raw_css(css) {\n", " const element = document.createElement(\"style\");\n", " element.appendChild(document.createTextNode(css));\n", " document.body.appendChild(element);\n", " }\n", "\n", " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.2.min.js\", \"https://cdn.holoviz.org/panel/1.4.4/dist/panel.min.js\"];\n", " var js_modules = [];\n", " var js_exports = {};\n", " var css_urls = [];\n", " var inline_js = [ function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", "function(Bokeh) {} // ensure no trailing comma for IE\n", " ];\n", "\n", " function run_inline_js() {\n", " if ((root.Bokeh !== undefined) || (force === true)) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", "\ttry {\n", " inline_js[i].call(root, root.Bokeh);\n", "\t} catch(e) {\n", "\t if (!reloading) {\n", "\t throw e;\n", "\t }\n", "\t}\n", " }\n", " // Cache old bokeh versions\n", " if (Bokeh != undefined && !reloading) {\n", "\tvar NewBokeh = root.Bokeh;\n", "\tif (Bokeh.versions === undefined) {\n", "\t Bokeh.versions = new Map();\n", "\t}\n", "\tif (NewBokeh.version !== Bokeh.version) {\n", "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", "\t}\n", "\troot.Bokeh = Bokeh;\n", " }} else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " }\n", " root._bokeh_is_initializing = false\n", " }\n", "\n", " function load_or_wait() {\n", " // Implement a backoff loop that tries to ensure we do not load multiple\n", " // versions of Bokeh and its dependencies at the same time.\n", " // In recent versions we use the root._bokeh_is_initializing flag\n", " // to determine whether there is an ongoing attempt to initialize\n", " // bokeh, however for backward compatibility we also try to ensure\n", " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", " // before older versions are fully initialized.\n", " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", " root._bokeh_is_initializing = false;\n", " root._bokeh_onload_callbacks = undefined;\n", " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", " load_or_wait();\n", " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", " setTimeout(load_or_wait, 100);\n", " } else {\n", " root._bokeh_is_initializing = true\n", " root._bokeh_onload_callbacks = []\n", " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", " if (!reloading && !bokeh_loaded) {\n", "\troot.Bokeh = undefined;\n", " }\n", " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", "\trun_inline_js();\n", " });\n", " }\n", " }\n", " // Give older versions of the autoload script a head-start to ensure\n", " // they initialize before we start loading newer version.\n", " setTimeout(load_or_wait, 100)\n", "}(window));" ], "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = false;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.2.min.js\", \"https://cdn.holoviz.org/panel/1.4.4/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", "}\n", "\n", "\n", " function JupyterCommManager() {\n", " }\n", "\n", " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", " comm_manager.register_target(comm_id, function(comm) {\n", " comm.on_msg(msg_handler);\n", " });\n", " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", " comm.onMsg = msg_handler;\n", " });\n", " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", " var messages = comm.messages[Symbol.asyncIterator]();\n", " function processIteratorResult(result) {\n", " var message = result.value;\n", " console.log(message)\n", " var content = {data: message.data, comm_id};\n", " var buffers = []\n", " for (var buffer of message.buffers || []) {\n", " buffers.push(new DataView(buffer))\n", " }\n", " var metadata = message.metadata || {};\n", " var msg = {content, buffers, metadata}\n", " msg_handler(msg);\n", " return messages.next().then(processIteratorResult);\n", " }\n", " return messages.next().then(processIteratorResult);\n", " })\n", " }\n", " }\n", "\n", " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", " if (comm_id in window.PyViz.comms) {\n", " return window.PyViz.comms[comm_id];\n", " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", " if (msg_handler) {\n", " comm.on_msg(msg_handler);\n", " }\n", " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", " comm.open();\n", " if (msg_handler) {\n", " comm.onMsg = msg_handler;\n", " }\n", " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", " comm_promise.then((comm) => {\n", " window.PyViz.comms[comm_id] = comm;\n", " if (msg_handler) {\n", " var messages = comm.messages[Symbol.asyncIterator]();\n", " function processIteratorResult(result) {\n", " var message = result.value;\n", " var content = {data: message.data};\n", " var metadata = message.metadata || {comm_id};\n", " var msg = {content, metadata}\n", " msg_handler(msg);\n", " return messages.next().then(processIteratorResult);\n", " }\n", " return messages.next().then(processIteratorResult);\n", " }\n", " }) \n", " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", " return comm_promise.then((comm) => {\n", " comm.send(data, metadata, buffers, disposeOnDone);\n", " });\n", " };\n", " var comm = {\n", " send: sendClosure\n", " };\n", " }\n", " window.PyViz.comms[comm_id] = comm;\n", " return comm;\n", " }\n", " window.PyViz.comm_manager = new JupyterCommManager();\n", " \n", "\n", "\n", "var JS_MIME_TYPE = 'application/javascript';\n", "var HTML_MIME_TYPE = 'text/html';\n", "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", "var CLASS_NAME = 'output';\n", "\n", "/**\n", " * Render data to the DOM node\n", " */\n", "function render(props, node) {\n", " var div = document.createElement(\"div\");\n", " var script = document.createElement(\"script\");\n", " node.appendChild(div);\n", " node.appendChild(script);\n", "}\n", "\n", "/**\n", " * Handle when a new output is added\n", " */\n", "function handle_add_output(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", " if (id !== undefined) {\n", " var nchildren = toinsert.length;\n", " var html_node = toinsert[nchildren-1].children[0];\n", " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", " var scripts = [];\n", " var nodelist = html_node.querySelectorAll(\"script\");\n", " for (var i in nodelist) {\n", " if (nodelist.hasOwnProperty(i)) {\n", " scripts.push(nodelist[i])\n", " }\n", " }\n", "\n", " scripts.forEach( function (oldScript) {\n", " var newScript = document.createElement(\"script\");\n", " var attrs = [];\n", " var nodemap = oldScript.attributes;\n", " for (var j in nodemap) {\n", " if (nodemap.hasOwnProperty(j)) {\n", " attrs.push(nodemap[j])\n", " }\n", " }\n", " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", " oldScript.parentNode.replaceChild(newScript, oldScript);\n", " });\n", " if (JS_MIME_TYPE in output.data) {\n", " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", " }\n", " output_area._hv_plot_id = id;\n", " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", " window.PyViz.plot_index[id] = Bokeh.index[id];\n", " } else {\n", " window.PyViz.plot_index[id] = null;\n", " }\n", " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", "}\n", "\n", "/**\n", " * Handle when an output is cleared or removed\n", " */\n", "function handle_clear_output(event, handle) {\n", " var id = handle.cell.output_area._hv_plot_id;\n", " var server_id = handle.cell.output_area._bokeh_server_id;\n", " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", " if (server_id !== null) {\n", " comm.send({event_type: 'server_delete', 'id': server_id});\n", " return;\n", " } else if (comm !== null) {\n", " comm.send({event_type: 'delete', 'id': id});\n", " }\n", " delete PyViz.plot_index[id];\n", " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", " var doc = window.Bokeh.index[id].model.document\n", " doc.clear();\n", " const i = window.Bokeh.documents.indexOf(doc);\n", " if (i > -1) {\n", " window.Bokeh.documents.splice(i, 1);\n", " }\n", " }\n", "}\n", "\n", "/**\n", " * Handle kernel restart event\n", " */\n", "function handle_kernel_cleanup(event, handle) {\n", " delete PyViz.comms[\"hv-extension-comm\"];\n", " window.PyViz.plot_index = {}\n", "}\n", "\n", "/**\n", " * Handle update_display_data messages\n", " */\n", "function handle_update_output(event, handle) {\n", " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", " handle_add_output(event, handle)\n", "}\n", "\n", "function register_renderer(events, OutputArea) {\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[0]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " events.on('output_added.OutputArea', handle_add_output);\n", " events.on('output_updated.OutputArea', handle_update_output);\n", " events.on('clear_output.CodeCell', handle_clear_output);\n", " events.on('delete.Cell', handle_clear_output);\n", " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", "\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " safe: true,\n", " index: 0\n", " });\n", "}\n", "\n", "if (window.Jupyter !== undefined) {\n", " try {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " } catch(err) {\n", " }\n", "}\n" ], "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ] }, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "p1002" } }, "output_type": "display_data" } ], "source": [ "import hvplot.polars" ] }, { "cell_type": "code", "execution_count": 184, "id": "25df41f9-cb76-4002-a65c-b5439b1c0ecd", "metadata": {}, "outputs": [], "source": [ "ts = pl.select(\n", " index=pl.datetime(2000, 1, 1) + pl.duration(days=pl.int_range(1000)),\n", " value=np.random.randn(1000)\n", ")\n", "ts = ts.with_columns(pl.col('value').cum_sum())" ] }, { "cell_type": "code", "execution_count": 186, "id": "181b931f-441b-493b-ac3d-bad0b470f0cd", "metadata": {}, "outputs": [ { "data": {}, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ], "text/plain": [ ":Curve [index] (value)" ] }, "execution_count": 186, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "p1004" } }, "output_type": "execute_result" } ], "source": [ "ts.hvplot.line(x='index')" ] }, { "cell_type": "code", "execution_count": 188, "id": "a430b004-3961-4246-9949-26dd93a6f2e3", "metadata": {}, "outputs": [ { "data": {}, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ "
\n", "
\n", "
\n", "" ], "text/plain": [ ":NdOverlay [Variable]\n", " :Curve [index] (value)" ] }, "execution_count": 188, "metadata": { "application/vnd.holoviews_exec.v0+json": { "id": "p1079" } }, "output_type": "execute_result" } ], "source": [ "df = pl.DataFrame(\n", " np.random.randn(1000, 4), schema=['A', 'B', 'C', 'D']\n", ")\n", "df = df.with_columns(\n", " ts['index'],\n", " pl.all().cum_sum()\n", ")\n", "df.hvplot.line(x='index')" ] }, { "cell_type": "markdown", "id": "b35f5ab5-3660-4fc7-a0c9-a1b599a7a2cf", "metadata": {}, "source": [ "## Importing and exporting data" ] }, { "cell_type": "markdown", "id": "253febe9-b988-4999-94aa-0777caf1d6a1", "metadata": {}, "source": [ "### CSV" ] }, { "cell_type": "code", "execution_count": 191, "id": "0ffeab03-9631-4ef6-b3fc-aab665ac9952", "metadata": {}, "outputs": [], "source": [ "df = pl.DataFrame(np.random.randint(0, 5, (10, 5)))\n", "df.write_csv('data/pandas_foo.csv')" ] }, { "cell_type": "code", "execution_count": 192, "id": "3f781026-2b16-4ecf-bb43-11cbce70b4dc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 5)
column_0column_1column_2column_3column_4
i64i64i64i64i64
12001
31041
31012
21100
03044
44430
20224
02133
40324
21411
" ], "text/plain": [ "shape: (10, 5)\n", "┌──────────┬──────────┬──────────┬──────────┬──────────┐\n", "│ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ column_4 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", "╞══════════╪══════════╪══════════╪══════════╪══════════╡\n", "│ 1 ┆ 2 ┆ 0 ┆ 0 ┆ 1 │\n", "│ 3 ┆ 1 ┆ 0 ┆ 4 ┆ 1 │\n", "│ 3 ┆ 1 ┆ 0 ┆ 1 ┆ 2 │\n", "│ 2 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │\n", "│ 0 ┆ 3 ┆ 0 ┆ 4 ┆ 4 │\n", "│ 4 ┆ 4 ┆ 4 ┆ 3 ┆ 0 │\n", "│ 2 ┆ 0 ┆ 2 ┆ 2 ┆ 4 │\n", "│ 0 ┆ 2 ┆ 1 ┆ 3 ┆ 3 │\n", "│ 4 ┆ 0 ┆ 3 ┆ 2 ┆ 4 │\n", "│ 2 ┆ 1 ┆ 4 ┆ 1 ┆ 1 │\n", "└──────────┴──────────┴──────────┴──────────┴──────────┘" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.read_csv('data/pandas_foo.csv')" ] }, { "cell_type": "markdown", "id": "d8d1668a-63c8-4670-b7f3-a1f516d5b7c1", "metadata": {}, "source": [ "### Parquet" ] }, { "cell_type": "code", "execution_count": 193, "id": "4cb89697-e557-419a-828f-4c02e35a666d", "metadata": {}, "outputs": [], "source": [ "df.write_parquet('data/pandas_foo.parquet')" ] }, { "cell_type": "code", "execution_count": 194, "id": "92a784d2-1d63-45bd-a4b1-6c2957c8e665", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 5)
column_0column_1column_2column_3column_4
i32i32i32i32i32
12001
31041
31012
21100
03044
44430
20224
02133
40324
21411
" ], "text/plain": [ "shape: (10, 5)\n", "┌──────────┬──────────┬──────────┬──────────┬──────────┐\n", "│ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ column_4 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i32 ┆ i32 ┆ i32 ┆ i32 ┆ i32 │\n", "╞══════════╪══════════╪══════════╪══════════╪══════════╡\n", "│ 1 ┆ 2 ┆ 0 ┆ 0 ┆ 1 │\n", "│ 3 ┆ 1 ┆ 0 ┆ 4 ┆ 1 │\n", "│ 3 ┆ 1 ┆ 0 ┆ 1 ┆ 2 │\n", "│ 2 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │\n", "│ 0 ┆ 3 ┆ 0 ┆ 4 ┆ 4 │\n", "│ 4 ┆ 4 ┆ 4 ┆ 3 ┆ 0 │\n", "│ 2 ┆ 0 ┆ 2 ┆ 2 ┆ 4 │\n", "│ 0 ┆ 2 ┆ 1 ┆ 3 ┆ 3 │\n", "│ 4 ┆ 0 ┆ 3 ┆ 2 ┆ 4 │\n", "│ 2 ┆ 1 ┆ 4 ┆ 1 ┆ 1 │\n", "└──────────┴──────────┴──────────┴──────────┴──────────┘" ] }, "execution_count": 194, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.read_parquet('data/pandas_foo.parquet')" ] }, { "cell_type": "markdown", "id": "dde3275e-17c4-4c80-8ae5-a32d2a054d8f", "metadata": {}, "source": [ "### Excel" ] }, { "cell_type": "code", "execution_count": 195, "id": "c736010e-1988-4477-9fc6-82d2c0c6a58b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.write_excel('data/pandas_foo.xlsx', 'Sheet1')" ] }, { "cell_type": "code", "execution_count": 198, "id": "d9132530-0ec8-40e3-aeec-dc949d0196fc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (10, 5)
column_0column_1column_2column_3column_4
i64i64i64i64i64
12001
31041
31012
21100
03044
44430
20224
02133
40324
21411
" ], "text/plain": [ "shape: (10, 5)\n", "┌──────────┬──────────┬──────────┬──────────┬──────────┐\n", "│ column_0 ┆ column_1 ┆ column_2 ┆ column_3 ┆ column_4 │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", "╞══════════╪══════════╪══════════╪══════════╪══════════╡\n", "│ 1 ┆ 2 ┆ 0 ┆ 0 ┆ 1 │\n", "│ 3 ┆ 1 ┆ 0 ┆ 4 ┆ 1 │\n", "│ 3 ┆ 1 ┆ 0 ┆ 1 ┆ 2 │\n", "│ 2 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │\n", "│ 0 ┆ 3 ┆ 0 ┆ 4 ┆ 4 │\n", "│ 4 ┆ 4 ┆ 4 ┆ 3 ┆ 0 │\n", "│ 2 ┆ 0 ┆ 2 ┆ 2 ┆ 4 │\n", "│ 0 ┆ 2 ┆ 1 ┆ 3 ┆ 3 │\n", "│ 4 ┆ 0 ┆ 3 ┆ 2 ┆ 4 │\n", "│ 2 ┆ 1 ┆ 4 ┆ 1 ┆ 1 │\n", "└──────────┴──────────┴──────────┴──────────┴──────────┘" ] }, "execution_count": 198, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.read_excel('data/pandas_foo.xlsx', sheet_name='Sheet1')" ] }, { "cell_type": "markdown", "id": "f28d6f45-6887-4fb0-bd66-0561ee055ba1", "metadata": {}, "source": [ "## Gotchas" ] }, { "cell_type": "code", "execution_count": 199, "id": "9c47f09c-c53a-40c1-aca4-d1bbc2412c51", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "the truth value of a Series is ambiguous\n\nHere are some things you might want to try:\n- instead of `if s`, use `if not s.is_empty()`\n- instead of `s1 and s2`, use `s1 & s2`\n- instead of `s1 or s2`, use `s1 | s2`\n- instead of `s in [y, z]`, use `s.is_in([y, z])`\n", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[199], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pl\u001b[38;5;241m.\u001b[39mSeries([\u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;28;01mFalse\u001b[39;00m]):\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mI was true\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32mC:\\mamba\\envs\\py312\\Lib\\site-packages\\polars\\series\\series.py:636\u001b[0m, in \u001b[0;36mSeries.__bool__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 626\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__bool__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn:\n\u001b[0;32m 627\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 628\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthe truth value of a Series is ambiguous\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 629\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 634\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m- instead of `s in [y, z]`, use `s.is_in([y, z])`\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 635\u001b[0m )\n\u001b[1;32m--> 636\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n", "\u001b[1;31mTypeError\u001b[0m: the truth value of a Series is ambiguous\n\nHere are some things you might want to try:\n- instead of `if s`, use `if not s.is_empty()`\n- instead of `s1 and s2`, use `s1 & s2`\n- instead of `s1 or s2`, use `s1 | s2`\n- instead of `s in [y, z]`, use `s.is_in([y, z])`\n" ] } ], "source": [ "if pl.Series([False, True, False]):\n", " print(\"I was true\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }