Intro to data structures#

import numpy as np
import polars as pl
from helper.jupyter import row

Series#

In Polars, the Series object has no index, so we need to use a two-column DataFrame to simulate the functions of a Series in Pandas.

s = pl.DataFrame(dict(
    index=["a", "b", "c", "d", "e"],
    value=np.random.randn(5)
))
s
shape: (5, 2)
indexvalue
strf64
"a"-1.163544
"b"-0.900663
"c"1.657318
"d"-1.240945
"e"0.964103
s['index']
shape: (5,)
index
str
"a"
"b"
"c"
"d"
"e"
pl.Series(np.random.randn(5))
shape: (5,)
f64
0.083327
-1.077013
0.120624
-0.480749
-1.196976
# pd.Series(d)
d = {"b": 1, "a": 0, "c": 2}
pl.DataFrame(list(d.items()), schema=['index', 'value'], orient='row')
shape: (3, 2)
indexvalue
stri64
"b"1
"a"0
"c"2
pl.select(
    index=pl.Series(["a", "b", "c", "d", "e"]),
    value=5.0
)
shape: (5, 2)
indexvalue
strf64
"a"5.0
"b"5.0
"c"5.0
"d"5.0
"e"5.0

Series is ndarray-like#

s['value'][0]
-1.163544306576043
s['value'][:3]
shape: (3,)
value
f64
-1.163544
-0.900663
1.657318
# s[s > s.median()]
s.filter(pl.col('value') > pl.col('value').median())
shape: (2, 2)
indexvalue
strf64
"c"1.657318
"e"0.964103
# s.iloc[[4, 3, 1]]
s[[4, 3, 1]]
shape: (3, 2)
indexvalue
strf64
"e"0.964103
"d"-1.240945
"b"-0.900663
s.with_columns(
    pl.col('value').exp()
)
shape: (5, 2)
indexvalue
strf64
"a"0.312377
"b"0.4063
"c"5.245225
"d"0.289111
"e"2.622434
s['value'].dtype
Float64
# s.array
s['value'].to_numpy()
array([-1.16354431, -0.90066273,  1.65731812, -1.24094468,  0.96410308])

Series is dict-like#

# s["a"]
s.select(pl.col('value').filter(pl.col('index') == 'a')).item()
-1.163544306576043
# s["e"] = 12.0
s = s.with_columns(
    pl.when(pl.col('index') == 'e')
      .then(12.0)
      .otherwise(pl.col('value'))
      .name.keep()
)
# "e" in s
"e" in s['index']
True
"f" in s['index']
False

Vectorized operations and label alignment with Series#

# s + s
from helper.polars import align_op
align_op(s, s, op=pl.Expr.add)
shape: (5, 2)
indexvalue
strf64
"a"-2.327089
"b"-1.801325
"c"3.314636
"d"-2.481889
"e"24.0
# s * 2
s.select(
    'index',
    pl.col('value') * 2
)
shape: (5, 2)
indexvalue
strf64
"a"-2.327089
"b"-1.801325
"c"3.314636
"d"-2.481889
"e"24.0
# np.exp(s)
s.select(
    "index",
    pl.col("value").exp()
)
shape: (5, 2)
indexvalue
strf64
"a"0.312377
"b"0.4063
"c"5.245225
"d"0.289111
"e"162754.791419
# s.iloc[1:] + s.iloc[:-1]
align_op(
    s.slice(1), 
    s.slice(0, len(s) - 1), 
    op=pl.Expr.add, 
    fill_value=None, 
    how="full")
shape: (5, 2)
indexvalue
strf64
"a"null
"b"-1.801325
"c"3.314636
"d"-2.481889
"e"null

Name attribute#

s = pl.Series("something", np.random.randn(5))
s
shape: (5,)
something
f64
1.879844
1.553987
-1.190783
-1.452195
-0.553582
s.name
'something'
s2 = s.rename('different')
s2.name
'different'

DataFrame#

s1 = pl.DataFrame(dict(index=["a", "b", "c"], one=[1.0, 2.0, 3.0]))
s2 = pl.DataFrame(dict(index=["a", "b", "c", "d"], two=[1.0, 2.0, 3.0, 4.0]))
df = s1.join(s2, on='index', how='full', coalesce=True)
df
shape: (4, 3)
indexonetwo
strf64f64
"a"1.01.0
"b"2.02.0
"c"3.03.0
"d"null4.0
df['index']
shape: (4,)
index
str
"a"
"b"
"c"
"d"
df.columns
['index', 'one', 'two']
df.drop('index').columns
['one', 'two']

From dict of ndarrays / lists#

d = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
pl.DataFrame(d)
shape: (4, 2)
onetwo
f64f64
1.04.0
2.03.0
3.02.0
4.01.0
pl.DataFrame(d).insert_column(0, pl.Series('index', ["a", "b", "c", "d"]))
shape: (4, 3)
indexonetwo
strf64f64
"a"1.04.0
"b"2.03.0
"c"3.02.0
"d"4.01.0

From structured or record array#

data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
pl.DataFrame(data)
shape: (2, 3)
ABC
i32f32binary
12.0b"Hello"
23.0b"World"
pl.DataFrame(data).insert_column(0, pl.Series('index', ['first', 'second']))
shape: (2, 4)
indexABC
stri32f32binary
"first"12.0b"Hello"
"second"23.0b"World"
pl.DataFrame(data).select("C", "A", "B")
shape: (2, 3)
CAB
binaryi32f32
b"Hello"12.0
b"World"23.0

From a list of dicts#

data2 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
pl.DataFrame(data2)
shape: (2, 3)
abc
i64i64i64
12null
51020
pl.DataFrame(data2).insert_column(0, pl.Series('index', ['first', 'second']))
shape: (2, 4)
indexabc
stri64i64i64
"first"12null
"second"51020
pl.DataFrame(data2).select('a', 'b')
shape: (2, 2)
ab
i64i64
12
510

From a dict of tuples#

data = {
    ("a", "b"): {("A", "B"): 1, ("A", "C"): 2},
    ("a", "a"): {("A", "C"): 3, ("A", "B"): 4},
    ("a", "c"): {("A", "B"): 5, ("A", "C"): 6},
    ("b", "a"): {("A", "C"): 7, ("A", "B"): 8},
    ("b", "b"): {("A", "D"): 9, ("A", "B"): 10},
}

dfs = []
for key, value in data.items():
    rows = []
    for key2, value2 in value.items():
        data_row = list(key2) + [value2]
        rows.append(data_row)
    dfs.append(
        pl.DataFrame(rows, orient='row', schema=['index0', 'index1', '-'.join(key)])
    )
    
pl.concat(dfs, how="align")    
shape: (3, 7)
index0index1a-ba-aa-cb-ab-b
strstri64i64i64i64i64
"A""B"145810
"A""C"2367null
"A""D"nullnullnullnull9

From a Series#

ser = pl.Series('ser', range(3))
pl.DataFrame(ser).insert_column(0, pl.Series('index', list("abc")))
shape: (3, 2)
indexser
stri64
"a"0
"b"1
"c"2

From a list of namedtuples#

from collections import namedtuple
Point = namedtuple("Point", "x y")
pl.DataFrame([Point(0, 0), Point(0, 3), (2, 3)])
shape: (3, 2)
xy
i64i64
00
03
23
Point3D = namedtuple("Point3D", "x y z")
data = [Point3D(0, 0, 0), Point3D(0, 3, 5), Point(2, 3)]
pl.DataFrame([p._asdict() for p in data])
shape: (3, 3)
xyz
i64i64i64
000
035
23null

From a list of dataclasses#

from dataclasses import make_dataclass
Point = make_dataclass("Point", [("x", int), ("y", int)])
pl.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
shape: (3, 2)
xy
i64i64
00
03
23

Alternate constructors#

DataFrame.from_dict#

pl.DataFrame(dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]))
shape: (3, 2)
AB
i64i64
14
25
36
data = dict([("A", [1, 2, 3]), ("B", [4, 5, 6])])
pl.DataFrame(
    list(data.values()), schema=['one', 'two', 'three'], orient='row'
).insert_column(0, pl.Series('index', data.keys()))
shape: (2, 4)
indexonetwothree
stri64i64i64
"A"123
"B"456

DataFrame.from_records#

data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
pl.DataFrame(data)
shape: (2, 3)
ABC
i32f32binary
12.0b"Hello"
23.0b"World"

Column selection, addition, deletion#

df['one']
shape: (4,)
one
f64
1.0
2.0
3.0
null
df = df.with_columns(
    three=pl.col('one') * pl.col('two'),
    flag=pl.col('one') > 2
)
# del df["two"]
df = df.drop('two')
#three = df.pop("three")
three = df['three']
df = df.drop('three')
df
shape: (4, 3)
indexoneflag
strf64bool
"a"1.0false
"b"2.0false
"c"3.0true
"d"nullnull
df = df.with_columns(foo=pl.lit('bar'))
df
shape: (4, 4)
indexoneflagfoo
strf64boolstr
"a"1.0false"bar"
"b"2.0false"bar"
"c"3.0true"bar"
"d"nullnull"bar"
# df["one_trunc"] = df["one"][:2]
df = df.with_columns(
    one_trunc=pl.col('one').slice(0, 2).append(pl.repeat(None, pl.len() - 2))
)
df
shape: (4, 5)
indexoneflagfooone_trunc
strf64boolstrf64
"a"1.0false"bar"1.0
"b"2.0false"bar"2.0
"c"3.0true"bar"null
"d"nullnull"bar"null
df.insert_column(1, df["one"].rename('bar'))
df
shape: (4, 6)
indexbaroneflagfooone_trunc
strf64f64boolstrf64
"a"1.01.0false"bar"1.0
"b"2.02.0false"bar"2.0
"c"3.03.0true"bar"null
"d"nullnullnull"bar"null

Assigning new columns in method chains#

iris = pl.read_csv('data/iris.data')
iris.head()
shape: (5, 5)
SepalLengthSepalWidthPetalLengthPetalWidthName
f64f64f64f64str
5.13.51.40.2"Iris-setosa"
4.93.01.40.2"Iris-setosa"
4.73.21.30.2"Iris-setosa"
4.63.11.50.2"Iris-setosa"
5.03.61.40.2"Iris-setosa"
iris.with_columns(
    sepal_ratio=pl.col('SepalWidth') / pl.col('SepalLength')
).head()
shape: (5, 6)
SepalLengthSepalWidthPetalLengthPetalWidthNamesepal_ratio
f64f64f64f64strf64
5.13.51.40.2"Iris-setosa"0.686275
4.93.01.40.2"Iris-setosa"0.612245
4.73.21.30.2"Iris-setosa"0.680851
4.63.11.50.2"Iris-setosa"0.673913
5.03.61.40.2"Iris-setosa"0.72
import hvplot.polars
(
iris
.filter(pl.col('SepalLength') > 5)
.with_columns(
    SepalRatio=pl.col('SepalWidth') / pl.col('SepalLength'),
    PetalRatio=pl.col('PetalWidth') / pl.col('PetalLength')
)
.hvplot.scatter(x='SepalRatio', y='PetalRatio')
)
dfa = pl.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
(
dfa
.with_columns(C = pl.col('A') + pl.col('B'))
.with_columns(D = pl.col('A') + pl.col('C'))
)
shape: (3, 4)
ABCD
i64i64i64i64
1456
2579
36912

Indexing / selection#

# df.loc["b"]
df.filter(pl.col('index') == 'b')
shape: (1, 6)
indexbaroneflagfooone_trunc
strf64f64boolstrf64
"b"2.02.0false"bar"2.0
# df.iloc[2]
df.slice(2, 1)
shape: (1, 6)
indexbaroneflagfooone_trunc
strf64f64boolstrf64
"c"3.03.0true"bar"null

Data alignment and arithmetic#

# df + df2
df = pl.DataFrame(np.random.randn(10, 4), schema=["A", "B", "C", "D"])
df2 = pl.DataFrame(np.random.randn(7, 3), schema=["A", "B", "C"])
align_op(df.with_row_index(), df2.with_row_index(), pl.Expr.add, fill_value=None)
shape: (10, 5)
indexABCD
u32f64f64f64f64
00.7792980.9018110.0911261.41107
1-0.936034-2.6214840.2378251.146001
2-1.192351-3.3607481.5635030.1362
30.933616-0.613897-1.3189110.193137
4-0.195308-0.56240.1188950.222491
50.876072-0.8137321.3678140.554935
60.737510.9945051.1042280.4458
7nullnullnull0.959163
8nullnullnull1.144944
9nullnullnull0.158198
# df - df.iloc[0]
df.select(pl.all() - pl.all().first())
shape: (10, 4)
ABCD
f64f64f64f64
0.00.00.00.0
-0.755136-2.4810020.449509-0.265068
-0.660488-3.6968911.680688-1.27487
1.186271-2.438415-0.240415-1.217932
-0.508953-1.890291.344603-1.188579
-0.631357-2.7167060.657985-0.856135
0.117999-1.3908570.727739-0.96527
0.549259-1.1762272.202544-0.451907
0.772859-1.2504992.459803-0.266126
-0.366154-0.0298161.599533-1.252872
df * 5 + 2
df.select(pl.all() * 5 + 2) # or this
shape: (10, 4)
ABCD
f64f64f64f64
0.64673110.257392-3.4792339.055349
-3.128947-2.147616-1.2316887.730007
-2.65571-8.2270614.9242062.681001
6.578087-1.934685-4.6813062.965687
-1.8980360.805943.2437823.112453
-2.510055-3.32614-0.1893094.774675
1.2367253.3031070.1594634.228999
3.3930254.3762577.5334886.795814
4.5110274.0048998.8197797.724719
-1.18403910.1083114.5184312.790988
df.select((1 / pl.all()).name.keep())
shape: (10, 4)
ABCD
f64f64f64f64
-3.6947570.605518-0.9125360.708682
-0.974859-1.205512-1.5471790.872599
-1.07395-0.4888991.7098667.342137
1.092159-1.27075-0.7483575.177661
-1.282697-4.1873954.0199984.494571
-1.108634-0.938766-2.2838261.802013
-6.5507213.836984-2.7165982.243159
3.589312.1041490.9035891.042576
1.9912172.4938910.7331620.873405
-1.5703320.6166511.9853636.321207
df.select(pl.all() ** 4)
shape: (10, 4)
ABCD
f64f64f64f64
0.0053667.4386081.4421133.964551
1.1072180.4734940.1745171.724807
0.75173317.5034380.1169910.000344
0.7028390.3834953.1883470.001391
0.3694060.0032530.0038290.00245
0.6619841.2875680.0367580.094835
0.0005430.0046140.0183610.039497
0.0060250.0510151.5000850.846387
0.063610.0258523.4609971.718449
0.164456.9157850.0643640.000626
df1 = pl.DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]}).cast(pl.Boolean)
df2 = pl.DataFrame({"a": [0, 1, 1], "b": [1, 1, 0]}).cast(pl.Boolean)
# df1 & df2
align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.and_)
shape: (3, 3)
indexab
u32boolbool
0falsefalse
1falsetrue
2truefalse
# df1 | df2
align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.or_)
shape: (3, 3)
indexab
u32boolbool
0truetrue
1truetrue
2truetrue
# df1 ^ df2
align_op(df1.with_row_index(), df2.with_row_index(), pl.Expr.xor)
shape: (3, 3)
indexab
u32boolbool
0truetrue
1truefalse
2falsetrue
# -df1
df1.select(pl.all().not_())
shape: (3, 2)
ab
boolbool
falsetrue
truefalse
falsefalse

Transposing#

df.slice(0, 5).transpose(include_header=True, header_name='index')
shape: (4, 6)
indexcolumn_0column_1column_2column_3column_4
strf64f64f64f64f64
"A"-0.270654-1.025789-0.9311420.915617-0.779607
"B"1.651478-0.829523-2.045412-0.786937-0.238812
"C"-1.095847-0.6463380.584841-1.3362610.248756
"D"1.411071.1460010.13620.1931370.222491

DataFrame interoperability with NumPy functions#

np.exp(df)
array([[0.76288056, 5.2146839 , 0.33425647, 4.10033964],
       [0.35851332, 0.43625722, 0.52396123, 3.14558954],
       [0.39410336, 0.12932687, 1.79470589, 1.14591119],
       [2.49831716, 0.45523708, 0.26282647, 1.21304945],
       [0.4585861 , 0.78756298, 1.28242953, 1.24918415],
       [0.4057529 , 0.34464929, 0.64541498, 1.74182763],
       [0.85842587, 1.29773619, 0.69204278, 1.56173886],
       [1.32128542, 1.60841852, 3.02435428, 2.60951072],
       [1.65236152, 1.49328708, 3.91163667, 3.14226478],
       [0.52897833, 5.06149624, 1.65480988, 1.17139769]])
np.asarray(df)
array([[-0.27065379,  1.65147847, -1.0958467 ,  1.41106981],
       [-1.02578947, -0.82952326, -0.64633759,  1.14600133],
       [-0.93114207, -2.04541218,  0.58484116,  0.13620012],
       [ 0.91561737, -0.78693694, -1.33626128,  0.19313739],
       [-0.77960722, -0.23881194,  0.24875635,  0.22249066],
       [-0.90201093, -1.06522792, -0.4378618 ,  0.55493493],
       [-0.15265495,  0.26062136, -0.3681075 ,  0.44579985],
       [ 0.27860506,  0.47525141,  1.10669761,  0.95916274],
       [ 0.50220549,  0.40097979,  1.36395587,  1.14494381],
       [-0.63680781,  1.62166214,  0.50368613,  0.15819764]])
ser = pl.Series([1, 2, 3, 4])
np.exp(ser)
shape: (4,)
f64
2.718282
7.389056
20.085537
54.59815
ser1 = pl.DataFrame(dict(value=[1, 2, 3], index=["a", "b", "c"]))
ser2 = pl.DataFrame(dict(value=[1, 3, 5], index=["b", "a", "c"]))
row(ser1, ser2)
shape: (3, 2)
valueindex
i64str
1"a"
2"b"
3"c"
shape: (3, 2)
valueindex
i64str
1"b"
3"a"
5"c"
# np.remainder(ser1, ser2)
ser1_a, ser2_a = pl.align_frames(ser1, ser2, on='index')
row(ser1_a, ser2_a, pl.select(
    index=ser1_a['index'],
    value=np.remainder(ser1_a['value'], ser2_a['value']))   
)
shape: (3, 2)
valueindex
i64str
1"a"
2"b"
3"c"
shape: (3, 2)
valueindex
i64str
3"a"
1"b"
5"c"
shape: (3, 2)
indexvalue
stri64
"a"1
"b"0
"c"3

Console display#

baseball = pl.read_csv('data/baseball.csv')
print(baseball)
shape: (100, 23)
┌───────┬───────────┬──────┬───────┬───┬─────┬─────┬─────┬──────┐
│ id    ┆ player    ┆ year ┆ stint ┆ … ┆ hbp ┆ sh  ┆ sf  ┆ gidp │
│ ---   ┆ ---       ┆ ---  ┆ ---   ┆   ┆ --- ┆ --- ┆ --- ┆ ---  │
│ i64   ┆ str       ┆ i64  ┆ i64   ┆   ┆ f64 ┆ f64 ┆ f64 ┆ f64  │
╞═══════╪═══════════╪══════╪═══════╪═══╪═════╪═════╪═════╪══════╡
│ 88641 ┆ womacto01 ┆ 2006 ┆ 2     ┆ … ┆ 0.0 ┆ 3.0 ┆ 0.0 ┆ 0.0  │
│ 88643 ┆ schilcu01 ┆ 2006 ┆ 1     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
│ 88645 ┆ myersmi01 ┆ 2006 ┆ 1     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
│ 88649 ┆ helliri01 ┆ 2006 ┆ 1     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
│ 88650 ┆ johnsra05 ┆ 2006 ┆ 1     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
│ …     ┆ …         ┆ …    ┆ …     ┆ … ┆ …   ┆ …   ┆ …   ┆ …    │
│ 89525 ┆ benitar01 ┆ 2007 ┆ 2     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
│ 89526 ┆ benitar01 ┆ 2007 ┆ 1     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
│ 89530 ┆ ausmubr01 ┆ 2007 ┆ 1     ┆ … ┆ 6.0 ┆ 4.0 ┆ 1.0 ┆ 11.0 │
│ 89533 ┆ aloumo01  ┆ 2007 ┆ 1     ┆ … ┆ 2.0 ┆ 0.0 ┆ 3.0 ┆ 13.0 │
│ 89534 ┆ alomasa02 ┆ 2007 ┆ 1     ┆ … ┆ 0.0 ┆ 0.0 ┆ 0.0 ┆ 0.0  │
└───────┴───────────┴──────┴───────┴───┴─────┴─────┴─────┴──────┘
baseball.glimpse()
Rows: 100
Columns: 23
$ id     <i64> 88641, 88643, 88645, 88649, 88650, 88652, 88653, 88662, 89177, 89178
$ player <str> 'womacto01', 'schilcu01', 'myersmi01', 'helliri01', 'johnsra05', 'finlest01', 'gonzalu01', 'seleaa01', 'francju01', 'francju01'
$ year   <i64> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2006, 2007, 2007
$ stint  <i64> 2, 1, 1, 1, 1, 1, 1, 1, 2, 1
$ team   <str> 'CHN', 'BOS', 'NYA', 'MIL', 'NYA', 'SFN', 'ARI', 'LAN', 'ATL', 'NYN'
$ lg     <str> 'NL', 'AL', 'AL', 'NL', 'AL', 'NL', 'NL', 'NL', 'NL', 'NL'
$ g      <i64> 19, 31, 62, 20, 33, 139, 153, 28, 15, 40
$ ab     <i64> 50, 2, 0, 3, 6, 426, 586, 26, 40, 50
$ r      <i64> 6, 0, 0, 0, 0, 66, 93, 2, 1, 7
$ h      <i64> 14, 1, 0, 0, 1, 105, 159, 5, 10, 10
$ X2b    <i64> 1, 0, 0, 0, 0, 21, 52, 1, 3, 0
$ X3b    <i64> 0, 0, 0, 0, 0, 12, 2, 0, 0, 0
$ hr     <i64> 1, 0, 0, 0, 0, 6, 15, 0, 0, 1
$ rbi    <f64> 2.0, 0.0, 0.0, 0.0, 0.0, 40.0, 73.0, 0.0, 8.0, 8.0
$ sb     <f64> 1.0, 0.0, 0.0, 0.0, 0.0, 7.0, 0.0, 0.0, 0.0, 2.0
$ cs     <f64> 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0
$ bb     <i64> 4, 0, 0, 0, 0, 46, 69, 1, 4, 10
$ so     <f64> 4.0, 1.0, 0.0, 2.0, 4.0, 55.0, 58.0, 7.0, 10.0, 13.0
$ ibb    <f64> 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 10.0, 0.0, 1.0, 0.0
$ hbp    <f64> 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 7.0, 0.0, 0.0, 0.0
$ sh     <f64> 3.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 6.0, 0.0, 0.0
$ sf     <f64> 0.0, 0.0, 0.0, 0.0, 0.0, 4.0, 6.0, 0.0, 1.0, 1.0
$ gidp   <f64> 0.0, 0.0, 0.0, 0.0, 0.0, 6.0, 14.0, 1.0, 1.0, 1.0
print(
    baseball.select(pl.nth(range(0, 12)).tail(20))
)
shape: (20, 12)
┌───────┬───────────┬──────┬───────┬───┬─────┬─────┬─────┬─────┐
│ id    ┆ player    ┆ year ┆ stint ┆ … ┆ r   ┆ h   ┆ X2b ┆ X3b │
│ ---   ┆ ---       ┆ ---  ┆ ---   ┆   ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64   ┆ str       ┆ i64  ┆ i64   ┆   ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═══════╪═══════════╪══════╪═══════╪═══╪═════╪═════╪═════╪═════╡
│ 89474 ┆ finlest01 ┆ 2007 ┆ 1     ┆ … ┆ 9   ┆ 17  ┆ 3   ┆ 0   │
│ 89480 ┆ embreal01 ┆ 2007 ┆ 1     ┆ … ┆ 0   ┆ 0   ┆ 0   ┆ 0   │
│ 89481 ┆ edmonji01 ┆ 2007 ┆ 1     ┆ … ┆ 39  ┆ 92  ┆ 15  ┆ 2   │
│ 89482 ┆ easleda01 ┆ 2007 ┆ 1     ┆ … ┆ 24  ┆ 54  ┆ 6   ┆ 0   │
│ 89489 ┆ delgaca01 ┆ 2007 ┆ 1     ┆ … ┆ 71  ┆ 139 ┆ 30  ┆ 0   │
│ …     ┆ …         ┆ …    ┆ …     ┆ … ┆ …   ┆ …   ┆ …   ┆ …   │
│ 89525 ┆ benitar01 ┆ 2007 ┆ 2     ┆ … ┆ 0   ┆ 0   ┆ 0   ┆ 0   │
│ 89526 ┆ benitar01 ┆ 2007 ┆ 1     ┆ … ┆ 0   ┆ 0   ┆ 0   ┆ 0   │
│ 89530 ┆ ausmubr01 ┆ 2007 ┆ 1     ┆ … ┆ 38  ┆ 82  ┆ 16  ┆ 3   │
│ 89533 ┆ aloumo01  ┆ 2007 ┆ 1     ┆ … ┆ 51  ┆ 112 ┆ 19  ┆ 1   │
│ 89534 ┆ alomasa02 ┆ 2007 ┆ 1     ┆ … ┆ 1   ┆ 3   ┆ 1   ┆ 0   │
└───────┴───────────┴──────┴───────┴───┴─────┴─────┴─────┴─────┘
print(pl.DataFrame(np.random.randn(3, 12)))
shape: (3, 12)
┌──────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ column_0 ┆ column_1  ┆ column_2  ┆ column_3  ┆ … ┆ column_8  ┆ column_9  ┆ column_10 ┆ column_11 │
│ ---      ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---       │
│ f64      ┆ f64       ┆ f64       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64       │
╞══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡
│ 0.545788 ┆ -0.786944 ┆ 0.511419  ┆ -1.465452 ┆ … ┆ 0.103465  ┆ 1.25846   ┆ -1.961481 ┆ -0.892518 │
│ 0.828004 ┆ 0.291711  ┆ -0.666151 ┆ -0.056065 ┆ … ┆ -1.487244 ┆ -0.325992 ┆ -0.25713  ┆ -0.381324 │
│ 0.44002  ┆ 0.21451   ┆ 0.285626  ┆ -0.689011 ┆ … ┆ -0.06698  ┆ 1.482008  ┆ 1.288873  ┆ -1.12619  │
└──────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴───────────┘
with pl.Config() as cfg:
    cfg.set_tbl_width_chars(40)
    print(pl.DataFrame(np.random.randn(3, 12)))
shape: (3, 12)
┌─────┬─────┬─────┬─────┬───┬─────┬─────┬─────┬─────┐
│ col ┆ col ┆ col ┆ col ┆ … ┆ col ┆ col ┆ col ┆ col │
│ umn ┆ umn ┆ umn ┆ umn ┆   ┆ umn ┆ umn ┆ umn ┆ umn │
│ _0  ┆ _1  ┆ _2  ┆ _3  ┆   ┆ _8  ┆ _9  ┆ _10 ┆ _11 │
│ --- ┆ --- ┆ --- ┆ --- ┆   ┆ --- ┆ --- ┆ --- ┆ --- │
│ f64 ┆ f64 ┆ f64 ┆ f64 ┆   ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
╞═════╪═════╪═════╪═════╪═══╪═════╪═════╪═════╪═════╡
│ -1. ┆ 0.0 ┆ -0. ┆ 1.7 ┆ … ┆ 0.8 ┆ 0.3 ┆ -1. ┆ -1. │
│ 593 ┆ 321 ┆ 382 ┆ 495 ┆   ┆ 431 ┆ 008 ┆ 529 ┆ 861 │
│ 329 ┆ 95  ┆ 463 ┆ 8   ┆   ┆ 73  ┆ 16  ┆ 093 ┆ 916 │
│ 0.5 ┆ -0. ┆ 0.5 ┆ -0. ┆ … ┆ -0. ┆ -0. ┆ -1. ┆ 0.6 │
│ 488 ┆ 337 ┆ 436 ┆ 509 ┆   ┆ 462 ┆ 292 ┆ 385 ┆ 538 │
│ 96  ┆ 439 ┆ 99  ┆ 815 ┆   ┆ 243 ┆ 441 ┆ 327 ┆ 87  │
│ -1. ┆ -0. ┆ 0.7 ┆ -0. ┆ … ┆ 0.1 ┆ -0. ┆ -0. ┆ 0.5 │
│ 150 ┆ 856 ┆ 171 ┆ 110 ┆   ┆ 902 ┆ 484 ┆ 245 ┆ 015 │
│ 248 ┆ 766 ┆ 96  ┆ 738 ┆   ┆ 8   ┆ 427 ┆ 482 ┆ 93  │
└─────┴─────┴─────┴─────┴───┴─────┴─────┴─────┴─────┘
datafile = {
    "filename": ["filename_01", "filename_02"],
    "path": [
        "media/user_name/storage/folder_01/filename_01",
        "media/user_name/storage/folder_02/filename_02",
    ],
}
with pl.Config() as cfg:
    cfg.set_tbl_width_chars(30)
    print(pl.DataFrame(datafile))
shape: (2, 2)
┌─────────────┬──────────────┐
│ filename    ┆ path         │
│ ---         ┆ ---          │
│ str         ┆ str          │
╞═════════════╪══════════════╡
│ filename_01 ┆ media/user_n │
│             ┆ ame/storage/ │
│             ┆ folder…      │
│ filename_02 ┆ media/user_n │
│             ┆ ame/storage/ │
│             ┆ folder…      │
└─────────────┴──────────────┘
with pl.Config() as cfg:
    cfg.set_tbl_width_chars(100)
    print(pl.DataFrame(datafile))
shape: (2, 2)
┌─────────────┬─────────────────────────────────┐
│ filename    ┆ path                            │
│ ---         ┆ ---                             │
│ str         ┆ str                             │
╞═════════════╪═════════════════════════════════╡
│ filename_01 ┆ media/user_name/storage/folder… │
│ filename_02 ┆ media/user_name/storage/folder… │
└─────────────┴─────────────────────────────────┘

DataFrame column attribute access and IPython completion#

There is no such function in polars.