using DataFrames, Random
x = Dict("A"=>[1,2], "B"=>[true,false],"C"=>['a','b'],"fixed"=>Ref([1,1]))
DataFrame(x)
DataFrame([rand(3) for _ in 1:3], :auto)
DataFrame([1,2,3,4]',:auto)
DataFrame(:x1=>[1,2,3,4])
DataFrame(rand(3,4),Symbol.('a':'d'))
DataFrame(rand(3,4),string.('a':'d'))
DataFrame(A=Int[],B=Float64[],C=String[])
x = DataFrame(a=1:2,b='a':'b')
y = copy(x)
x == y
x === y
x.a == y.a
x.a === y.a
x = DataFrame(a=1:2,b='a':'b')
y = DataFrame(x, copycols=false)
(x===y), (x==y), (x.a==y.a), (x.a === y.a)
y.a[1] = 20
y
x
x = DataFrame(a=1,b=1.0)
similar(x)
similar(x,0)
similar(x,2)
# [1,1] : 1번째 row, 1번째 row => 1번째 row를 두번 선택함
# : 모든 컬럼
#
sdf = view(x,[1,1],:)
typeof(sdf)
sdf.a[2] = 2
x
dfr = x[1,:]
x = DataFrame(x=1:2, y=["A","B"])
@time x |> Matrix
@time Matrix(x)
@time x |> Array
@time Array(x)
x = DataFrame(x=1:2,y=[missing,"B"])
x |> Matrix
x|>Array
NamedTuple
related tabular structures¶x = DataFrame(x=1:2, y=["A","B"])
rt = Tables.rowtable(x)
ct = Tables.columntable(x)
DataFrame(rt)
DataFrame(ct)
ec = eachcol(x)
ec isa AbstractVector
isa(ec,AbstractVector)
ec[1]
ec["y"]
ec[:y]
ec.y
ec.y[1]
er = eachrow(x)
er isa AbstractVector
er[end]
er.y
er.y[1]
Note that both data frame and also DataFrameColumns and DataFrameRows objects are not type stable (they do not know the types of their columns). This is useful to avoid compilation cost if you have very wide data frames with heterogenous column types.
However, often (especially if a data frame is narrows) it is useful to create a lazy iterator that produces NamedTuples for each row of the DataFrame. Its key benefit is that it is type stable (so it is useful when you want to perform some operations in a fast way on a small subset of columns of a DataFrame - this strategy is often used internally by DataFrames.jl package):
nti = Tables.namedtupleiterator(x)
for row in enumerate(nti)
@show row
@show row[2].y
end
We can pass the makeunique keyword argument to allow passing duplicate names (they get deduplicated)
df = DataFrame(:a=>1, :a=>2, :a_1=>3; makeunique=true)
Otherwise, duplicates are not allowed.
df = DataFrame(:a=>1, :a=>2, :a_1=>3)
Observe that currently nothing is not printed when displaying a DataFrame in Jupyter Notebook:
df = DataFrame(x=[1,nothing], y=[nothing,"a"],z=[missing,"c"])
empty!(df)
df