my_matrix_π = Matrix{Float64}(undef,2,2)
2×2 Matrix{Float64}: 2.122e-314 5.0e-324 2.122e-314 1.29994e-315
fill!(my_matrix_π,3.14)
2×2 Matrix{Float64}: 3.14 3.14 3.14 3.14
[ones(Int,2,2) zeros(Int,2,2)]
2×4 Matrix{Int64}: 1 1 0 0 1 1 0 0
[ones(Int,2,2);zeros(Int,2,2)]
4×2 Matrix{Int64}: 1 1 1 1 0 0 0 0
[x^2 for x in 1:10 if isodd(x)]
5-element Vector{Int64}: 1 9 25 49 81
( x->
begin
println(x)
println(x)
println(x)
end
).("ddd")
ddd ddd ddd
my_example_matrix = [[1 2 3];[4 5 6];[7 8 9]]
3×3 Matrix{Int64}: 1 2 3 4 5 6 7 8 9
map(x->x+1, my_example_matrix[3,:])
3-element Vector{Int64}: 8 9 10
using BenchmarkTools
@benchmark mapslices(sum,my_example_matrix,dims=1)
BenchmarkTools.Trial: memory estimate: 2.70 KiB allocs estimate: 57 -------------- minimum time: 5.400 μs (0.00% GC) median time: 6.500 μs (0.00% GC) mean time: 7.043 μs (2.48% GC) maximum time: 598.550 μs (98.14% GC) -------------- samples: 10000 evals/sample: 6
@benchmark sum(my_example_matrix,dims=1)
BenchmarkTools.Trial: memory estimate: 112 bytes allocs estimate: 1 -------------- minimum time: 93.444 ns (0.00% GC) median time: 139.438 ns (0.00% GC) mean time: 137.157 ns (1.36% GC) maximum time: 2.402 μs (0.00% GC) -------------- samples: 10000 evals/sample: 961
@benchmark sort(data) setup=(data=rand(10))
BenchmarkTools.Trial: memory estimate: 160 bytes allocs estimate: 1 -------------- minimum time: 94.203 ns (0.00% GC) median time: 113.458 ns (0.00% GC) mean time: 118.443 ns (1.50% GC) maximum time: 760.663 ns (82.34% GC) -------------- samples: 10000 evals/sample: 966
column_major = [[1 3];[2 4]]
2×2 Matrix{Int64}: 1 3 2 4
row_major = [[1 2];[3 4]]
2×2 Matrix{Int64}: 1 2 3 4
for c in eachcol(column_major)
println(c)
end
for r in eachrow(column_major)
println(r)
end
[1, 2] [3, 4] [1, 3] [2, 4]
my_pair = "Julia"=>42
"Julia" => 42
my_pair.first
"Julia"
my_pair.second
42
first(my_pair)
"Julia"
last(my_pair)
42
name2number_map = Dict("one"=>1, "two"=>2)
Dict{String, Int64} with 2 entries: "two" => 2 "one" => 1
"two" in keys(name2number_map)
true
name2number_map["three"] = 3
3
name2number_map
Dict{String, Int64} with 3 entries: "two" => 2 "one" => 1 "three" => 3
delete!(name2number_map,"three")
Dict{String, Int64} with 2 entries: "two" => 2 "one" => 1
popped_value = pop!(name2number_map,"two")
2
A = ["one","two","three"]
B = [1,2,3]
name2number_map = Dict(zip(A,B))
Dict{String, Int64} with 3 entries: "two" => 2 "one" => 1 "three" => 3
pop!(name2number_map)
"two" => 2
add_elements(a,b,c) = a + b+ c
add_elements (generic function with 1 method)
my_collection = [1,2,3]
add_elements(my_collection...)
6
add_elements(1:3...)
6
dirname(@__FILE__)
""
using Dates
subtypes(Period)
2-element Vector{Any}: DatePeriod TimePeriod
subtypes(DatePeriod)
5-element Vector{Any}: Day Month Quarter Week Year
subtypes(TimePeriod)
6-element Vector{Any}: Hour Microsecond Millisecond Minute Nanosecond Second
methods(DateTime)
methods(Date)
Date("19870913","yyyymmdd")
1987-09-13
m_day = Date("1987-09-13")
1987-09-13
yearmonth(m_day)
(1987, 9)
monthday(m_day)
(9, 13)
yearmonthday(m_day)
(1987, 9, 13)
m_day
1987-09-13
dayofweek(m_day)
7
dayname(m_day)
"Sunday"
dayofweekofmonth(m_day)
2
dayofweekofmonth(Date("1987-08-29")+Day(1))
5
first(methodswith(Date),20)
collect(Date("2021-01-01"):Day(1):Date("2021-01-07"))
7-element Vector{Date}: 2021-01-01 2021-01-02 2021-01-03 2021-01-04 2021-01-05 2021-01-06 2021-01-07
DateTime("2021-01-01T21:10:11")
2021-01-01T21:10:11
DateTime("2021-01-01")
2021-01-01T00:00:00
methods(DateTime)
rand((42,"Julia",3.14))
42
rand(Dict(:one=>1,:two=>2))
:two => 2
using Downloads
readlines(Downloads.download("https://naver.com"))[1:2]
2-element Vector{String}: "" "<!doctype html> <html lang=\"ko\" data-dark=\"false\"> <head> <meta charset=\"utf-8\"> <title>NAVER</title> <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\"> <meta name=\"viewport\" content=\"width=1190\"> <meta name=\"apple-mobile-web-app-title\" content=\"NAVER\"/> <meta name=\"robots\" content=\"index,nofollow\"/> <meta name=\"description\" content=\"네이버 메인에서 다양한 정보와 유용한 컨텐츠를 만나 보세요\"/> <meta property=\"og:title\" content=\"네이버\"> <meta property=\"og:url\" content=\"https://www.naver.com/\"> <meta property=\"og:image\" content=\"https://s.pstatic.net/static/www/mobile/edit/2016/0705/mobile_212852414260.png\"> <meta property=\"og:description\" content=\"네이버 메인에서 다양한 정보와 유용한 컨텐츠를 만나 보세요\"/> <meta name=\"twitter:card\" content=\"summary\"> <meta name=\"twitter:title\" content=\"\"> <meta name=\"twitter:url\" content=\"https://www.naver.com/\"> <meta name=\"twitter:image\" content=\"https://s.pstatic.net/static/www/mobile/edit/2016/0705/mobile_212852414260.png\"> <meta name=\"twitter:description\" content=\"네이버 메인에서 다양한 정보와 유용한 컨텐츠를 만나 보세요\"/> <link rel=\"stylesheet\" href=\"https://pm.pstatic.net/dist/css/nmain.20210826.css\"> <link rel=\"stylesheet\" href=\"https://ssl.pstatic.net/sstatic/search/pc/css/sp_autocomplete_210318.css\"> <link rel=\"shortcut icon\" type=\"image/x-icon\" href=\"/favicon.ico?1\"/> <link rel=\"apple-touch-icon\" sizes=\"114x114\" href=\"https://s.pstatic.net/static/www/u/2014/0328/mma_204243574.png\"/> <link rel=\"apple-touch-icon\" href=\"https://s.pstatic.net/static/www/u/2014/0328/mma_20432863.png\"/> <script>document.domain=\"naver.com\",window.nmain=window.nmain||{},window.nmain.supportFlicking=!1;var nsc=\"navertop.v4\",ua=navigator.userAgent;window.nmain.isIE=navigator.appName&&navigator.appName.indexOf(\"Explorer\")>0&&ua.toLocaleLowerCase().indexOf(\"msie 10.0\")<0,document.getElementsByTagName(\"html\")[0].setAttribute(\"data-useragent\",ua),window.nmain.isIE&&(Object.create=function(n){function a(){}return a.prototype=n,new a})</script> <script>var darkmode= false;window.naver_corp_da=window.naver_corp_da||{main:{}},window.naver_corp_da.main=window.naver_corp_da.main||{},window.naver_corp_da.main.darkmode=darkmode,window.gladsdk=window.gladsdk||{cmd:[]},window.gladsdk.cmd.push((function(){window.gladsdk.setHostMeta(\"theme\",darkmode?\"dark\":\"light\")}))</script> <script> window.nmain.gv = { isLogin: false,"
using DataFrames
using XLSX:
eachtablerow,
readxlsx,
writetable
function write_xlsx(name,df::DataFrame)
path = "$name.xlsx"
data = collect(eachcol(df))
cols = names(df)
writetable(path,data,cols)
end
write_xlsx (generic function with 1 method)
function write_grades_xlsx(df::DataFrame)
path = "grades"
write_xlsx(path,df)
"$path.xlsx"
end
write_grades_xlsx (generic function with 1 method)
df = DataFrame(name=["sally","Bob","Alice","Hank"],grade_2020=[1.0,5.0,8.5,4.0])
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | sally | 1.0 |
2 | Bob | 5.0 |
3 | Alice | 8.5 |
4 | Hank | 4.0 |
path = write_grades_xlsx(df)
path
"grades.xlsx"
xf = readxlsx(path)
xf
XLSXFile("grades.xlsx") containing 1 Worksheet sheetname size range ------------------------------------------------- Sheet1 5x2 A1:B5
sheet = xf["Sheet1"]
sheet
5×2 XLSX.Worksheet: ["Sheet1"](A1:B5)
eachtablerow(sheet) |> DataFrame
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
Any | Any | |
1 | sally | 1.0 |
2 | Bob | 5.0 |
3 | Alice | 8.5 |
4 | Hank | 4.0 |
df.name
4-element Vector{String}: "sally" "Bob" "Alice" "Hank"
df[!,:name]
4-element Vector{String}: "sally" "Bob" "Alice" "Hank"
df2 = DataFrame(id=[1])
1 rows × 1 columns
id | |
---|---|
Int64 | |
1 | 1 |
@benchmark filter(:name=>x->x=="Alice",df)
BenchmarkTools.Trial: memory estimate: 1.23 KiB allocs estimate: 16 -------------- minimum time: 1.240 μs (0.00% GC) median time: 1.810 μs (0.00% GC) mean time: 2.130 μs (3.87% GC) maximum time: 417.110 μs (99.24% GC) -------------- samples: 10000 evals/sample: 10
@benchmark filter(:name=> ==("Alice"),df)
BenchmarkTools.Trial: memory estimate: 1.31 KiB allocs estimate: 19 -------------- minimum time: 1.260 μs (0.00% GC) median time: 1.570 μs (0.00% GC) mean time: 1.960 μs (6.87% GC) maximum time: 487.250 μs (99.09% GC) -------------- samples: 10000 evals/sample: 10
filter(:name => !=("Alice"),df)
3 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | sally | 1.0 |
2 | Bob | 5.0 |
3 | Hank | 4.0 |
df
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | sally | 1.0 |
2 | Bob | 5.0 |
3 | Alice | 8.5 |
4 | Hank | 4.0 |
function complex_filter(name,grade)::Bool
i_name = startswith(name,'A') || startswith(name,'B')
i_grade = 6 < grade
i_name && i_grade
end
complex_filter (generic function with 1 method)
filter([:name,:grade_2020]=>complex_filter,df)
1 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Alice | 8.5 |
subset(df,:name=>ByRow(x->x=="Alice"))
1 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Alice | 8.5 |
subset(df,:name=> ByRow(==("Alice")))
1 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Alice | 8.5 |
function salaries()
names = ["John","Hank","Karen","Zed"]
salary = [1_900,2_800,2_800,missing]
DataFrame(;names,salary)
end
salaries()
4 rows × 2 columns
names | salary | |
---|---|---|
String | Int64? | |
1 | John | 1900 |
2 | Hank | 2800 |
3 | Karen | 2800 |
4 | Zed | missing |
filter(:salary => > (2_000), salaries())
syntax: space before "(" not allowed in "> (" at In[148]:1 Stacktrace: [1] top-level scope @ In[148]:1 [2] eval @ .\boot.jl:360 [inlined] [3] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String) @ Base .\loading.jl:1094
subset(salaries(), :salary=>ByRow(>(2_000)))
ArgumentError: missing was returned in condition number 1 but only true or false are allowed; pass skipmissing=true to skip missing values Stacktrace: [1] _and(x::Missing) @ DataFrames C:\julia_env\turing\packages\DataFrames\vuMM8\src\abstractdataframe\subset.jl:11 [2] _broadcast_getindex_evalf @ .\broadcast.jl:648 [inlined] [3] _broadcast_getindex @ .\broadcast.jl:621 [inlined] [4] getindex @ .\broadcast.jl:575 [inlined] [5] macro expansion @ .\broadcast.jl:984 [inlined] [6] macro expansion @ .\simdloop.jl:77 [inlined] [7] copyto! @ .\broadcast.jl:983 [inlined] [8] copyto! @ .\broadcast.jl:994 [inlined] [9] copyto! @ .\broadcast.jl:936 [inlined] [10] copy @ .\broadcast.jl:908 [inlined] [11] materialize(bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(DataFrames._and), Tuple{Vector{Union{Missing, Bool}}}}) @ Base.Broadcast .\broadcast.jl:883 [12] _get_subset_conditions(df::DataFrame, ::Base.RefValue{Any}, skipmissing::Bool) @ DataFrames C:\julia_env\turing\packages\DataFrames\vuMM8\src\abstractdataframe\subset.jl:74 [13] subset(df::DataFrame, args::Any; skipmissing::Bool, view::Bool) @ DataFrames C:\julia_env\turing\packages\DataFrames\vuMM8\src\abstractdataframe\subset.jl:173 [14] subset(df::DataFrame, args::Any) @ DataFrames C:\julia_env\turing\packages\DataFrames\vuMM8\src\abstractdataframe\subset.jl:173 [15] top-level scope @ In[149]:1 [16] eval @ .\boot.jl:360 [inlined] [17] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String) @ Base .\loading.jl:1094
subset(salaries(),:salary=>ByRow(>(2_000));skipmissing=true)
2 rows × 2 columns
names | salary | |
---|---|---|
String | Int64? | |
1 | Hank | 2800 |
2 | Karen | 2800 |
function responses()
id = [1,2]
q1 = [28,61]
q2 = [:us,:fr]
q3 = ["F","B"]
q4 = ["B","C"]
q5 = ["A","E"]
DataFrame(;id,q1,q2,q3,q4,q5)
end
responses()
2 rows × 6 columns
id | q1 | q2 | q3 | q4 | q5 | |
---|---|---|---|---|---|---|
Int64 | Int64 | Symbol | String | String | String | |
1 | 1 | 28 | us | F | B | A |
2 | 2 | 61 | fr | B | C | E |
select(responses(),:id,:q1)
2 rows × 2 columns
id | q1 | |
---|---|---|
Int64 | Int64 | |
1 | 1 | 28 |
2 | 2 | 61 |
select(responses(), Not(:q5))
2 rows × 5 columns
id | q1 | q2 | q3 | q4 | |
---|---|---|---|---|---|
Int64 | Int64 | Symbol | String | String | |
1 | 1 | 28 | us | F | B |
2 | 2 | 61 | fr | B | C |
select(responses(),Not([:q4,:q5]))
2 rows × 4 columns
id | q1 | q2 | q3 | |
---|---|---|---|---|
Int64 | Int64 | Symbol | String | |
1 | 1 | 28 | us | F |
2 | 2 | 61 | fr | B |
select(responses(),:q5, Not(:id))
2 rows × 5 columns
q5 | q1 | q2 | q3 | q4 | |
---|---|---|---|---|---|
String | Int64 | Symbol | String | String | |
1 | A | 28 | us | F | B |
2 | E | 61 | fr | B | C |
select(responses(), Not(:id))
2 rows × 5 columns
q1 | q2 | q3 | q4 | q5 | |
---|---|---|---|---|---|
Int64 | Symbol | String | String | String | |
1 | 28 | us | F | B | A |
2 | 61 | fr | B | C | E |
select(responses(),:)
2 rows × 6 columns
id | q1 | q2 | q3 | q4 | q5 | |
---|---|---|---|---|---|---|
Int64 | Int64 | Symbol | String | String | String | |
1 | 1 | 28 | us | F | B | A |
2 | 2 | 61 | fr | B | C | E |
select(responses(),:q5,:)
2 rows × 6 columns
q5 | id | q1 | q2 | q3 | q4 | |
---|---|---|---|---|---|---|
String | Int64 | Int64 | Symbol | String | String | |
1 | A | 1 | 28 | us | F | B |
2 | E | 2 | 61 | fr | B | C |
select(responses(),1,:q5,:)
2 rows × 6 columns
id | q5 | q1 | q2 | q3 | q4 | |
---|---|---|---|---|---|---|
Int64 | String | Int64 | Symbol | String | String | |
1 | 1 | A | 28 | us | F | B |
2 | 2 | E | 61 | fr | B | C |
select(responses(),1=>"participant",:q1=>"age",:q2=>"nationality")
2 rows × 3 columns
participant | age | nationality | |
---|---|---|---|
Int64 | Int64 | Symbol | |
1 | 1 | 28 | us |
2 | 2 | 61 | fr |
renames = (1=>"participant", :q1=>"age", :q2=>"nationality")
select(responses(), renames...)
2 rows × 3 columns
participant | age | nationality | |
---|---|---|---|
Int64 | Int64 | Symbol | |
1 | 1 | 28 | us |
2 | 2 | 61 | fr |
function wrong_types()
id = 1:4
date = ["28-01-2018","03-04-2019","01-08-2018","22-11-2020"]
age = ["adolescent","adult","infant","adult"]
DataFrame(;id,date,age)
end
wrong_types()
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | String | String | |
1 | 1 | 28-01-2018 | adolescent |
2 | 2 | 03-04-2019 | adult |
3 | 3 | 01-08-2018 | infant |
4 | 4 | 22-11-2020 | adult |
sort(wrong_types(),:date)
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | String | String | |
1 | 3 | 01-08-2018 | infant |
2 | 2 | 03-04-2019 | adult |
3 | 4 | 22-11-2020 | adult |
4 | 1 | 28-01-2018 | adolescent |
function fix_date_column(df::DataFrame)
string2dates(dates::Vector) = Date.(dates,dateformat"dd-mm-yyy")
dates = string2dates(df[!,:date])
df[!,:date] = dates
df
end
fix_date_column(wrong_types())
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | Date | String | |
1 | 1 | 2018-01-28 | adolescent |
2 | 2 | 2019-04-03 | adult |
3 | 3 | 2018-08-01 | infant |
4 | 4 | 2020-11-22 | adult |
df = fix_date_column(wrong_types())
sort(df,:date)
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | Date | String | |
1 | 1 | 2018-01-28 | adolescent |
2 | 3 | 2018-08-01 | infant |
3 | 2 | 2019-04-03 | adult |
4 | 4 | 2020-11-22 | adult |
sort(wrong_types(),:age)
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | String | String | |
1 | 1 | 28-01-2018 | adolescent |
2 | 2 | 03-04-2019 | adult |
3 | 4 | 22-11-2020 | adult |
4 | 3 | 01-08-2018 | infant |
using CategoricalArrays
function fix_age_column(df)
levels = ["infant","adolescent","adult"]
ages = categorical(df[!,:age];levels,ordered=true)
df[!,:age] = ages
df
end
fix_age_column(wrong_types())
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | String | Cat… | |
1 | 1 | 28-01-2018 | adolescent |
2 | 2 | 03-04-2019 | adult |
3 | 3 | 01-08-2018 | infant |
4 | 4 | 22-11-2020 | adult |
df = fix_age_column(wrong_types())
sort(df,:age)
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | String | Cat… | |
1 | 3 | 01-08-2018 | infant |
2 | 1 | 28-01-2018 | adolescent |
3 | 2 | 03-04-2019 | adult |
4 | 4 | 22-11-2020 | adult |
function correct_types()
df = wrong_types()
df = fix_date_column(df)
df = fix_age_column(df)
end
correct_types()
4 rows × 3 columns
id | date | age | |
---|---|---|---|
Int64 | Date | Cat… | |
1 | 1 | 2018-01-28 | adolescent |
2 | 2 | 2019-04-03 | adult |
3 | 3 | 2018-08-01 | infant |
4 | 4 | 2020-11-22 | adult |
df = correct_types()
a = df[1,:age]
b = df[2,:age]
a < b
true
function grades_2020()
DataFrame(;name=["Sally","Bob","Alice","Hank"],
grade_2020=[1.0,5.0,8.5,4.0])
end
grades_2020()
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 1.0 |
2 | Bob | 5.0 |
3 | Alice | 8.5 |
4 | Hank | 4.0 |
function grades_2021()
DataFrame(;name=["Bob2","Sally","Hank"],
grade_2021=[9.5,9.5,6.0])
end
grades_2021()
3 rows × 2 columns
name | grade_2021 | |
---|---|---|
String | Float64 | |
1 | Bob2 | 9.5 |
2 | Sally | 9.5 |
3 | Hank | 6.0 |
innerjoin(grades_2020(),grades_2021(); on=:name)
2 rows × 3 columns
name | grade_2020 | grade_2021 | |
---|---|---|---|
String | Float64 | Float64 | |
1 | Sally | 1.0 | 9.5 |
2 | Hank | 4.0 | 6.0 |
outerjoin(grades_2020(),grades_2021();on=:name)
5 rows × 3 columns
name | grade_2020 | grade_2021 | |
---|---|---|---|
String | Float64? | Float64? | |
1 | Sally | 1.0 | 9.5 |
2 | Hank | 4.0 | 6.0 |
3 | Bob | 5.0 | missing |
4 | Alice | 8.5 | missing |
5 | Bob2 | missing | 9.5 |
crossjoin(grades_2020(),grades_2021();on=:id)
MethodError: no method matching crossjoin(::DataFrame, ::DataFrame; on=:id) Closest candidates are: crossjoin(::AbstractDataFrame, ::AbstractDataFrame; makeunique) at C:\julia_env\turing\packages\DataFrames\vuMM8\src\join\composer.jl:1332 got unsupported keyword argument "on" crossjoin(::AbstractDataFrame, ::AbstractDataFrame, ::AbstractDataFrame...; makeunique) at C:\julia_env\turing\packages\DataFrames\vuMM8\src\join\composer.jl:1343 got unsupported keyword argument "on" Stacktrace: [1] kwerr(::NamedTuple{(:on,), Tuple{Symbol}}, ::Function, ::DataFrame, ::DataFrame) @ Base .\error.jl:157 [2] top-level scope @ In[191]:1 [3] eval @ .\boot.jl:360 [inlined] [4] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String) @ Base .\loading.jl:1094
crossjoin(grades_2020(),grades_2021())
ArgumentError: Duplicate variable names: :name. Pass makeunique=true to make them unique using a suffix automatically. Stacktrace: [1] add_names(ind::DataFrames.Index, add_ind::DataFrames.Index; makeunique::Bool) @ DataFrames C:\julia_env\turing\packages\DataFrames\vuMM8\src\other\index.jl:323
crossjoin(grades_2020(),grades_2021();makeunique=true)
12 rows × 4 columns
name | grade_2020 | name_1 | grade_2021 | |
---|---|---|---|---|
String | Float64 | String | Float64 | |
1 | Sally | 1.0 | Bob2 | 9.5 |
2 | Sally | 1.0 | Sally | 9.5 |
3 | Sally | 1.0 | Hank | 6.0 |
4 | Bob | 5.0 | Bob2 | 9.5 |
5 | Bob | 5.0 | Sally | 9.5 |
6 | Bob | 5.0 | Hank | 6.0 |
7 | Alice | 8.5 | Bob2 | 9.5 |
8 | Alice | 8.5 | Sally | 9.5 |
9 | Alice | 8.5 | Hank | 6.0 |
10 | Hank | 4.0 | Bob2 | 9.5 |
11 | Hank | 4.0 | Sally | 9.5 |
12 | Hank | 4.0 | Hank | 6.0 |
leftjoin(grades_2020(),grades_2021();on=:name)
4 rows × 3 columns
name | grade_2020 | grade_2021 | |
---|---|---|---|
String | Float64 | Float64? | |
1 | Sally | 1.0 | 9.5 |
2 | Hank | 4.0 | 6.0 |
3 | Bob | 5.0 | missing |
4 | Alice | 8.5 | missing |
rightjoin(grades_2020(), grades_2021(); on=:name)
3 rows × 3 columns
name | grade_2020 | grade_2021 | |
---|---|---|---|
String | Float64? | Float64 | |
1 | Sally | 1.0 | 9.5 |
2 | Hank | 4.0 | 6.0 |
3 | Bob2 | missing | 9.5 |
leftjoin(grades_2021(),grades_2020(); on=:name)
3 rows × 3 columns
name | grade_2021 | grade_2020 | |
---|---|---|---|
String | Float64 | Float64? | |
1 | Sally | 9.5 | 1.0 |
2 | Hank | 6.0 | 4.0 |
3 | Bob2 | 9.5 | missing |
semijoin(grades_2020(),grades_2021();on=:name)
2 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 1.0 |
2 | Hank | 4.0 |
antijoin(grades_2020(),grades_2021();on=:name)
2 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Bob | 5.0 |
2 | Alice | 8.5 |
function grades_2020()
name = ["Sally","Bob","Alice","Hank"]
grade_2020 = [1,5,8.5,4]
DataFrame(;name,grade_2020)
end
grades_2020()
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 1.0 |
2 | Bob | 5.0 |
3 | Alice | 8.5 |
4 | Hank | 4.0 |
plus_one(grades) = grades .+ 1
transform(grades_2020(),:grade_2020=>plus_one)
4 rows × 3 columns
name | grade_2020 | grade_2020_plus_one | |
---|---|---|---|
String | Float64 | Float64 | |
1 | Sally | 1.0 | 2.0 |
2 | Bob | 5.0 | 6.0 |
3 | Alice | 8.5 | 9.5 |
4 | Hank | 4.0 | 5.0 |
transform(grades_2020(),:grade_2020=>plus_one=> :grade_2020)
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 2.0 |
2 | Bob | 6.0 |
3 | Alice | 9.5 |
4 | Hank | 5.0 |
transform(grades_2020(),:grade_2020=>plus_one; renamecols=false)
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 2.0 |
2 | Bob | 6.0 |
3 | Alice | 9.5 |
4 | Hank | 5.0 |
select(grades_2020(),:,:grade_2020=>plus_one)
4 rows × 3 columns
name | grade_2020 | grade_2020_plus_one | |
---|---|---|---|
String | Float64 | Float64 | |
1 | Sally | 1.0 | 2.0 |
2 | Bob | 5.0 | 6.0 |
3 | Alice | 8.5 | 9.5 |
4 | Hank | 4.0 | 5.0 |
select(grades_2020(),:,:grade_2020=>plus_one=>:grade_2020)
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 2.0 |
2 | Bob | 6.0 |
3 | Alice | 9.5 |
4 | Hank | 5.0 |
df = grades_2020()
df.grade_2020 = plus_one.(df.grade_2020)
df
4 rows × 2 columns
name | grade_2020 | |
---|---|---|
String | Float64 | |
1 | Sally | 2.0 |
2 | Bob | 6.0 |
3 | Alice | 9.5 |
4 | Hank | 5.0 |
leftjoined = leftjoin(grades_2020(), grades_2021(); on=:name)
4 rows × 3 columns
name | grade_2020 | grade_2021 | |
---|---|---|---|
String | Float64 | Float64? | |
1 | Sally | 1.0 | 9.5 |
2 | Hank | 4.0 | 6.0 |
3 | Bob | 5.0 | missing |
4 | Alice | 8.5 | missing |
pass(A,B) = [5.5 < a || 5.5 < b for (a,b) in zip(A,B)]
transform(leftjoined,[:grade_2020,:grade_2021]=>pass; renamecols=false)
4 rows × 4 columns
name | grade_2020 | grade_2021 | grade_2020_grade_2021 | |
---|---|---|---|---|
String | Float64 | Float64? | Bool? | |
1 | Sally | 1.0 | 9.5 | 1 |
2 | Hank | 4.0 | 6.0 | 1 |
3 | Bob | 5.0 | missing | missing |
4 | Alice | 8.5 | missing | 1 |
function only_pass()
leftjoined = leftjoin(grades_2020(), grades_2021();on=:name)
pass(A,B) = [5.5 < a || 5.5 < b for (a,b) in zip(A,B)]
leftjoined = transform(leftjoined,[:grade_2020,:grade_2021]=>pass=>:pass)
passed = subset(leftjoined,:pass; skipmissing=true)
return passed.name
end
only_pass()
3-element Vector{String}: "Sally" "Hank" "Alice"
function all_grades()
df1 = grades_2020()
df1 = select(df1,:name,:grade_2020=>:grade)
df2 = grades_2021()
df2 = select(df2,:name,:grade_2021=>:grade)
rename_bob2(data_col) = replace.(data_col,"Bob2" => "Bob")
df2 = transform(df2, :name => rename_bob2 => :name)
return vcat(df1,df2)
end
all_grades()
7 rows × 2 columns
name | grade | |
---|---|---|
String | Float64 | |
1 | Sally | 1.0 |
2 | Bob | 5.0 |
3 | Alice | 8.5 |
4 | Hank | 4.0 |
5 | Bob | 9.5 |
6 | Sally | 9.5 |
7 | Hank | 6.0 |
collect(groupby(all_grades(),:name))
4-element Vector{Any}: 2×2 SubDataFrame Row │ name grade │ String Float64 ─────┼───────────────── 1 │ Sally 1.0 2 │ Sally 9.5 2×2 SubDataFrame Row │ name grade │ String Float64 ─────┼───────────────── 1 │ Bob 5.0 2 │ Bob 9.5 1×2 SubDataFrame Row │ name grade │ String Float64 ─────┼───────────────── 1 │ Alice 8.5 2×2 SubDataFrame Row │ name grade │ String Float64 ─────┼───────────────── 1 │ Hank 4.0 2 │ Hank 6.0
using Statistics
gdf = groupby(all_grades(),:name)
combine(gdf,:grade=>mean)
4 rows × 2 columns
name | grade_mean | |
---|---|---|
String | Float64 | |
1 | Sally | 5.25 |
2 | Bob | 7.25 |
3 | Alice | 8.5 |
4 | Hank | 5.0 |
group = [:A,:A,:B,:B]
X = 1:4
Y = 5:8
df = DataFrame(;group,X,Y)
4 rows × 3 columns
group | X | Y | |
---|---|---|---|
Symbol | Int64 | Int64 | |
1 | A | 1 | 5 |
2 | A | 2 | 6 |
3 | B | 3 | 7 |
4 | B | 4 | 8 |
gdf = groupby(df,:group)
combine(gdf,[:X,:Y] .=> mean; renamecols=false)
2 rows × 3 columns
group | X | Y | |
---|---|---|---|
Symbol | Float64 | Float64 | |
1 | A | 1.5 | 5.5 |
2 | B | 3.5 | 7.5 |
gdf = groupby(df,:group)
rounded_mean(data_col) = round(Int, mean(data_col))
combine(gdf,[:X,:Y] .=> rounded_mean; renamecols=false)
2 rows × 3 columns
group | X | Y | |
---|---|---|---|
Symbol | Int64 | Int64 | |
1 | A | 2 | 6 |
2 | B | 4 | 8 |
df = responses()
@allocated select(df,:id,:q1)
7648
df = responses()
@allocated select!(df,:id,:q1)
7360
@allocated col = df[:,:id]
96
@allocated col = df[!,:id]
0