Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:Ledest:erlang:23
erlang
3521-Use-maps-from_keys-2-and-heuristics-in-new...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 3521-Use-maps-from_keys-2-and-heuristics-in-new-sets.patch of Package erlang
From 1a9df67497d2a80d463b85a6dd11ba444607982e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@dashbit.co> Date: Fri, 18 Dec 2020 14:53:36 +0100 Subject: [PATCH] Use maps:from_keys/2 and heuristics in new sets maps:from_keys/2 speeds up sets creation by avoiding a list traversal and reducing memory allocation. Both intersection/2 and subtract/2 operations were also augmented with a new heuristic that chooses between creating a set from scratch or deleting some of the existing keys, in order to reduce memory allocation and speed up operations. As of this patch, I would say the guidelines for choosing between ordsets, gb_sets and sets (v2) is the following: 1. If you only want to traverse, perform unions, intersections, or subtractions of values that are quick to compare (integers, atoms, etc) and they are up to 1k-10k in size, pick ordsets; 2. If you need take_smallest and take_largest and you don't want to do external bookkeeping, use gb_sets; 3. For everything else, choose sets (v2). Benchmarks for different operations and implementations can be found at https://github.com/josevalim/sets_bench. --- lib/stdlib/src/sets.erl | 71 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/lib/stdlib/src/sets.erl b/lib/stdlib/src/sets.erl index 344976cd4f..086de0f202 100644 --- a/lib/stdlib/src/sets.erl +++ b/lib/stdlib/src/sets.erl @@ -115,7 +115,7 @@ from_list(Ls) -> List :: [Element], Set :: set(Element). from_list(Ls, [{version, 2}]) -> - maps:from_list([{K,?VALUE}||K<-Ls]); + maps:from_keys(Ls, ?VALUE); from_list(Ls, Opts) -> case proplists:get_value(version, Opts, 1) of 1 -> from_list(Ls); @@ -258,7 +258,14 @@ union1(S1, []) -> S1. Set2 :: set(Element), Set3 :: set(Element). intersection(#{}=S1, #{}=S2) -> - maps:intersect(S1, S2); + case map_size(S1) < map_size(S2) of + true -> + Next = maps:next(maps:iterator(S1)), + intersection_heuristic(Next, [], [], floor(map_size(S1) * 0.75), S1, S2); + false -> + Next = maps:next(maps:iterator(S2)), + intersection_heuristic(Next, [], [], floor(map_size(S2) * 0.75), S2, S1) + end; intersection(S1, S2) -> case size(S1) < size(S2) of true -> @@ -267,6 +274,33 @@ intersection(S1, S2) -> filter(fun (E) -> is_element(E, S1) end, S2) end. +%% If we are keeping more than 75% of the keys, then it is +%% cheaper to delete them. Stop accumulating and start deleting. +intersection_heuristic(Next, _Keep, Delete, 0, Acc, Reference) -> + intersection_decided(Next, remove_keys(Delete, Acc), Reference); +intersection_heuristic({Key, _Value, Iterator}, Keep, Delete, KeepCount, Acc, Reference) -> + Next = maps:next(Iterator), + case Reference of + #{Key := _} -> + intersection_heuristic(Next, [Key | Keep], Delete, KeepCount - 1, Acc, Reference); + _ -> + intersection_heuristic(Next, Keep, [Key | Delete], KeepCount, Acc, Reference) + end; +intersection_heuristic(none, Keep, _Delete, _Count, _Acc, _Reference) -> + maps:from_keys(Keep, ?VALUE). + +intersection_decided({Key, _Value, Iterator}, Acc0, Reference) -> + Acc1 = case Reference of + #{Key := _} -> Acc0; + #{} -> maps:remove(Key, Acc0) + end, + intersection_decided(maps:next(Iterator), Acc1, Reference); +intersection_decided(none, Acc, _Reference) -> + Acc. + +remove_keys([K | Ks], Map) -> remove_keys(Ks, maps:remove(K, Map)); +remove_keys([], Map) -> Map. + %% intersection([Set]) -> Set. %% Return the intersection of the list of sets. -spec intersection(SetList) -> Set when @@ -323,9 +357,38 @@ is_disjoint_1(Set, Iter) -> Set1 :: set(Element), Set2 :: set(Element), Set3 :: set(Element). + +subtract(#{}=S1, #{}=S2) -> + Next = maps:next(maps:iterator(S1)), + subtract_heuristic(Next, [], [], floor(map_size(S1) * 0.75), S1, S2); subtract(S1, S2) -> filter(fun (E) -> not is_element(E, S2) end, S1). +%% If we are keeping more than 75% of the keys, then it is +%% cheaper to delete them. Stop accumulating and start deleting. +subtract_heuristic(Next, _Keep, Delete, 0, Acc, Reference) -> + subtract_decided(Next, remove_keys(Delete, Acc), Reference); +subtract_heuristic({Key, _Value, Iterator}, Keep, Delete, KeepCount, Acc, Reference) -> + Next = maps:next(Iterator), + case Reference of + #{Key := _} -> + subtract_heuristic(Next, Keep, [Key | Delete], KeepCount, Acc, Reference); + _ -> + subtract_heuristic(Next, [Key | Keep], Delete, KeepCount - 1, Acc, Reference) + end; +subtract_heuristic(none, Keep, _Delete, _Count, _Acc, _Reference) -> + maps:from_keys(Keep, ?VALUE). + +subtract_decided({Key, _Value, Iterator}, Acc, Reference) -> + case Reference of + #{Key := _} -> + subtract_decided(maps:next(Iterator), maps:remove(Key, Acc), Reference); + _ -> + subtract_decided(maps:next(Iterator), Acc, Reference) + end; +subtract_decided(none, Acc, _Reference) -> + Acc. + %% is_subset(Set1, Set2) -> boolean(). %% Return 'true' when every element of Set1 is also a member of %% Set2, else 'false'. @@ -380,7 +443,7 @@ fold_1(Fun, Acc, Iter) -> Pred :: fun((Element) -> boolean()), Set1 :: set(Element), Set2 :: set(Element). -filter(F, #{}=D) -> maps:from_list(filter_1(F, maps:iterator(D))); +filter(F, #{}=D) -> maps:from_keys(filter_1(F, maps:iterator(D)), ?VALUE); filter(F, #set{}=D) -> filter_set(F, D). filter_1(Fun, Iter) -> @@ -388,7 +451,7 @@ filter_1(Fun, Iter) -> {K, _, NextIter} -> case Fun(K) of true -> - [{K,?VALUE} | filter_1(Fun, NextIter)]; + [K | filter_1(Fun, NextIter)]; false -> filter_1(Fun, NextIter) end; -- 2.26.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor