So I have implemented three linear time (\$\mathcal{O}(V + E)\$) algorithms for finding strongly connected components of a (directed) graph. A strongly connected component is just a maximal subset of nodes, such that any node in the subset can be reached from any other node in the subset.
The algorithms I am comparing are:
See below for code.
SCCFinder.java:
package net.coderodde.graph.scc;
import java.util.List;
import net.coderodde.graph.DirectedGraph;
/**
* This interface defines the API for algorithms finding strongly connected
* components.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (May 3, 2016)
*/
public interface SCCFinder {
/**
* Returns a list of strongly connected components in the input graph
* {@code digraph}.
*
* @param digraph
* @return
*/
public List<List<Integer>>
findStronglyConnectedCmponents(final DirectedGraph digraph);
}
KosarajuSCCFinder.java:
package net.coderodde.graph.scc.support;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import net.coderodde.graph.DirectedGraph;
import net.coderodde.graph.scc.SCCFinder;
/**
* This class implements the recursive
* <a href="https://en.wikipedia.org/wiki/Kosaraju%27s_algorithm">Kosarajus's
* algorithm</a>
* for finding strongly connected components in an input directed graph.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (May 3, 2016)
*/
public final class KosarajuSCCFinder implements SCCFinder {
private DirectedGraph digraph;
private List<Integer> nodeList;
private Set<Integer> visitedSet;
private Map<Integer, Integer> assignmentMap;
public KosarajuSCCFinder() {}
private KosarajuSCCFinder(final DirectedGraph digraph) {
Objects.requireNonNull(digraph, "The input directed graph is null.");
this.digraph = digraph;
this.nodeList = new ArrayList<>(digraph.size());
this.visitedSet = new HashSet<>(digraph.size());
this.assignmentMap = new HashMap<>(digraph.size());
}
@Override
public List<List<Integer>>
findStronglyConnectedCmponents(DirectedGraph digraph) {
return new KosarajuSCCFinder(digraph).compute();
}
private List<List<Integer>> compute() {
for (final Integer node : digraph.getAllNodes()) {
visit(node);
}
Collections.<Integer>reverse(nodeList);
for (final Integer node : nodeList) {
assign(node, node);
}
final Map<Integer, List<Integer>> map = new HashMap<>();
for (final Map.Entry<Integer, Integer> entry
: assignmentMap.entrySet()) {
final Integer component = entry.getValue();
if (!map.containsKey(component)) {
map.put(component, new ArrayList<>());
}
map.get(component).add(entry.getKey());
}
return new ArrayList<>(map.values());
}
private void visit(final Integer node) {
if (visitedSet.contains(node)) {
return;
}
visitedSet.add(node);
for (final Integer child : digraph.getChildrenOf(node)) {
visit(child);
}
nodeList.add(node);
}
private void assign(final Integer node, final Integer root) {
if (!assignmentMap.containsKey(node)) {
assignmentMap.put(node, root);
for (final Integer parent : digraph.getParentsOf(node)) {
assign(parent, root);
}
}
}
}
TarjanSCCFinder.java:
package net.coderodde.graph.scc.support;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import net.coderodde.graph.DirectedGraph;
import net.coderodde.graph.scc.SCCFinder;
/**
* This class implements
* <a href="https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm">Tarjan's strongly connected components algorithm</a> using
* recursive depth-first search.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (May 3, 2016)
*/
public final class TarjanSCCFinder implements SCCFinder {
private DirectedGraph digraph;
private int index;
private Deque<Integer> stack;
private Set<Integer> onStackSet;
private Map<Integer, Integer> indexMap;
private Map<Integer, Integer> lowLinkMap;
private List<List<Integer>> solution;
public TarjanSCCFinder() {}
private TarjanSCCFinder(final DirectedGraph digraph) {
Objects.requireNonNull(digraph, "The input digraph is null.");
this.digraph = digraph;
this.stack = new ArrayDeque<>();
this.indexMap = new HashMap<>();
this.lowLinkMap = new HashMap<>();
this.onStackSet = new HashSet<>();
this.solution = new ArrayList<>();
}
@Override
public List<List<Integer>>
findStronglyConnectedCmponents(final DirectedGraph digraph) {
return new TarjanSCCFinder(digraph).compute();
}
private List<List<Integer>> compute() {
Objects.requireNonNull(digraph, "The input directed graph is null.");
for (final Integer node : digraph.getAllNodes()) {
if (!indexMap.containsKey(node)) {
strongConnect(node);
}
}
return this.solution;
}
private void strongConnect(final Integer node) {
indexMap.put(node, index);
lowLinkMap.put(node, index);
++index;
stack.push(node);
onStackSet.add(node);
for (final Integer child : digraph.getChildrenOf(node)) {
if (!indexMap.containsKey(child)) {
strongConnect(child);
lowLinkMap.put(node, Math.min(lowLinkMap.get(node),
lowLinkMap.get(child)));
} else if (onStackSet.contains(child)) {
lowLinkMap.put(node, Math.min(lowLinkMap.get(node),
indexMap.get(child)));
}
}
if (lowLinkMap.get(node).equals(indexMap.get(node))) {
final List<Integer> newStronglyConnectedComponent =
new ArrayList<>();
Integer top;
do {
top = stack.pop();
onStackSet.remove(top);
newStronglyConnectedComponent.add(top);
} while (!top.equals(node));
this.solution.add(newStronglyConnectedComponent);
}
}
}
PathBasedSCCFinder.java:
package net.coderodde.graph.scc.support;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import net.coderodde.graph.DirectedGraph;
import net.coderodde.graph.scc.SCCFinder;
/**
* This class implements a <a href="">path-based strong component algorithm</a>.
*
* @author Rodion "rodde" Efremov
* @version 1.6
*/
public class PathBasedSCCFinder implements SCCFinder {
private DirectedGraph digraph;
private int counter;
private Set<Integer> assignedNodeSet;
private Map<Integer, Integer> preorderNumberMap;
private Deque<Integer> stackP;
private Deque<Integer> stackS;
private List<List<Integer>> solution;
public PathBasedSCCFinder() {}
private PathBasedSCCFinder(final DirectedGraph digraph) {
Objects.requireNonNull(digraph, "The input directed graph is null.");
this.digraph = digraph;
this.assignedNodeSet = new HashSet<>();
this.preorderNumberMap = new HashMap<>();
this.stackP = new ArrayDeque<>();
this.stackS = new ArrayDeque<>();
this.solution = new ArrayList<>();
}
@Override
public List<List<Integer>>
findStronglyConnectedCmponents(DirectedGraph digraph) {
return new PathBasedSCCFinder(digraph).compute();
}
private List<List<Integer>> compute() {
for (final Integer node : digraph.getAllNodes()) {
if (!preorderNumberMap.containsKey(node)) {
visit(node);
}
}
return this.solution;
}
private void visit(final Integer node) {
preorderNumberMap.put(node, counter++);
stackP.addLast(node);
stackS.addLast(node);
for (final Integer child : digraph.getChildrenOf(node)) {
if (!preorderNumberMap.containsKey(child)) {
visit(child);
} else if (!assignedNodeSet.contains(child)) {
while (preorderNumberMap.get(stackP.getLast()) >
preorderNumberMap.get(child)) {
stackP.removeLast();
}
}
}
if (node.equals(stackP.getLast())) {
stackP.removeLast();
Integer topOfStackS;
final List<Integer> component = new ArrayList<>();
do {
topOfStackS = stackS.removeLast();
component.add(topOfStackS);
assignedNodeSet.add(topOfStackS);
} while (!topOfStackS.equals(node));
this.solution.add(component);
}
}
}
DirectedGraph.java:
package net.coderodde.graph;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* This class implements a directed graph.
*
* @author Rodion "rodde" Efremov
* @version 1.6 (Jan 11, 2016)
*/
public class DirectedGraph {
private int edges;
private final Map<Integer, Set<Integer>> parentMap = new HashMap<>();
private final Map<Integer, Set<Integer>> childMap = new HashMap<>();
public int size() {
return parentMap.size();
}
public int getNumberOfEdges() {
return edges;
}
public boolean addNode(int nodeId) {
if (parentMap.containsKey(nodeId)) {
return false;
}
parentMap.put(nodeId, new HashSet<>());
childMap .put(nodeId, new HashSet<>());
return true;
}
public boolean hasNode(int nodeId) {
return parentMap.containsKey(nodeId);
}
public boolean clearNode(int nodeId) {
if (!hasNode(nodeId)) {
return false;
}
Set<Integer> parents = parentMap.get(nodeId);
Set<Integer> children = childMap.get(nodeId);
if (parents.isEmpty() && children.isEmpty()) {
return false;
}
for (Integer childId : children) {
parentMap.get(childId).remove(nodeId);
}
for (Integer parentId : parents) {
childMap.get(parentId).remove(nodeId);
}
edges -= parents.size() + children.size();
parents.clear();
children.clear();
return true;
}
public boolean removeNode(int nodeId) {
if (!hasNode(nodeId)) {
return false;
}
clearNode(nodeId);
parentMap.remove(nodeId);
childMap.remove(nodeId);
return true;
}
public boolean addEdge(int tailNodeId, int headNodeId) {
addNode(tailNodeId);
addNode(headNodeId);
if (childMap.get(tailNodeId).contains(headNodeId)) {
return false;
} else {
childMap .get(tailNodeId).add(headNodeId);
parentMap.get(headNodeId).add(tailNodeId);
edges++;
return true;
}
}
public boolean hasEdge(int tailNodeId, int headNodeId) {
if (!childMap.containsKey(tailNodeId)) {
return false;
}
return childMap.get(tailNodeId).contains(headNodeId);
}
public boolean removeEdge(int tailNodeId, int headNodeId) {
if (!childMap.containsKey(tailNodeId)) {
return false;
}
if (!childMap.get(tailNodeId).contains(headNodeId)) {
return false;
}
childMap .get(tailNodeId).remove(headNodeId);
parentMap.get(headNodeId).remove(tailNodeId);
edges--;
return true;
}
public Set<Integer> getChildrenOf(int nodeId) {
if (!childMap.containsKey(nodeId)) {
return Collections.<Integer>emptySet();
}
return Collections.<Integer>unmodifiableSet(childMap.get(nodeId));
}
public Set<Integer> getParentsOf(int nodeId) {
if (!parentMap.containsKey(nodeId)) {
return Collections.<Integer>emptySet();
}
return Collections.<Integer>unmodifiableSet(parentMap.get(nodeId));
}
public Set<Integer> getAllNodes() {
return Collections.<Integer>unmodifiableSet(childMap.keySet());
}
public void clear() {
childMap.clear();
parentMap.clear();
edges = 0;
}
}
SCCFinderDemo.java:
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import net.coderodde.graph.DirectedGraph;
import net.coderodde.graph.scc.SCCFinder;
import net.coderodde.graph.scc.support.KosarajuSCCFinder;
import net.coderodde.graph.scc.support.PathBasedSCCFinder;
import net.coderodde.graph.scc.support.TarjanSCCFinder;
public class SCCFinderDemo {
private static final class DEMO_DATA {
private static final int NODES = 100_000;
private static final int ARCS = 150_000;
}
private static final class WARMUP_DATA {
private static final int NODES = 5_000;
private static final int ARCS = 80_000;
private static final int ITERATIONS = 50;
}
private static final class FINDERS {
static SCCFinder KOSARAJU = new KosarajuSCCFinder();
static SCCFinder TARJAN = new TarjanSCCFinder();
static SCCFinder PATH_BASED = new PathBasedSCCFinder();
}
public static void main(String[] args) {
final long seed = System.nanoTime();
final Random random = new Random(seed);
System.out.println("Seed = " + seed);
System.out.println("[STATUS] Warming up...");
warmup(random);
System.out.println("[STATUS] Warming done.");
final DirectedGraph digraph = createRandomDigraph(random,
DEMO_DATA.NODES,
DEMO_DATA.ARCS);
long startTime = System.nanoTime();
final List<List<Integer>> scc1 =
FINDERS.KOSARAJU.findStronglyConnectedCmponents(digraph);
long endTime = System.nanoTime();
System.out.printf("Kosaraju's algorithm in %d milliseconds.\n",
(endTime - startTime) / 1_000_000);
startTime = System.nanoTime();
final List<List<Integer>> scc2 =
FINDERS.TARJAN.findStronglyConnectedCmponents(digraph);
endTime = System.nanoTime();
System.out.printf("Tarjan's algorithm in %d milliseconds.\n",
(endTime - startTime) / 1_000_000);
startTime = System.nanoTime();
final List<List<Integer>> scc3 =
FINDERS.PATH_BASED.findStronglyConnectedCmponents(digraph);
endTime = System.nanoTime();
System.out.printf("Path-based algorithm in %d milliseconds.\n",
(endTime - startTime) / 1_000_000);
// We need to sort each strongly connected component so that we can
// ask whether two are same.
for (final List<Integer> component : scc1) {
Collections.sort(component);
}
for (final List<Integer> component : scc2) {
Collections.sort(component);
}
for (final List<Integer> component : scc3) {
Collections.sort(component);
}
// Sets ignore the order so that we don't need to sort the components.
final Set<List<Integer>> scc1set = new HashSet<>(scc1);
final Set<List<Integer>> scc2set = new HashSet<>(scc2);
final Set<List<Integer>> scc3set = new HashSet<>(scc3);
System.out.println("---");
System.out.println("Algorithms agree: " + (scc1set.equals(scc2set) &&
scc2set.equals(scc3set)));
}
private static void warmup(final Random random) {
final DirectedGraph digraph =
createRandomDigraph(random,
WARMUP_DATA.NODES,
WARMUP_DATA.ARCS);
for (int i = 0; i < WARMUP_DATA.ITERATIONS; ++i) {
FINDERS.KOSARAJU .findStronglyConnectedCmponents(digraph);
FINDERS.TARJAN .findStronglyConnectedCmponents(digraph);
FINDERS.PATH_BASED.findStronglyConnectedCmponents(digraph);
}
}
private static DirectedGraph createRandomDigraph(final Random random,
final int nodes,
final int arcs) {
final DirectedGraph digraph = new DirectedGraph();
for (int i = 0; i < nodes; ++i) {
digraph.addNode(i);
}
for (int i = 0; i < arcs; ++i) {
digraph.addEdge(random.nextInt(nodes), random.nextInt(nodes));
}
return digraph;
}
}
The performance figures are as follows:
Kosaraju's algorithm in 1253 milliseconds. Tarjan's algorithm in 287 milliseconds. Path-based algorithm in 243 milliseconds. --- Algorithms agree: true
I would be glad to hear comments about API design, naming, coding conventions and warming up the JVM; however, any critique is much appreciated.
IMPORTANT Please pass the -Xss20m
option to the JVM. Otherwise you will get a stack overflow since the demo graph is large, and all the algorithms are recursive.
StackOverflowError
when I try to run the code in the question. Did you change the stack size? Edit: it appears to work with-Xss4m
. \$\endgroup\$