Browse Source

add euler path functionality too

master
Charles Reid 3 years ago
parent
commit
973e04db0b
  1. 683
      graph/euler_cycle_path.py

683
graph/euler_cycle_path.py

@ -0,0 +1,683 @@
from copy import deepcopy
class Stack(list):
"""My own little Stack class,
works just like a list but with
all the comforts of the Stack ADT.
Top of the stack = end of the list.
"""
def push(self,item):
"""Add an item to the top of the stack"""
self.append(item)
def peek(self):
"""Return the first item on the stack without removing it"""
top = self[len(self)-1]
return top
def empty(self):
"""Boolean: is this stack empty?"""
return len(self)==0
def to_list(self):
"""Convert the stack (child type of list) to list"""
return list(self)
class NoEulerPath(Exception):
pass
class AdjacencyGraph(object):
"""Directed graph stored using an adjacency list"""
def __init__(self):
"""Constructor"""
self.adj = {}
self.dfs_started = False
def __str__(self):
"""String representation"""
s = []
for source in self.adj.keys():
sinks = self.adj[source]
for sink in sinks:
m = "%s -> %s\n"%(source,sink)
s.append(m)
return "".join(s)
def __len__(self):
"""Number of vertices on graph"""
return len(self.adj.keys())
#########################
# Methods to define
# basic functionality
# for a directed graph
def in_degree(self,u):
"""
Return the in-degree of vertex u
Cost: O(E)
(Could reduce to O(1) by
keeping a reverse adjacency
dictionary)
"""
n = 0
for v in self.adj.keys():
sinks = self.adj[v]
if u in sinks:
n += 1
return n
def out_degree(self,u):
"""
Return the out-degree of vertex u
Cost: O(1)
"""
if u in self.adj.keys():
return len(self.adj[u])
else:
return 0
def vertices(self):
"""Vertex generator"""
vertices = set()
for k in self.adj.keys():
vertices.add(k)
for m in self.adj[k]:
vertices.add(m)
for v in vertices:
yield v
def euler_start_vertices(self):
pass
def euler_end_vertices(self):
pass
def n_vertices(self):
"""Return the number of vertices"""
vertices = set()
for k in self.adj.keys():
vertices.add(k)
for m in self.adj[k]:
vertices.add(m)
return len(vertices)
def n_edges(self):
n = 0
for source in self.adj.keys():
try:
n += len(self.adj[source])
except:
# in case value is None
pass
return n
def get_random_vertex(self):
"""Return random source vertex"""
for k in self.adj.keys():
return k
def get_neighbors(self,u):
"""Get all neighbors of node u"""
# Note: neighbors are stored in
# sorted order
if u in self.adj.keys():
return self.adj[u]
else:
return []
def add_edge(self, u, v):
"""Add an edge from u to v"""
# For each source vertex:
if u in self.adj.keys():
# Get existing sink list
t = self.adj[u]
# Append to it
t.append(v)
# Keep list of sinks sorted
# (lexicographic string sorting)
t.sort()
# Create the new edge
# from source to sink
self.adj[u] = t
else:
# Initialize the list of sinks (v)
# for the given source (u)
self.adj[u] = [v]
#########################
# Methods related to
# finding Euler paths
# and Euler cycles on
# directed graphs
# A directed graph contains an Euler cycle
# if and only if:
# 1. it is strongly connected
# 2. each vertex has indegree==outdegree
#
# A directed graph contains an Euler path
# from x to y if and only if:
# 1. it is connected
# 2. each vertex except x and y has indegree==outdegree
# x has indegree+1==outdegree
# y has indegree==outdegree+1
def transpose(self):
"""Return an adjacency graph that is the
transpose of this graph.
The transpose is defined such that each edge
e = u -> v on the original graph becomes
e' = v -> u on the transpose graph.
"""
transpose = AdjacencyGraph()
for target in self.adj:
sources = self.adj[target]
for source in sources:
transpose.add_edge(source,target)
return transpose
def strongly_connected(self):
"""Boolean: is this graph strongly
connected? (Is every vertex reachable
from every other vetex?)
Starting from an arbitrary
vertex u, perform a DFS search
and ensure all vertices are
reachable from u. Then, reverse
every edge in the graph, and
perform another DFS search
starting from u, and ensure
that all vertices can reach u.
If either DFS does not visit
every vertex, the graph is not
strongly connected.
"""
start_vertex = self.get_random_vertex()
g_fwd = self
visited_fwd, path_fwd = dfs(g_fwd,start_vertex)
g_rev = self.transpose()
visited_rev, path_rev = dfs(g_rev,start_vertex)
if len(self)==len(visited_fwd) and len(self)==len(visited_rev):
return True
else:
return False
def euler_cycle_exists(self):
"""Boolean: does an Euler cycle
exist on this graph?
Requires two conditions be met:
1. graph is strongly connected
2. each vertex has indegree==outdegree
Cost: O(E) (counting each edge, and doing DFS)
"""
all_deg_eq = True
for v in self.vertices():
if self.in_degree(v)!=self.out_degree(v):
all_deg_eq = False
break
# Condition 2: indegree==outdegree for all v
if all_deg_eq:
# Condition 1: graph is strongly connected
if self.strongly_connected():
# An Euler cycle exists
return True
return False
def euler_cycle(self):
"""Find the Euler cycle on the
directed graph, if it exists.
Returns: ordered list of vertices
in the Euler cycle, or None if no
Euler cycle exists.
"""
return self._hierholzer_cycle()
def euler_path_exists(self):
"""Boolean: does an Euler cycle
exist on this graph?
Requires two conditions be met:
1. graph is strongly connected
2. each vertex has indegree==outdegree
except for two, x and y
3. x (euler path src) has indegree==outdegree-1
4. y (euler path sink) has indegree-1==outdegree
"""
mismatch = []
for v in self.vertices():
if self.in_degree(v)!=self.out_degree(v):
mismatch.append(v)
# Condition 2: all vertices but 2 satisfy indegree==outdegree
if len(mismatch)==2:
euler_path_src = [w for w in mismatch if self.in_degree(w)==self.out_degree(w)-1]
euler_path_sink = [w for w in mismatch if self.in_degree(w)-1==self.out_degree(w)]
# Condition 3/4:
if len(euler_path_src)==1 and len(euler_path_sink)==1:
euler_path_src = euler_path_src[0]
euler_path_sink = euler_path_sink[0]
### # Condition 1: graph is strongly connected
### if self.strongly_connected():
# (above condition not being satisfied in Euler path case)
# An euler path exists
return True
return False
def euler_path(self):
"""Find the Euler path on the
directed graph, if it exists.
Returns: ordered list of vertices
that compose the Euler path.
"""
if not self.euler_path_exists():
return None
# To find the Euler path,
# we need the Euler path src and sink
mismatch = []
for v in self.vertices():
if self.in_degree(v)!=self.out_degree(v):
mismatch.append(v)
euler_path_src = [w for w in mismatch if self.in_degree(w)==self.out_degree(w)-1]
euler_path_sink = [w for w in mismatch if self.in_degree(w)-1==self.out_degree(w)]
if len(euler_path_src)==1 and len(euler_path_sink)==1:
euler_path_src = euler_path_src[0]
euler_path_sink = euler_path_sink[0]
else:
return None # WAT
# Now update the graph to create
# a directed edge from the sink to the src
# then use the algorithms for finding
# an Euler cycle. Then we cut the artificial
# edge and return the Euler path.
# Create the artificial sink->src edge
if euler_path_sink in self.adj.keys():
self.adj[euler_path_sink].append(euler_path_src)
else:
self.adj[euler_path_sink] = [euler_path_src]
# Find the Euler cycle
euler_cycle = self._hierholzer_cycle()
# Destroy the artificial edge
self.adj[euler_path_sink].pop()
if len(self.adj[euler_path_sink]):
del self.adj[euler_path_sink]
# Re-center the euler_cycle:
# Find index of the (sink, src) pair
sink_ix = euler_cycle.index(euler_path_sink)
src_ix = sink_ix + 1
euler_path = euler_cycle[src_ix:] + euler_cycle[1:src_ix]
return euler_path
def _hierholzer_cycle(self):
"""Run the Hierholzer algorithm
on this directed graph to find
the Euler cycle.
(This assumes an Euler cycle exists.)
Returns None if no Euler cycle exists,
otherwise returns an ordered list
of vertices.
"""
if not self.euler_cycle_exists():
return None
# Algorithm:
#
# Initialize euler cycle
# Initialize (vertex) stack
# Initialize list of unvisited edges
# Choose any vertex v and push it onto the stack
# While stack is not empty, look at top vertex u on stack.
# If u has an unvisited edge (e.g., to vertex w), push the destination vertex w onto the stack, and mark the edge u-w as visited.
# If u has no unvisited edges, pop u off the stack and print it.
# When the stack is empty, you have printed sequence of vertices corresponding to Eulerian cycle
# Create the ordered list that will
# hold the Euler cycle (reversed)
euler_cycle = Stack()
# Create a vertex stack
s = Stack()
# Create a list of unvisited edges
unvisited = deepcopy(self.adj)
# Choose any vertex v and push it onto the stack
start = self.get_random_vertex()
s.push(start)
curr_vertex = start
# Note: this algorithm is slightly reordered from above
# While stack is not empty,
while not s.empty():
# If current vertex u has an unvisited edge
if len(unvisited[curr_vertex])>0:
# Push vertex onto stack
s.push(curr_vertex)
# Get the next vertex from an edge
# Then remove that edge from unvisited
sinks = unvisited[curr_vertex]
next_vertex = sinks.pop()
unvisited[curr_vertex] = sinks
# Move to next vertex
curr_vertex = next_vertex
# Otherwise, backtrack to find
# the remaining circuit
else:
euler_cycle.push(curr_vertex)
curr_vertex = s.pop()
# Now that we have the cycle,
# reverse it.
euler_cycle = euler_cycle[::-1]
return euler_cycle
def hierholzer_path(self):
"""Run the Hierholzer algorithm
on this directed graph to find
the Euler Path (if it exists).
Returns None if no Euler Path exists,
otherwise returns an ordered list
of vertices.
"""
# https://charlesreid1.com/wiki/Graphs/Euler_Circuit#Directed_Graphs:_Hierholzer.27s_Algorithm
#
# Setup step: ensure an Euler path can exist
# Setup step: turn the Euler path into an Euler cycle
# Algorithm: stack of vertices
# Clean up/check
#
pass
def _get_euler_path_start_end(self):
"""Get the Euler Path start and
end vertices, if they exist.
The Euler Path start vertex
satisfies the property:
indegree + 1 == outdegree
while the Euler Path end vertex
satisfies the property:
indegree == outdegree + 1
If there is one and only one
start and end pair, we return it.
If there is no start pair and no
end pair, and everyone satisfies
indegree==outdegree, we have an
Euler cycle.
Otherwise, raise a NoEulerPath
exception, handled by the callee.
Cost: O(V*E)
"""
n_euler_path_start = 0
start_vertex = []
end_vertex = []
unequal_vertex = []
for v in self.adj.keys():
indegree = in_degree(v)
outdegree = out_degree(v)
if indegree+1==outdegree:
start_vertex.append(v)
elif indegree==outdegree+1:
end_vertex.append(v)
elif indegree!=outdegree:
unequal_vertex.append(v)
if len(start_vertex)>1:
raise NoEulerPath("Error: multiple start vertices (indegree+1==outdegree).\n%s"%(", ".join(start_vertex)))
elif len(end_vertex)>1:
raise NoEulerPath("Error: multiple end vertices (indegree==outdegree+1).\n%s"%(", ".join(end_vertex)))
elif len(unequal_vertex)>0:
raise NoEulerPath("Error: one or more vertices have unequal indegree and outdegree.\n%s"%(", ".join(unequal_vertex)))
#####################
# Depth first search
# is best defined as
# a function, not a
# method, so that it
# can be more flexible.
def default_visit_function(g,u,**kwargs):
"""This is the default "visit" function
for the depth first search function.
Any custom visit function should
use the same input parameters.
g = graph
u = start vertex
kwargs = extra params
"""
pass
def dfs(g, u, visit=default_visit_function,**kwargs):
"""Perform a depth-first search
starting from vertex u."""
def _dfs(g,u,visit=visit,**kwargs):
"""Private recursive DFS method
"""
# Mark vertex u as visited
_visited.add(u)
# For each neighbor node v
for v in g.get_neighbors(u):
# Check if visited already
if v not in _visited:
# Recursive case:
# Visit vertex v
visit(g,v,**kwargs)
# Mark neighbor v as visited
_visited.add(v)
# Store predecessor
_path[v] = u
# Recursive call
_dfs(g,v,visit,**kwargs)
# Track visited vertices
_visited = set()
# Track path through vertices
# (hash map '_path' maps each
# vertex v to its predecessor
# vertex u in the search tree)
_path = {}
# Root node has no predecessor
_path[u] = None
# We call visit(u) on a vertex
# before we call DFS(u) to continue
# the DFS, so call visit() on the
# root node before kicking off the DFS
visit(g,u,**kwargs)
# Recursive DFS
_dfs(g,u,visit,**kwargs)
##################
# Note:
# need to deal with the case of
# multiple components (if _visited
# does not contain all vertices)
##################
return _visited, _path
##############################
# Load a problem
def load_graph(filename):
"""Load a graph from a file"""
g = AdjacencyGraph()
with open(filename) as f:
lines = f.readlines()
lines = [line.strip() for line in lines]
for line in lines:
source, sinks = line.split(" -> ")
sinks = sinks.split(",")
for sink in sinks:
g.add_edge(source,sink)
return g
##############################
# Test out the graph functionality
def test_graph_functionality():
"""Some basic tests of graph functionality"""
g = load_graph('input2.txt')
print("Printing directed graph:")
print(g)
if g.strongly_connected():
print("[+] Strongly connected")
else:
print("[-] Not strongly connected")
print()
if g.euler_cycle_exists():
print("[+] Euler cycle exists")
else:
print("[-] No Euler cycle exists")
##############################
# Euler cycle problems
def euler_cycle_small():
"""Solve a small Euler cycle problem"""
g = load_graph('input2.txt')
c = g.euler_cycle()
print("Euler cycle: %s"%("->".join(c)))
print("Gold: 6->8->7->9->6->5->4->2->1->0->3->2->6")
def euler_cycle_rosalind():
"""Solve the rosalind.info Euler cycle problem"""
g = load_graph('rosalind_ba3f.txt')
if g.euler_cycle_exists():
print("[+] Euler cycle exists")
else:
print("[-] No Euler cycle exists")
c = g.euler_cycle()
print("Printing euler cycle to output_ba3f.txt")
with open('output_ba3f.txt','w') as f:
f.write("->".join(c))
print("\n\nRun this command:\n\ncat output_ba3f.txt | pbcopy\n\n")
##############################
# Euler path problems
def euler_path_small():
"""Solve a small Euler path problem"""
g = load_graph('input4.txt')
print(g)
if g.euler_path_exists():
print("[+] Euler path exists")
else:
print("[-] No Euler path exists")
p = g.euler_path()
print(p)
print("Euler path: %s"%("->".join(p)))
print("Gold: 6->7->8->9->6->3->0->2->1->3->4")
def euler_path_rosalind():
"""Solve the rosalind.info Euler path problem"""
pass
if __name__=="__main__":
#euler_cycle_small()
#euler_cycle_rosalind()
euler_path_small()
#euler_path_rosalind()
Loading…
Cancel
Save