\documentclass[12pt,a4paper]{article}
\usepackage{graphicx}
\pagestyle{empty}
\oddsidemargin 2.1mm
\textwidth 155mm
\topmargin -15mm
\textheight 244mm
\begin{document}
%-----------------------------------------------------------------------
\noindent
{\bf Data Mining 2: Frequent Pattern Mining} \hfill Summer 2017 \\
Christian Borgelt \hfill from 18.07.2017
%-----------------------------------------------------------------------
\vskip3ex
\centerline{\bf Exercise Sheet 6}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 33 \quad\rm Canonical Code Words}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
What are rightmost path extensions?
What are maximum source extensions?
\item[b)\hfill]
Do rightmost path/maximum source extensions
always yield canonical code words?
\item[c)\hfill]
\parbox[t]{110mm}{Find the canonical code word, based on a
depth-first search spanning tree, for cyclin, that is, for
the molecule:}\hskip16mm
\lower5.3ex\hbox{\includegraphics{fpmex-6}} \\[-1ex]
Use the order
$\mbox{\tt N} \prec \mbox{\tt O} \prec \mbox{\tt C}$
for the atoms and the order
$\mbox{\tt -} \prec \mbox{\tt =}$ for the bonds!
\item[d)\hfill]
Find the canonical code word, based on a breadth-first search
spanning tree, for cyclin (see part~c)!
Use the same orders as in part~c!
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 34 \quad\rm Extendable Vertices}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Which vertices/atoms of cyclin are extendable, based on the
canonical code word from Exercise~33c),
by rightmost path extensions?
\item[b)\hfill]
Which vertices/atoms of cyclin are extendable, based on the
canonical code word from Exercise~33d),
by maximum source extensions?
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 35 \quad\rm Canonical Code Words}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Check whether the code word\hfill
{\tt
S\kern0.5ex 2\kern0.5ex -\kern0.5ex 3\kern0.5ex -\kern0.5ex
N\kern0.5ex 4\kern0.5ex -\kern0.5ex 5\kern0.5ex -\kern0.5ex
C\kern0.5ex 6\kern0.5ex -\kern0.5ex
O\kern0.5ex
C\kern0.5ex 6\kern0.5ex -\kern0.5ex 7\kern0.5ex -\kern0.5ex
C\kern0.5ex
C\kern0.5ex 8\kern0.5ex -\kern0.5ex 9\kern0.5ex =\kern0.5ex
O\kern0.5ex
O}\\
\parbox[t]{90mm}{is the canonical code word, based on an
(extended) adjacency matrix of which the upper triangle
is read row by row, of the graph/molecule:}\hskip24mm
\lower6.8ex\hbox{\includegraphics{fpmex-5}}\par
Use the order $\mbox{\tt S} \prec
\mbox{\tt N} \prec \mbox{\tt O} \prec \mbox{\tt C}$
for the atoms and the order
$\mbox{\tt -} \prec \mbox{\tt =}$ for the bonds!
\item[b)\hfill]
Find the canonical code word, based on an (extended) adjacency
matrix of which the upper triangle is read row by row,
for cyclin (see Exercise~33c).
Use the same orders of atoms and bonds as in part~a!
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 36 \quad\rm Vertex Signatures}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
What is the purpose of the method of vertex signatures?
How can we construct vertex signatures by iteratively splitting
equivalence classes?
\item[b)\hfill]
What is an automorphism of a graph?
What is the orbit of a vertex?
\item[c)\hfill]
Show that the automorphisms of a graph form a group
(algebraic structure)!
\item[d)\hfill]
Why can we form a canonical code word very efficiently
if we know that all vertex labels are pairwise different?
Why can we form a canonical code word very efficiently
if we know the orbits of all vertices?
\item[e)\hfill]
Can we always identify the orbits of the vertices of a graph
by constructing vertex signatures? If yes, why can we be sure?
If no: Are there special conditions under which we can be sure
that we have identified the orbits? Are vertex signatures still
helpful even if they do not identify all orbits?
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 37 \quad\rm Vertex Signatures}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Try to find unique labels for the vertices/atoms of cyclin
(see Exercise 33c) using the method of vertex signatures!
What does the resulting canonical code word look like
(use the same orders as above)?
\item[b)\hfill]
\parbox[t]{90mm}{Try to find unique labels for the vertices/atoms
of phenol, that is, of the graph/molecule shown on the right,
using the method of vertex signatures.}\hskip20mm
\lower2.7ex\hbox{\includegraphics{fpmex-1}} \\[.5ex]
Why is it not possible to reduce all equivalence classes
to single elements?
\item[c)\hfill]
How is the canonical code word for phenol constructed?
How can one deal with the fact that not all
equivalence classes can be reduced to single elements?
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 38 \quad\rm Orbits}
\begin{minipage}[t]{0.48\textwidth}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Determine the orbits of the vertices for the following two graphs:
\par\vskip2mm
\centerline{\includegraphics[scale=1.2]{fpmex-9}\hskip16mm
\includegraphics{fpmex-10}}
\end{enumerate}
\end{minipage}\hfill
\begin{minipage}[t]{0.48\textwidth}
\begin{enumerate}\itemsep0pt
\item[b)\hfill]
Determine the orbits of the vertices for the following two graphs:
\par\vskip2mm
\centerline{\includegraphics{fpmex-11}\hskip16mm
\includegraphics{fpmex-3}}
\end{enumerate}
\end{minipage}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 39 \quad\rm Perfect Extensions}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Perfect extensions of item sets are defined
by a very simple criterion. \\
Why is the same criterion necessary,
but not sufficient for graphs? \\
What criterion is needed instead?
\item[b)\hfill]
Why do rings cause problems to perfect extensions of graphs? \\
With what additional conditions can these problems be handled? \\
Are these conditions necessary?
\item[c)\hfill]
Why is it easy to cut the search tree branches ``to the right''
of a perfect extension, but difficult to cut those ``to the left''
of a perfect extension?
\item[d)\hfill]
Find the perfect extensions of the fragment {\tt C-C} and of the
fragment {\tt N-C} in the graph database that consists of the
following three molecules: \par\vskip2mm
\centerline{\includegraphics{fpmex-1}\hskip20mm
\includegraphics{fpmex-2}\hskip20mm
\includegraphics{fpmex-3}}
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 40 \quad\rm Mining a Single Graph}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Why is it not a good idea to use
the number of occurrences/subgraph isomorphisms
as the support measure for mining frequent subgraphs
of a single graph?
\item[b)\hfill]
How can we define support for mining frequent subgraphs
in a single graph?
\item[c)\hfill]
What is the main disadvantage of support measures based
on overlap graphs?
\item[d)\hfill]
Is there a support measure for finding frequent (sub)graphs
in a single graph that does not use an overlap graph?
How is it defined? What are its advantages/disadvantages?
\end{enumerate}
%-----------------------------------------------------------------------
\end{document}