\documentclass[12pt,a4paper]{article}
\pagestyle{empty}
\oddsidemargin 2.1mm
\textwidth 155mm
\topmargin -15mm
\textheight 240mm
\begin{document}
%-----------------------------------------------------------------------
\noindent
{\bf Data Mining 2: Frequent Pattern Mining} \hfill Summer 2017 \\
Christian Borgelt \hfill from 20.06.2017
%-----------------------------------------------------------------------
\vskip3ex
\centerline{\bf Exercise Sheet 4}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 21 \quad\rm Maximal and Closed Item Sets}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
\parbox[t]{100mm}{Find the frequent / maximal / closed item sets
for the transaction database shown on the right and a minimum
support $s_{\min} = 3$
(you may or may not use some concrete algorithm):}\hfill
\begin{tabular}[t]{@{}l@{}}
$a$ $d$ $f$ \\
$a$ $c$ $d$ $e$ \\
$b$ $d$ \\
$b$ $c$ $d$ \\
$b$ $c$
\end{tabular}\hskip10mm
\begin{tabular}[t]{@{}l@{}}
$a$ $b$ $d$ \\
$b$ $d$ $e$ \\
$b$ $c$ $e$ $g$ \\
$c$ $d$ $f$ \\
$a$ $b$ $d$
\end{tabular}
\item[b)\hfill]
Find an example of a (small) transaction database
for which the number of maximal item sets goes down
if the minimum support is reduced;
or explain in some other way why this may happen!
\item[c)\hfill]
Is it possible that the number of all frequent item sets or the
number of closed frequent item sets goes down if the minimum
support is reduced? If yes, give an example! If no, argue why
this is not possible!
\item[d)\hfill]
How are maximal and closed item sets related to each other? \\
Can the one be expressed in terms of the other? \\
What information is preserved/lost with maximal/closed item sets?
How?
\item[e)\hfill]
Define closed item sets with the help of the notion
of a perfect extension!
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 22 \quad\rm Maximal and Closed Item Sets}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Characterize the set $M_T(1)$
for an arbitrary transaction database $T$
(that is, your characterization should work
for all possible transaction databases~$T$).
\item[b)\hfill]
Suppose we have
$\forall s_{\min}:
F_T(s_{\min}) = C_T(s_{\min}) = M_T(s_{\min})$. \\
What does the transaction database $T$ look like?
\item[c)\hfill]
Suppose we have only
$\forall s_{\min}: C_T(s_{\min}) = M_T(s_{\min})$. \\
What does the transaction database $T$ look like?
\item[d)\hfill]
Suppose we have only
$\forall s_{\min}: C_T(s_{\min}) - \{ \emptyset \}
= M_T(s_{\min}) - \{ \emptyset \}$. \\
What does the transaction database $T$ look like?
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 23 \quad\rm Finding Closed Item Sets}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
How are closed item sets and closed sets of transaction
identifiers related? \\
What does it mean for a transaction index set that it is closed?
\item[b)\hfill]
How does the Carpenter algorithm exploit this relationship?
\item[c)\hfill]
What recursive relationship for closed item sets
is the IsTa algorithm based on?
\item[d)\hfill]
Suppose the divide-and-conquer scheme for finding frequent item
sets is enhanced by perfect extension pruning,
so that subproblems are described by triplets $S = (T,P,X)$.
Is $P \cup X$ always a closed item set?
Is it always a maximal item set?
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 24 \quad\rm Evaluating Item Sets}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
Find some other measure(s)
on the partial order $(2^B,\subseteq)$
that is/are anti-monotone (or downward closed)!
\item[b)\hfill]
Show that the quotient of observed and expected frequency
of an item set (expected w.r.t.\ an assumption of full
independence) is not anti-monotone!
Try to find counterexamples with $|I| \ge 1$ and with $|I| \ge 2$.
Show that the reciprocal value of this quotient is also
not anti-monotone!
\item[c)\hfill]
Show that if an item set $I$ and an item $a$ are independent,
$\varrho_{\rm fi}(I \cup \{a\}) = \varrho_{\rm fi}(I)$.
Show that, in contrast to this,
$\varrho_{\rm ii}(I \cup \{a\}) = 1$ in this case!
\item[d)\hfill]
Find an example where $\varrho_{\rm si}$ scores an item set $I$
high, even though $I$ is composed of two independent subsets!
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 25 \quad\rm Association Rules}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
What are association rules?
With what measures are they evaluated? \\
What is the support of an association rule
(two versions)? \\
What is the confidence of an association rule?
\item[b)\hfill]
How are association rules induced? What steps are needed? \\
How is the induction related to frequent item set mining?
\item[c)\hfill]
With what minimum support do we have to find
frequent item sets for association rule induction?
Is it the minimum support of association rules?
Or does it depend on the choice of the rule support definition?
If yes, how?
\item[d)\hfill]
What relationships hold between the confidence values
of different association rules formed from the same item set?
How can we exploit these relationships in the generation
of association rules?
\end{enumerate}
%-----------------------------------------------------------------------
\subsubsection*{Exercise 26 \quad\rm Association Rules}
\begin{enumerate}\itemsep0pt
\item[a)\hfill]
What are some measures other than minimum support and minimum
confidence with which association rules may be evaluated?
From what values are they usually computed?
\item[b)\hfill]
What is the lift (value) of an association rule
and how is it defined? \\
What is the information gain of an association rule? \\
What is the $\chi^2$-measure of an association rule?
\item[c)\hfill]
Are association rules with
more than one item in the consequent useful? \\
What additional information do they provide?
\end{enumerate}
%-----------------------------------------------------------------------
\end{document}