% Created 2018-03-01 Thu 19:52
\documentclass[11pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{fixltx2e}
\usepackage{graphicx}
\usepackage{longtable}
\usepackage{float}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{textcomp}
\usepackage{marvosym}
\usepackage{wasysym}
\usepackage{amssymb}
\usepackage{hyperref}
\tolerance=1000
\usepackage{csquotes}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{algpseudocode}
\usepackage[margin=0.75in]{geometry}
\usepackage{nth}
\usepackage{tikz}
\usetikzlibrary{positioning,chains,fit,shapes,shapes.misc,calc,arrows,patterns,decorations.pathreplacing}
\usepackage{ccicons}
\usepackage{calc}
\usepackage{fancyhdr}
\usepackage{fancyvrb}
\usepackage{hyperref}
\usepackage{nopageno}
\usepackage{enumitem}
\usepackage{multicol}
\usepackage{braket}
\algnewcommand\algorithmicforeach{\textbf{for each}}
\algdef{S}[FOR]{ForEach}[1]{\algorithmicforeach\ #1\ \algorithmicdo}
\setlength{\columnseprule}{1pt}
\newcommand{\bonusmarksamt}[1]{\textbf{[#1~BONUS~marks]}}
\newcommand{\bonusmarkamt}[0]{\textbf{[1~BONUS~mark]}}
\newcommand{\marksamt}[1]{\textbf{[#1~marks]}}
\newcommand{\markamt}[0]{\textbf{[1~mark]}}
\newcommand{\fillinMC}[1]{\fillinMCmath{\mbox{#1}}}
\newcommand{\fillinMCmath}[1]{\begin{tikzpicture}\draw circle [radius=0.5em];\end{tikzpicture}\ #1}
\newcommand{\fillinMCsoln}[1]{\fillinMCmathsoln{\mbox{#1}}}
\newcommand{\fillinMCmathsoln}[1]{\begin{tikzpicture}\draw[black, fill=blue] circle [radius=0.5em];\end{tikzpicture}\ #1}
\newcommand{\fillinMCAll}[1]{\fillinMCAllmath{\mbox{#1}}}
\newcommand{\fillinMCAllmath}[1]{\begin{tikzpicture}\draw (0,0) rectangle (1em,1em);\end{tikzpicture}\ #1}
\newcommand{\fillinMCAllsoln}[1]{\fillinMCAllmathsoln{\mbox{#1}}}
\newcommand{\fillinMCAllmathsoln}[1]{\begin{tikzpicture}\draw[black, fill=blue] (0,0) rectangle (1em,1em);\end{tikzpicture}\ #1}
\newcommand{\fillinblank}[1]{\fillinblankmath{\mbox{#1}}}
\newcommand{\fillinblankmath}[1]{\begingroup\setlength{\fboxsep}{1em}\setlength{\fboxrule}{2pt}\fbox{\LARGE\phantom{$#1$}}\endgroup}
\newcommand{\fillinblanksoln}[1]{\fillinblankmathsoln{\mbox{#1}}}
\newcommand{\fillinblankmathsoln}[1]{\begingroup\setlength{\fboxsep}{1em}\setlength{\fboxrule}{2pt}\fbox{{$#1$}}\endgroup}
\newcommand{\answerbox}[1][3\baselineskip]{%
\noindent\setlength{\fboxrule}{2pt}\framebox[0.7\linewidth]{%
\raisebox{2pt}[0pt][#1]{}%
}\par\medskip%
}
\usepackage{collcell,array}
\newcommand{\shiftdown}[1]{\smash{\raisebox{-.5\normalbaselineskip}{#1}}}
\newcolumntype{C}{>{\collectcell\shiftdown}c<{\endcollectcell}}
\newcommand{\shiftup}[1]{\smash{\raisebox{.5\normalbaselineskip}{#1}}}
\def\sequence#1#2{\dsequence{#1}{1}{#2}}
\def\dsequence#1#2#3{#1_{#2}, \ldots, #1_{#3}}
\newtheorem{fact}{Fact}
\def\imax{$\texttt{S}_{\max}$}
\def\iprevmax{previous\_$\texttt{S}_{\max}$}
\def\deleted{\texttt{covered}}
\def\infinity{$\infty$}
\fancyhead{}\fancyfoot[C]{\tiny This work is licensed under a \href{http://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International License. \ccby}\\ For license purposes, the author is the University of British Columbia.}\pagestyle{fancyplain}
%\fancyhead{}\fancyfoot[C]{\tiny This work is licensed under a \href{http://creativecommons.org/licenses/by-nc/4.0/}{Creative Commons Attribution-NonCommercial 4.0 International License. \ccbync}\\ For license purposes, the author is the University of British Columbia.}\pagestyle{fancyplain}
\usepackage{mdframed}
\BeforeBeginEnvironment{minted}{\begin{mdframed}}
\AfterEndEnvironment{minted}{\end{mdframed}}
\date{}
\title{CPSC 320 2017W2: Assignment 3}
\hypersetup{
  pdfkeywords={},
  pdfsubject={},
  pdfcreator={Emacs 25.2.2 (Org mode 8.2.10)}}
\begin{document}

\maketitle
Please submit this assignment via \href{https://gradescope.com/}{GradeScope} at
\url{https://gradescope.com}. Be sure to identify everyone in your group if
you're making a group submission. (Reminder: groups can include a
maximum of three students; we strongly encourage groups of two.)

Remember that this assignment is based on the collected quizzes and
quiz solutions. You will likely want to refer to those where you need
more details on assignment questions.

Submit by the deadline \textbf{Monday 5 Mar at 10PM}. For credit, your group
must make a \textbf{single} submission via one group member's account,
marking all other group members and the pages associated with each
problem in that submission \textbf{using GradeScope's interface}. Your
group's submission \textbf{must}:
\begin{itemize}
\item Be on time.
\item Consist of a single, clearly legible file uploadable to GradeScope
with clearly indicated solutions to the problems. (PDFs produced via
\LaTeX{}, Word, Google Docs, or other editing software work
well. Scanned documents will likely work well. \textbf{High-quality}
photographs are OK if we agree they're legible.)
\item Include prominent numbering that corresponds to the numbering used
in this assignment handout (not the individual quizzes). Put
these \textbf{in order} starting each problem on a new page, ideally. If
not, \textbf{very clearly} and prominently indicate which problem is
answered where!
\item Include at the start of the document the \textbf{ugrad.cs.ubc.ca e-mail
addresses} of each member of your team. (Please do \textbf{NOT} include
your name on the assignment, however.\footnote{If you don't mind private
  information being stored outside Canada and want an extra
  double-check on your identity, include your student number rather
  than your name.})
\item Include at the start of the document the statement: "All group
members have read and followed the \href{http://blogs.ubc.ca/cpsc3202017W1/syllabus/#conduct}{guidelines for academic conduct
in CPSC 320}. As part of those rules, when collaborating with anyone
outside my group, (1) I and my collaborators took no record but
names (and GradeScope information) away, and (2) after a suitable
break, my group created the assignment I am submitting without help
from anyone other than the course staff." (Go read those
guidelines!)
\item Include at the start of the document your outside-group
collaborators' ugrad.cs.ubc.ca IDs, but \textbf{not} their names. (Be sure
to get those IDs when you collaborate!)
\end{itemize}
\section{Ol' McDonald is safer, he hi he hi ho}
\label{sec-1}
Here is pseudocode for a correct  greedy algorithm that solves the McDonald's 
safety committee problem:

\begin{Verbatim}[commandchars=\\\{\}]  
  define produce_safety_committee(array_of_shifts):
      \imax = none           # where s(none) = f(none) = -\infinity
      \iprevmax = none
      
      list_current = empty list
      all_endpoints = all shift endpoints, sorted in increasing order
      
      for endpoint p in all_endpoints:
          let S be the shift to which p belongs
          if p is a left endpoint:
              set \imax to S if f(S) > f(\imax)
              #
              # Shifts starting before f(\iprevmax) are already covered.
              # so we don't need to worry about covering them.
              #  
              if p < f(\iprevmax):
                  mark S as \deleted
              else:
                  add S to list_current
          
          else if S is not marked as \deleted:
              \iprevmax = \imax
              add \imax to the committee
              mark all shifts from list_current as \deleted
              list_current = empty list
\end{Verbatim}

In this question you will complete a proof of correctness of this algorithm.

\begin{enumerate}
\item Let $\sequence Sk$ be the set  of shifts returned by our algorithm. Assume
without  loss of  generality that  $\sequence Sk$  is sorted  by finishing
times. We will  denote the starting and finishing times  of shift $S_j$ by
$s(S_j)$ and $f(S_j)$ respectively. We  first prove that $\sequence Sk$ is
a valid safety committee, by proving that:

\begin{fact}
If $S$ is  a shift such that  $s(S) \le f(S_j)$, then $S$  overlaps one of
$\sequence Sj$.
\end{fact}

\textbf{Proof}: The proof is by induction on $j$. For $j = 1$, observe that \textbf{FILL
IN THIS PART}.

Suppose now that every shift whose starting point is $\le f(S_j)$ overlaps
one  of  $\sequence  Sj$.  Consider  a  shift  $S$  such  that  $s(S)  \le
   f(S_{j+1})$.
\begin{itemize}
\item If $s(S) \le f(S_j)$ then \textbf{FILL IN THIS PART}.
\item Otherwise, \textbf{FILL IN THIS PART}.  Thus $f(S) \ge f(T)$, which means that
it overlaps with $S_{j+1}$.
\end{itemize}
\textbf{End Proof}

\item Let $\sequence Tm$ be the shifts in a smallest safety committee, sorted by
increasing finishing time. Because our algorithm returns a complete safety
committee (select the correct statement):

\begin{tabular}{l}
\fillinMC{$k < m$}\\
\fillinMC{$k \le m$}\\
\fillinMC{$k = m$}\\
\fillinMC{$k \ge m$}\\
\fillinMC{$k > m$}\\
\end{tabular}

First we establish that

\begin{fact}
For $j =  1, \ldots, m$, $f(S_j) \ge f(T_j)$.
\end{fact}

\textbf{Proof}: This is clear  for $j = 1$, because \textbf{FILL IN THIS PART}

Suppose now  that it is true  for $S_j$ and $T_j$,  and consider $S_{j+1}$
and  $T_{j+1}$. Let  $S$ be  the shift  with the  earliest finishing  time
amongst those that start after $f(S_j)$.

\textbf{FILL IN THIS PART}

Hence $f(T_{j+1}) \le f(S_{j+1})$.

\textbf{End Proof}

In order to prove that (select the correct statement)

\begin{tabular}{l}
\fillinMC{$k < m$}\\
\fillinMC{$k \le m$}\\
\fillinMC{$k \ge m$}\\
\fillinMC{$k > m$}\\
\end{tabular}

we only need  to prove that every shift overlaps  an element of $\sequence
   Sm$. Indeed, if there were a shift  $S$ that does not overlap any of them,
then \textbf{FILL IN THIS PART}

This is  clearly impossible,  since in  that case  none of  $\sequence Tm$
would overlap with $S$.
\end{enumerate}
\section{Recurrences resolve runtimes}
\label{sec-2}
\begin{enumerate}
\item Consider the following sorting algorithm:

\begin{Verbatim}[commandchars=\\\{\}]  
define sloth_sort(A, first, last):
  if A[first] < A[last]:
      exchange A[first] and A[last]

  if (first + 1 < last):
    mid = (last - first + 1) // 3     # integer division
    sloth_sort(A, first + mid, last)  # sort last two-thirds
    sloth_sort(A, first, last - mid)  # sort first two-thirds
    sloth_sort(A, first + mid, last)  # sort last two-thirds again
\end{Verbatim}

a. Write a recurrence relation  that describes the worst-case running time
   of function  \verb~sloth_sort~ in terms of  $n$, where $n =  \texttt{last} -
      \texttt{first} + 1$. You can ignore floors and ceilings.

b. What is the worst case running time of algorithm \verb~sloth_sort~ ?

\item Consider now this much less useful algorithm:

\begin{Verbatim}[commandchars=\\\{\}]  
define not_useful(A, first, last):
  if last < first + 4:
      x = A[last] - A[first]
  else:
    x = not_useful(A, first+1, last-1) - not_useful(A, first+2, last-2)
  return x
\end{Verbatim}

Write a recurrence  relation that describes the worst-case  running time of
function  \verb~not_useful~  in  terms  of  $n$, where  $n  =  \texttt{last}  -
   \texttt{first} + 1$.

\item Finally, consider  the following Python algorithm, which we  may see again
later this term:

\begin{Verbatim}[commandchars=\\\{\}]  
def deterministic_select(A, i):
  if len(A) < 5:
    sorted_A = sorted(A)
    return sorted_A[i-1]
 
  blocks = [ ]
  for b in range((len(A)-1) // 5 + 1):  # That is, the ceiling of len(A)/5
    blocks.append(sorted(A[b*5:(b+1)*5]))
 
  medians = [block[len(block)//2] for block in blocks]
  median_of_medians = deterministic_select(medians, len(medians) // 2 + 1)
 
  lesser  = [v for v in A if v <  median_of_medians]
  greater = [v for v in A if v >  median_of_medians]
  moms    = [v for v in A if v == median_of_medians]
 
  if len(lesser) < i <= len(lesser) + len(moms):
    return median_of_medians
  elif len(lesser) >= i:
    return deterministic_select(lesser, i)
  else:
    return deterministic_select(greater, i - len(lesser) - len(moms))
\end{Verbatim}

Knowing that  \verb~0.3 len(A) <  len(lesser) <  0.7 len(A)~, write  a recurrence
relation  that   describes  the   worst-case  running  time   of  function
\verb~deterministic_select~ in terms of $n$, where $n =$ \verb~len(A)~.
\end{enumerate}
\section{Playing the Blame Game}
\label{sec-3}
A distributed computing system composed of $n$ nodes is responsible
for ensuring its own integrity against attempts to subvert the
network. To accomplish this, nodes in the system can assess each
others' integrity, which they always do in pairs. A node in such a
pair with its integrity intact will correctly assess the node it is
paired with to report either "intact" or "subverted". However, a node
that has been subverted may freely report "intact" or "subverted"
regardless of the other node's status.

The goal is for an outside authority to determine which nodes are
intact and which are subverted. If $n/2$ or more nodes have been
subverted, then the authority cannot necessarily determine which nodes
are intact using any strategy based on this kind of pairing. However,
if more than $n/2$ nodes are intact, it is possible to confidently
determine which are which.

Throughout this problem, we assume that \textbf{more} than $n/2$ of the nodes
are intact. Further, we let one "round" of pairings be any number of
pairings overall as long as each node participates in at most one
pairing. (I.e., a round is a matching that may not be perfect.)

\textbf{ONLY PROBLEM IN THIS PART:} Give an algorithm in clear, simple
pseudocode that runs in $O(\lg n)$ rounds and identifies an intact
node, and briefly but clearly justify the correctness of your
algorithm via clear comments interspersed into the algorithm
explaining what it does and why. (\textbf{HINT:} the quiz problems should
lead you in a useful direction!)
\section{Mixed Nets}
\label{sec-4}
You're working on the routing for an anonymization service called a
"mixnet" in which a network of computers pass messages through a
sequence of handoffs from one source computer to eventually reach
another target computer.

To represent this, you have a weakly-connected, directed acyclic graph
(DAG) $G = (V, E)$ composed of designated source and target vertices
$s, t \in V$ and a set of $p > 0$ simple paths (along which $p$
messages pass) each of which starts at $s$, ends at $t$, and includes
at least one vertex in between $s$ and $t$. The paths are also vertex
disjoint besides $s$ and $t$ (i.e., no two paths share any other
vertex). There are no other vertices or edges in the graph. For
example, here are two different graphs both over the same set of
vertices and both with $p=2$:

\begin{minipage}[c]{0.45\textwidth}
\begin{center}
\includegraphics[width=0.9\textwidth]{figures/2017W2-mixnet-graph1.png}

Graph \#1
\end{center}
\end{minipage}
\begin{minipage}[c]{0.45\textwidth}
\begin{center}
\includegraphics[width=0.9\textwidth]{figures/2017W2-mixnet-graph2.png}

Graph \#2
\end{center}
\end{minipage}

In fact, your mixnet actually involves a single set of computers (with
a designated start and target computer) and two entirely separate sets
of paths among those computers, as with these two sample graphs. At
some point as each message passes along its path among the first set
of paths, it switches to using one of the second set of paths instead
(never switching back).

Specifically, you have an overall graph $G$ made up of two subgraphs
$G_1$ and $G_2$ like those specified in the previous part, where one
subgraph's vertices is an exact copy of the other's, i.e., $G_1 =
(V_1, E_1)$ and $G_2 = (V_2, E_2)$, where a vertex $v_1 \in V_1$ if
and only if $v_2 \in V_2$. ($s_1$ and $t_1$ are the start and target
vertices in $G_1$ and their corresponding vertices $s_2$ and $t_2$ are
the start and target in $G_2$.) Each subgraph is based on its own set
of $p$ paths, but $p$ is the same for both. There is \textbf{also} a directed
edge $(v_1, v_2)$ for each vertex $v_1 \in V_1$ \textbf{except} $s_1$ and
$t_1$ leading \emph{from} $G_1$ \emph{to} $G_2$. (There is no edge from $s_1$ to
$s_2$ or $t_1$ to $t_2$.)

So, for the examples above, the overall graph would include both Graph
1 (with each node subscripted like $a_1$) and Graph 2 (with each node
subscripted like $a_2$) plus 4 more edges: $(a_1, a_2), (b_1, b_2),
(c_1, c_2), (d_1, d_2)$.

Your goal is to find $p$ vertex-disjoint paths in the overall graph
that start at $s_1$ in $G_1$ and end at $t_2$ in $G_2$. Thus, no two
paths visit the exact same vertex (besides $s_1$ and
$t_2$). \textbf{Additionally}, you want to ensure that no two paths even
visit the same vertex in \emph{different} subgraphs (i.e., no path contains
$v_1$ if any other path contains $v_2$). We call two paths visiting
the same vertex but in different graphs a "conflict".

\begin{enumerate}
\item In a valid instance of this problem, there is \textbf{always} a path from
$s_1$ to $t_2$. Sketch the key points in a proof of this
fact. (Recall that $p > 0$.)

\item In a valid instance of this problem, there is \textbf{never} any path from
$t_1$ to $t_2$. Sketch the key points in a proof of this fact.

\item We now add the constraint to a valid instance of this problem that
it must have \textbf{some} valid solution. Finish the following solution
to this problem via reduction to STP (i.e., SMP with ties allowed).

Let the first vertex along each of the paths out of $s_1$ be
$v_{1,1}, v_{1,2}, \ldots, v_{1,p}$. Let the last vertex along each
of the paths into $t_2$ be $u_{2,1}, u_{2,2}, \ldots, u_{2,p}$. Let
the men $m_1, \ldots, m_p$ be the vertices $v_{1,1}, v_{1,2},
   \ldots, v_{1,p}$. Let the women $w_1, \ldots, w_p$ be the
vertices $u_{2,1}, u_{2,2}, \ldots, u_{2,p}$.

Let man $m_i$ prefer women $w_j$ to woman $w_k$ when there is a
path from $s_1$ through $v_{1,i}$ to $u_{2,j}$ which transitions
from $G_1$ to $G_2$ after $a$ steps (and no such path that
transitions after fewer than $a$ steps), there is a path from $s_1$
through $v_{1,i}$ to $u_{2,k}$ which transitions from $G_1$ to
$G_2$ after $b$ steps (and no such path that transitions after
fewer than $b$ steps), and \fillinblank{MMMMM}. (Let
all women $w_k$ for which there is no path from $s_1$ through
$v_{1,i}$ to $u_{2,j}$ be tied at the end of $m_i$'s preference
list.)

Let woman $w_i$ prefer man $m_j$ to man $m_k$ when\ldots{} \textbf{COMPLETE
THIS PART}. (Hint: think about an arrangement that in some sense
mirrors the previous set of preferences.)

Solve the resulting STP instance to produce the perfect matching $S
   = \{(w_i, m_j), (w_k, m_l), \ldots\}$.

For each pair $(w_i, m_j)$, use \fillinblank{MMM}, always exploring
edges that transition from $G_1$ to $G_2$ before edges that remain
in $G_1$, to find (and add to the solution set of paths) the first
path leading from $s_1$ to $v_{1,i}$ and then via some number of
edges to $u_{2,j}$ and then to $t_2$.

\item Prove that since some valid solution exists, then some solution to
the STP instance produced by the reduction exists which contains no
instabilities (weak or strong).

(This doesn't complete the proof that the reduction is correct, but
it will present all the key insights.)
\end{enumerate}

\section{Cover Charge}
\label{sec-5}
In  the "minimum  edge cover"  problem, the  input is  a simple,  undirected,
connected  graph $G  = (V,  E)$ with  $|V|  \geq 2$,  and the  output is  the
smallest possible set $E'$ such that $E' \subseteq E$ and for all vertices $v
\in V$,  there is an  edge $\{v, u\}$  (which is the  same as $\{u,  v\}$) in
$E'$. That  is, every vertex  in the  graph is the  endpoint of some  edge in
$E'$.

A maximum matching in a graph $G = (V, E)$ is the largest set $E''$
such that $E'' \subseteq E$ and there are no three vertices $v_1,
v_2, v_3 \in V$ such that $\{v_1, v_2\}$ and $\{v_1, v_3\}$ are in
$E''$.  That is: $E''$ "marries off" as many vertices as possible
without having any one vertex "married" to two or more vertices.

In this part, assume you have a connected graph $G = (V, E)$ and a maximum
matching for the graph $E''$.

\begin{enumerate}
\item Prove that if $|E''| = \frac{|E| + 1}{2}$, then $E''$ is a minimum
edge cover.
\item Give an efficient, optimal greedy algorithm to find a minimum edge
cover for $G = (V, E)$, given a maximum matching in the graph
$E''$, and briefly but clearly justify the correctness of your
algorithm via clear comments interspersed into the algorithm
explaining what it does and why.
\end{enumerate}
\section{Bonus (OPTIONAL)}
\label{sec-6}
Worth 1 bonus point each:

\begin{enumerate}
\item Returning to the problem "Playing the Blame Game", give a short,
extremely clear, and complete proof that if exactly half of the
nodes are subverted (assuming an even number of nodes), no strategy
whatsoever is guaranteed to find an intact node.
\item Returning to the problem "Cover Change", consider the following
greedy algorithm for this problem:

\begin{verbatim}
Sort the vertices in increasing order by degree
Mark all vertices as uncovered
Let the cover E' be empty.
While there are vertices remaining, pick the next one v:
    If v is uncovered:
        If there is any neighbour u of v that is uncovered:
            Find the uncovered neighbour u of v with lowest degree
            Add {u, v} to E' and mark u and v as covered
        Else:
            Pick an arbitrary edge {u, v}, add it to E', and mark v as covered
\end{verbatim}

Prove that this algorithm is not optimal by providing a \textbf{very}
cleanly drawn and clearly explained counterexample. Your
counterexample may not rely for its correctness (as a
counterexample) on tie-breaking behaviour in the algorithm.
\end{enumerate}
% Emacs 25.2.2 (Org mode 8.2.10)
\end{document}