(************** Content-type: application/mathematica ************** CreatedBy='Mathematica 5.0' Mathematica-Compatible Notebook This notebook can be used with any Mathematica-compatible application, such as Mathematica, MathReader or Publicon. The data for the notebook starts with the line containing stars above. To get the notebook into a Mathematica-compatible application, do one of the following: * Save the data starting with the line of stars above into a file with a name ending in .nb, then open the file inside the application; * Copy the data starting with the line of stars above to the clipboard, then use the Paste menu command inside the application. Data for notebooks contains only printable 7-bit ASCII and can be sent directly in email or through ftp in text mode. Newlines can be CR, LF or CRLF (Unix, Macintosh or MS-DOS style). NOTE: If you modify the data for this notebook not in a Mathematica- compatible application, you must delete the line below containing the word CacheID, otherwise Mathematica-compatible applications may try to use invalid cache data. For more information on notebooks and Mathematica-compatible applications, contact Wolfram Research: web: http://www.wolfram.com email: info@wolfram.com phone: +1-217-398-0700 (U.S.) Notebook reader applications are available free of charge from Wolfram Research. *******************************************************************) (*CacheID: 232*) (*NotebookFileLineBreakTest NotebookFileLineBreakTest*) (*NotebookOptionsPosition[ 389986, 11535]*) (*NotebookOutlinePosition[ 403329, 11892]*) (* CellTagsIndexPosition[ 400085, 11805]*) (*WindowFrame->Normal*) Notebook[{ Cell[CellGroupData[{ Cell["Notes", "Section 1"], Cell[CellGroupData[{ Cell["Editorial Changes", "Subsection"], Cell["American spelling changed to British spelling.", "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ref:Farvardin1990", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", StyleBox["IEEE Trans. IT", FontSlant->"Italic"], StyleBox[",", FontWeight->"Bold", FontSlant->"Italic"], "\" changed to \"", StyleBox["IEEE Trans. IT,", FontSlant->"Italic"], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ref:Luttrell1988", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"Luttrell S P,1988\" changed to \"Luttrell S P, 1988\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ref:Luttrell1992", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"Proc. IEE Part F\" changed to \"", StyleBox["Proc. IEE Part F", FontSlant->"Italic"], "\"", "." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change1", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"(measured per message symbol)", StyleBox[":", FontWeight->"Bold"], "\" changed to \"(measured per message symbol):\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change15", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ") is the vector of marginal probabilities in layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], Cell[BoxData[ FormBox[Cell[""], TraditionalForm]]], ", ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ") is the vector of marginal probabilities in layer 0\" changed to \"", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ") is the vector of marginal probabilities in layer 0, ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ") is the vector of marginal probabilities in layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change2", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ \(TraditionalForm\`\(G\_\(i\_1\)\)(P\^\(0 | 1\), Q\^\(0 | 1\))\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_1\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}], TraditionalForm]]], "\", \"", Cell[BoxData[ \(TraditionalForm\`\(G\_\(i\_2\)\)(P\^\(1 | 2\), Q\^\(1 | 2\))\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_2\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)]}], ")"}], TraditionalForm]]], "\", \"", Cell[BoxData[ \(TraditionalForm\`\(G\_\(i\_L\)\)(P\^\(L - 1 | L\), Q\^\(L - 1 | L\))\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_L\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}], TraditionalForm]]], "\", and \"", Cell[BoxData[ \(TraditionalForm\`G(P\^L, Q\^L)\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]], TextAlignment->AlignmentMarker], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change3", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontSlant->"Italic"], \(l - 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontSlant->"Italic"], \(l - 1 | l\)]}], ")"}], TraditionalForm]]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)]}], ")"}], TraditionalForm]]], "\" (4 times), and \"", Cell[BoxData[ \(TraditionalForm\`G(P\^L, Q\^L)\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]], TextAlignment->AlignmentMarker], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change4", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}], TraditionalForm]]], "\" replaced by \"", Cell[BoxData[ FormBox[ RowBox[{\(L\_L\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}], TraditionalForm]]], "\" (2 times)." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["PROBLEM", ButtonData:>"Problem:1", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "The notation \"", Cell[BoxData[ FormBox[ RowBox[{\(L\_L\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}], TraditionalForm]]], "\" is confusing because the symbol \"", Cell[BoxData[ \(TraditionalForm\`L\)]], "\" is used with two different meanings." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:1", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:2", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ \(TraditionalForm\`\(\(\ \)\(1\/\(\(\@\(2 \[Pi]\)\) \ \[Sigma]\)\)\)\)]], "\" changed to \"", Cell[BoxData[ FormBox[ FractionBox["1", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:2", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:3", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:6", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ \(TraditionalForm\`log\ V\/\(\(\@\(2 \[Pi]\)\) \[Sigma]\)\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"log", FractionBox["V", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]]}], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:4", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:7", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ \(TraditionalForm\`log V\^l\/\(\(\@\(2 \[Pi]\)\) \[Sigma]\^l\)\)]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"log", FractionBox[\(V\^l\), SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma]\^l)\), RowBox[{"dim", " ", SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "l"]}]]]}], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Error:5", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "This equation has appeared from nowhere. I don't think it's correct. There \ is no simple relationship between the objective function for the full joint \ density (the l.h.s. of the equation) and the objective function for the joint \ density with layer ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], " made hidden (the r.h.s. of the equation). This dooms the claimed tighter \ upper bound to oblivion. The expression for ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], TraditionalForm]]], " reveals that it assumes that the Gaussian model is working directly from \ layer ", Cell[BoxData[ \(TraditionalForm\`k + 2\)]], " back to layer ", Cell[BoxData[ \(TraditionalForm\`k\)]], ", whereas in the original objective function it is a cascade of 2 Gaussian \ models, working from layer ", Cell[BoxData[ \(TraditionalForm\`k + 2\)]], " back to layer ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], ", and then back from layer ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`k\)]], ". This does not produce a Gaussian overall, rather it produces a Gaussian \ mixture." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["PROBLEM", ButtonData:>"Problem:2", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "The discussion on finding a least upper bound on ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], TraditionalForm]]], " is very non-constructive. It would be nice to have an ", StyleBox["explicit", FontSlant->"Italic"], " prescription for a suitable choice of ", Cell[BoxData[ \(TraditionalForm\`Pr(y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\)]], "." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change5", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"will be discussed in ", CounterBox["Section", "Sect:Kohonen"], ".\" changed to \"will be discussed in section ", CounterBox["Section", "Sect:Kohonen"], ".\"" }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change15", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"This is discussed in section ", CounterBox["Section", "Sect:TypesOfDensityModel"], ".\" moved to end of next paragraph." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change6", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"This approach defines an upper bound on ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], "\". changed to \"This approach defines an upper bound on ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change7", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]], TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]], TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], "\" (4 times), and \"", Cell[BoxData[ FormBox[ RowBox[{"K", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}], TraditionalForm]]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}], TraditionalForm]]], "\" (4 times)." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change8", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"because this type of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " a zero entropy ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}], TraditionalForm]]], "\" changed to \"because this type of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " has a zero entropy ", Cell[BoxData[ FormBox[ RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}], TraditionalForm]]], "\", \"", Cell[BoxData[ FormBox[ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{"H", "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}]}]}]}]}], TraditionalForm]], TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}]}]}]}]}], TraditionalForm]], TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], "\", and \"because this type of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " has a large entropy ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}], TraditionalForm]]], "\" changed to \"because this type of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " has a large entropy ", Cell[BoxData[ FormBox[ RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change9", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ FormBox[ RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{"Pr", "(", RowBox[{\(y\_1\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], RowBox[{"Pr", "(", RowBox[{\(y\_2\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], RowBox[{"\[CenterEllipsis]Pr", "(", RowBox[{\(y\_n\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]}], TraditionalForm]]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_1\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "1"}], ")"}], RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_2\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "2"}], ")"}], RowBox[{"\[CenterEllipsis]Pr", "(", RowBox[{ RowBox[{\(y\_n\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "n"}], ")"}], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]}], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change10", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{ RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["ERROR", ButtonData:>"Ed:Change11", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ \(TraditionalForm\`\(Pr(k)\)\[LongRightArrow]1\)]], "\" changed to \"", Cell[BoxData[ \(TraditionalForm\`\(Pr(k)\)\[LongRightArrow]\(1\/K\)\)]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change12", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) \(A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(\(i\^\[Prime]\)\_\(l + 1\)\)\%\(l + 1\)\)]], Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) \(A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change13", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ FormBox[ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\), "c"], TraditionalForm]]], "\" changed to \"", Cell[BoxData[ FormBox[ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], TraditionalForm]]], "\"." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ ButtonBox["TYPO", ButtonData:>"Ed:Change14", Active->True, ButtonStyle->"Hyperlink"], TextForm]]], "\"", Cell[BoxData[ FormBox[ RowBox[{ StyleBox[ SubsuperscriptBox["Q", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L"], "L"], FontSlant->"Italic"], "=", RowBox[{ SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], "L"], SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], "L"], "\[CenterEllipsis]"}]}], TraditionalForm]]], "\" changed to \"", Cell[BoxData[ FormBox[ RowBox[{ StyleBox[ SubsuperscriptBox["Q", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L"], "L"], FontSlant->"Italic"], "=", RowBox[{ SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L", "1"], "L"], SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L", "2"], "L"], "\[CenterEllipsis]"}]}], TraditionalForm]]], "\"." }], "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell["A unified theory of density models and auto-encoders", "Title"], Cell["Dr S P Luttrell", "Author"], Cell["\<\ This paper appeared as DERA Technical Report, DERA/CIS/CIS5/TR97303, 31 \ October 1997.\ \>", "Text"], Cell[TextData[{ "This paper also appeared (without the executive summary and the \ recommendations, and with some minor formatting changes) as an ", StyleBox["Isaac Newton Institute for Mathematical Sciences Preprint", FontSlant->"Italic"], ", NI97039-NNM, 31 October 1997." }], "Text"], Cell["\<\ \[Copyright]\tCrown Copyright 1997 \tDefence Evaluation and Research Agency \tFarnborough, Hampshire, GU14 6TD, UK\ \>", "Text", TextAlignment->Left], Cell["\<\ This report introduces an objective function for simultaneously optimising \ the density model and transition matrices of a Markov source. The chosen \ objective function seeks to minimise the average total number of bits that is \ required to encode the joint state of the Markov source. This may be applied \ to the problem of optimising the bottom-up (recognition model) and top-down \ (generative model) connections in a multilayer neural network. This approach \ unifies many previous results on the optimisation of multilayer unsupervised \ neural networks.\ \>", "Abstract"], Cell[CellGroupData[{ Cell["Executive summary", "Section 1", CounterIncrements->{}], Cell[TextData[{ StyleBox["MoD Contract Number:", FontWeight->"Bold"], " TG10.04.02.04." }], "Text"], Cell[TextData[{ StyleBox["MoD Customer:", FontWeight->"Bold"], " AD/Sc(ICS)SAG, i.e. Assistant Director, Science (Information and \ Communications Services) Science Advisory Group." }], "Text"], Cell[TextData[{ StyleBox["Research Aim:", FontWeight->"Bold"], " To develop a theory of self-organising networks for processing \ information derived from multiple sources (i.e. data fusion)." }], "Text"], Cell[TextData[{ "Results:", StyleBox[" Many previous results on optimising such networks are unified \ into a single approach.", FontWeight->"Plain"] }], "Text", FontWeight->"Bold"], Cell[TextData[{ "Conclusions:", StyleBox[" This unification further demonstrates the validity of previous \ approaches, and provides a systematic framework for future developments.", FontWeight->"Plain"] }], "Text", FontWeight->"Bold"], Cell[TextData[{ "Customer Benefits:", StyleBox[" The area of application is data fusion, in which sensor \ information is collated in a network of interconnected processing nodes. This \ unified theory shows how to optimise this type of data fusion network in a \ systematic way. This will lead to many different types of saving, such as a \ reduced need for expert intervention in the design of data fusion networks.", FontWeight->"Plain"] }], "Text", FontWeight->"Bold"], Cell[TextData[{ "Recommendations:", StyleBox[" The above unified approach includes as special cases many \ apparently different previous results. It is recommended that future work, \ including theory and software, be framed in terms of this new unified \ approach.", FontWeight->"Plain"] }], "Text", FontWeight->"Bold"], Cell[TextData[{ "Keywords:", StyleBox[" Density model, auto-encoder, Markov source, folded Markov chain, \ partitioned mixture distribution, adaptive cluster expansion.", FontWeight->"Plain"] }], "Text", FontWeight->"Bold"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " Introduction" }], "Section", CellTags->"Sect:Introduction"], Cell[TextData[{ "There is currently a great deal of interest in modelling probability \ density functions (PDF). This research is motivated by the fact that the \ joint PDF of a set of variables can be used to deduce any conditional PDF \ which involves these variables alone, which thus allows all inference \ problems in the space of these variables to be addressed quantitatively. The \ only limitation of this approach to solving inference problems is that a ", StyleBox["model", FontSlant->"Italic"], " of the PDF is used, rather than the ", StyleBox["actual", FontSlant->"Italic"], " PDF itself, which can lead to inaccurate inferences. The objective \ function for optimising a PDF model is usually to maximise the log-likelihood \ that it could generate the training set: i.e. maximise ", Cell[BoxData[ \(TraditionalForm\`\[LeftAngleBracket]log(model\ probability)\ \[RightAngleBracket]\_\(training\ set\)\)]], "." }], "Text"], Cell[TextData[{ "There is also a great deal of interest in the design of optimal \ autoencoders, for encoding input vectors with the intention of reconstructing \ them with minimum average error. This would allow data to be transmitted \ along a limited bandwidth communication link, for instance. The objective \ function for optimising an autoencoder is usually to minimise the average \ squared reconstruction error over the training set: i.e. minimise ", Cell[BoxData[ \(TraditionalForm\`\[LeftAngleBracket]\[LeftBracketingBar]\ \[LeftBracketingBar]vector - reconstruction\[RightBracketingBar]\ \[RightBracketingBar]\^2\[RightAngleBracket]\_\(training\ set\)\)]], "." }], "Text"], Cell[TextData[{ "These two optimisation criteria are different from each other. For \ instance, autoencoding requires that information be retained about the input \ ", StyleBox["itself", FontSlant->"Italic"], ", whereas density modelling requires only that information be retained \ about the ", StyleBox["PDF", FontSlant->"Italic"], " of the input; these are different requirements. However, there is a way \ of expressing the autoencoder objective function which turns out to be \ equivalent to a density modelling objective function, although it turns out \ that the corresponding density model is not of the type that was referred to \ above." }], "Text"], Cell["\<\ The purpose of this report is to derive the theory that relates density \ modelling and autoencoding, and to show how many of the key results obtained \ by the author (during the past decade) may be derived from this theory. This \ unified theory may then be used to ensure that future results are backwardly \ compatible with past results.\ \>", "Text"], Cell[TextData[{ "In ", ButtonBox["section", ButtonData:>"Sect:CodingTheory", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:CodingTheory"], " the standard Shannon theory of information is summarised, and its \ application to coding various types of source is derived; in particular, the \ Markov source is discussed, because it is central to the topic of this \ report. In ", ButtonBox["section", ButtonData:>"Sect:ApplicationNN", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:ApplicationNN"], " the application of Markov source coding to unsupervised neural networks \ is discussed in detail, and the connection with folded Markov chains (FMC) \ [", ButtonBox["17", ButtonData:>"Ref:Luttrell1994a", ButtonStyle->"Hyperlink"], "] is derived. In ", ButtonBox["section", ButtonData:>"Sect:TypesOfDensityModel", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:TypesOfDensityModel"], " density modelling of Markov sources is compared with standard density \ modelling using a Helmholtz machine [", ButtonBox["2", ButtonData:>"Ref:DayanHintonNealZemel1995", ButtonStyle->"Hyperlink"], ", ", ButtonBox["3", ButtonData:>"Ref:DayanHinton1996", ButtonStyle->"Hyperlink"], "], which demonstrates a close connection and important differences between \ these two problems. The last three sections deal with particular applications \ of density modelling of Markov sources: ", ButtonBox["section", ButtonData:>"Sect:Kohonen", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Kohonen"], " deals with the Kohonen network [", ButtonBox["7", ButtonData:>"Ref:Kohonen1989", ButtonStyle->"Hyperlink"], "], ", ButtonBox["section", ButtonData:>"Sect:PMD", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:PMD"], " deals with partitioned mixture distributions (PMD) [", ButtonBox["18", ButtonData:>"Ref:Luttrell1994b", ButtonStyle->"Hyperlink"], ", ", ButtonBox["20", ButtonData:>"Ref:Luttrell1994d", ButtonStyle->"Hyperlink"], "], and ", ButtonBox["section", ButtonData:>"Sect:ACE", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:ACE"], " deals with the adaptive cluster expansion (ACE) [", ButtonBox["14", ButtonData:>"Ref:Luttrell1991a", ButtonStyle->"Hyperlink"], ", ", ButtonBox["19", ButtonData:>"Ref:Luttrell1994c", ButtonStyle->"Hyperlink"], ", ", ButtonBox["21", ButtonData:>"Ref:Luttrell1996", ButtonStyle->"Hyperlink"], "]." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " Coding Theory" }], "Section", CellTags->"Sect:CodingTheory"], Cell[TextData[{ "\n\t\n", ButtonBox["Section", ButtonData:>"Sect:InformationTheory", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:InformationTheory"], ".", CounterBox["Subsection", "Sect:InformationTheory"], " outlines the basic ideas of information theory, and ", ButtonBox["section", ButtonData:>"Sect:SourceCoding", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:SourceCoding"], ".", CounterBox["Subsection", "Sect:SourceCoding"], " describes in detail the process of using a model to code a source. In ", ButtonBox["section", ButtonData:>"Sect:MarkovSourceCoding", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:MarkovSourceCoding"], ".", CounterBox["Subsection", "Sect:MarkovSourceCoding"], " this is extended to the case of a Markov source, and in ", ButtonBox["section", ButtonData:>"Sect:DynamicMarkovSourceCoding", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:DynamicMarkovSourceCoding"], ".", CounterBox["Subsection", "Sect:DynamicMarkovSourceCoding"], " this is further extended (in outline only) to the case of a dynamical \ Markov source.\n\t\nSee [", ButtonBox["25", ButtonData:>"Ref:Shannon1948", ButtonStyle->"Hyperlink"], "] for a lucid introduction to information theory, and see [", ButtonBox["23", ButtonData:>"Ref:Rissanen1978", ButtonStyle->"Hyperlink"], ", ", ButtonBox["24", ButtonData:>"Ref:Rissanan1989", ButtonStyle->"Hyperlink"], "] for a discussion of the number of bits required to encode a source using \ a model." }], "Text"], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " Information Theory" }], "Subsection", CellTags->"Sect:InformationTheory"], Cell[TextData[{ "A source of symbols (drawn from an alphabet of ", Cell[BoxData[ \(TraditionalForm\`M\)]], " distinct symbols) is modelled by a vector of probabilities ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]] }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "\[Congruent]", \((P\_1, P\_2, \[CenterEllipsis], P\_M)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "which describes the relative frequency with which each symbol is drawn \ independently from the source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ". A trivial example is an unbiassed die, which has ", Cell[BoxData[ \(TraditionalForm\`M = 6\)]], " and ", Cell[BoxData[ \(TraditionalForm\`P\_i = 1\/6\)]], " for ", Cell[BoxData[ \(TraditionalForm\`i = 1, 2, \[CenterEllipsis], 6\)]], "." }], "Text"], Cell[TextData[{ "The ordered sequence of symbols drawn independently from a source may be \ partitioned into subsequences of ", Cell[BoxData[ \(TraditionalForm\`N\)]], " symbols, and each such subsequence will be called a message. If ", Cell[BoxData[ \(TraditionalForm\`N\)]], " is very large, then a message is ", StyleBox["likely", FontSlant->"Italic"], " if the relative frequency of occurrence of its symbols approximates ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", or ", StyleBox["unlikely", FontSlant->"Italic"], " if not. As ", Cell[BoxData[ \(TraditionalForm\`N\[LongRightArrow]\[Infinity]\)]], " the set of messages that is likely is very sharply defined, so that there \ is a set of likely messages all with equal probability of occurring (because \ each likely message has the same relative frequency of occurrence of each \ symbol), and a set of unlikely messages (i.e. all the messages that are not \ likely messages) that have essentially zero probability of occurring. It is \ this separation of messages into a likely set (all with equal probability) \ and an unlikely set (all with zero probability) that underlies information \ theory." }], "Text"], Cell[TextData[{ "A likely message from ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " will be called a likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message. The number of times ", Cell[BoxData[ \(TraditionalForm\`n\_i\)]], " that each symbol ", Cell[BoxData[ \(TraditionalForm\`i\)]], " occurs in a ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message of length ", Cell[BoxData[ \(TraditionalForm\`N\)]], " is ", Cell[BoxData[ \(TraditionalForm\`n\_i = N\ P\_i\)]], ", where ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\_\(i = 1\)\%M P\_i = 1\)]], " guarantees that the normalisation condition ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\_\(i = 1\)\%M n\_i = N\)]], " is satisfied. The logarithm of the number of different likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages is given by (using Stirling's approximation ", Cell[BoxData[ \(TraditionalForm\`log\ \(x!\) \[TildeTilde] log\ x - x\)]], " when ", Cell[BoxData[ \(TraditionalForm\`x\)]], " is large)" }], "Text"], Cell[BoxData[{ \(TraditionalForm\`log(\(N!\)\/\(\(n\_1!\) \(n\_2!\) \[CenterEllipsis] \( \ n\_M!\)\)) \[TildeTilde] N log N - N - \[Sum]\+\(i = 1\)\%M\( n\_i\) log n\_i + \[Sum]\+\(i = 1\)\%M n\_i\), "\n", \(TraditionalForm\`\(\(=\)\(\(N log N - \[Sum]\+\(i = 1\)\%M\( n\_i\) log n\_i\)\[IndentingNewLine]\(\(=\)\(\(N log N - \[Sum]\+\(i = 1\)\%M N \( P\_i\) \(log( N P\_i)\)\)\[IndentingNewLine]\(\(=\)\(\(N log N - \[Sum]\+\(i = 1\)\%M N \( P\_i\) log N - \[Sum]\+\(i = 1\)\%M N \( P\_i\) log P\_i\)\[IndentingNewLine]\(\(=\)\(\(-N\) \(\[Sum]\+\(i \ = 1\)\%M\( P\_i\) log P\_i\)\)\)\)\)\)\)\)\)\)}], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:LikelyMessage"], Cell[TextData[{ "Now define the entropy ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " of source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " as the logarithm of the number of different likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages (measured per message symbol):" }], "Text", CellTags->"Ed:Change1"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], "\[Congruent]", \(-\(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ P\_i\)\), "\[GreaterEqual]", "0"}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "Thus ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " is the number of bits per symbol (on average) that are required to encode \ the source (assuming a perfect encoder), because the only messages that the \ source has a finite probability of producing are the likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages that are enumerated in ", ButtonBox["equation", ButtonData:>"Eq:LikelyMessage", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:LikelyMessage"], ". The base of the logarithm determines the base in which the \"bits\" are \ measured. Thus base 2 logarithms correspond to \"bits\" that each have 2 \ states (i.e. binary digits), whereas base 10 logarithms correspond to \ \"bits\" that each have 10 states (i.e. decimal digits). A common error is to \ assume that the base of the logarithm somehow implies a corresponding \ discretisation of ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], ". The logarithm is used only as a convention to control the dynamic range \ of the quantity that is called ", StyleBox["information", FontSlant->"Italic"], "; its effect can be removed by exponentiation using the same base as was \ used for the logarithm in the first place." }], "Text"], Cell[TextData[{ "It is usually very difficult to encode the source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " using ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " bits per symbol on average. This is because although the boundary between \ the set of likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages and the set of unlikely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages is sharply defined in principle, in practice it is very hard to \ model mathematically. If this boundary is not precisely defined, then it is \ impossible to compute the value of ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " accurately. In order to ensure that ", StyleBox["all", FontSlant->"Italic"], " of the likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages are accounted for, it is necessary for the mathematical model of \ the boundary to ", StyleBox["outside", FontSlant->"Italic"], " the true boundary, which thus overestimates the value of ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], ". This demonstrates that ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " is in fact a lower bound on the true number of bits per symbol that must \ be used to encode the source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " Source Coding" }], "Subsection", CellTags->"Sect:SourceCoding"], Cell[TextData[{ "The mathematical model of the boundary between the set of likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages and the set of unlikely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages may be derived from a vector of probabilities ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", whose ", Cell[BoxData[ \(TraditionalForm\`M\)]], " elements model the probability of each symbol drawn from an alphabet of \ ", Cell[BoxData[ \(TraditionalForm\`M\)]], " distinct symbols. If ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "=", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"]}], TraditionalForm]]], " then the boundary is modelled perfectly, and hence in principle the lower \ bound ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " on the number of bits per symbol may be attained, although even this is \ difficult to realise constructively in practice. In practical situations ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "\[NotEqual]", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"]}], TraditionalForm]]], " is invariably the case, so the problem of coding a source with an \ inaccurate model cannot be avoided." }], "Text"], Cell[TextData[{ "Constructive coding of a source using a model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " requires that ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " be used to generate messages (", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages) which can then be compared with ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages. Since the only ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages that occur are the likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages (these all occur with equal probability because each likely \ message has the same relative frequency of occurrence of each symbol) all we \ need to do in order to calculate the number of bits per symbol that is \ required when using ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " to encode ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is to calculate the probability that a ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message is one of the likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages (the probability that ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " can generate each of the likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages is the same), which is sufficient information to deduce the \ total number of bits per symbol that is required." }], "Text"], Cell[TextData[{ "The log-probability ", Cell[BoxData[ FormBox[ RowBox[{\(\[CapitalPi]\_N\), "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " that a ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message is a likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message is" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[CapitalPi]\_N\), "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", \(\(log(\(\(N!\)\/\(\(n\_1!\) \(n\_2!\) \ \[CenterEllipsis]\ \(n\_M!\)\)\) \(Q\_1\%\(n\_1\)\) \(Q\_2\%\(n\_2\)\) \ \[CenterEllipsis]\ Q\_M\%\(n\_M\))\)\[IndentingNewLine]\(\(\[TildeTilde]\)\(\ \[AlignmentMarker]\)\(\(-N\) \(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ P\_i\) + N \(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ Q\_i\)\)\)\[IndentingNewLine]\(\(=\)\(\ \[AlignmentMarker]\)\(\(-N\) \(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ P\_i\/Q\_i\)\)\)\[IndentingNewLine]\(\(\[LessEqual]\)\(\ \[AlignmentMarker]\)\(0\)\)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "which is negative because the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " generates likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages with less than unit probability." }], "Text"], Cell[TextData[{ "The model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " must be used to generate enough ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages to ensure that all of the likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages are reproduced. This requires the basic ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " bits per symbol that would be required if ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "=", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"]}], TraditionalForm]]], ", plus some extra bits to compensate for the less than ", Cell[BoxData[ \(TraditionalForm\`100 %\)]], " efficiency (because ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "\[NotEqual]", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"]}], TraditionalForm]]], ") with which ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " generates likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages. The number of extra bits per symbol is the relative entropy ", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]] }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", \(\(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ P\_i\/Q\_i\)\[IndentingNewLine]\(\(\[GreaterEqual]\)\(\ \[AlignmentMarker]\)\(0\)\)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "which is ", Cell[BoxData[ FormBox[ RowBox[{"-", FractionBox[ RowBox[{\(\[CapitalPi]\_N\), "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "N"]}], TraditionalForm]]], ", or minus the log-probability that a ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message is a likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message. Thus ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is used to generate exactly the number of extra ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-messages that is required to compensate for the fact that the probability \ that each ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message is a likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message is less than unity (i.e. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[CapitalPi]\_N\), "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "\[LessEqual]", "0"}], TraditionalForm]]], ")." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " (i.e. relative entropy) is the amount by which the number of bits per \ symbol exceeds the lower bound ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " (i.e. source entropy). Note that both ", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " and ", Cell[BoxData[ FormBox[ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " are quantities that are realisable only in principle (i.e. they are lower \ bounds on the number of bits that is required in practice), because of the \ well-known practical difficulties associated with using a model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " to design an encoder. Define the total number of bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], "+", RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], " as ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]] }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], "+", RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", \(\(\(-\(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ P\_i\)\) + \[Sum]\+\(i = 1\)\%M\( P\_i\) log\ P\_i\/Q\_i\)\[IndentingNewLine]\(\(=\)\(\ \[AlignmentMarker]\)\(-\(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ Q\_i\)\)\)\[IndentingNewLine]\(\(\[GreaterEqual]\)\(\ \[AlignmentMarker]\)\(0\)\)\)}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "This expression for ", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " provides a means of optimising the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ". Ideally the number of extra bits that is required to compensate for the \ model's inefficiency should be as small as possible, which requires that the \ optimum model ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], StyleBox["opt", FontSlant->"Italic"]], TraditionalForm]]], " should minimise the objective function ", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " with respect to ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", thus" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], StyleBox["opt", FontSlant->"Italic"]], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{GridBox[{ {\(arg\ min\)}, { StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]} }], RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{GridBox[{ {\(arg\ max\)}, { StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]} }], \(\[Sum]\+\(i = 1\)\%M\( P\_i\) log\ Q\_i\)}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{GridBox[{ {\(arg\ max\)}, { StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]} }], \(log(\(Q\_1\%\(n\_1\)\) \(Q\_2\%\(n\_2\)\) \ \[CenterEllipsis]\ Q\_M\%\(n\_M\))\)}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "where ", Cell[BoxData[ \(TraditionalForm\`log(\(Q\_1\%\(n\_1\)\) \(Q\_2\%\(n\_2\)\) \ \[CenterEllipsis]\ Q\_M\%\(n\_M\))\)]], " is the log-probability that a message of length ", Cell[BoxData[ \(TraditionalForm\`N\)]], " generated by ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is a likely ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], "-message. This criterion for optimising a model will not include the \ number of bits that is required to specify the model ", StyleBox["itself", FontSlant->"Italic"], ", such as is used in the minimum description length approach ", "[", ButtonBox["23", ButtonData:>"Ref:Rissanen1978", ButtonStyle->"Hyperlink"], ", ", ButtonBox["24", ButtonData:>"Ref:Rissanan1989", ButtonStyle->"Hyperlink"], "]", "." }], "Text"], Cell[TextData[{ Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " is frequently used as an objective function in density modelling, where \ the optimum model ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], StyleBox["opt", FontSlant->"Italic"]], TraditionalForm]]], " is chosen as the one that maximises the log-probability of generating the \ observed data ", Cell[BoxData[ \(TraditionalForm\`\((n\_1, n\_2, \[CenterEllipsis], n\_M)\)\)]], ". Since ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], StyleBox["opt", FontSlant->"Italic"]], TraditionalForm]]], " must, in some sense, be close to ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", this affords a practical way of ensuring that the optimum model \ probabilities ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], StyleBox["opt", FontSlant->"Italic"]], TraditionalForm]]], " are similar to the source probabilities ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", which is the goal of density modelling." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " Markov Source Coding" }], "Subsection", CellTags->"Sect:MarkovSourceCoding"], Cell[TextData[{ "The above scheme for using a model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " to code symbols derived from a source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " may be extended to the case where the source and the model are ", Cell[BoxData[ \(TraditionalForm\`L\)]], "-layer Markov chains. Thus split up each of ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " and ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " into separate pieces associated with each layer, or pair of adjacent \ layers." }], "Text"], Cell[BoxData[{ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}], TraditionalForm], "\n", FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}], TraditionalForm]}], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "where ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(k | l\)], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(k | l\)], TraditionalForm]]], ") is the matrix of transition probabilities from layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`k\)]], " of the Markov chain of the source (model), ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ") is the vector of marginal probabilities in layer 0, ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ") is the vector of marginal probabilities in layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], ". These two ways of decomposing ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " (and ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ") are equivalent, because a forward pass through a Markov chain may be \ converted into a backward pass through a different Markov chain, whose \ transition probabilities are uniquely determined by applying Bayes' theorem \ to the original Markov chain." }], "Text", CellTags->"Ed:Change15"], Cell[TextData[{ "The number of extra bits per symbol that is required to encode the source \ ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " with the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is given by" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\[CenterEllipsis] \ \(\[Sum]\+\(i\_L = 1\)\%\(M\_L\)\(P\_\(i\_0, i\_1\)\%\(0 | 1\)\) \(P\_\(i\_1, i\_2\)\%\(1 | 2\)\) \[CenterEllipsis]\ \(P\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\) \(P\_\(i\_L\)\%L\) \(log(\(\(P\_\(i\_0, \ i\_1\)\%\(0 | 1\)\) \(P\_\(i\_1, i\_2\)\%\(1 | 2\)\) \[CenterEllipsis]\ \(P\_\ \(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\) P\_\(i\_L\)\%L\)\/\(\(Q\_\(i\_0, i\_1\ \)\%\(0 | 1\)\) \(Q\_\(i\_1, i\_2\)\%\(1 | 2\)\) \[CenterEllipsis]\ \ \(Q\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\) Q\_\(i\_L\)\%L\))\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{\(\(\[Sum]\+\(i\_1 = 1\)\%\(M\_1\)\(P\_\(i\_1\)\%1\) \(\ \[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\(P\_\(i\_0, i\_1\)\%\(0 | 1\)\) log P\_\(i\_0, i\_1\)\%\(0 | 1\)\/Q\_\(i\_0, i\_1\)\%\(0 \ | 1\)\) + \[Sum]\+\(i\_2 = 1\)\%\(M\_2\)P\_\(i\_2\)\%2\ \[AlignmentMarker]\[AlignmentMarker]\(\[Sum]\+\(i\_1 = \ 1\)\%\(M\_1\)\(P\_\(i\_1, i\_2\)\%\(1 | 2\)\) log P\_\(i\_1, i\_2\)\%\(1 | 2\)\/Q\_\(i\_1, i\_2\)\%\(1 \ | 2\)\)\)\n\[AlignmentMarker]\(\(+\[CenterEllipsis]\) + \[Sum]\+\(i\_L = \ 1\)\%\(M\_L\)\(P\_\(i\_L\)\%L\) \(\[Sum]\+\(i\_\(L - 1\) = 1\)\%\(M\_\(L - \ 1\)\)\(P\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\) log P\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | \ L\)\/Q\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\)\)\n\[AlignmentMarker]\(+\(\ \[Sum]\+\(i\_L = 1\)\%\(M\_L\)\(P\_\(i\_L\)\%L\) log P\_\(i\_L\)\%L\/Q\_\(i\_L\)\%L\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{\(\[Sum]\+\(i\_1 = 1\)\%\(M\_1\)\), RowBox[{\(P\_\(i\_1\)\%1\), RowBox[{\(G\_\(i\_1\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], "+", "\[AlignmentMarker]", RowBox[{\(\[Sum]\+\(i\_2 = 1\)\%\(M\_2\)\), RowBox[{\(P\_\(i\_2\)\%2\), RowBox[{\(G\_\(i\_2\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)]}], ")"}]}]}], "\n", "\[AlignmentMarker]", "+", "\[CenterEllipsis]", "+", RowBox[{\(\[Sum]\+\(i\_L = 1\)\%\(M\_L\)\), RowBox[{\(P\_\(i\_L\)\%L\), RowBox[{\(G\_\(i\_L\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}]}]}], "\n", "\[AlignmentMarker]", "+", RowBox[{"G", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Ed:Change2"], Cell[TextData[{ "where the suffix ", Cell[BoxData[ \(TraditionalForm\`i\_l\)]], " that appears on the ", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)]}], ")"}], TraditionalForm]]], " indicates that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is fixed during the evaluation of ", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)]}], ")"}], TraditionalForm]]], " (i.e. it is the relative entropy of layer ", Cell[BoxData[ \(TraditionalForm\`l - 1\)]], ", given that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is known). This may be interpreted as the number of extra bits per symbol \ ", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " that is required to encode the ", Cell[BoxData[ FormBox[ SuperscriptBox["L", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " layer of the Markov chain, plus the sum over layers ", Cell[BoxData[ \(TraditionalForm\`l\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`0 \[LessEqual] l \[LessEqual] L - 1\)]], ") of the number of extra bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)]}], ")"}], TraditionalForm]]], " that is required to make the transition backwards from layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`l - 1\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`1 \[LessEqual] l \[LessEqual] L\)]], ") of the Markov chain (averaged over all states of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " using ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\_\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(G\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l - 1 | l\)]}], ")"}]}]}], TraditionalForm]]], "). Using Bayes' theorem, this expression for ", Cell[BoxData[ FormBox[ RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " can be manipulated into a form which starts at layer ", Cell[BoxData[ \(TraditionalForm\`0\)]], ", and then makes forwards transitions from layer to layer, to eventually \ arrive at layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], ". However, in this report, only the backwards pass through the Markov \ chain will be used." }], "Text", CellTags->"Ed:Change3"], Cell[TextData[{ "The total number of bits per symbol that is required to code the source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " with the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " (i.e. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"H", "(", StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ")"}], "+", RowBox[{"G", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], "), which is given by" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]\[AlignmentMarker]", \(\(-\(\[Sum]\+\(i\_1 = \ 1\)\%\(M\_1\)\(P\_\(i\_1\)\%1\) \(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\(P\_\(i\_0, i\_1\)\%\(0 | 1\)\) log\ Q\_\(i\_0, i\_1\)\%\(0 | 1\)\)\)\) - \ \[Sum]\+\(i\_2 = 1\)\%\(M\_2\)P\_\(i\_2\)\%2\ \[AlignmentMarker]\[AlignmentMarker]\(\[Sum]\+\(i\_1 = \ 1\)\%\(M\_1\)\(P\_\(i\_1, i\_2\)\%\(1 | 2\)\) log\ Q\_\(i\_1, i\_2\)\%\(1 | 2\)\)\)}], "\n", "\[AlignmentMarker]", "-", "\[CenterEllipsis]", "-", \(\[Sum]\+\(i\_L = 1\)\%\(M\_L\)\(P\_\(i\_L\)\%L\) \ \(\[Sum]\+\(i\_\(L - 1\) = 1\)\%\(M\_\(L - 1\)\)\(P\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\) log\ Q\_\(i\_\(L - 1\), i\_L\)\%\(L - 1 | L\)\)\)}], "\n", "\[AlignmentMarker]", "-", \(\[Sum]\+\(i\_L = 1\)\%\(M\_L\)\(P\_\(i\_L\)\%L\) log\ Q\_\(i\_L\)\%L\)}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(\(-\(\[Sum]\+\(l = 0\)\%\(L - 1\)\(\[Sum]\+\(i\_\(l + 1\) = \ 1\)\%\(M\_\(l + 1\)\)\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\) \(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) log\ Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)\)\)\) \ - \[Sum]\+\(i\_L = 1\)\%\(M\_L\)\(P\_\(i\_L\)\%L\) log\ Q\_\(i\_L\)\%L\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\), RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:NegativeLogLikelihood"], Cell[TextData[{ "where the suffix ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " appears on the ", Cell[BoxData[ FormBox[ RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}], TraditionalForm]]], " because the state of layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " is fixed during the evaluation of ", Cell[BoxData[ FormBox[ RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}], TraditionalForm]]], ". This may be interpreted as the total number of bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " that is required to encode the ", Cell[BoxData[ FormBox[ SuperscriptBox["L", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " layer of the Markov chain, plus the sum over layers ", Cell[BoxData[ \(TraditionalForm\`l\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`0 \[LessEqual] l \[LessEqual] L - 1\)]], ") of the total number of bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}], TraditionalForm]]], " that is required to make the transition backwards from layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`l - 1\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`1 \[LessEqual] l \[LessEqual] L\)]], ") of the Markov chain (averaged over all states of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " using ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\), RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}], TraditionalForm]]], ")." }], "Text"], Cell[TextData[{ "This result has a very natural interpretation. Both the source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " and the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " are Markov chains, and corresponding parts of the model are matched up \ with corresponding parts of the source. First of all, the number of bits that \ is required to encode the ", Cell[BoxData[ FormBox[ SuperscriptBox["L", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " layer of the source is ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], ". Having done that, the number of bits that is required to encode the ", Cell[BoxData[ FormBox[ RowBox[{"L", "-", SuperscriptBox["1", StyleBox["th", FontSlant->"Italic"]]}], TraditionalForm]]], " layer of the source, given that the state of the ", Cell[BoxData[ FormBox[ SuperscriptBox["L", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " layer is already known, is ", Cell[BoxData[ FormBox[ RowBox[{\(L\_L\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}], TraditionalForm]]], ", which must then be averaged over the alternative possible states of the \ ", Cell[BoxData[ FormBox[ SuperscriptBox["L", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " layer to yield ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_L = 1\)\%\(M\_L\)\), RowBox[{\(P\_\(i\_L\)\%L\), RowBox[{\(L\_L\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)]}], ")"}]}]}], TraditionalForm]]], ". This process is then repeated to encode the ", Cell[BoxData[ FormBox[ RowBox[{"L", "-", SuperscriptBox["2", StyleBox["th", FontSlant->"Italic"]]}], TraditionalForm]]], " layer of the source, given that the state of the ", Cell[BoxData[ FormBox[ RowBox[{"L", "-", SuperscriptBox["1", StyleBox["th", FontSlant->"Italic"]]}], TraditionalForm]]], " layer is already known, and so on back to layer ", Cell[BoxData[ \(TraditionalForm\`0\)]], ". This yields precisely the expression for ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " given above." }], "Text", CellTags->{"Ed:Change4", "Problem:1"}] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " Dynamical Markov Source Encoding" }], "Subsection", CellTags->"Sect:DynamicMarkovSourceCoding"], Cell[TextData[{ "The above theory of coding Markov sources, in which both the source ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " and the corresponding model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " are multilayer Markov chains, may be extended to the case where each \ layer has a memory of its own previous state; thus the static Markov source \ becomes a dynamical Markov source (usually with a discretised time index). In \ this case the source and the model are doubly Markov, where, in the simplest \ case, there is one Markov chain linking together different layers at the same \ time slice (as above), and there is another Markov chain linking together the \ same layer at different time slices. There are many possible variations on \ this theme." }], "Text"], Cell[TextData[{ "This dynamical source differs from the static source used previously only \ insofar as ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " is now modulated by a prior probability on the states of layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " (in the simplest case), so that the Markov source ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "(", "t", ")"}], TraditionalForm]]], " at time slice ", Cell[BoxData[ \(TraditionalForm\`t\)]], " has a statistical structure that depends directly on ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "(", \(t - 1\), ")"}], TraditionalForm]]], ", and thus indirectly on all ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "(", "\[Tau]", ")"}], TraditionalForm]]], " for ", Cell[BoxData[ \(TraditionalForm\`\[Tau] < t - 1\)]], ". The simplest model ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "(", "t", ")"}], TraditionalForm]]], " that can be used for this dynamical source is the same as was used in the \ case of a static source, but this will not be as efficient a model as one \ which modelled the dynamics of the source. Apart from the introduction of a \ state-dependent prior probability, the entire theory of dynamical sources and \ models is the same as the static theory." }], "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " ", "Application To Unsupervised Neural Networks" }], "Section", CellTags->"Sect:ApplicationNN"], Cell[TextData[{ "In ", ButtonBox["section", ButtonData:>"Sect:SourceModelNN", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:SourceModelNN"], ".", CounterBox["Subsection", "Sect:SourceModelNN"], " the theory of Markov source coding (that was presented in ", ButtonBox["section", ButtonData:>"Sect:MarkovSourceCoding", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:MarkovSourceCoding"], ".", CounterBox["Subsection", "Sect:MarkovSourceCoding"], ") is applied to a multilayer neural network. In ", ButtonBox["section", ButtonData:>"Sect:2LayerFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:2LayerFMC"], ".", CounterBox["Subsection", "Sect:2LayerFMC"], " this approach is applied to a 2-layer neural network to obtain a folded \ Markov chain (FMC) network [", ButtonBox["17", ButtonData:>"Ref:Luttrell1994a", ButtonStyle->"Hyperlink"], "], which is generalised to a multilayer neural network in ", ButtonBox["section", ButtonData:>"Sect:CoupledFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:CoupledFMC"], ".", CounterBox["Subsection", "Sect:CoupledFMC"], " to obtain a network of coupled 2-layer FMCs. In ", ButtonBox["section", ButtonData:>"Sect:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Leakage"], ".", CounterBox["Subsection", "Sect:Leakage"], " a crude \"mean field\" approach to optimising this type of multilayer \ network is presented, where the concept of probability leakage is introduced. \ Finally, the problem of coding the output layer of a multilayer network is \ addressed in ", ButtonBox["section", ButtonData:>"Sect:CodeOutputLayer", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:CodeOutputLayer"], ".", CounterBox["Subsection", "Sect:CodeOutputLayer"], "." }], "Text"], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Source Model Of Layered Network" }], "Subsection", CellTags->"Sect:SourceModelNN"], Cell[TextData[{ "In this section the optimisation of the joint PDF of the states of all of \ the layers of an ", Cell[BoxData[ \(TraditionalForm\`\((L + 1)\)\)]], "-layer unsupervised neural network will be considered. It turns out that \ this leads to new insight into the optimisation of a multilayer encoder \ network." }], "Text"], Cell[TextData[{ "The Markov chain source ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "=", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}]}], TraditionalForm]]], " (or, equivalently, ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "=", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], TraditionalForm]]], ") may be used to describe the true behaviour (i.e. not merely a model) of \ a layered neural network as follows" }], "Text"], Cell[BoxData[{ \(TraditionalForm\`P\_\(i\_0\)\%0 = \[AlignmentMarker]true\ probability\ \ that\ layer\ 0\ has\ state\ i\_0\), "\n", \(TraditionalForm\`P\_\(i\_L\)\%L = \[AlignmentMarker]true\ probability\ \ that\ layer\ L\ has\ state\ i\_L\), "\n", \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\) = \ \[AlignmentMarker]\[AlignmentMarker]true\ probability\ that\ layer\ l + 1\ has\ state\ i\_\(l + 1\)\), "\n", \(TraditionalForm\`\[AlignmentMarker]given\ that\ layer\ l\ has\ state\ i\ \_l\), "\n", \(TraditionalForm\`P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\) = \ \[AlignmentMarker]\[AlignmentMarker]true\ probability\ that\ layer\ l\ has\ \ state\ i\_l\), "\n", \(TraditionalForm\`\[AlignmentMarker]given\ that\ layer\ l + 1\ has\ state\ i\_\(l + 1\)\)}], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "Thus ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is an external source, and ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], TraditionalForm]]], " is an internal source, where external/internal describes whether the \ source is outside/inside the layered network, respectively. ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " is not part of the source itself (i.e. the external source), rather it is \ the way in which layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " of the neural network is connected to layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ". There is an analogous interpretation of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " and the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], "." }], "Text"], Cell[TextData[{ "The Markov chain model ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "=", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}]}], TraditionalForm]]], " (or, equivalently, ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "=", "\[AlignmentMarker]", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], TraditionalForm]]], ") may then be used as a model (i.e. not actually the true behaviour) of a \ layered neural network as follows" }], "Text"], Cell[BoxData[{ \(TraditionalForm\`Q\_\(i\_0\)\%0 = \[AlignmentMarker]model\ probability\ \ that\ layer\ 0\ has\ state\ i\_0\), "\n", \(TraditionalForm\`Q\_\(i\_L\)\%L = \[AlignmentMarker]model\ probability\ \ that\ layer\ L\ has\ state\ i\_L\), "\n", \(TraditionalForm\`Q\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\) = \ \[AlignmentMarker]\[AlignmentMarker]model\ probability\ that\ layer\ l + 1\ has\ state\ i\_\(l + 1\)\), "\n", \(TraditionalForm\`\[AlignmentMarker]given\ that\ layer\ l\ has\ state\ i\ \_l\), "\n", \(TraditionalForm\`Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\) = \ \[AlignmentMarker]\[AlignmentMarker]model\ probability\ that\ layer\ l\ has\ \ state\ i\_l\), "\n", \(TraditionalForm\`\[AlignmentMarker]given\ that\ layer\ l + 1\ has\ state\ i\_\(l + 1\)\)}], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " has an analogous interpretation to ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", except that it is a model of the source, rather than the true behaviour \ of the source." }], "Text"], Cell[TextData[{ "It turns out to be useful for the true Markov behaviour (i.e. ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ") and the model Markov behaviour (i.e. ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ") to run in opposite directions through the Markov chain. Thus ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "=", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}]}], TraditionalForm]]], " (flow of influence from layer ", Cell[BoxData[ \(TraditionalForm\`0\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], " of the Markov chain) and ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "=", "\[AlignmentMarker]", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], TraditionalForm]]], " (flow of influence from layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`0\)]], " of the Markov chain). In the conventional language of neural networks, ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is a \"recognition model\" and ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is a \"generative model\". Note that the terminology \"recognition \ model\" is strictly speaking not accurate in this context, because ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " describes the true behaviour (i.e. it is not merely a model) of a \ multilayer source. The effect of the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " on the external source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is to compute (in a stochastic fashion) various functions of the state of \ the source, so the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " can be interpreteted as computing \"statistics\" of the external source. \ A better terminology would be to say that ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is a \"multilayer statistic\", rather than a \"recognition model\"." }], "Text"], Cell[TextData[{ "However, terminology depends on one's viewpoint. In Markov chain density \ modelling ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is a source when viewed from the point of view of the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ". In conventional density modelling ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is a source when viewed from the point of model ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ", in which case ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], TraditionalForm]]], " is a recognition model and ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " is a generative model. In this report, terminology will thus be used in a \ context-dependent way." }], "Text"], Cell[TextData[{ "Now evaluate the expression for ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " in the case where ", Cell[BoxData[ FormBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " and ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " run in opposite directions through the Markov chain. Thus use Bayes' \ theorem in the form" }], "Text"], Cell[BoxData[ \(TraditionalForm\`\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\) = \(P\_\(i\_l\)\%l\) P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\)], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "and define ", Cell[BoxData[ FormBox[ RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}], TraditionalForm]]], " in such a way that it depends on ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\)]], " (flow of influence from layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ") and ", Cell[BoxData[ \(TraditionalForm\`log\ Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)]], " (flow of influence from layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], ")" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}], "\[Congruent]", \(-\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\) log\ Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "The ", Cell[BoxData[ FormBox[ RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}], TraditionalForm]]], " terms in ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " (see ", ButtonBox["equation", ButtonData:>"Eq:NegativeLogLikelihood", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:NegativeLogLikelihood"], ") may thus be rewritten as" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\), RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}], "=", RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "whence ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " may finally be written as" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:ObjectiveMarkovSource"], Cell[TextData[{ "The various parts of the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], TraditionalForm]]], " term may be interpreted as follows. ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_l\)\%l\)]], " is the source probability that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is ", Cell[BoxData[ \(TraditionalForm\`i\_l\)]], " (after propagation of the external source from layer ", Cell[BoxData[ \(TraditionalForm\`0\)]], " via layers ", Cell[BoxData[ \(TraditionalForm\`1, 2, \[CenterEllipsis], l - 1\)]], ") ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\)]], " is the source probability that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " is ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " given that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is ", Cell[BoxData[ \(TraditionalForm\`i\_l\)]], ", and ", Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)]], " is the model probability that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is ", Cell[BoxData[ \(TraditionalForm\`i\_l\)]], " given that the state of layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " is ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], ". Finally, the term ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " is the total number of bits that is required to code the ", Cell[BoxData[ FormBox[ SuperscriptBox["L", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " layer of the network (i.e. its output layer)." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "2-Layer Folded Markov Chain Network" }], "Subsection", CellTags->"Sect:2LayerFMC"], Cell[TextData[{ "The expression for ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " in ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource"], " is rather complicated, but it has a simple internal structure which \ allows it to be systematically analysed. Thus apply ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource"], " to a 2-layer network to obtain the objective function" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", RowBox[{ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:ObjectiveMarkovSource2Layer"], Cell[TextData[{ "Now change notation in order to make contact with previous results on \ vector quantisers (VQ) [", ButtonBox["9", ButtonData:>"Ref:LindeBuzoGray1980", ButtonStyle->"Hyperlink"], "]" }], "Text"], Cell[BoxData[ FormBox[GridBox[{ { RowBox[{ RowBox[{\(i\_0\), "\[LongRightArrow]", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], StyleBox[" ", FontWeight->"Plain"], RowBox[{ StyleBox[\(\[Sum]\_\(i\_0 = 1\)\%\(M\_0\)\), FontWeight->"Plain"], RowBox[{"\[LongRightArrow]", RowBox[{"\[Integral]", StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]}]}]}]}], \(input\ vector\)}, { RowBox[{ RowBox[{\(i\_1\), "\[LongRightArrow]", StyleBox["y", FontSlant->"Italic"]}], StyleBox[" ", FontWeight->"Plain"], RowBox[{ StyleBox[\(\[Sum]\_\(i\_1 = 1\)\%\(M\_1\)\), FontWeight-> "Plain"], \(\(\[LongRightArrow]\)\(\[Sum]\_\(y = 1\)\%M\)\)}]}], \(output\ code\ index\)}, { RowBox[{\(P\_\(i\_0\)\%0\), "\[LongRightArrow]", RowBox[{"Pr", "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}]}], \(input\ PDF\)}, { RowBox[{\(P\_\(i\_1, i\_0\)\%\(1 | 0\)\), "\[LongRightArrow]", RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], \(recognition\ model\)}, { RowBox[{\(Q\_\(i\_0, i\_1\)\%\(0 | 1\)\[LongRightArrow]V\), " ", FractionBox["1", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]], RowBox[{"exp", "(", RowBox[{"-", FractionBox[ SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"], \(2 \[Sigma]\^2\)]}], ")"}]}], \(Gaussian\ generative\ model\)}, {\(Q\_\(i\_1\)\%1\[LongRightArrow]\(Q(y)\)\), \(output\ prior\)} }], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, GridBoxOptions->{ColumnAlignments->{Left}}, CellTags->{"Eq:ObjectiveMarkovSource2LayerNotation", "Error:1"}], Cell[TextData[{ "where ", Cell[BoxData[ FormBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " is a continuous-valued input vector (e.g. the activity pattern in layer \ 0), ", Cell[BoxData[ \(TraditionalForm\`\[Sigma]\)]], " is the (isotropic) variance of the Gaussian generative model, ", Cell[BoxData[ \(TraditionalForm\`V\)]], " is an infinitesimal volume element in input space, and ", Cell[BoxData[ \(TraditionalForm\`y\)]], " is a discrete-valued output index (e.g. the location of the next neuron \ to fire in layer 1). This allows ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " to be written as" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{"-", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], RowBox[{"log", "(", RowBox[{"V", FractionBox["1", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]], RowBox[{"exp", "(", RowBox[{"-", FractionBox[ SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"], \(2 \[Sigma]\^2\)]}], ")"}]}], ")"}]}]}]}]}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{ RowBox[{\(1\/\(2 \[Sigma]\^2\)\), RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}], "-", RowBox[{"log", FractionBox["V", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Error:2"], Cell[TextData[{ "Now define the 2-layer \"folded Markov chain\" (FMC) objective function ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " as (see [", ButtonBox["17", ButtonData:>"Ref:Luttrell1994a", ButtonStyle->"Hyperlink"], "] for details of the FMC approach)" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "\[Congruent]", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], RowBox[{"\[Integral]", RowBox[{ SuperscriptBox[ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], "\[Prime]"], RowBox[{"Pr", "(", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "|", "y"}], ")"}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:ObjectiveFMC"], Cell[TextData[{ "and use the symmetry of ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " to write it in the form" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "=", RowBox[{"2", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:ObjectiveVQ"], Cell[TextData[{ "where ", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}], TraditionalForm]]], " takes the value that minimises ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " (i.e. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}], "=", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], StyleBox[" ", FontWeight->"Bold", FontSlant->"Plain"], RowBox[{"Pr", "(", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", "y"}], ")"}], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]}]}], TraditionalForm]]], ")." }], "Text"], Cell[TextData[{ "This allows ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " to be written as" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{\(1\/\(4 \[Sigma]\^2\)\), SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]]}], "-", RowBox[{"log", FractionBox["V", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Error:3"], Cell[TextData[{ "If the cost of coding the output layer (i.e. ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]]], ") is ignored, then provided that ", Cell[BoxData[ \(TraditionalForm\`V\)]], " and ", Cell[BoxData[ \(TraditionalForm\`\[Sigma]\)]], " are fixed quantities, the 2-layer Markov source coding objective function \ ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " can be minimised by minimising the 2-layer FMC objective function ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], ". The basic FMC approach can be generalised by replacing the (isotropic) \ variance ", Cell[BoxData[ \(TraditionalForm\`\[Sigma]\)]], " by a vector of (anisotropic) variances, or even a full covariance matrix \ if there is enough training data to permit this." }], "Text"], Cell[TextData[{ ButtonBox["Equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], " is also the objective function for a soft vector quantiser (VQ), where ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " is a soft encoder, and ", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}], TraditionalForm]]], " is reconstruction vector attached to code index ", Cell[BoxData[ \(TraditionalForm\`y\)]], ", and ", Cell[BoxData[ FormBox[ SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"], TraditionalForm]]], " is the ", Cell[BoxData[ \(TraditionalForm\`L\^2\)]], " norm of the reconstruction error. A hard VQ (i.e. winner-take-all \ encoder) has ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", SubscriptBox["\[Delta]", RowBox[{"y", ",", RowBox[{"y", "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}]}]]}], TraditionalForm]]], "; this emerges as the optimal form when ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " is minimised w.r.t. ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], ". This shows that the VQ objective function (", ButtonBox["equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], ") is closely related to the objective function for 2-layer Markov source \ encoding (", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource2Layer", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource2Layer"], "), provided that the cost of coding the output layer ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]]], " is ignored." }], "Text"], Cell[TextData[{ "The effect of the ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]]], " term in ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " is to encourage ", Cell[BoxData[ \(TraditionalForm\`P\_i\%1\[LongRightArrow]\[Delta]\_\(i, i\_0\)\)]], " (only one state in layer 1 is used) and ", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"], "\[LongRightArrow]", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], TraditionalForm]]], " (perfect model in layer 1). The behaviour ", Cell[BoxData[ \(TraditionalForm\`P\_i\%1\[LongRightArrow]\[Delta]\_\(i, i\_0\)\)]], " is in conflict with the requirements of the ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " term in ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], ", which requires that more than one state in layer 1 is used, in order to \ minimise the reconstruction distortion.There is a tradeoff between increasing \ the number of active states in layer 1 in order to enable the Gaussian \ generative model (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is a Gaussian mixture distribution) to make a good approximation to the \ external source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ", and decreasing the number of active states in layer 1 in order to make \ the average total number of bits ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]]], " that are required to specify an output state as small as possible." }], "Text"], Cell[TextData[{ "In this report the ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]]], " term will usually be omitted. The optimal network is then a hard VQ, \ where only 1 output state is active for a given input state, but different \ output states are used for input states that lie in different quantisation \ cells, so the net effect is that all output states are used." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Coupled FMC Networks" }], "Subsection", CellTags->"Sect:CoupledFMC"], Cell[TextData[{ "The results of ", ButtonBox["section", ButtonData:>"Sect:2LayerFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:ApplicationNN"], ".", CounterBox["Subsection", "Sect:2LayerFMC"], " will now be generalised to an ", Cell[BoxData[ \(TraditionalForm\`\((L + 1)\)\)]], "-layer network. The objective function for coding a Markov source (", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource"], ") can be written, using a notation which is analogous to that given in ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource2LayerNotation", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource2LayerNotation"], " (where the superscripts are layer indices) as" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"], \(4 \((\[Sigma]\^l)\)\^2\)], "-", RowBox[{"log", FractionBox[\(V\^l\), SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma]\^l)\), RowBox[{"dim", " ", SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "l"]}]]]}]}], ")"}]}], "\[AlignmentMarker]", "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->{"Eq:ObjectiveMarkovSourceSimple", "Error:4"}], Cell[TextData[{ "which is a sum of 2-layer FMC objective functions (where each term is \ weighted by ", Cell[BoxData[ \(TraditionalForm\`\((\[Sigma]\^l)\)\^\(-2\)\)]], "), plus an output coding cost ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], ". The layer index ", Cell[BoxData[ \(TraditionalForm\`l\)]], " identifies the input layer of each of the 2-layer FMCs, and the output \ layer of the ", Cell[BoxData[ FormBox[ SuperscriptBox["l", StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " 2-layer FMC is identified with the input layer of the ", Cell[BoxData[ FormBox[ SuperscriptBox[\((l + 1)\), StyleBox["th", FontSlant->"Italic"]], TraditionalForm]]], " 2-layer FMC, which overall yields a chain of ", Cell[BoxData[ \(TraditionalForm\`L\)]], " coupled 2-layer FMCs. This will be called an ", StyleBox["FMC-ladder", FontSlant->"Italic"], ", or simply a ladder." }], "Text"], Cell[TextData[{ "If the cost of coding the output layer is ignored, then the multilayer \ Markov source coding objective function ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " is minimised by minimising the sum of 2-layer FMC objective functions ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\_\(l = 0\)\%\(L - 1\)\), FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"], \(\((\[Sigma]\^l)\)\^2\)]}], TraditionalForm]]], ". As the number of network layers is increased, the effect of omitting ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " has less and less effect on the overall optimisation, because its effect \ is swamped by the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\_\(l = 0\)\%\(L - 1\)\), FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"], \(\((\[Sigma]\^l)\)\^2\)]}], TraditionalForm]]], " term." }], "Text"], Cell[TextData[{ "Just as the objective function for a 2-layer FMC (", ButtonBox["equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], ") is equivalent to the objective function for a soft VQ, the objective \ function ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\_\(l = 0\)\%\(L - 1\)\), FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"], \(\((\[Sigma]\^l)\)\^2\)]}], TraditionalForm]]], " for an FMC-ladder is equivalent to the objective function for a chain of \ coupled soft VQs [", ButtonBox["13", ButtonData:>"Ref:Luttrell1990", ButtonStyle->"Hyperlink"], "]. In the simplest case, the VQ connecting layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " encodes the scalar code index ", Cell[BoxData[ \(TraditionalForm\`y\^l\)]], " which is output by the VQ connecting layer ", Cell[BoxData[ \(TraditionalForm\`l - 1\)]], " to layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], ". Clearly, a VQ is not necessarily a good way of encoding ", Cell[BoxData[ \(TraditionalForm\`y\^l\)]], ", because VQs are designed to encode continuous-valued vectors. This \ problem will be addressed in the section on probability leakage (", ButtonBox["section", ButtonData:>"Sect:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:ApplicationNN"], ".", CounterBox["Subsection", "Sect:Leakage"], "), where the properties of the output from a VQ are optimised in such a \ way as to approximate what the input of the next VQ expects to see." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Probability Leakage" }], "Subsection", CellTags->"Sect:Leakage"], Cell[TextData[{ "The objective function ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\_\(l = 0\)\%\(L - 1\)\), FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"], \(\((\[Sigma]\^l)\)\^2\)]}], TraditionalForm]]], " for an FMC-ladder couples the optimisation of the individual 2-layer FMCs \ together. Because the output of FMC ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is the input of FMC ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 2\)]], "), the optimisation of FMC ", Cell[BoxData[ \(TraditionalForm\`k\)]], " has side effects on the optimisation of FMCs ", Cell[BoxData[ \(TraditionalForm\`k + 1, k + 2, \[CenterEllipsis], L - 1\)]], ". This leads to the effect called self-supervision, in which top-down \ connections from higher to lower network layers are automatically generated, \ to allow the lower layers to process their input more effectively in the \ light of what the higher layers discover in the data [", ButtonBox["15", ButtonData:>"Ref:Luttrell1991b", ButtonStyle->"Hyperlink"], ", ", ButtonBox["16", ButtonData:>"Ref:Luttrell1992", ButtonStyle->"Hyperlink"], "]}. This can be made explicit in the objective function by grouping the \ terms as follows" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"], \(\((\[Sigma]\^l)\)\^2\)]}], "=", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "0"], \(\((\[Sigma]\^0)\)\^2\)], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "1"], \(\((\[Sigma]\^1)\)\^2\)], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "2"], \(\((\[Sigma]\^2)\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], ")"}]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "From the point of view of FMC ", Cell[BoxData[ \(TraditionalForm\`k\)]], ", the effect of FMCs ", Cell[BoxData[ \(TraditionalForm\`k + 1, k + 2, \[CenterEllipsis], L - 1\)]], " is to add an additional piece of objective function to the basic FMC \ objective function ", Cell[BoxData[ FormBox[ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)], TraditionalForm]]], " thus" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)], "\[LongRightArrow]", FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)]}], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 1\)], \(\((\[Sigma]\^\(k + 1\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], ")"}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell["This expression can be bounded above as follows", "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 1\)], \(\((\[Sigma]\^\(k + 1\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], ")"}]}], "\[LessEqual]", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)], "+", SubscriptBox[ RowBox[{"[", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 1\)], \(\((\[Sigma]\^\(k + 1\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], "]"}], \(worst\ case\)]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "where the input to each of the FMCs ", Cell[BoxData[ \(TraditionalForm\`k + 1, k + 2, \[CenterEllipsis], L - 1\)]], " is assumed to be uniformly distributed in the worst case. Minimising ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], TraditionalForm]]], " then locates a least upper bound on ", Cell[BoxData[ FormBox[ RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 1\)], \(\((\[Sigma]\^\(k + 1\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], ")"}]}], TraditionalForm]]], ", as required." }], "Text"], Cell[TextData[{ "A tighter upper bound can be obtained by combining FMC ", Cell[BoxData[ \(TraditionalForm\`k\)]], " and FMC ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], " into a single 3-layer FMC [", ButtonBox["17", ButtonData:>"Ref:Luttrell1994a", ButtonStyle->"Hyperlink"], "] whose input and output are layer ", Cell[BoxData[ \(TraditionalForm\`k\)]], " and layer ", Cell[BoxData[ \(TraditionalForm\`k + 2\)]], " respectively, and there is also a hidden layer ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], "." }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], \(\((\[Sigma]\^k)\)\^2\)], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 1\)], \(\((\[Sigma]\^\(k + 1\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], ")"}]}], "\[LessEqual]", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], \(\((\[Sigma]\^\(k, k + 1\))\)\^2\)], "+", SubscriptBox[ RowBox[{"[", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 2\)], \(\((\[Sigma]\^\(k + 2\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], "]"}], \(worst\ case\)]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, CellTags->"Error:5"], Cell[TextData[{ "where a self-explanatory notation has been used. The 3-layer FMC objective \ function (see ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveFMC"], " for the 2-layer case) is given by" }], "Text"], Cell[BoxData[{ FormBox[ RowBox[{ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], "=", "\[AlignmentMarker]", \(\[Sum]\+\(y\^k = 1\)\%\(M\_k\)\(Pr \((y\^k)\)\ \) \(\[Sum]\+\(y\^\(k + 1\) = 1\)\%\(M\_\(k + 1\)\)\(Pr \((y\^\(k + 1\) | y\^k)\)\) \(\[Sum]\+\(y\^\(k + 2\) = 1\)\%\(M\_\(k + \ 2\)\)Pr \((y\^\(k + 2\) | y\^\(k + 1\))\)\)\)\)}], TraditionalForm], "\n", FormBox[ RowBox[{ "\[AlignmentMarker]", \(\[Sum]\+\(\(y\^\[Prime]\)\^k = \ 1\)\%\(M\_k\)\(Pr \((\(y\^\[Prime]\)\^k | \(y\^\[Prime]\)\^\(k + 1\))\)\) \ \(\(\[Sum]\+\(\(y\^\[Prime]\)\^\(k + 1\) = 1\)\%\(M\_\(k + 1\)\)Pr \((\(y\^\ \[Prime]\)\^\(k + 1\) | y\^\(k + 2\))\)\)\n\[AlignmentMarker]\(\(\[Times]\)\(\ \[LeftBracketingBar]\[LeftBracketingBar]y\^k - \(y\^\[Prime]\)\^k\ \[RightBracketingBar]\[RightBracketingBar]\^2\)\)\)\)}], TraditionalForm]}], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "The summations ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(y\^\(k + 2\) = 1\)\%\(M\_\(k + 2\)\)\((\ \[CenterEllipsis])\)\)]], " can be evaluated thus" }], "Text"], Cell[BoxData[ \(TraditionalForm\`Pr(\(y\^\[Prime]\)\^\(k + 1\) | y\^\(k + 1\)) = \[Sum]\+\(y\^\(k + 2\) = 1\)\%\(M\_\(k + 2\)\)\(Pr(\ \(y\^\[Prime]\)\^\(k + 1\) | y\^\(k + 2\))\) \(Pr( y\^\(k + 2\) | y\^\(k + 1\))\)\)], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, CellTags->"Eq:Fold"], Cell["which yields", "Text"], Cell[BoxData[{ FormBox[ RowBox[{ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], "=", "\[AlignmentMarker]", \(\[Sum]\+\(y\^k = 1\)\%\(M\_k\)\(Pr \((y\^k)\)\ \)\[AlignmentMarker]\(\[Sum]\+\(\(y\^\[Prime]\)\^\(k + 1\) = 1\)\%\(M\_\(k + \ 1\)\)\((\[Sum]\+\(y\^\(k + 1\) = 1\)\%\(M\_\(k + 1\)\)\(Pr \((\(y\^\[Prime]\)\ \^\(k + 1\) | y\^\(k + 1\))\)\) \(Pr \((y\^\(k + 1\) | y\^k)\)\))\)\)\)}], TraditionalForm], "\n", FormBox[\(\[Sum]\+\(\(y\^\[Prime]\)\^k = \ 1\)\%\(M\_k\)\(Pr(\(y\^\[Prime]\)\^k | \(y\^\[Prime]\)\^\(k + 1\))\) \ \[LeftBracketingBar]\[LeftBracketingBar]y\^k - \(y\^\[Prime]\)\^k\ \[RightBracketingBar]\[RightBracketingBar]\^2\), TraditionalForm]}], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "which may be rearranged (in the same way that ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], " is obtained from ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveFMC"], ") to obtain" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], "=", \(2 \(\[Sum]\+\(y\^k = 1\)\%\(M\_k\)\(Pr( y\^k)\) \(\[Sum]\+\(y\^\(k + 1\) = 1\)\%\(M\_\(k + 1\)\)\((\ \[Sum]\+\(\(y\^\[Prime]\)\^\(k + 1\) = 1\)\%\(M\_\(k + 1\)\)\(Pr( y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\) \ \(Pr(\(y\^\[Prime]\)\^\(k + 1\) | y\^k)\))\) \[LeftBracketingBar]\ \[LeftBracketingBar]y\^k - \(\(y\^\[Prime]\)\^k\)(y\^\(k + 1\))\ \[RightBracketingBar]\[RightBracketingBar]\^2\)\)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "where the notation ", Cell[BoxData[ \(TraditionalForm\`y\^\(k + 1\)\)]], " and ", Cell[BoxData[ \(TraditionalForm\`\(y\^\[Prime]\)\^\(k + 1\)\)]], " have been interchanged for convenience. If this result is compared with \ the expression for ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], TraditionalForm]]], ", then it is seen that ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], TraditionalForm]]], " can be obtained from ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], TraditionalForm]]], " by making the replacement" }], "Text"], Cell[BoxData[ \(TraditionalForm\`\(Pr( y\^\(k + 1\) | y\^k)\)\[LongRightArrow]\(\[Sum]\+\(\(y\^\[Prime]\)\^\(k + 1\) = \ 1\)\%\(M\_\(k + 1\)\)\(Pr( y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\) \(Pr(\(y\^\[Prime]\)\ \^\(k + 1\) | y\^k)\)\)\)], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, CellTags->"Eq:Leakage"], Cell[TextData[{ "where the transition matrix element ", Cell[BoxData[ \(TraditionalForm\`Pr(y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\)]], " is given in ", ButtonBox["equation", ButtonData:>"Eq:Fold", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:Fold"], "; it specifies the probability that code index ", Cell[BoxData[ \(TraditionalForm\`\(y\^\[Prime]\)\^\(k + 1\)\)]], " is damaged by the 2-layer FMC ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], " in such a way as to convert it to code index ", Cell[BoxData[ \(TraditionalForm\`y\^\(k + 1\)\)]], ". Numerical values for the ", Cell[BoxData[ \(TraditionalForm\`Pr(y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\)]], " can be assigned assuming a manifestly suboptimal 2-layer FMC ", Cell[BoxData[ \(TraditionalForm\`k + 1\)]], ", which will give an upper bound on ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], TraditionalForm]]], ". For instance, ", Cell[BoxData[ \(TraditionalForm\`Pr(y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\)]], " could be modelled by an additive Gaussian noise process, with a zero mean \ and a large enough variance that it guarantees an upper bound on ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], TraditionalForm]]], ". Finally, minimising this upper bound on ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], TraditionalForm]]], " (i.e. ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "k"], TraditionalForm]]], " with the replacement in ", ButtonBox["equation", ButtonData:>"Eq:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:Leakage"], ")", " then locates a least upper bound on ", Cell[BoxData[ FormBox[ RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k, k + 1\)], \(\((\[Sigma]\^\(k, k + 1\))\)\^2\)], "+", RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(k + 2\)], \(\((\[Sigma]\^\(k + 2\))\)\^2\)], "+", \((\[CenterEllipsis]\ \((\[CenterEllipsis]\ \((\ \[CenterEllipsis])\))\))\)}], ")"}]}], TraditionalForm]]], ", as required." }], "Text", CellTags->"Problem:2"], Cell[TextData[{ "The process in ", ButtonBox["equation", ButtonData:>"Eq:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:Leakage"], " is called \"probability leakage\", because the transition matrix ", Cell[BoxData[ \(TraditionalForm\`Pr(y\^\(k + 1\) | \(y\^\[Prime]\)\^\(k + 1\))\)]], " leaks probability from index ", Cell[BoxData[ \(TraditionalForm\`\(y\^\[Prime]\)\^\(k + 1\)\)]], " to index ", Cell[BoxData[ \(TraditionalForm\`y\^\(k + 1\)\)]], ". The application of this idea to Kohonen self-organising maps will be \ discussed in ", ButtonBox["section", ButtonData:>"Sect:Kohonen", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Kohonen"], "." }], "Text", CellTags->"Ed:Change5"], Cell["\<\ The above least upper bound approach can be extended to minimising the \ overall objective function as follows:\ \>", "Text"], Cell[TextData[{ "1. Minimise the upper bound on ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(0, 1\)], TraditionalForm]]], " by introducing probability leakage into ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "0"], TraditionalForm]]], "." }], "Text"], Cell[TextData[{ "2. Then minimise ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(1, 2\)], TraditionalForm]]], " by introducing probability leakage into ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "1"], TraditionalForm]]], "." }], "Text"], Cell["3. Etc.", "Text"], Cell[TextData[{ "4. ", "Then minimise ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(L - 2, L - 1\)], TraditionalForm]]], " by introducing probability leakage into ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(L - 2\)], TraditionalForm]]], "." }], "Text"], Cell[TextData[{ "5. Then minimise ", Cell[BoxData[ FormBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(L - 2, L - 1\)], TraditionalForm]]], ". No probability leakage occurs in network layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], ", because it is the final layer." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Coding The Output Layer" }], "Subsection", CellTags->"Sect:CodeOutputLayer"], Cell[TextData[{ "The general expression for ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " in ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource"], " is the sum of two terms: an FMC-ladder ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], TraditionalForm]]], ", plus the cost ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " of coding layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], ". The ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " term has precisely the form that is commonly used in density modelling, \ so any convenient density model could be used to parameterise ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " in layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], "." }], "Text"], Cell[TextData[{ "A typical implementation of the type of network that minimises ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " thus splits into two pieces corresponding to the two different types of \ term in the objective function. The input space (i.e. layer 0) is connected \ to the output space (i.e. layer ", Cell[BoxData[ \(TraditionalForm\`L\)]], ") by an FMC-ladder corresponding to the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], TraditionalForm]]], " term. In the special case where ", Cell[BoxData[ \(TraditionalForm\`L = 0\)]], " (i.e. no FMC-ladder is used) this approach reduces to standard input \ density modelling. There are various ways in which ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " can be computed, all of which are special cases of the Markov random \ field (MRF) density modelling approach (all finite connectivity density \ models are MRFs). There are various possibilities for this MRF model:" }], "Text"], Cell["\<\ 1. Boltzmann machine (BM). This is the most general type of MRF model \ (assuming that the restriction of the classical BM to binary variables and \ quadratic interactions is not imposed), and can be computationally very \ expensive. This includes all the well-known image modelling MRF approaches, \ with or without hidden variables.\ \>", "Text"], Cell["\<\ 2. Hopfield network. This is a zero temperature BM, which is computationally \ cheaper than a finite temperature BM, but is correspondingly less powerful.\ \>", "Text", CellTags->"Ed:Change15"], Cell[TextData[{ "3. Helmholtz machine (HM). This approach defines an upper bound on ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], ", which reduces the computational load that would otherwise occur if a BM \ were used. ", " This is discussed in ", ButtonBox["section", ButtonData:>"Sect:TypesOfDensityModel", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:TypesOfDensityModel"], "." }], "Text", CellTags->"Ed:Change6"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " ", "Two Types of Density Model" }], "Section", CellTags->"Sect:TypesOfDensityModel"], Cell[TextData[{ "This section discusses the relationship between two types of density \ model. The first type is the conventional density model that aims to \ approximate the input probability density (i.e. the objective function is ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], "), and the second type is the one introduced here which aims to \ approximate the joint probability density of a Markov source (i.e. the \ objective function is ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], "). In order to relate ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " to ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " it is necessary to introduce additional layers (i.e. layers ", Cell[BoxData[ \(TraditionalForm\`1, 2, \[CenterEllipsis], L\)]], ") into ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " in an appropriate fashion. The Helmholtz machine (HM) [", ButtonBox["5", ButtonData:>"Ref:HintonZemel1994", ButtonStyle->"Hyperlink"], ", ", ButtonBox["6", ButtonData:>"Ref:HintonDayanFreyNeal1995", ButtonStyle->"Hyperlink"], ", ", ButtonBox["2", ButtonData:>"Ref:DayanHintonNealZemel1995", ButtonStyle->"Hyperlink"], ", ", ButtonBox["3", ButtonData:>"Ref:DayanHinton1996", ButtonStyle->"Hyperlink"], "] does this by replacing ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " by a different objective function (which has these additional layers \ present as hidden variables), and which is an upper bound on the original \ objective function ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], ". It turns out that Helmholtz machine (HM) objective function (which is \ generally written as ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], TraditionalForm]]], ") and the folded Markov chain (FMC) objective function (which is generally \ written as ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], ") introduced here are closely related. The essential difference between \ the two is that the Helmholtz machine objective function does does ", StyleBox["not", FontSlant->"Italic"], " include the cost of specifying the state of layers ", Cell[BoxData[ \(TraditionalForm\`1, 2, \[CenterEllipsis], L\)]], " given that the state of layer 0 is known (this is known as the \ \"bits-back\" term ), which thus allows it to develop distributed codes \ (which are expensive to specify) more easily. It is not clear whether the \ Helmholtz machine objective function is the best approach to distributed \ codes, because there are other ways of encouraging distributed codes to \ develop." }], "Text"], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "FMC versus Helmholtz Machine" }], "Subsection", CellTags->"Sect:FMCvsHM"], Cell[TextData[{ "In the conventional density modelling approach to neural networks, there \ are two basic classes of model. In the case of both unsupervised and \ supervised neural networks the source is ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ", which is the network input (unsupervised case) or the network output \ (supervised case). Additionally, in the case of supervised neural networks ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is conditioned on the network input as ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(0 | input\)], TraditionalForm]]], ". Thus in both cases there is only an external source (i.e. source layers \ ", Cell[BoxData[ \(TraditionalForm\`1, 2, \[CenterEllipsis], L\)]], " are not present), which is modelled by ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " (unsupervised case) or ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | input\)], TraditionalForm]]], " (supervised case). ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " or ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | input\)], TraditionalForm]]], " can be modelled in any way that is convenient. Frequently a multilayer \ generative model of the form" }], "Text"], Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_0\)\%0 = \[Sum]\+\(i\_1, i\_2, \ \[CenterEllipsis], i\_L\)\(Q\_\(i\_0, i\_1\)\%\(0 | 1\)\) \[CenterEllipsis]\ \(Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) \[CenterEllipsis]\ Q\_\(i\_L\)\%L\)], \ "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "is used, where the ", Cell[BoxData[ \(TraditionalForm\`i\_L\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`1 \[LessEqual] l \[LessEqual] L\)]], ") are hidden variables, which need to be summed over in order to calculate \ the required marginal probability ", Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_0\)\%0\)]], ", and the notation is deliberately chosen to be the same as is used in the \ Markov chain model" }], "Text"], Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_1, i\_2, \[CenterEllipsis], i\_L\) = \ \(Q\_\(i\_0, i\_1\)\%\(0 | 1\)\) \[CenterEllipsis]\ \(Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) \[CenterEllipsis]\ Q\_\(i\_L\)\%L\)], \ "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "Helmholtz machines and FMCs are related to each other. Thus the ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " that is minimised in conventional density modelling can be manipulated in \ order to derive ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], TraditionalForm]]] }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "\[LessEqual]", "\[AlignmentMarker]", RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "+", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(\(-\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\(P\_\(i\_0\)\%0\) \(\ \[Sum]\+\(i\_1 = 1\)\%\(M\_1\)\(P\_\(i\_1, i\_0\)\%\(1 | 0\)\) \(log(\(Q\_\(i\_0\)\%0\) Q\_\(i\_1, i\_0\)\%\(1 | 0\))\)\)\)\) + \[Sum]\+\(i\ \_0 = 1\)\%\(M\_0\)\(P\_\(i\_0\)\%0\) \(\[Sum]\+\(i\_1 = \ 1\)\%\(M\_1\)\(P\_\(i\_1, i\_0\)\%\(1 | 0\)\) log\ P\_\(i\_1, i\_0\)\%\(1 | 0\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{"L", "(", RowBox[{ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}], ",", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}]}], ")"}], "-", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "-", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}]}]}]}], "\[IndentingNewLine]", "\[Congruent]", "\[AlignmentMarker]", SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "where the inequality follows from ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}], "\[GreaterEqual]", "0"}], TraditionalForm]]], ". Thus the objective function ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " for conventional density modelling is bounded above by the objective \ function ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], TraditionalForm]]], " for optimising a 2-layer HM with one hidden layer. The ", Cell[BoxData[ FormBox[ RowBox[{"-", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}]}]}]}], TraditionalForm]]], " term is minus the entropy of layer 1 given that layer 0 is known \ (averaged over layer 0); this is the \"bits-back\" term of ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], TraditionalForm]]], ". The ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " term is the standard Markov source objective function (see ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSource2Layer", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSource2Layer"], "), which may be be rearranged in order to make contact with the FMC \ approach as in ", ButtonBox["section", ButtonData:>"Sect:2LayerFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:2LayerFMC"], ".", CounterBox["Subsection", "Sect:2LayerFMC"], ". Thus" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], "\[LessEqual]", "\[AlignmentMarker]", RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{\(1\/\(4 \[Sigma]\^2\)\), SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]]}], "-", RowBox[{"log", FractionBox["V", SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma])\), RowBox[{"dim", " ", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]]]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, CellTags->"Error:6"], Cell[TextData[{ "where the inequality follows from ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}], "\[GreaterEqual]", "0"}], TraditionalForm]]], "." }], "Text"], Cell[TextData[{ "Two inequalities ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "\[LessEqual]", SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], "\[LessEqual]", RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], " are used in the above derivation. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "\[LessEqual]", SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]]}], TraditionalForm]]], " arises because ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}]}]}], "\[GreaterEqual]", "0"}], TraditionalForm]]], " (i.e. the model ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], " is imperfect, so that ", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], "\[NotEqual]", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], TraditionalForm]]], "), and ", Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], "\[LessEqual]", RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], " arises because ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}]}]}], "\[GreaterEqual]", "0"}], TraditionalForm]]], " (i.e. the source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], " is stochastic). If the model is perfect (", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], "=", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], TraditionalForm]]], ") and the source is deterministic (", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], " is such that the state of layer 1 is known once the state of layer 0 is \ given), then the pair of inequalities reduces to ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "=", RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], ". In this special case the objective function for optimising a density \ model in layer 0 is the same as for optimising the corresponding joint \ density model in layers 0 and 1." }], "Text"], Cell[TextData[{ "Now rewrite the expression for ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], TraditionalForm]]], " in order to see how it acts to disrupt the \"sparse coding\" property of \ ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], "." }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], "=", RowBox[{ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], "+", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity], Cell[TextData[{ "The ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], TraditionalForm]]], " part (i.e. the pure 2-layer FMC, which is equal to ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(1\/\(4 \[Sigma]\^2\)\), SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]]}], "-", \(log V\/\(\(\@\(2 \[Pi]\)\) \[Sigma]\)\)}], TraditionalForm]]], " if ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], TraditionalForm]]], " is Gaussian) and the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}], TraditionalForm]]], " part compete with each other when the 2-layer HM objective function is \ minimised. Assuming that ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_0\)\%0 > 0\)]], ", the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}], TraditionalForm]]], " part likes to make ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"], TraditionalForm]]], " approximate ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], ", because this reduces ", Cell[BoxData[ FormBox[ RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}], TraditionalForm]]], ". On the other hand, assuming that ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_1\)\%1 > 0\)]], ", the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], TraditionalForm]]], " part likes to make ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], TraditionalForm]]], " approximate ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], ", because this reduces ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], TraditionalForm]]], ". These two conditions cannot be met simultaneously, because ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(G\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]}], TraditionalForm]]], " acts to make ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], " \"spread out\" to become like ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"], TraditionalForm]]], ", whereas ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}], TraditionalForm]]], " acts to make ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], TraditionalForm]]], " a \"sparse coder\"." }], "Text", CellTags->"Ed:Change7"], Cell[TextData[{ "The above derivation can be generalised to an ", Cell[BoxData[ \(TraditionalForm\`\((L + 1)\)\)]], "-layer network. Firstly, the 2-layer result can be written as" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], GridBox[{ { RowBox[{"\[LessEqual]", "\[AlignmentMarker]", RowBox[{ RowBox[{"L", "(", RowBox[{ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}], ",", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)]}], ")"}]}], ")"}], "-", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}]}]}]}]}], " "}, { RowBox[{"=", RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "-", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(H\_\(i\_0\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ")"}]}]}]}]}], \(2 - layer\ HM\)}, { RowBox[{"\[LessEqual]", RowBox[{\(\[Sum]\+\(i\_0 = 1\)\%\(M\_0\)\), RowBox[{\(P\_\(i\_0\)\%0\), RowBox[{\(K\_\(i\_0\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)]}], ")"}]}]}]}], \(2 - layer\ FMC\)}, { RowBox[{"+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}], RowBox[{"+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "1"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "1"]}], ")"}]}]} }]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell["which may be generalised to", "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], GridBox[{ { RowBox[{"\[LessEqual]", "\[AlignmentMarker]", RowBox[{"L", "(", RowBox[{ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], ",", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}]}], ")"}]}], " "}, { RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}]}]}]}]}], " "}, { RowBox[{"=", RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}]}]}]}]}]}], \(multilayer\ HM\)}, { RowBox[{"\[LessEqual]", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), \(K\_\(i\_l\)\), RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}]}], \(coupled\ 2 - layer\ FMCs\)}, { RowBox[{"+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], RowBox[{"+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]} }]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "If ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " is deterministic (i.e. the state of layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " is derived deterministically from the state of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], "), then ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}]}]}]}], "=", "0"}], TraditionalForm]]], ", in which case ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], TraditionalForm]]], " simplifies to" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["HM", FontSlant->"Italic"]], "=", RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{"(", RowBox[{ RowBox[{\(1\/\(\(\[AlignmentMarker]\)\(4 \((\[Sigma]\^l)\)\^2\ \)\)\), SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], "l"]}], "-", RowBox[{"log", FractionBox[\(V\^l\), SuperscriptBox[\((\(\@\(2 \[Pi]\)\) \[Sigma]\^l)\), RowBox[{"dim", " ", SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "l"]}]]]}]}], ")"}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, CellTags->"Error:7"], Cell[TextData[{ "This highlights an important difference between the FMC and HM approaches: \ the FMC approach encourages the formation of deterministic ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " (because this type of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " has a zero entropy ", Cell[BoxData[ FormBox[ RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}], TraditionalForm]]], "), whereas the ", Cell[BoxData[ FormBox[ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}]}]}]}]}], TraditionalForm]]], " term in the HM objective function tries to encourage stochastic ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " (because this type of ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " has a large entropy ", Cell[BoxData[ FormBox[ RowBox[{\(H\_\(i\_l\)\), "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ")"}], TraditionalForm]]], "). Thus the multilayer network produced by optimising a set of coupled \ FMCs tends to have a minimal amount of stochastic behaviour. This is another \ way of saying that the FMC approach naturally leads to sparse codes." }], "Text", CellTags->"Ed:Change8"], Cell[TextData[{ "As in the case of a 2-layer network, if the source is deterministic and \ the model is perfect, then ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "=", RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], ", so that input density optimisation is equivalent to joint density \ optimisation." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Alternative Viewpoints" }], "Subsection"], Cell[TextData[{ "The relationship between conventional density models and FMCs can be \ stated from the point of view of a conventional density modeller. The goal is \ to build a density model ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " of the source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ", such that the number of bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " required to encode ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is minimised. However, if the source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " is transformed through ", Cell[BoxData[ \(TraditionalForm\`L\)]], " layers of a network to produce a transformed source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ", then ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], " is bounded above by the expression ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\), RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], TraditionalForm]]], ", which is the sum of the number of bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " required to encode ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ", plus (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], ") the number of bits per symbol ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\), RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}], TraditionalForm]]], " required to encode ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " (this is a set of coupled FMCs). The relationship ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}], "=", RowBox[{\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\), RowBox[{\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\), RowBox[{\(L\_\(i\_\(l + 1\)\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], TraditionalForm]]], " was used to obtain this interpretation. Thus the problem of encoding the \ source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " can be split into three steps: transform the source from ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " to ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ", encode the transformed source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ", and encode all of the transformations ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], ") to allow the original source to be reconstructed from the transformed \ source. The total number of bits required to encode ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " and ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], ") is then an upper bound on the total number of bits required to encode ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ". In this picture, coupled FMCs are used to connect the original source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " to the transformed source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ", so they connect one conventional density modelling problem (i.e. \ optimising ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ") to another (i.e. optimising ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ")." }], "Text"], Cell[TextData[{ "The above description of the relationship between conventional density \ models and FMCs was presented from the point of view of a conventional \ density modeller, who asserts that the goal is to build an optimum (i.e. \ minimum number of bits per symbol) density model ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " of the source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ". From this point of view, the coupled FMCs are merely a means of \ transforming the problem from modelling the source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " to modelling the transformed source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], ". That this process is imperfect is reflected in the fact that more bits \ per symbol are required to encode the transformed source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " (plus the coupled FMCs leading to it) than the original source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ". A conventional density modeller might reasonably ask what is the point \ of using FMCs, if they give only an upper bound on the number of bits per \ symbol for encoding the original source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ". However, it is not at all clear that the conventional density modeller \ is using the correct objective function in the first place. Why should the \ number of bits per symbol for encoding the original source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " be especially important? It is as if the world has been separated into an \ external world (i.e. ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ") and an internal world (i.e. the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], "), and a special status is accorded to the external world, which deems \ that it is important to model its density ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " accurately, at the expense of modelling the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " accurately. In the FMC approach, this artificial boundary between \ external and internal worlds is removed, because the coupled FMCs model the \ joint density ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], TraditionalForm]]], ", where ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], " and the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " are all accorded equal status. This even-handed approach is much more \ natural than one in which a particular part of the source (i.e. the external \ source) is accorded a special status." }], "Text"], Cell[TextData[{ "In the language of multilayer neural networks, the ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 0\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L - 2\)], ",", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(L | L - 1\)]}], ")"}], TraditionalForm]]], " is the source which comprises the bottom-up transformations (or \ recognition models) which generate the states of the internal layers of the \ network, and the ", Cell[BoxData[ FormBox[ RowBox[{"\[AlignmentMarker]\[AlignmentMarker]", RowBox[{"(", RowBox[{ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(0 | 1\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(1 | 2\)], ",", "\[CenterEllipsis]", ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(L - 1 | L\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], TraditionalForm]]], " is the model of the source which comprises the top-down transformations \ (or generative models). Thus the network is self-referential, because it \ forms a model of a source that includes its own internal states, because part \ of the source (i.e. the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], ") is the state of the network layers. This self-referential behaviour is \ present in both the conventional density modelling approach and in the FMC \ approach, but whereas in the former case it is not optimised in an \ even-handed fashion, in the latter case it is optimised in an even-handed \ fashion. One could adopt an extreme viewpoint, in which no distinction is \ made at all between the part of the world that is external to the neural \ network, and the part that is internal to the neural network. This is a \ natural approach, because the neural network is itself part of the world, so \ it should therefore be treated in exactly the same way as the part of the \ world that is external to the neural network. In effect, the neural network \ is inevitable; it is the world's way of modelling itself." }], "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " ", "Kohonen Self-Organising Network" }], "Section", CellTags->"Sect:Kohonen"], Cell[TextData[{ "In this section the Kohonen topographic mapping neural network [", ButtonBox["7", ButtonData:>"Ref:Kohonen1989", ButtonStyle->"Hyperlink"], "] will be derived from the 2-layer FMC objective function including \ probability leakage. The result is only approximate, because the training \ algorithm proposed by Kohonen does not correspond to the minimisation of any \ objective function. However, the objective function approach is useful \ nevertheless, because it produces very similar results to Kohonen's original \ proposal, whilst also allowing such neural networks to be treated in a \ unified way." }], "Text"], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "2-Layer Folded Markov Chain Network" }], "Subsection"], Cell[TextData[{ "In ", ButtonBox["section", ButtonData:>"Sect:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Leakage"], ".", CounterBox["Subsection", "Sect:Leakage"], " the concept of probability leakage was introduced as an empirical way of \ modelling the damage that FMCs ", Cell[BoxData[ \(TraditionalForm\`k + 1, k + 2, \[CenterEllipsis], L - 1\)]], " do to the output of FMC ", Cell[BoxData[ \(TraditionalForm\`k\)]], " in the FMC-ladder. Probability leakage can readily be implemented by the \ replacement for ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " given in ", ButtonBox["equation", ButtonData:>"Eq:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:Leakage"], ". In the case of the 2-layer FMC discussed in ", ButtonBox["section", ButtonData:>"Sect:2LayerFMC", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:2LayerFMC"], ".", CounterBox["Subsection", "Sect:2LayerFMC"], ", and in particular the objective function given in ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], ", probability leakage leads to the modified objective function" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "=", RowBox[{"2", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{\(\[Sum]\+\(y\^\[Prime] = 1\)\%M\), RowBox[{\(Pr(y | y\^\[Prime])\), RowBox[{"Pr", "(", RowBox[{\(y\^\[Prime]\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "which corresponds to the objective function for a soft VQ with code noise \ modelled by ", Cell[BoxData[ \(TraditionalForm\`Pr(y | y\^\[Prime])\)]], "; this is related to earlier results on vector quantisation for \ transmission along a noisy communication channel [", ButtonBox["8", ButtonData:>"Ref:KumazawaKasaharaNamekawa1984", ButtonStyle->"Hyperlink"], ", ", ButtonBox["4", ButtonData:>"Ref:Farvardin1990", ButtonStyle->"Hyperlink"], "]. The corresponding hard VQ (i.e. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", SubscriptBox["\[Delta]", RowBox[{"y", ",", RowBox[{"y", "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}]}]]}], TraditionalForm]]], ") objective function becomes" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "=", RowBox[{"2", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", RowBox[{"y", "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}]}], ")"}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:ObjectiveSOM"], Cell[TextData[{ "This is a self-organising map (SOM) objective function, because the \ optimal code vectors ", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}], TraditionalForm]]], " must arrange themselves so that not only can they reconstruct the input \ with small average ", Cell[BoxData[ \(TraditionalForm\`L\^2\)]], " distortion, but they must also make the information in the output layer \ robust with respect to the damage that can be caused by probability leakage. \ ", Cell[BoxData[ \(TraditionalForm\`Pr(y | y\^\[Prime])\)]], " can be identified with the topographic neighbourhood function used by \ Kohonen in his SOM algorithm. However, minimisation of the objective function \ in ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveSOM", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveSOM"], " does not quite lead to the same training algorithm as specified by \ Kohonen; the encoding process is no longer a nearest neighbour prescription, \ but a minimum distortion prescription (i.e. pick the winning code index as \ the one that on average will lead to minimum ", Cell[BoxData[ \(TraditionalForm\`L\^2\)]], " reconstruction distortion, when the effects of probability leakage have \ been taken into account. Thus the encoding process anticipates the average \ effect of probability leakage (which in turn models the damage caused by the \ higher layers of the FMC-ladder, as discussed in ", ButtonBox["section", ButtonData:>"Sect:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Leakage"], ".", CounterBox["Subsection", "Sect:Leakage"], ")." }], "Text"], Cell[TextData[{ "If one does not mind the slight difference between the training algorithm \ derived from ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveSOM", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveSOM"], " and Kohonen's original algorithm, then this approach supplies a nice \ interpretation of what the Kohonen algorithm is actually doing. It can be \ understood only by referring to the type of damage that higher layers of the \ network are going to do to the output of layer 1." }], "Text"], Cell[TextData[{ "This approach may be generalised to a multilayer network. If leakage is \ introduced in each layer then a multilayer Kohonen network is obtained. See \ [", ButtonBox["10", ButtonData:>"Ref:Luttrell1988", ButtonStyle->"Hyperlink"], "] for a simple application of multilayer Kohonen networks, and see [", ButtonBox["11", ButtonData:>"Ref:Luttrell1989a", ButtonStyle->"Hyperlink"], ", ", ButtonBox["12", ButtonData:>"Ref:Luttrell1989b", ButtonStyle->"Hyperlink"], "] for an early paper written from the FMC point of view." }], "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " ", "Partitioned Mixture Distributions (PMD)" }], "Section", CellTags->"Sect:PMD"], Cell[TextData[{ "This section introduces a useful parameterisation of the conditional \ probability ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " for building the Markov source. Because the multilayer network produced \ by optimising a set of coupled FMCs tends to have a minimal amount of \ stochastic behaviour (see the end of ", ButtonBox["section", ButtonData:>"Sect:FMCvsHM", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:FMCvsHM"], ".", CounterBox["Subsection", "Sect:FMCvsHM"], "), a way of encouraging ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " to form distributed codes must be found, so that more that one state in \ layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " can have high probability, given that the state in layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " is known. Different parts of layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " could then be used to encode different parts of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], ", which would then allow factorial codes to develop. It turns out that \ there is a simple way of allowing such codes to develop, called the \ partitioned mixture distribution (PMD) [", ButtonBox["18", ButtonData:>"Ref:Luttrell1994b", ButtonStyle->"Hyperlink"], ", ", ButtonBox["20", ButtonData:>"Ref:Luttrell1994d", ButtonStyle->"Hyperlink"], ", ", ButtonBox["22", ButtonData:>"Ref:Luttrell1997", ButtonStyle->"Hyperlink"], "]. A PMD is essentially a large number of mixture distribution models run \ \"in reverse\" (i.e. used to compute posterior probabilities over class \ labels, rather than to compute probability densities in the mixture \ distribution input space). When these mixture distributions are used to \ connect together layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " and layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ", where each mixture distribution is connected to only part of layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " and layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ", then the resulting network (i.e. a PMD) contains all of the ingredients \ that are necessary for factorial codes to develop." }], "Text"], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Multiple Recognition Models" }], "Subsection"], Cell["In the expression for the objective function", "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}], TraditionalForm]]], " term corresponds to a set of coupled FMCs (i.e. an FMC-ladder), in which \ the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " and the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], ") need to be constructed. Currently, in an FMC-ladder the generative \ models ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " are parameterised as Gaussian probability densities, whereas the \ recognition models ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " are parameterised in a more general way." }], "Text"], Cell[TextData[{ "The simplest parameterisation of the recognition model ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], TraditionalForm]]], " is" }], "Text"], Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\) = \ \(\(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) P\_\(i\_\(l + 1\)\)\%\(l + 1\)\ \)\/\(\[Sum]\+\(\(i\^\[Prime]\)\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_l, \(i\^\[Prime]\)\_\(l + 1\)\)\%\(l | l + 1\)\) P\_\(\(i\^\ \[Prime]\)\_\(l + 1\)\)\%\(l + 1\)\)\)], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}, CellTags->"Eq:PosteriorProbability"], Cell[TextData[{ "which guarantees the normalisation condition ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)P\_\(i\ \_\(l + 1\), i\_l\)\%\(l + 1 | l\) = 1\)]], "; this is the posterior probability (of class ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " given data ", Cell[BoxData[ \(TraditionalForm\`i\_l\)]], ") derived from a mixture distribution ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(\(i\^\[Prime]\)\_\(l + 1\) = 1\)\%\(M\_\(l \ + 1\)\)\(P\_\(i\_l, \(i\^\[Prime]\)\_\(l + 1\)\)\%\(l | l + 1\)\) P\_\(\(i\^\[Prime]\)\_\(l + 1\)\)\%\(l + 1\)\)]], ". A limitation of this type of recognition model is that it allows only a \ ", StyleBox["single", FontSlant->"Italic"], " explanation ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " of the data ", Cell[BoxData[ \(TraditionalForm\`i\_l\)]], " (in the case of a hard ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\)]], ") or a probability distribution over ", StyleBox["single", FontSlant->"Italic"], " explanations (in the case of a soft ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\)]], "), so it cannot lead to a factorial encoding of the data." }], "Text"], Cell[TextData[{ "The simplest way of allowing a factorial encoding to develop is to make \ simultaneous use more than one recognition model. Each recognition model uses \ its own ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], TraditionalForm]]], " vector and ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " matrix to compute a posterior probability of the type shown in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbability", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbability"], ", so that if each recognition model is sensitised to a different piece of \ the data, then a factorial code can develop. This approach can be formalised \ by making the replacement ", Cell[BoxData[ FormBox[ RowBox[{\(i\_\(l + 1\)\), "\[LongRightArrow]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]}], TraditionalForm]]], "in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbability", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbability"], " (i.e. replace the scalar code index by a vector code index, where the \ number of vector components is equal to the number of recognition models). If \ the components of ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], TraditionalForm]]], " are determined independently of each other, then their joint posterior \ probability ", Cell[BoxData[ FormBox[ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", \(i\_l\)}], \(l + 1 | l\)], TraditionalForm]]], " is a product of independent posterior probabilities, where each posterior \ probability corresponds to one of the recognition models, and thus has its \ own ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], TraditionalForm]]], " vector and ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " matrix." }], "Text"], Cell[TextData[{ "If this type of posterior probability, which is a product of ", Cell[BoxData[ \(TraditionalForm\`n\)]], " independent factors if there are ", Cell[BoxData[ \(TraditionalForm\`n\)]], " independent recognition models, is then inserted into ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], " (i.e. a 2-layer FMC, or equivalently a soft VQ) it yields" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "=", "\[AlignmentMarker]", RowBox[{"2", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y\_1 = 1\)\%M\), RowBox[{\(\[Sum]\+\(y\_2 = 1\)\%M\), RowBox[{"\[CenterEllipsis]", RowBox[{\(\[Sum]\+\(y\_n = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_1\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "1"}], ")"}], RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_2\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "2"}], ")"}], RowBox[{"\[CenterEllipsis]Pr", "(", RowBox[{ RowBox[{\(y\_n\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "n"}], ")"}], "\n", "\[Times]", SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", \(y\_1, y\_2, \[CenterEllipsis], y\_n\), ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "where ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_k\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], TraditionalForm]]], " denotes the posterior probability that (given input ", Cell[BoxData[ FormBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], TraditionalForm]]], ") code index ", Cell[BoxData[ \(TraditionalForm\`y\_k\)]], " occurs in recognition model ", Cell[BoxData[ \(TraditionalForm\`k\)]], ", and ", Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", \(y\_1, y\_2, \[CenterEllipsis], y\_n\), ")"}], TraditionalForm]]], " takes the value that minimises ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " (i.e. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", \(y\_1, y\_2, \[CenterEllipsis], y\_n\), ")"}], "=", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{"d", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_1\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "1"}], ")"}], RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_2\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "2"}], ")"}], RowBox[{"\[CenterEllipsis]Pr", "(", RowBox[{ RowBox[{\(y\_n\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "n"}], ")"}], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]}]}], TraditionalForm]]], "). If the ", Cell[BoxData[ \(TraditionalForm\`L\^2\)]], " norm is then expanded thus" }], "Text", CellTags->"Ed:Change9"], Cell[BoxData[ FormBox[ RowBox[{ SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", \(y\_1, y\_2, \[CenterEllipsis], y\_n\), ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"], "\[Congruent]", SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", GridBox[{ { RowBox[{"(", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{\(1\/n\), RowBox[{\(\[Sum]\_\(k = 1\)\%n\), RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", \(y\_k\), ")"}]}]}]}], ")"}]}, { RowBox[{"+", RowBox[{"(", RowBox[{ RowBox[{\(1\/n\), RowBox[{\(\[Sum]\_\(k = 1\)\%n\), RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", \(y\_k\), ")"}]}]}], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", \(y\_1, y\_2, \[CenterEllipsis], y\_n\), ")"}]}], ")"}]}]} }], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "then ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " can be bounded above as follows" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "\[LessEqual]", "\[AlignmentMarker]", RowBox[{"2", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{ StyleBox["d", FontSlant->"Italic"], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y\_1 = 1\)\%M\), RowBox[{\(\[Sum]\+\(y\_2 = 1\)\%M\), RowBox[{"\[CenterEllipsis]", RowBox[{\(\[Sum]\+\(y\_n = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_1\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "1"}], ")"}], RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_2\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "2"}], ")"}], RowBox[{"\[CenterEllipsis]Pr", "(", RowBox[{ RowBox[{\(y\_n\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "n"}], ")"}], "\n", "\[Times]", SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{\(1\/n\), RowBox[{\(\[Sum]\+\(k = 1\)\%n\), RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", \(y\_k\), ")"}]}]}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:MultipleRecognition"], Cell[TextData[{ "First of all the ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_k\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], TraditionalForm]]], " are used to produce soft encodings in each of the recognition models (", Cell[BoxData[ \(TraditionalForm\`k = 1, 2, \[CenterEllipsis], n\)]], "), then a sum ", Cell[BoxData[ FormBox[ RowBox[{\(1\/n\), RowBox[{\(\[Sum]\_\(k = 1\)\%n\), RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", \(y\_k\), ")"}]}]}], TraditionalForm]]], " of the vectors ", Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", \(y\_k\), ")"}], TraditionalForm]]], " is used as the reconstruction of the input ", Cell[BoxData[ FormBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ". In the special case where hard encodings are used, so that ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{\(y\_k\), "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], "=", SubscriptBox["\[Delta]", RowBox[{\(y\_k\), ",", RowBox[{\(y\_k\), "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}]}]]}], TraditionalForm]]], ", then the upper bound on ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " reduces to" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "\[LessEqual]", RowBox[{"2", RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{ StyleBox["d", FontSlant->"Italic"], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{\(1\/n\), RowBox[{\(\[Sum]\+\(k = 1\)\%n\), RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", RowBox[{\(y\_k\), "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}], ")"}]}]}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker], Cell[TextData[{ "This is related to the objective function for a cooperative vector \ quantiser (CVQ), where ", Cell[BoxData[ \(TraditionalForm\`n\)]], " VQs are used ", StyleBox["independently", FontSlant->"Italic"], " to encode the input, and then a reconstruction is formed from a sum of \ vectors. Note that the code vectors used for the encoding operation ", Cell[BoxData[ FormBox[ RowBox[{\(y\_k\), "(", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], ")"}], TraditionalForm]]], " are not necessarily the same as the ", Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox[ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "k"], "(", \(y\_k\), ")"}], TraditionalForm]]], ", except in the special case ", Cell[BoxData[ \(TraditionalForm\`n = 1\)]], "." }], "Text"], Cell[TextData[{ "Suppose that a single recognition model is independently used ", Cell[BoxData[ \(TraditionalForm\`n\)]], " times, rather than ", Cell[BoxData[ \(TraditionalForm\`n\)]], " independent recognition models each independently being used once. This \ corresponds to constraining the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], TraditionalForm]]], " vectors and ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)], TraditionalForm]]], " matrices to be the same for each of the ", Cell[BoxData[ \(TraditionalForm\`n\)]], " recognition models. The upper bound on ", Cell[BoxData[ FormBox[ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], TraditionalForm]]], " can be manipulated into the form" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ SubscriptBox["D", StyleBox["FMC", FontSlant->"Italic"]], "\[LessEqual]", "\[AlignmentMarker]", RowBox[{ RowBox[{\(2\/n\), RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{ StyleBox["d", FontSlant->"Italic"], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}], "\n", "+", RowBox[{\(\(2 \((n - 1)\)\)\/n\), RowBox[{"\[Integral]", RowBox[{ StyleBox[ RowBox[{ StyleBox["d", FontSlant->"Italic"], StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}]], " ", RowBox[{ StyleBox["Pr", FontSlant->"Plain"], StyleBox["(", FontSlant->"Plain"], StyleBox["x", FontWeight->"Bold", FontSlant->"Italic"], StyleBox[")", FontSlant->"Plain"]}], SuperscriptBox[ RowBox[{"\[LeftBracketingBar]", RowBox[{"\[LeftBracketingBar]", RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "-", RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}]}]}], "\[RightBracketingBar]"}], "\[RightBracketingBar]"}], "2"]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, CellTags->"Eq:SingleRecognition"], Cell[TextData[{ "where the ", Cell[BoxData[ \(TraditionalForm\`k\)]], " index is no longer needed. In the case ", Cell[BoxData[ \(TraditionalForm\`n = 1\)]], " this correctly reduces to ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveVQ", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveVQ"], " (the inequality reduces to an equality in this case). When ", Cell[BoxData[ \(TraditionalForm\`n > 1\)]], " the second term offers the possibility of factorial encoding, because it \ contains a weighted linear combination ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], RowBox[{ SuperscriptBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "\[Prime]"], "(", "y", ")"}]}]}], TraditionalForm]]], " of vectors." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ", "Average Over Recognition Models" }], "Subsection"], Cell[TextData[{ "Now combine the above two approaches to factorial encoding, so that a \ single recognition model is used (as in ", ButtonBox["equation", ButtonData:>"Eq:SingleRecognition", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:SingleRecognition"], "), which is parameterised in such a way that it can emulate multiple \ recognition models (as in ", ButtonBox["equation", ButtonData:>"Eq:MultipleRecognition", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:MultipleRecognition"], "). The simplest possibility is to make the replacement ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\)\)\%\(l + 1\)\[LongRightArrow] A\_\(k, i\_\(l + 1\)\)\%\(l + 1\) P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)]], " (where ", Cell[BoxData[ \(TraditionalForm\`A\_\(k, i\_\(l + 1\)\)\%\(l + 1\) \[GreaterEqual] 0\)]], ") in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbability", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbability"], ", where ", Cell[BoxData[ \(TraditionalForm\`k\)]], " is a recognition model index which ranges over ", Cell[BoxData[ \(TraditionalForm\`k = 1, 2, \[CenterEllipsis], K\)]], " (note that ", Cell[BoxData[ \(TraditionalForm\`K\)]], " is not constrained to be the same as ", Cell[BoxData[ \(TraditionalForm\`n\)]], "), and to average over ", Cell[BoxData[ \(TraditionalForm\`k\)]], ", to produce" }], "Text"], Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\ \[LongRightArrow]\(1\/K\) \(\[Sum]\+\(k = 1\)\%K\(\( P\_\(i\_l, i\_\(l + \ 1\)\)\%\(l | l + 1\)\) \(A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(i\_\(l + \ 1\)\)\%\(l + 1\)\)\/\(\[Sum]\+\(\(i\^\[Prime]\)\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_l, \(i\^\[Prime]\)\_\(l + 1\)\)\%\(l | l + 1\)\) \(A\_\(k, \(i\ \^\[Prime]\)\_\(l + 1\)\)\%\(l + 1\)\) P\_\(\(i\^\[Prime]\)\_\(l + 1\)\)\%\(l \ + 1\)\)\)\)], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}, CellTags->"Eq:PosteriorProbabilityPMD"], Cell[TextData[{ "In effect, ", Cell[BoxData[ \(TraditionalForm\`K\)]], " recognition models are embedded between layer ", Cell[BoxData[ \(TraditionalForm\`l\)]], " and layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ", and the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["A", FontWeight->"Bold"], \(l + 1\)], TraditionalForm]]], " matrix specifies which indices ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " in layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " are associated with recognition model ", Cell[BoxData[ \(TraditionalForm\`k\)]], "." }], "Text"], Cell[TextData[{ "A partitioned mixture distribution (PMD) is precisely this type of \ multiple embedded recognition model. In the simplest type of PMD the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["A", FontWeight->"Bold"], \(l + 1\)], TraditionalForm]]], " matrix is chosen to contain only 0's and 1's, which are arranged so that \ the ", Cell[BoxData[ \(TraditionalForm\`K\)]], " recognition models partition layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " into ", Cell[BoxData[ \(TraditionalForm\`K\)]], " overlapping patches. Other PMDs are possible. For instance, the ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["A", FontWeight->"Bold"], \(l + 1\)], TraditionalForm]]], " matrix could specify ", Cell[BoxData[ \(TraditionalForm\`K\)]], " recognition models which partition layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " into K ", StyleBox["non", FontSlant->"Italic"], "-overlapping patches. In this case, because the ", Cell[BoxData[ \(TraditionalForm\`n\)]], " code indices are generated independently, they are not guaranteed to \ occupy different partitions (i.e. different recognition models); some \ partitions might have no code indices, some might have only one, and others \ might have more than one, subject only to the constraint that the total \ number was ", Cell[BoxData[ \(TraditionalForm\`n\)]], ". This is not a fundamental problem, because provided that ", Cell[BoxData[ \(TraditionalForm\`n \[GreaterEqual] K\)]], " there is a finite probability (which increases monotonically towards 1 as \ ", Cell[BoxData[ \(TraditionalForm\`n\[LongRightArrow]\[Infinity]\)]], ") that at least one code index occupies each of the ", Cell[BoxData[ \(TraditionalForm\`K\)]], " partitions. This is how a single recognition model (when parameterised as \ a PMD given in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbabilityPMD", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbabilityPMD"], ") can emulate multiple recognition models." }], "Text"], Cell[TextData[{ "As in the Kohonen network (see ", ButtonBox["section", ButtonData:>"Sect:Kohonen", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Kohonen"], ") probability leakage (see ", ButtonBox["section", ButtonData:>"Sect:Leakage", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:Leakage"], ButtonBox[".", ButtonData:>"Sect:Leakage", ButtonStyle->"Hyperlink"], CounterBox["Subsection", "Sect:Leakage"], ") can be used to anticipate the damage that higher layers of a multilayer \ network cause, by encouraging the network properties to become \ topographically ordered." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " Full Bayesian Average Over Recognition Models" }], "Subsection"], Cell[TextData[{ "One possible criticism of the recognition model given in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbabilityPMD", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbabilityPMD"], " is that it is a mixture of ", Cell[BoxData[ \(TraditionalForm\`K\)]], " recognition models, where each contributing model is assigned the ", StyleBox["same", FontSlant->"Italic"], " weight ", Cell[BoxData[ \(TraditionalForm\`1\/K\)]], ". Normally, a posterior probability ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " is decomposed as a sum over contributing model posterior probabilities ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{ RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], TraditionalForm]]], " as follows" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", RowBox[{\(\[Sum]\+\(k = 1\)\%K\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], RowBox[{"Pr", "(", RowBox[{"k", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "where each of the ", Cell[BoxData[ \(TraditionalForm\`K\)]], " recognition models is assigned a ", StyleBox["different", FontSlant->"Italic"], " data-dependent weight ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"k", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], ". The conditional probabilities ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{"k", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " and ", Cell[BoxData[ FormBox[ RowBox[{"Pr", "(", RowBox[{ RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], TraditionalForm]]], " can be evaluated to yield" }], "Text", CellTags->"Ed:Change10"], Cell[BoxData[{ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"k", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", FractionBox[ RowBox[{\(\[Sum]\+\(y = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", "y"}], ",", "k"}], ")"}], \(Pr(y | k)\), \(Pr(k)\)}]}], RowBox[{\(\[Sum]\+\(k\^\[Prime] = 1\)\%K\), RowBox[{\(\[Sum]\+\(y\^\[Prime] = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", \(y\^\[Prime]\)}], ",", \(k\^\[Prime]\)}], ")"}], \(Pr( y\^\[Prime] | k\^\[Prime])\), \(Pr(k\^\[Prime])\)}]}]}]]}], TraditionalForm], "\n", FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ",", "k"}], ")"}], "=", "\[AlignmentMarker]", FractionBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", "y"}], ",", "k"}], ")"}], \(Pr( y | k)\), \(Pr(k)\)}], RowBox[{\(\[Sum]\+\(y\^\[Prime] = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", \(y\^\[Prime]\)}], ",", "k"}], ")"}], \(Pr(y\^\[Prime] | k)\), \(Pr(k)\)}]}]]}], TraditionalForm]}], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell["so that", "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", RowBox[{\(\[Sum]\+\(k = 1\)\%K\), FractionBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", "y"}], ",", "k"}], ")"}], \(Pr(y | k)\), \(Pr(k)\)}], RowBox[{\(\[Sum]\+\(k\^\[Prime] = 1\)\%K\), RowBox[{\(\[Sum]\+\(y\^\[Prime] = 1\)\%M\), RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", \(y\^\[Prime]\)}], ",", \(k\^\[Prime]\)}], ")"}], \(Pr( y\^\[Prime] | k\^\[Prime])\), \(Pr( k\^\[Prime])\)}]}]}]]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "If the replacements ", Cell[BoxData[ \(TraditionalForm\`\(Pr(k)\)\[LongRightArrow]\(1\/K\)\)]], " and ", Cell[BoxData[ \(TraditionalForm\`\(Pr(y | k)\)\[LongRightArrow] A\_\(k, i\_\(l + 1\)\)\%\(l + 1\) P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)]], " (both of these modulo a constant factor), and ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{ RowBox[{ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], "|", "y"}], ",", "k"}], ")"}], "\[LongRightArrow]", \(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)}], TraditionalForm]]], ", then" }], "Text", CellTags->"Ed:Change11"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"y", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "\[LongRightArrow]", \(\[Sum]\+\(k = 1\)\%K\(\( P\_\(i\_l, i\_\(l + 1\ \)\)\%\(l | l + 1\)\) \(A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(i\_\(l + 1\)\ \)\%\(l + 1\)\)\/\(\[Sum]\+\(k\^\[Prime] = \ 1\)\%K\(\[Sum]\+\(\(i\^\[Prime]\)\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_l, \(i\^\[Prime]\)\_\(l + 1\)\)\%\(l | l + 1\)\) \(A\_\(k\^\ \[Prime], \(i\^\[Prime]\)\_\(l + 1\)\)\%\(l + 1\)\) P\_\(\(i\^\[Prime]\)\_\(l \ + 1\)\)\%\(l + 1\)\)\)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}, CellTags->"Eq:PosteriorProbabilityPMD2"], Cell[TextData[{ "which is ", StyleBox["not", FontSlant->"Italic"], " the same as the PMD recognition model in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbabilityPMD", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbabilityPMD"], ", which would have been obtained if ", Cell[BoxData[ \(TraditionalForm\`k\)]], " were independent of ", Cell[BoxData[ FormBox[ StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " (i.e. ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"Pr", "(", RowBox[{"k", "|", StyleBox["x", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", \(Pr(k)\)}], TraditionalForm]]], ")." }], "Text"], Cell[TextData[{ "However, the PMD recognition model has a strong advantage over the full \ recognition model in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbabilityPMD2", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbabilityPMD2"], ", because it uses only local connectivity in layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ", which determines the contributions to the sums over ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " and ", Cell[BoxData[ \(TraditionalForm\`k\)]], ". In ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbabilityPMD2", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbabilityPMD2"], " the normalisation term in the denominator has a double summation ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(k = 1\)\%K\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\ \(M\_\(l + 1\)\)\(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) \(A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)\)]], ", which involves all pairs of indices ", Cell[BoxData[ \(TraditionalForm\`k\)]], " and ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " which have ", Cell[BoxData[ \(TraditionalForm\`A\_\(k, i\_\(l + 1\)\)\%\(l + 1\) > 0\)]], ", which thus corresponds to long-range lateral interactions in layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ". On the other hand, the PMD recognition model in ", ButtonBox["equation", ButtonData:>"Eq:PosteriorProbabilityPMD", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:PosteriorProbabilityPMD"], " has a normalisation term in the denominator which involves only a single \ summation ", Cell[BoxData[ \(TraditionalForm\`\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\) \(A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\) P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)]], ", so the lateral interactions in layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], " are determined by the structure of the matrix ", Cell[BoxData[ \(TraditionalForm\`A\_\(k, i\_\(l + 1\)\)\%\(l + 1\)\)]], ", which defines only short-range lateral connections (i.e. for a given \ recognition model ", Cell[BoxData[ \(TraditionalForm\`k\)]], ", only a limited number of index values ", Cell[BoxData[ \(TraditionalForm\`i\_\(l + 1\)\)]], " satisfy ", Cell[BoxData[ \(TraditionalForm\`A\_\(k, i\_\(l + 1\)\)\%\(l + 1\) > 0\)]], "." }], "Text", CellTags->"Ed:Change12"], Cell["\<\ As discussed in section \\QTSN{ref}{Sect:DynamicMarkovSourceCoding}, a PMD \ can be endowed with a memory of its previous state, just as a standard \ mixture distribution can, to obtain a dynamical PMD \ \\QCITE{cite}{}{Luttrell1997}.\ \>", "Text"], Cell[TextData[{ "As discussed in ", ButtonBox["section", ButtonData:>"Sect:DynamicMarkovSourceCoding", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:DynamicMarkovSourceCoding"], ".", CounterBox["Subsection", "Sect:DynamicMarkovSourceCoding"], ", a PMD can be endowed with a memory of its previous state, just as a \ standard mixture distribution can, to obtain a dynamical PMD [", ButtonBox["22", ButtonData:>"Ref:Luttrell1997", ButtonStyle->"Hyperlink"], "]." }], "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " ", "Adaptive Cluster Expansion (ACE)" }], "Section", CellTags->"Sect:ACE"], Cell[TextData[{ "This section discusses the adaptive cluster expansion (ACE) [", ButtonBox["14", ButtonData:>"Ref:Luttrell1991a", ButtonStyle->"Hyperlink"], ", ", ButtonBox["19", ButtonData:>"Ref:Luttrell1994c", ButtonStyle->"Hyperlink"], ", ", ButtonBox["21", ButtonData:>"Ref:Luttrell1996", ButtonStyle->"Hyperlink"], "], which is a tree-structured density network." }], "Text"], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ACE: Tree-Structured Density Network" }], "Subsection"], Cell[TextData[{ "Consider the objective function ", Cell[BoxData[ \(TraditionalForm\`D\)]], " for an ", Cell[BoxData[ \(TraditionalForm\`L + 1\)]], " layer FMC-ladder" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"D", "\[Congruent]", "\[AlignmentMarker]", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\), RowBox[{\(P\_\(i\_l\)\%l\), RowBox[{\(K\_\(i\_l\)\), "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1 | l\)], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], \(l | l + 1\)]}], ")"}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", \(-\(\[Sum]\+\(l = 0\)\%\(L - \ 1\)\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\(P\_\(i\_l\)\%l\) \(\[Sum]\+\(i\_\(l + \ 1\) = 1\)\%\(M\_\(l + 1\)\)\(P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\) log\ Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)\)\)\)}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "Now assume that the ", Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)]], " part of the model (i.e. the Markovian part) is perfect so that ", Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\) = P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], "), and that the ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\)]], " part of the source (i.e. the Markovian part) is deterministic so that ", Cell[BoxData[ \(TraditionalForm\`P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\) = \ \[Delta]\_\(i\_\(l + 1\), \(i\_\(l + 1\)\)(i\_l)\)\)]], " (for ", Cell[BoxData[ \(TraditionalForm\`l = 0, 1, \[CenterEllipsis], L - 1\)]], "), in which case ", Cell[BoxData[ \(TraditionalForm\`D\)]], " simplifies as follows" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{"D", "=", "\[AlignmentMarker]", RowBox[{\(-\(\[Sum]\+\(l = 0\)\%\(L - 1\)\(\[Sum]\+\(i\_l = \ 1\)\%\(M\_l\)\(P\_\(i\_l\)\%l\) \(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\) log\ P\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(-\(\[Sum]\+\(l = 0\)\%\(L - 1\)\(\[Sum]\+\(i\_l = \ 1\)\%\(M\_l\)\(P\_\(i\_l\)\%l\) \(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + \ 1\)\)\(\[Delta]\_\(i\_\(l + 1\), \(i\_\(l + 1\)\)(i\_l)\)\) log \(\( \[Delta]\_\(i\_\(l + 1\), \(i\_\(l + 1\)\)(i\ \_l)\)\) P\_\(i\_l\)\%l\)\/P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(\(-\(\[Sum]\+\(l = 0\)\%\(L - 1\)\(\[Sum]\+\(i\_l = 1\)\ \%\(M\_l\)\(P\_\(i\_l\)\%l\) log\ P\_\(i\_l\)\%l\)\)\) + \[Sum]\+\(l = 0\)\%\(L - \ 1\)\(\[Sum]\+\(i\_l = 1\)\%\(M\_l\)\(P\_\(i\_l\)\%l\) \(\[Sum]\+\(i\_\(l + \ 1\) = 1\)\%\(M\_\(l + 1\)\)\(\[Delta]\_\(i\_\(l + 1\), \(i\_\(l + 1\)\)( i\_l)\)\) log\ P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(\(-\(\[Sum]\+\(l = 0\)\%\(L - 1\)\(\[Sum]\+\(i\_l = 1\ \)\%\(M\_l\)\(P\_\(i\_l\)\%l\) log\ P\_\(i\_l\)\%l\)\)\) + \[Sum]\+\(l = 0\)\%\(L \ - 1\)\(\[Sum]\+\(i\_\(l + 1\) = 1\)\%\(M\_\(l + 1\)\)\(P\_\(i\_\(l + 1\)\)\%\(l + 1\)\) log\ P\_\(i\_\(l + 1\)\)\%\(l + 1\)\)\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(\(-\(\[Sum]\+\(i\_0 = 0\)\%\(M\_0\)\(P\_\(i\_0\)\%0\ \) log\ P\_\(i\_0\)\%0\)\) + \[Sum]\+\(i\_L = 1\)\%\(M\_L\)\(P\_\(i\_L\)\%L\) log\ P\_\(i\_L\)\%L\), "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"H", "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ")"}], "-", RowBox[{"H", "(", SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ")"}]}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "This is the number of bits per symbol that is required to convert a ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], "-message into a ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], "-message, assuming that the Markovian part of the source is deterministic, \ and that the model is perfect. This result is not very interesting in \ itself." }], "Text"], Cell["\<\ However, if the Markovian part of the source is not only deterministic, but \ is also tree-structured, and the model is similarly tree-structured, then the \ notation must be modified thus\ \>", "Text"], Cell[BoxData[{ FormBox[ RowBox[{ RowBox[{\(i\_l\), "\[LongRightArrow]", "\[AlignmentMarker]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], "=", RowBox[{"(", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", "\[CenterEllipsis]"}], ")"}]}], TraditionalForm], "\n", FormBox[ RowBox[{ RowBox[{\(i\_\(l + 1\)\), "\[LongRightArrow]", "\[AlignmentMarker]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]}], "=", RowBox[{"(", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], ",", "\[CenterEllipsis]"}], ")"}]}], TraditionalForm], "\n", FormBox[ RowBox[{ RowBox[{\(P\_\(i\_\(l + 1\), i\_l\)\%\(l + 1 | l\)\), "\[LongRightArrow]", "\[AlignmentMarker]", SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)]}], "=", RowBox[{ RowBox[{ SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"]}], \(l + 1 | l\)], SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"]}], \(l + 1 | l\)], "\[CenterEllipsis]"}], "=", RowBox[{ SubscriptBox["\[Delta]", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], ",", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], "(", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ")"}]}]], SubscriptBox["\[Delta]", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], ",", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], "(", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ")"}]}]], "\[CenterEllipsis]"}]}]}], TraditionalForm], "\n", FormBox[ RowBox[{ RowBox[{\(Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\), "\[LongRightArrow]", "\[AlignmentMarker]", SubsuperscriptBox["Q", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]}], \(l | l + 1\)]}], "=", RowBox[{ SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"]}], \(l | l + 1\)], SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"]}], \(l | l + 1\)], "\[CenterEllipsis]"}]}], TraditionalForm]}], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "where the components of the vector ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], TraditionalForm]]], " have been partitioned as ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", "\[CenterEllipsis]"}], ")"}], TraditionalForm]]], ", where each ", Cell[BoxData[ FormBox[ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], TraditionalForm]]], " contains a complete set of siblings whose parent is in layer ", Cell[BoxData[ \(TraditionalForm\`l + 1\)]], ", and the components of the vector ", Cell[BoxData[ FormBox[ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], TraditionalForm]]], " have been partitioned as ", Cell[BoxData[ FormBox[ RowBox[{"(", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], ",", "\[CenterEllipsis]"}], ")"}], TraditionalForm]]], ", where ", Cell[BoxData[ FormBox[ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], TraditionalForm]]], " is the parent of ", Cell[BoxData[ FormBox[ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], TraditionalForm]]], ". This notation may be used to rearrange ", Cell[BoxData[ \(TraditionalForm\`D\)]], " as follows" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{"D", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{"log", "(", RowBox[{ SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"]}], \(l | l + 1\)], SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"]}], \(l | l + 1\)], "\[CenterEllipsis]"}], ")"}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"]}], \(l | l + 1\)]}]}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{\(\[Sum]\+c\), RowBox[{"log", "(", FractionBox[ RowBox[{ SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"]}], \(l + 1 | l\)], SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], "l"]}], SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], \(l + 1\)]], ")"}]}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"]}]}]}]}]}]}], "\n", "\[AlignmentMarker]", "-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"]}], \(l + 1 | l\)]}]}]}]}]}]}]}], "\n", "\[AlignmentMarker]", "+", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], \(l + 1\)]}]}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"]}]}]}]}]}]}], "\[AlignmentMarker]", "+", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], \(l + 1\)], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], \(l + 1\)]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{ RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}], "\[AlignmentMarker]", "-", RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(component\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}], "\[AlignmentMarker]"}]}]}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "where the fact that ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{\(\[Sum]\+c\), RowBox[{"log", " ", SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"]}], \(l + 1 | l\)]}]}]}]}]}]}]}]}], "=", "0"}], TraditionalForm]]], " has been used." }], "Text"], Cell[TextData[{ "This expression for ", Cell[BoxData[ \(TraditionalForm\`D\)]], " can be rewritten in terms of the mutual information ", Cell[BoxData[ FormBox[ RowBox[{"I", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}], TraditionalForm]]], " between the components of cluster ", Cell[BoxData[ FormBox[ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], TraditionalForm]]], " by making use of the following result" }], "Text", CellTags->"Ed:Change13"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{ RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"I", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}], "\[Congruent]", "\[AlignmentMarker]", RowBox[{ RowBox[{\(\[Sum]\+\(component\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}], "-", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}]}], "\[AlignmentMarker]", "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(component\ c\)\), RowBox[{ SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], "l", "c"], "l"], "log", " ", SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], "l", "c"], "l"]}]}]}], "+", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{ UnderscriptBox["\[Sum]", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"]], RowBox[{ SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], "l"], "log", " ", SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], "l"]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell["to yield", "Text"], Cell[BoxData[ FormBox[ RowBox[{"D", "=", "\[AlignmentMarker]\[AlignmentMarker]", RowBox[{ RowBox[{ RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "0"], ")"}]}], "-", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "L"], ")"}]}], "\n", "+", RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}], "-", RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(component\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"I", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}]}], "+", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "0"], ")"}]}], "-", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "L"], ")"}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "Now add ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " (the contribution from the output layer) to ", Cell[BoxData[ \(TraditionalForm\`D\)]], " (the objective function for an FMC-ladder) to obtain ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], ", and assume that the model is perfect in the output layer so that ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"], TraditionalForm]]], " is given by ", Cell[BoxData[ FormBox[ RowBox[{ StyleBox[ SubsuperscriptBox["Q", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L"], "L"], FontSlant->"Italic"], "=", RowBox[{ SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L", "1"], "L"], SubsuperscriptBox["P", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "L", "2"], "L"], "\[CenterEllipsis]"}]}], TraditionalForm]]], ". This allows ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], TraditionalForm]]], " to be simplified to ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}], "=", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "L"], ")"}]}]}], TraditionalForm]]], ", so that ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " simplifies as [", ButtonBox["14", ButtonData:>"Ref:Luttrell1991a", ButtonStyle->"Hyperlink"], "]" }], "Text", CellTags->"Ed:Change14"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"D", "+", RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "L"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "L"]}], ")"}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"I", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}]}], "+", RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "0"], ")"}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "The ", Cell[BoxData[ FormBox[ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"I", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}]}], TraditionalForm]]], " term is (minus) the sum of the mutual informations within all of the \ clusters in the ", Cell[BoxData[ \(TraditionalForm\`L + 1\)]], " layer network, and the ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"H", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "0"], ")"}]}], TraditionalForm]]], " term is constant for a given external source ", Cell[BoxData[ FormBox[ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], TraditionalForm]]], ". This means that minimising ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}], TraditionalForm]]], " is equivalent to maximising ", Cell[BoxData[ FormBox[ RowBox[{\(\[Sum]\+\(l = 1\)\%L\), RowBox[{\(\[Sum]\+\(cluster\ c\)\), RowBox[{"I", "(", SubsuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "c", "l"], ")"}]}]}], TraditionalForm]]], ". This is the maximum mutual information result for ACE networks. The \ mutual information maximisation principle in [", ButtonBox["1", ButtonData:>"Ref:BeckerHinton1992", ButtonStyle->"Hyperlink"], "] is a special case of the above result." }], "Text"], Cell[TextData[{ "As was noted in ", ButtonBox["section", ButtonData:>"Sect:FMCvsHM", ButtonStyle->"Hyperlink"], " ", CounterBox["Section", "Sect:FMCvsHM"], ".", CounterBox["Subsection", "Sect:FMCvsHM"], ", if the source is deterministic and the model is perfect (as they are \ here), then ", Cell[BoxData[ FormBox[ RowBox[{ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], "=", RowBox[{"L", "(", RowBox[{ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], ",", StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"]}], ")"}]}], TraditionalForm]]], ", which implies that input density optimisation is equivalent to joint \ density optimisation. This equivalence was used in [", ButtonBox["14", ButtonData:>"Ref:Luttrell1991a", ButtonStyle->"Hyperlink"], "], where the sum-of-mutual-informations objective function was derived by \ minimising ", Cell[BoxData[ FormBox[ RowBox[{"L", "(", RowBox[{ SuperscriptBox[ StyleBox["P", FontWeight->"Bold", FontSlant->"Plain"], "0"], ",", SuperscriptBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], "0"]}], ")"}], TraditionalForm]]], "." }], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], ".", CounterBox["Subsection"], " ACE: Hierarchical Vector Quantiser" }], "Subsection"], Cell[TextData[{ "If the above ACE\\ network is modified slightly, so that the model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], " has exactly the same structure as before, but is Gaussian rather than \ perfect, then ", Cell[BoxData[ \(TraditionalForm\`Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\)]], " becomes" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{ RowBox[{\(Q\_\(i\_l, i\_\(l + 1\)\)\%\(l | l + 1\)\), "\[LongRightArrow]", "\[AlignmentMarker]", SubsuperscriptBox["Q", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]}], \(l | l + 1\)]}], "=", RowBox[{ SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"]}], \(l | l + 1\)], SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"]}], \(l | l + 1\)], "\[CenterEllipsis]"}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "where the individual ", Cell[BoxData[ FormBox[ SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"]}], \(l | l + 1\)], TraditionalForm]]], " are Gaussian. The expression for ", Cell[BoxData[ \(TraditionalForm\`D\)]], " (i.e. an FMC-ladder without the output term) then becomes (compare ", ButtonBox["equation", ButtonData:>"Eq:ObjectiveMarkovSourceSimple", ButtonStyle->"Hyperlink"], " ", CounterBox["NumberedEquation", "Eq:ObjectiveMarkovSourceSimple"], ")" }], "Text"], Cell[BoxData[ FormBox[ RowBox[{"D", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], \(l + 1\)], ",", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]}], \(l + 1 | l\)], RowBox[{"log", "(", RowBox[{ SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"]}], \(l | l + 1\)], SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"]}], \(l | l + 1\)], "\[CenterEllipsis]"}], ")"}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{ UnderscriptBox["\[Sum]", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], ",", "\[CenterEllipsis]"}]], RowBox[{ RowBox[{"(", RowBox[{ SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"]}], \(l + 1 | l\)], SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"]}], \(l + 1 | l\)], "\[CenterEllipsis]"}], ")"}], RowBox[{"log", "(", RowBox[{ SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "1"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "1"]}], \(l | l + 1\)], SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "2"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "2"]}], \(l | l + 1\)], "\[CenterEllipsis]"}], ")"}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{ RowBox[{"-", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{ UnderscriptBox["\[Sum]", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"]], RowBox[{ SubsuperscriptBox["P", SubscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l"], "l"], RowBox[{\(\[Sum]\+c\), RowBox[{ UnderscriptBox["\[Sum]", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"]], RowBox[{ SubsuperscriptBox["P", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"], ",", SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"]}], \(l + 1 | l\)], "log", " ", SubsuperscriptBox["Q", RowBox[{ SubsuperscriptBox[ StyleBox["i", FontWeight->"Bold", FontSlant->"Plain"], "l", "c"], ",", SubsuperscriptBox[ StyleBox["i", FontSlant->"Italic"], \(l + 1\), "c"]}], \(l | l + 1\)]}]}]}]}]}]}]}], "\[IndentingNewLine]", "=", "\[AlignmentMarker]", RowBox[{\(\[Sum]\+\(l = 0\)\%\(L - 1\)\), RowBox[{\(\[Sum]\+c\), RowBox[{"(", RowBox[{ FractionBox[ SubsuperscriptBox["D", StyleBox["FMC", FontSlant->"Italic"], \(l, c\)], \(4 \((\[Sigma]\^\(l, c\))\)\^2\)], "-", \(log V\^\(l, c\)\/\(\(\@\(2 \[Pi]\)\) \[Sigma]\^\(l, c\)\ \)\)}], ")"}]}]}]}]}]}]}], TraditionalForm]], "NumberedEquation", TextAlignment->AlignmentMarker, SpanMaxSize->Infinity, GridBoxOptions->{ColumnAlignments->{Left}}], Cell[TextData[{ "which is the objective function for a tree of coupled soft VQs. Thus the \ ACE network, with a Gaussian model ", Cell[BoxData[ FormBox[ StyleBox["Q", FontWeight->"Bold", FontSlant->"Plain"], TraditionalForm]]], ", is a hierarchical vector quantiser, in which each layer encodes the \ clusters in the previous layer [", ButtonBox["11", ButtonData:>"Ref:Luttrell1989a", ButtonStyle->"Hyperlink"], ", ", ButtonBox["12", ButtonData:>"Ref:Luttrell1989b", ButtonStyle->"Hyperlink"], "]." }], "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " ", "Conclusions" }], "Section"], Cell[TextData[{ "The objective function for optimising the density model of a Markov source \ may be applied to the problem of optimising the joint density of all the \ layers of a neural network. This is possible because the joint state of all \ of the network layers may be viewed as a Markov chain of states (each layer \ is connected only to adjacent layers). The objective function may readily be \ shown to be equivalent to a sum of folded Markov chain objective functions, \ each of which connects a pair of adjacent layers, plus a term which specifies \ the cost of building a density model in the output layer. This representation \ makes contact with the results reported in [", ButtonBox["17", ButtonData:>"Ref:Luttrell1994a", ButtonStyle->"Hyperlink"], "], which allows many results to be unified into a single approach (i.e. a \ single objective function)." }], "Text"], Cell["\<\ The most significant aspect of this unification is the fact that all layers \ of a neural network are treated on an equal footing, unlike in the \ conventional approach to density modelling where the input layer is accorded \ a special status. For instance, this leads to a modular approach to building \ neural networks, where all of the modules have the same structure.\ \>", "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " Recommendations" }], "Section"], Cell[TextData[StyleBox["The above unified approach includes as special cases \ many apparently different previous results. It is recommended that future \ work, including theory and software, be framed in terms of this new unified \ approach.", FontWeight->"Plain"]], "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " Acknowledgements" }], "Section"], Cell["\<\ I thank Chris Webber for many useful conversations that we had during the \ course of this research. I also thank Peter Dayan and Geoffrey Hinton for \ conversations that we had about the relationship between folded Markov chains \ and Helmholtz machines during the \"Neural Networks and Machine Learning\" \ programme at the Newton Institute in Cambridge.\ \>", "Text"] }, Closed]], Cell[CellGroupData[{ Cell[TextData[{ CounterBox["Section"], " References" }], "Section"], Cell[TextData[{ "1 Becker S and Hinton G E, 1992, ", StyleBox["Nature", FontSlant->"Italic"], ", ", StyleBox["355", FontWeight->"Bold"], ", 161-163, Self-organising neural network that discovers surfaces in \ random-dot stereograms." }], "Reference", CellTags->"Ref:BeckerHinton1992"], Cell[TextData[{ "2 Dayan P, Hinton G E, Neal R M and Zemel R S, 1995, ", StyleBox["Neural Computation", FontSlant->"Italic"], ", ", StyleBox["7", FontWeight->"Bold"], ", 889-904, The Helmholtz machine." }], "Reference", CellTags->"Ref:DayanHintonNealZemel1995"], Cell[TextData[{ "3 Dayan P and Hinton G E, 1996, ", StyleBox["Neural Networks", FontSlant->"Italic"], ", ", StyleBox["9", FontWeight->"Bold"], "(8), 1385-1403, Varieties of Helmholtz machine." }], "Reference", CellTags->"Ref:DayanHinton1996"], Cell[TextData[{ "4 Farvardin N, 1990, ", StyleBox["IEEE Trans. IT", FontSlant->"Italic"], ", ", StyleBox["36", FontWeight->"Bold"], "(4), 799-809, A study of vector quantisation for noisy channels." }], "Reference", CellTags->"Ref:Farvardin1990"], Cell[TextData[{ "5 Hinton G E and Zemel R S, 1994, in Cowan J D, Tesauro G, and Alspector F \ (eds), ", StyleBox["Advances in Neural Information Processing Systems", FontSlant->"Italic"], ", ", StyleBox["6", FontWeight->"Bold"], ", San Francisco: Morgan Kaufmann, Autoencoders, minimum description \ length, and Helmholtz free energy." }], "Reference", CellTags->"Ref:HintonZemel1994"], Cell[TextData[{ "6 Hinton G E, Dayan P, Frey B J and Neal R M, 1995, ", StyleBox["Science", FontSlant->"Italic"], ", ", StyleBox["268", FontWeight->"Bold"], ", 1158-1161, The \"wake-sleep\" algorithm for unsupervised neural \ networks." }], "Reference", CellTags->"Ref:HintonDayanFreyNeal1995"], Cell["\<\ 7 Kohonen T, 1989, Springer-Verlag, Self-organisation and associative memory.\ \ \>", "Reference", CellTags->"Ref:Kohonen1989"], Cell[TextData[{ "8 Kumazawa H, Kasahara M and Namekawa T, 1984, ", StyleBox["Electronic. Eng. Japan", FontSlant->"Italic"], ", ", StyleBox["67B", FontWeight->"Bold"], "(4), 39-47, A construction of vector quantisers for noisy channels." }], "Reference", CellTags->"Ref:KumazawaKasaharaNamekawa1984"], Cell[TextData[{ "9 Linde Y, Buzo A and Gray R M, 1980, ", StyleBox["IEEE Trans. COM", FontSlant->"Italic"], ", ", StyleBox["28", FontWeight->"Bold"], ", 84-95, An algorithm for vector quantiser design." }], "Reference", CellTags->"Ref:LindeBuzoGray1980"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/ieeenn88/ieeenn88.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "10 Luttrell S P, 1988, ", StyleBox["Proc. 2nd IEEE Int. Conf. on Neural Networks", FontSlant->"Italic"], ", San Diego, ", StyleBox["1", FontWeight->"Bold"], ", 93-100, Self-organising multilayer topographic mappings." }], "Reference", CellTags->"Ref:Luttrell1988"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/ieenn89/ieenn89.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "11 Luttrell S P, 1989, ", StyleBox["Proc. 1st IEE Conf. on Artificial Neural Networks", FontSlant->"Italic"], ", 2-6, Hierarchical self-organising networks." }], "Reference", CellTags->"Ref:Luttrell1989a"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/hiervq/hiervq.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "12 Luttrell S P, 1989, ", StyleBox["Proc. IEE Part I", FontSlant->"Italic"], ", ", StyleBox["136", FontWeight->"Bold"], "(6), 405-413, Hierarchical vector quantisation." }], "Reference", CellTags->"Ref:Luttrell1989b"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/tvq/tvq.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "13 Luttrell S P, 1990, ", StyleBox["IEEE Transactions on Neural Networks", FontSlant->"Italic"], ", ", StyleBox["1", FontWeight->"Bold"], ", 229-232, Derivation of a class of training algorithms." }], "Reference", CellTags->"Ref:Luttrell1990"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/spie91/spie91.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "14 Luttrell S P, 1991, ", StyleBox["Proc. SPIE Conf. on Adaptive Signal Processing", FontSlant->"Italic"], ", ", StyleBox["1565", FontWeight->"Bold"], ", 518-528, A hierarchical network for clutter and texture modelling." }], "Reference", CellTags->"Ref:Luttrell1991a"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/ieenn91/ieenn91.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "15 Luttrell S P, 1991, ", StyleBox["Proc. 2nd IEE Conf. on Artificial Neural Networks", FontSlant->"Italic"], ", Bournemouth, 5-9, Self-supervised training of hierarchical vector \ quantisers." }], "Reference", CellTags->"Ref:Luttrell1991b"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/selfsup/selfsup.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "16 Luttrell S P, 1992, ", StyleBox["Proc. IEE Part F", FontSlant->"Italic"], ", ", StyleBox["139", FontWeight->"Bold"], "(6), 371-377, Self-supervised adaptive networks." }], "Reference", CellTags->"Ref:Luttrell1992"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/bayessom/bayessom.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "17 Luttrell S P, 1994, ", StyleBox["Neural Computation", FontSlant->"Italic"], ", ", StyleBox["6", FontWeight->"Bold"], ", 767-794, A Bayesian analysis of self-organising maps." }], "Reference", CellTags->"Ref:Luttrell1994a"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/pmd/pmd.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "18 Luttrell S P, 1994, ", StyleBox["Proc. IEE Vision, Image and Signal Processing", FontSlant->"Italic"], ", ", StyleBox["141", FontWeight->"Bold"], ", 251-260, The partitioned mixture distribution: an adaptive Bayesian \ network for low-level image processing." }], "Reference", CellTags->"Ref:Luttrell1994b"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/maxent94/maxent94_1.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "19 Luttrell S P, 1994, ", StyleBox["Proc. 14th Int. MAXENT Workshop", FontSlant->"Italic"], ", 269-278, Kluwer, The cluster expansion: a hierarchical density model." }], "Reference", CellTags->"Ref:Luttrell1994c"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/maxent94/maxent94_2.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "20 Luttrell S P, 1994, ", StyleBox["Proc. 14th Int. MAXENT Workshop", FontSlant->"Italic"], ", 279-286, Kluwer, The partitioned mixture distribution: multiple \ overlapping density models." }], "Reference", CellTags->"Ref:Luttrell1994d"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/itbrain/itbrain.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "21 Luttrell S P, 1996, ", StyleBox["Network", FontSlant->"Italic"], ", ", StyleBox["7", FontWeight->"Bold"], ", 285-290, A discrete firing event analysis of the adaptive cluster \ expansion network." }], "Reference", CellTags->"Ref:Luttrell1996"], Cell[TextData[{ Cell[TextData[{ " ", ButtonBox["OPEN", ButtonData:>{ URL[ "http://www.luttrell.org.uk/papers/ieenn97/ieenn97a.nb"], None}, Active->True, ButtonStyle->"Hyperlink"], " " }]], "22 Luttrell, 1997, ", StyleBox["Proc. 5th IEE Conf. on Artificial Neural Networks", FontSlant->"Italic"], ", 59-63, Partitioned mixture distributions: the dynamical case." }], "Reference", CellTags->"Ref:Luttrell1997"], Cell[TextData[{ "23 Rissanen J, 1978, ", StyleBox["Automatica", FontSlant->"Italic"], ", ", StyleBox["14", FontWeight->"Bold"], ", 465-471, Modelling by shortest data description." }], "Reference", CellTags->"Ref:Rissanen1978"], Cell["\<\ 24 Rissanan J, 1989, World Scientific, Stochastic complexity in statistical \ enquiry.\ \>", "Reference", CellTags->"Ref:Rissanan1989"], Cell[TextData[{ "25 Shannon C E, 1948, ", StyleBox["Bell Syst. Tech. J.", FontSlant->"Italic"], ", The mathematical theory of communication, ", StyleBox["27", FontWeight->"Bold"], ", 379-423 and 623-656." }], "Reference", CellTags->"Ref:Shannon1948"] }, Closed]] }, Open ]] }, FrontEndVersion->"5.0 for Microsoft Windows", ScreenRectangle->{{0, 1280}, {0, 941}}, WindowToolbars->{}, WindowSize->{665.375, 641}, WindowMargins->{{307.25, Automatic}, {Automatic, 50}}, Magnification->1, StyleDefinitions -> "Report.nb" ] (******************************************************************* Cached data follows. If you edit this Notebook file directly, not using Mathematica, you must remove the line containing CacheID at the top of the file. The cache data will then be recreated when you save this file from within Mathematica. *******************************************************************) (*CellTagsOutline CellTagsIndex->{ "Sect:Introduction"->{ Cell[32585, 1097, 109, 5, 70, "Section", CellTags->"Sect:Introduction"]}, "Sect:CodingTheory"->{ Cell[38067, 1257, 110, 5, 70, "Section", CellTags->"Sect:CodingTheory"]}, "Sect:InformationTheory"->{ Cell[39861, 1324, 161, 8, 70, "Subsection", CellTags->"Sect:InformationTheory"]}, "Eq:LikelyMessage"->{ Cell[43943, 1461, 890, 17, 70, "NumberedEquation", CellTags->"Eq:LikelyMessage"]}, "Ed:Change1"->{ Cell[44836, 1480, 644, 22, 70, "Text", CellTags->"Ed:Change1"]}, "Sect:SourceCoding"->{ Cell[49759, 1634, 151, 8, 70, "Subsection", CellTags->"Sect:SourceCoding"]}, "Sect:MarkovSourceCoding"->{ Cell[70451, 2306, 164, 8, 70, "Subsection", CellTags->"Sect:MarkovSourceCoding"]}, "Ed:Change15"->{ Cell[75009, 2445, 2086, 69, 70, "Text", CellTags->"Ed:Change15"], Cell[188873, 5833, 205, 4, 70, "Text", CellTags->"Ed:Change15"]}, "Ed:Change2"->{ Cell[77517, 2533, 4469, 91, 70, "NumberedEquation", CellTags->"Ed:Change2"]}, "Ed:Change3"->{ Cell[81989, 2626, 4290, 130, 70, "Text", CellTags->"Ed:Change3"]}, "Eq:NegativeLogLikelihood"->{ Cell[87504, 2801, 3151, 66, 70, "NumberedEquation", CellTags->"Eq:NegativeLogLikelihood"]}, "Ed:Change4"->{ Cell[94095, 2974, 3787, 114, 70, "Text", CellTags->{"Ed:Change4", "Problem:1"}]}, "Problem:1"->{ Cell[94095, 2974, 3787, 114, 70, "Text", CellTags->{"Ed:Change4", "Problem:1"}]}, "Sect:DynamicMarkovSourceCoding"->{ Cell[97919, 3093, 183, 8, 70, "Subsection", CellTags->"Sect:DynamicMarkovSourceCoding"]}, "Sect:ApplicationNN"->{ Cell[100928, 3183, 147, 6, 70, "Section", CellTags->"Sect:ApplicationNN"]}, "Sect:SourceModelNN"->{ Cell[103063, 3265, 176, 9, 70, "Subsection", CellTags->"Sect:SourceModelNN"]}, "Eq:ObjectiveMarkovSource"->{ Cell[123612, 3885, 1458, 39, 70, "NumberedEquation", CellTags->"Eq:ObjectiveMarkovSource"]}, "Sect:2LayerFMC"->{ Cell[127781, 4014, 176, 9, 70, "Subsection", CellTags->"Sect:2LayerFMC"]}, "Eq:ObjectiveMarkovSource2Layer"->{ Cell[128857, 4055, 1317, 36, 70, "NumberedEquation", CellTags->"Eq:ObjectiveMarkovSource2Layer"]}, "Eq:ObjectiveMarkovSource2LayerNotation"->{ Cell[130404, 4102, 3339, 77, 70, "NumberedEquation", CellTags->{"Eq:ObjectiveMarkovSource2LayerNotation", "Error:1"}]}, "Error:1"->{ Cell[130404, 4102, 3339, 77, 70, "NumberedEquation", CellTags->{"Eq:ObjectiveMarkovSource2LayerNotation", "Error:1"}]}, "Error:2"->{ Cell[134727, 4213, 5991, 139, 70, "NumberedEquation", CellTags->"Error:2"]}, "Eq:ObjectiveFMC"->{ Cell[141109, 4368, 2368, 60, 70, "NumberedEquation", CellTags->"Eq:ObjectiveFMC"]}, "Eq:ObjectiveVQ"->{ Cell[143708, 4440, 1831, 47, 70, "NumberedEquation", CellTags->"Eq:ObjectiveVQ"]}, "Error:3"->{ Cell[147260, 4550, 1232, 35, 70, "NumberedEquation", CellTags->"Error:3"]}, "Sect:CoupledFMC"->{ Cell[157323, 4859, 162, 9, 70, "Subsection", CellTags->"Sect:CoupledFMC"]}, "Eq:ObjectiveMarkovSourceSimple"->{ Cell[158406, 4901, 1549, 41, 70, "NumberedEquation", CellTags->{"Eq:ObjectiveMarkovSourceSimple", "Error:4"}]}, "Error:4"->{ Cell[158406, 4901, 1549, 41, 70, "NumberedEquation", CellTags->{"Eq:ObjectiveMarkovSourceSimple", "Error:4"}]}, "Sect:Leakage"->{ Cell[164797, 5094, 158, 9, 70, "Subsection", CellTags->"Sect:Leakage"]}, "Error:5"->{ Cell[171973, 5313, 1375, 36, 70, "NumberedEquation", CellTags->"Error:5"]}, "Eq:Fold"->{ Cell[174909, 5394, 348, 7, 70, "NumberedEquation", CellTags->"Eq:Fold"]}, "Eq:Leakage"->{ Cell[178145, 5489, 382, 9, 70, "NumberedEquation", CellTags->"Eq:Leakage"]}, "Problem:2"->{ Cell[178530, 5500, 2777, 82, 70, "Text", CellTags->"Problem:2"]}, "Ed:Change5"->{ Cell[181310, 5584, 800, 27, 70, "Text", CellTags->"Ed:Change5"]}, "Sect:CodeOutputLayer"->{ Cell[183927, 5685, 170, 9, 70, "Subsection", CellTags->"Sect:CodeOutputLayer"]}, "Ed:Change6"->{ Cell[189081, 5839, 796, 25, 70, "Text", CellTags->"Ed:Change6"]}, "Sect:TypesOfDensityModel"->{ Cell[189926, 5870, 136, 6, 70, "Section", CellTags->"Sect:TypesOfDensityModel"]}, "Sect:FMCvsHM"->{ Cell[195038, 6022, 167, 9, 70, "Subsection", CellTags->"Sect:FMCvsHM"]}, "Error:6"->{ Cell[206269, 6343, 1456, 41, 70, "NumberedEquation", CellTags->"Error:6"]}, "Ed:Change7"->{ Cell[215195, 6612, 6715, 201, 70, "Text", CellTags->"Ed:Change7"]}, "Error:7"->{ Cell[232995, 7093, 1373, 37, 70, "NumberedEquation", CellTags->"Error:7"]}, "Ed:Change8"->{ Cell[234371, 7132, 2277, 63, 70, "Text", CellTags->"Ed:Change8"]}, "Sect:Kohonen"->{ Cell[255003, 7726, 129, 6, 70, "Section", CellTags->"Sect:Kohonen"]}, "Eq:ObjectiveSOM"->{ Cell[260444, 7895, 1886, 48, 70, "NumberedEquation", CellTags->"Eq:ObjectiveSOM"]}, "Sect:PMD"->{ Cell[265386, 8033, 133, 6, 70, "Section", CellTags->"Sect:PMD"]}, "Eq:PosteriorProbability"->{ Cell[271852, 8237, 504, 9, 70, "NumberedEquation", CellTags->"Eq:PosteriorProbability"]}, "Ed:Change9"->{ Cell[279817, 8449, 2617, 80, 70, "Text", CellTags->"Ed:Change9"]}, "Eq:MultipleRecognition"->{ Cell[285283, 8601, 3197, 70, 70, "NumberedEquation", CellTags->"Eq:MultipleRecognition"]}, "Eq:SingleRecognition"->{ Cell[294543, 8855, 3850, 92, 70, "NumberedEquation", CellTags->"Eq:SingleRecognition"]}, "Eq:PosteriorProbabilityPMD"->{ Cell[301269, 9050, 649, 11, 70, "NumberedEquation", CellTags->"Eq:PosteriorProbabilityPMD"]}, "Ed:Change10"->{ Cell[307670, 9255, 1000, 34, 70, "Text", CellTags->"Ed:Change10"]}, "Ed:Change11"->{ Cell[312245, 9390, 766, 23, 70, "Text", CellTags->"Ed:Change11"]}, "Eq:PosteriorProbabilityPMD2"->{ Cell[313014, 9415, 842, 18, 70, "NumberedEquation", CellTags->"Eq:PosteriorProbabilityPMD2"]}, "Ed:Change12"->{ Cell[314708, 9468, 2663, 74, 70, "Text", CellTags->"Ed:Change12"]}, "Sect:ACE"->{ Cell[318211, 9574, 126, 6, 70, "Section", CellTags->"Sect:ACE"]}, "Ed:Change13"->{ Cell[350969, 10354, 669, 21, 70, "Text", CellTags->"Ed:Change13"]}, "Ed:Change14"->{ Cell[356398, 10498, 3463, 109, 70, "Text", CellTags->"Ed:Change14"]}, "Ref:BeckerHinton1992"->{ Cell[380305, 11154, 304, 10, 70, "Reference", CellTags->"Ref:BeckerHinton1992"]}, "Ref:DayanHintonNealZemel1995"->{ Cell[380612, 11166, 281, 9, 70, "Reference", CellTags->"Ref:DayanHintonNealZemel1995"]}, "Ref:DayanHinton1996"->{ Cell[380896, 11177, 262, 9, 70, "Reference", CellTags->"Ref:DayanHinton1996"]}, "Ref:Farvardin1990"->{ Cell[381161, 11188, 266, 9, 70, "Reference", CellTags->"Ref:Farvardin1990"]}, "Ref:HintonZemel1994"->{ Cell[381430, 11199, 405, 11, 70, "Reference", CellTags->"Ref:HintonZemel1994"]}, "Ref:HintonDayanFreyNeal1995"->{ Cell[381838, 11212, 314, 10, 70, "Reference", CellTags->"Ref:HintonDayanFreyNeal1995"]}, "Ref:Kohonen1989"->{ Cell[382155, 11224, 139, 4, 70, "Reference", CellTags->"Ref:Kohonen1989"]}, "Ref:KumazawaKasaharaNamekawa1984"->{ Cell[382297, 11230, 319, 9, 70, "Reference", CellTags->"Ref:KumazawaKasaharaNamekawa1984"]}, "Ref:LindeBuzoGray1980"->{ Cell[382619, 11241, 274, 9, 70, "Reference", CellTags->"Ref:LindeBuzoGray1980"]}, "Ref:Luttrell1988"->{ Cell[382896, 11252, 524, 19, 70, "Reference", CellTags->"Ref:Luttrell1988"]}, "Ref:Luttrell1989a"->{ Cell[383423, 11273, 448, 15, 70, "Reference", CellTags->"Ref:Luttrell1989a"]}, "Ref:Luttrell1989b"->{ Cell[383874, 11290, 466, 18, 70, "Reference", CellTags->"Ref:Luttrell1989b"]}, "Ref:Luttrell1990"->{ Cell[384343, 11310, 486, 18, 70, "Reference", CellTags->"Ref:Luttrell1990"]}, "Ref:Luttrell1991a"->{ Cell[384832, 11330, 518, 18, 70, "Reference", CellTags->"Ref:Luttrell1991a"]}, "Ref:Luttrell1991b"->{ Cell[385353, 11350, 484, 16, 70, "Reference", CellTags->"Ref:Luttrell1991b"]}, "Ref:Luttrell1992"->{ Cell[385840, 11368, 468, 18, 70, "Reference", CellTags->"Ref:Luttrell1992"]}, "Ref:Luttrell1994a"->{ Cell[386311, 11388, 485, 19, 70, "Reference", CellTags->"Ref:Luttrell1994a"]}, "Ref:Luttrell1994b"->{ Cell[386799, 11409, 553, 19, 70, "Reference", CellTags->"Ref:Luttrell1994b"]}, "Ref:Luttrell1994c"->{ Cell[387355, 11430, 469, 16, 70, "Reference", CellTags->"Ref:Luttrell1994c"]}, "Ref:Luttrell1994d"->{ Cell[387827, 11448, 493, 17, 70, "Reference", CellTags->"Ref:Luttrell1994d"]}, "Ref:Luttrell1996"->{ Cell[388323, 11467, 497, 19, 70, "Reference", CellTags->"Ref:Luttrell1996"]}, "Ref:Luttrell1997"->{ Cell[388823, 11488, 462, 15, 70, "Reference", CellTags->"Ref:Luttrell1997"]}, "Ref:Rissanen1978"->{ Cell[389288, 11505, 247, 9, 70, "Reference", CellTags->"Ref:Rissanen1978"]}, "Ref:Rissanan1989"->{ Cell[389538, 11516, 147, 4, 70, "Reference", CellTags->"Ref:Rissanan1989"]}, "Ref:Shannon1948"->{ Cell[389688, 11522, 270, 9, 70, "Reference", CellTags->"Ref:Shannon1948"]} } *) (*CellTagsIndex CellTagsIndex->{ {"Sect:Introduction", 390668, 11553}, {"Sect:CodingTheory", 390777, 11556}, {"Sect:InformationTheory", 390891, 11559}, {"Eq:LikelyMessage", 391007, 11562}, {"Ed:Change1", 391118, 11565}, {"Sect:SourceCoding", 391218, 11568}, {"Sect:MarkovSourceCoding", 391336, 11571}, {"Ed:Change15", 391448, 11574}, {"Ed:Change2", 391618, 11579}, {"Ed:Change3", 391724, 11582}, {"Eq:NegativeLogLikelihood", 391833, 11585}, {"Ed:Change4", 391953, 11588}, {"Problem:1", 392062, 11591}, {"Sect:DynamicMarkovSourceCoding", 392192, 11594}, {"Sect:ApplicationNN", 392318, 11597}, {"Sect:SourceModelNN", 392430, 11600}, {"Eq:ObjectiveMarkovSource", 392551, 11603}, {"Sect:2LayerFMC", 392676, 11606}, {"Eq:ObjectiveMarkovSource2Layer", 392799, 11609}, {"Eq:ObjectiveMarkovSource2LayerNotation", 392954, 11612}, {"Error:1", 393099, 11615}, {"Error:2", 393244, 11618}, {"Eq:ObjectiveFMC", 393354, 11621}, {"Eq:ObjectiveVQ", 393470, 11624}, {"Error:3", 393578, 11627}, {"Sect:CoupledFMC", 393687, 11630}, {"Eq:ObjectiveMarkovSourceSimple", 393811, 11633}, {"Error:4", 393948, 11636}, {"Sect:Leakage", 394090, 11639}, {"Error:5", 394188, 11642}, {"Eq:Fold", 394289, 11645}, {"Eq:Leakage", 394391, 11648}, {"Problem:2", 394495, 11651}, {"Ed:Change5", 394589, 11654}, {"Sect:CodeOutputLayer", 394693, 11657}, {"Ed:Change6", 394802, 11660}, {"Sect:TypesOfDensityModel", 394910, 11663}, {"Sect:FMCvsHM", 395022, 11666}, {"Error:6", 395120, 11669}, {"Ed:Change7", 395224, 11672}, {"Error:7", 395317, 11675}, {"Ed:Change8", 395421, 11678}, {"Sect:Kohonen", 395518, 11681}, {"Eq:ObjectiveSOM", 395621, 11684}, {"Sect:PMD", 395731, 11687}, {"Eq:PosteriorProbability", 395838, 11690}, {"Ed:Change9", 395956, 11693}, {"Eq:MultipleRecognition", 396063, 11696}, {"Eq:SingleRecognition", 396192, 11699}, {"Eq:PosteriorProbabilityPMD", 396325, 11702}, {"Ed:Change10", 396448, 11705}, {"Ed:Change11", 396545, 11708}, {"Eq:PosteriorProbabilityPMD2", 396657, 11711}, {"Ed:Change12", 396781, 11714}, {"Sect:ACE", 396875, 11717}, {"Ed:Change13", 396970, 11720}, {"Ed:Change14", 397067, 11723}, {"Ref:BeckerHinton1992", 397175, 11726}, {"Ref:DayanHintonNealZemel1995", 397303, 11729}, {"Ref:DayanHinton1996", 397429, 11732}, {"Ref:Farvardin1990", 397544, 11735}, {"Ref:HintonZemel1994", 397659, 11738}, {"Ref:HintonDayanFreyNeal1995", 397785, 11741}, {"Ref:Kohonen1989", 397907, 11744}, {"Ref:KumazawaKasaharaNamekawa1984", 398033, 11747}, {"Ref:LindeBuzoGray1980", 398165, 11750}, {"Ref:Luttrell1988", 398281, 11753}, {"Ref:Luttrell1989a", 398394, 11756}, {"Ref:Luttrell1989b", 398508, 11759}, {"Ref:Luttrell1990", 398621, 11762}, {"Ref:Luttrell1991a", 398734, 11765}, {"Ref:Luttrell1991b", 398848, 11768}, {"Ref:Luttrell1992", 398961, 11771}, {"Ref:Luttrell1994a", 399074, 11774}, {"Ref:Luttrell1994b", 399188, 11777}, {"Ref:Luttrell1994c", 399302, 11780}, {"Ref:Luttrell1994d", 399416, 11783}, {"Ref:Luttrell1996", 399529, 11786}, {"Ref:Luttrell1997", 399641, 11789}, {"Ref:Rissanen1978", 399753, 11792}, {"Ref:Rissanan1989", 399864, 11795}, {"Ref:Shannon1948", 399974, 11798} } *) (*NotebookFileOutline Notebook[{ Cell[CellGroupData[{ Cell[1776, 53, 26, 0, 70, "Section 1"], Cell[CellGroupData[{ Cell[1827, 57, 39, 0, 70, "Subsection"], Cell[1869, 59, 62, 0, 70, "Text"], Cell[1934, 61, 416, 17, 70, "Text"], Cell[2353, 80, 261, 8, 70, "Text"], Cell[2617, 90, 311, 12, 70, "Text"], Cell[2931, 104, 326, 11, 70, "Text"], Cell[3260, 117, 2015, 72, 70, "Text"], Cell[5278, 191, 2338, 75, 70, "Text"], Cell[7619, 268, 1526, 50, 70, "Text"], Cell[9148, 320, 1062, 35, 70, "Text"], Cell[10213, 357, 768, 25, 70, "Text"], Cell[10984, 384, 768, 27, 70, "Text"], Cell[11755, 413, 960, 33, 70, "Text"], Cell[12718, 448, 853, 28, 70, "Text"], Cell[13574, 478, 1470, 42, 70, "Text"], Cell[15047, 522, 662, 20, 70, "Text"], Cell[15712, 544, 371, 14, 70, "Text"], Cell[16086, 560, 328, 11, 70, "Text"], Cell[16417, 573, 1077, 34, 70, "Text"], Cell[17497, 609, 1996, 63, 70, "Text"], Cell[19496, 674, 3264, 100, 70, "Text"], Cell[22763, 776, 2251, 67, 70, "Text"], Cell[25017, 845, 688, 24, 70, "Text"], Cell[25708, 871, 381, 13, 70, "Text"], Cell[26092, 886, 674, 17, 70, "Text"], Cell[26769, 905, 595, 22, 70, "Text"], Cell[27367, 929, 1743, 54, 70, "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[29159, 989, 69, 0, 70, "Title"], Cell[29231, 991, 33, 0, 70, "Author"], Cell[29267, 993, 111, 3, 70, "Text"], Cell[29381, 998, 296, 6, 70, "Text"], Cell[29680, 1006, 161, 5, 70, "Text"], Cell[29844, 1013, 591, 9, 70, "Abstract"], Cell[CellGroupData[{ Cell[30460, 1026, 63, 1, 70, "Section 1", CounterIncrements->{}], Cell[30526, 1029, 107, 4, 70, "Text"], Cell[30636, 1035, 201, 5, 70, "Text"], Cell[30840, 1042, 212, 5, 70, "Text"], Cell[31055, 1049, 191, 6, 70, "Text"], Cell[31249, 1057, 245, 6, 70, "Text"], Cell[31497, 1065, 482, 9, 70, "Text"], Cell[31982, 1076, 330, 8, 70, "Text"], Cell[32315, 1086, 233, 6, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[32585, 1097, 109, 5, 70, "Section", CellTags->"Sect:Introduction"], Cell[32697, 1104, 962, 19, 70, "Text"], Cell[33662, 1125, 695, 12, 70, "Text"], Cell[34360, 1139, 677, 15, 70, "Text"], Cell[35040, 1156, 364, 6, 70, "Text"], Cell[35407, 1164, 2623, 88, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[38067, 1257, 110, 5, 70, "Section", CellTags->"Sect:CodingTheory"], Cell[38180, 1264, 1656, 56, 70, "Text"], Cell[CellGroupData[{ Cell[39861, 1324, 161, 8, 70, "Subsection", CellTags->"Sect:InformationTheory"], Cell[40025, 1334, 327, 10, 70, "Text"], Cell[40355, 1346, 274, 8, 70, "NumberedEquation"], Cell[40632, 1356, 542, 18, 70, "Text"], Cell[41177, 1376, 1312, 31, 70, "Text"], Cell[42492, 1409, 1448, 50, 70, "Text"], Cell[43943, 1461, 890, 17, 70, "NumberedEquation", CellTags->"Eq:LikelyMessage"], Cell[44836, 1480, 644, 22, 70, "Text", CellTags->"Ed:Change1"], Cell[45483, 1504, 347, 9, 70, "NumberedEquation"], Cell[45833, 1515, 1621, 41, 70, "Text"], Cell[47457, 1558, 2265, 71, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[49759, 1634, 151, 8, 70, "Subsection", CellTags->"Sect:SourceCoding"], Cell[49913, 1644, 1865, 57, 70, "Text"], Cell[51781, 1703, 2420, 78, 70, "Text"], Cell[54204, 1783, 696, 25, 70, "Text"], Cell[54903, 1810, 975, 20, 70, "NumberedEquation"], Cell[55881, 1832, 408, 14, 70, "Text"], Cell[56292, 1848, 2160, 72, 70, "Text"], Cell[58455, 1922, 544, 14, 70, "NumberedEquation"], Cell[59002, 1938, 1946, 65, 70, "Text"], Cell[60951, 2005, 2360, 72, 70, "Text"], Cell[63314, 2079, 1262, 31, 70, "NumberedEquation"], Cell[64579, 2112, 1452, 47, 70, "Text"], Cell[66034, 2161, 1570, 44, 70, "NumberedEquation"], Cell[67607, 2207, 1016, 35, 70, "Text"], Cell[68626, 2244, 1788, 57, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[70451, 2306, 164, 8, 70, "Subsection", CellTags->"Sect:MarkovSourceCoding"], Cell[70618, 2316, 912, 30, 70, "Text"], Cell[71533, 2348, 3473, 95, 70, "NumberedEquation"], Cell[75009, 2445, 2086, 69, 70, "Text", CellTags->"Ed:Change15"], Cell[77098, 2516, 416, 15, 70, "Text"], Cell[77517, 2533, 4469, 91, 70, "NumberedEquation", CellTags->"Ed:Change2"], Cell[81989, 2626, 4290, 130, 70, "Text", CellTags->"Ed:Change3"], Cell[86282, 2758, 1219, 41, 70, "Text"], Cell[87504, 2801, 3151, 66, 70, "NumberedEquation", CellTags->"Eq:NegativeLogLikelihood"], Cell[90658, 2869, 3434, 103, 70, "Text"], Cell[94095, 2974, 3787, 114, 70, "Text", CellTags->{"Ed:Change4", "Problem:1"}] }, Closed]], Cell[CellGroupData[{ Cell[97919, 3093, 183, 8, 70, "Subsection", CellTags->"Sect:DynamicMarkovSourceCoding"], Cell[98105, 3103, 966, 21, 70, "Text"], Cell[99074, 3126, 1805, 51, 70, "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[100928, 3183, 147, 6, 70, "Section", CellTags->"Sect:ApplicationNN"], Cell[101078, 3191, 1960, 70, 70, "Text"], Cell[CellGroupData[{ Cell[103063, 3265, 176, 9, 70, "Subsection", CellTags->"Sect:SourceModelNN"], Cell[103242, 3276, 346, 8, 70, "Text"], Cell[103591, 3286, 1960, 56, 70, "Text"], Cell[105554, 3344, 865, 15, 70, "NumberedEquation"], Cell[106422, 3361, 1832, 57, 70, "Text"], Cell[108257, 3420, 1979, 56, 70, "Text"], Cell[110239, 3478, 869, 15, 70, "NumberedEquation"], Cell[111111, 3495, 432, 14, 70, "Text"], Cell[111546, 3511, 4356, 128, 70, "Text"], Cell[115905, 3641, 2366, 71, 70, "Text"], Cell[118274, 3714, 767, 26, 70, "Text"], Cell[119044, 3742, 250, 4, 70, "NumberedEquation"], Cell[119297, 3748, 1010, 34, 70, "Text"], Cell[120310, 3784, 675, 17, 70, "NumberedEquation"], Cell[120988, 3803, 1075, 35, 70, "Text"], Cell[122066, 3840, 1167, 28, 70, "NumberedEquation"], Cell[123236, 3870, 373, 13, 70, "Text"], Cell[123612, 3885, 1458, 39, 70, "NumberedEquation", CellTags->"Eq:ObjectiveMarkovSource"], Cell[125073, 3926, 2671, 83, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[127781, 4014, 176, 9, 70, "Subsection", CellTags->"Sect:2LayerFMC"], Cell[127960, 4025, 894, 28, 70, "Text"], Cell[128857, 4055, 1317, 36, 70, "NumberedEquation", CellTags->"Eq:ObjectiveMarkovSource2Layer"], Cell[130177, 4093, 224, 7, 70, "Text"], Cell[130404, 4102, 3339, 77, 70, "NumberedEquation", CellTags->{"Eq:ObjectiveMarkovSource2LayerNotation", "Error:1"}], Cell[133746, 4181, 978, 30, 70, "Text"], Cell[134727, 4213, 5991, 139, 70, "NumberedEquation", CellTags->"Error:2"], Cell[140721, 4354, 385, 12, 70, "Text"], Cell[141109, 4368, 2368, 60, 70, "NumberedEquation", CellTags->"Eq:ObjectiveFMC"], Cell[143480, 4430, 225, 8, 70, "Text"], Cell[143708, 4440, 1831, 47, 70, "NumberedEquation", CellTags->"Eq:ObjectiveVQ"], Cell[145542, 4489, 1343, 44, 70, "Text"], Cell[146888, 4535, 369, 13, 70, "Text"], Cell[147260, 4550, 1232, 35, 70, "NumberedEquation", CellTags->"Error:3"], Cell[148495, 4587, 1484, 44, 70, "Text"], Cell[149982, 4633, 3409, 106, 70, "Text"], Cell[153394, 4741, 3164, 93, 70, "Text"], Cell[156561, 4836, 725, 18, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[157323, 4859, 162, 9, 70, "Subsection", CellTags->"Sect:CoupledFMC"], Cell[157488, 4870, 915, 29, 70, "Text"], Cell[158406, 4901, 1549, 41, 70, "NumberedEquation", CellTags->{"Eq:ObjectiveMarkovSourceSimple", "Error:4"}], Cell[159958, 4944, 1350, 41, 70, "Text"], Cell[161311, 4987, 1641, 46, 70, "Text"], Cell[162955, 5035, 1805, 54, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[164797, 5094, 158, 9, 70, "Subsection", CellTags->"Sect:Leakage"], Cell[164958, 5105, 1446, 39, 70, "Text"], Cell[166407, 5146, 1142, 30, 70, "NumberedEquation"], Cell[167552, 5178, 542, 17, 70, "Text"], Cell[168097, 5197, 849, 23, 70, "NumberedEquation"], Cell[168949, 5222, 63, 0, 70, "Text"], Cell[169015, 5224, 1317, 34, 70, "NumberedEquation"], Cell[170335, 5260, 1039, 28, 70, "Text"], Cell[171377, 5290, 593, 21, 70, "Text"], Cell[171973, 5313, 1375, 36, 70, "NumberedEquation", CellTags->"Error:5"], Cell[173351, 5351, 316, 10, 70, "Text"], Cell[173670, 5363, 1040, 21, 70, "NumberedEquation"], Cell[174713, 5386, 193, 6, 70, "Text"], Cell[174909, 5394, 348, 7, 70, "NumberedEquation", CellTags->"Eq:Fold"], Cell[175260, 5403, 28, 0, 70, "Text"], Cell[175291, 5405, 839, 17, 70, "NumberedEquation"], Cell[176133, 5424, 423, 16, 70, "Text"], Cell[176559, 5442, 741, 16, 70, "NumberedEquation"], Cell[177303, 5460, 839, 27, 70, "Text"], Cell[178145, 5489, 382, 9, 70, "NumberedEquation", CellTags->"Eq:Leakage"], Cell[178530, 5500, 2777, 82, 70, "Text", CellTags->"Problem:2"], Cell[181310, 5584, 800, 27, 70, "Text", CellTags->"Ed:Change5"], Cell[182113, 5613, 135, 3, 70, "Text"], Cell[182251, 5618, 420, 14, 70, "Text"], Cell[182674, 5634, 406, 14, 70, "Text"], Cell[183083, 5650, 23, 0, 70, "Text"], Cell[183109, 5652, 426, 15, 70, "Text"], Cell[183538, 5669, 352, 11, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[183927, 5685, 170, 9, 70, "Subsection", CellTags->"Sect:CodeOutputLayer"], Cell[184100, 5696, 2477, 77, 70, "Text"], Cell[186580, 5775, 1929, 48, 70, "Text"], Cell[188512, 5825, 358, 6, 70, "Text"], Cell[188873, 5833, 205, 4, 70, "Text", CellTags->"Ed:Change15"], Cell[189081, 5839, 796, 25, 70, "Text", CellTags->"Ed:Change6"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[189926, 5870, 136, 6, 70, "Section", CellTags->"Sect:TypesOfDensityModel"], Cell[190065, 5878, 4948, 140, 70, "Text"], Cell[CellGroupData[{ Cell[195038, 6022, 167, 9, 70, "Subsection", CellTags->"Sect:FMCvsHM"], Cell[195208, 6033, 2014, 59, 70, "Text"], Cell[197225, 6094, 365, 8, 70, "NumberedEquation"], Cell[197593, 6104, 471, 13, 70, "Text"], Cell[198067, 6119, 327, 7, 70, "NumberedEquation"], Cell[198397, 6128, 715, 21, 70, "Text"], Cell[199115, 6151, 4370, 103, 70, "NumberedEquation"], Cell[203488, 6256, 2778, 85, 70, "Text"], Cell[206269, 6343, 1456, 41, 70, "NumberedEquation", CellTags->"Error:6"], Cell[207728, 6386, 367, 12, 70, "Text"], Cell[208098, 6400, 5369, 158, 70, "Text"], Cell[213470, 6560, 429, 15, 70, "Text"], Cell[213902, 6577, 1290, 33, 70, "NumberedEquation"], Cell[215195, 6612, 6715, 201, 70, "Text", CellTags->"Ed:Change7"], Cell[221913, 6815, 203, 5, 70, "Text"], Cell[222119, 6822, 4487, 107, 70, "NumberedEquation"], Cell[226609, 6931, 43, 0, 70, "Text"], Cell[226655, 6933, 5243, 122, 70, "NumberedEquation"], Cell[231901, 7057, 1091, 34, 70, "Text"], Cell[232995, 7093, 1373, 37, 70, "NumberedEquation", CellTags->"Error:7"], Cell[234371, 7132, 2277, 63, 70, "Text", CellTags->"Ed:Change8"], Cell[236651, 7197, 916, 26, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[237604, 7228, 133, 8, 70, "Subsection"], Cell[237740, 7238, 8994, 264, 70, "Text"], Cell[246737, 7504, 5059, 141, 70, "Text"], Cell[251799, 7647, 3155, 73, 70, "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[255003, 7726, 129, 6, 70, "Section", CellTags->"Sect:Kohonen"], Cell[255135, 7734, 647, 12, 70, "Text"], Cell[CellGroupData[{ Cell[255807, 7750, 146, 8, 70, "Subsection"], Cell[255956, 7760, 1498, 52, 70, "Text"], Cell[257457, 7814, 1941, 47, 70, "NumberedEquation"], Cell[259401, 7863, 1040, 30, 70, "Text"], Cell[260444, 7895, 1886, 48, 70, "NumberedEquation", CellTags->"Eq:ObjectiveSOM"], Cell[262333, 7945, 1856, 49, 70, "Text"], Cell[264192, 7996, 556, 13, 70, "Text"], Cell[264751, 8011, 586, 16, 70, "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[265386, 8033, 133, 6, 70, "Section", CellTags->"Sect:PMD"], Cell[265522, 8041, 2566, 73, 70, "Text"], Cell[CellGroupData[{ Cell[268113, 8118, 138, 8, 70, "Subsection"], Cell[268254, 8128, 60, 0, 70, "Text"], Cell[268317, 8130, 1440, 39, 70, "NumberedEquation"], Cell[269760, 8171, 1810, 53, 70, "Text"], Cell[271573, 8226, 276, 9, 70, "Text"], Cell[271852, 8237, 504, 9, 70, "NumberedEquation", CellTags->"Eq:PosteriorProbability"], Cell[272359, 8248, 1340, 36, 70, "Text"], Cell[273702, 8286, 2694, 78, 70, "Text"], Cell[276399, 8366, 505, 15, 70, "Text"], Cell[276907, 8383, 2907, 64, 70, "NumberedEquation"], Cell[279817, 8449, 2617, 80, 70, "Text", CellTags->"Ed:Change9"], Cell[282437, 8531, 2626, 58, 70, "NumberedEquation"], Cell[285066, 8591, 214, 8, 70, "Text"], Cell[285283, 8601, 3197, 70, 70, "NumberedEquation", CellTags->"Eq:MultipleRecognition"], Cell[288483, 8673, 2108, 64, 70, "Text"], Cell[290594, 8739, 1936, 48, 70, "NumberedEquation"], Cell[292533, 8789, 981, 30, 70, "Text"], Cell[293517, 8821, 1023, 32, 70, "Text"], Cell[294543, 8855, 3850, 92, 70, "NumberedEquation", CellTags->"Eq:SingleRecognition"], Cell[298396, 8949, 1125, 35, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[299558, 8989, 142, 8, 70, "Subsection"], Cell[299703, 8999, 1563, 49, 70, "Text"], Cell[301269, 9050, 649, 11, 70, "NumberedEquation", CellTags->"Eq:PosteriorProbabilityPMD"], Cell[301921, 9063, 686, 26, 70, "Text"], Cell[302610, 9091, 2231, 62, 70, "Text"], Cell[304844, 9155, 668, 23, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[305549, 9183, 150, 7, 70, "Subsection"], Cell[305702, 9192, 1129, 35, 70, "Text"], Cell[306834, 9229, 833, 24, 70, "NumberedEquation"], Cell[307670, 9255, 1000, 34, 70, "Text", CellTags->"Ed:Change10"], Cell[308673, 9291, 2255, 60, 70, "NumberedEquation"], Cell[310931, 9353, 23, 0, 70, "Text"], Cell[310957, 9355, 1285, 33, 70, "NumberedEquation"], Cell[312245, 9390, 766, 23, 70, "Text", CellTags->"Ed:Change11"], Cell[313014, 9415, 842, 18, 70, "NumberedEquation", CellTags->"Eq:PosteriorProbabilityPMD2"], Cell[313859, 9435, 846, 31, 70, "Text"], Cell[314708, 9468, 2663, 74, 70, "Text", CellTags->"Ed:Change12"], Cell[317374, 9544, 258, 5, 70, "Text"], Cell[317635, 9551, 527, 17, 70, "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[318211, 9574, 126, 6, 70, "Section", CellTags->"Sect:ACE"], Cell[318340, 9582, 418, 14, 70, "Text"], Cell[CellGroupData[{ Cell[318783, 9600, 141, 7, 70, "Subsection"], Cell[318927, 9609, 201, 8, 70, "Text"], Cell[319131, 9619, 1183, 25, 70, "NumberedEquation"], Cell[320317, 9646, 961, 25, 70, "Text"], Cell[321281, 9673, 2764, 51, 70, "NumberedEquation"], Cell[324048, 9726, 624, 18, 70, "Text"], Cell[324675, 9746, 212, 4, 70, "Text"], Cell[324890, 9752, 5151, 134, 70, "NumberedEquation"], Cell[330044, 9888, 2116, 68, 70, "Text"], Cell[332163, 9958, 16662, 341, 70, "NumberedEquation"], Cell[348828, 10301, 2138, 51, 70, "Text"], Cell[350969, 10354, 669, 21, 70, "Text", CellTags->"Ed:Change13"], Cell[351641, 10377, 2344, 59, 70, "NumberedEquation"], Cell[353988, 10438, 24, 0, 70, "Text"], Cell[354015, 10440, 2380, 56, 70, "NumberedEquation"], Cell[356398, 10498, 3463, 109, 70, "Text", CellTags->"Ed:Change14"], Cell[359864, 10609, 1586, 42, 70, "NumberedEquation"], Cell[361453, 10653, 2065, 61, 70, "Text"], Cell[363521, 10716, 1723, 54, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[365281, 10775, 139, 7, 70, "Subsection"], Cell[365423, 10784, 427, 12, 70, "Text"], Cell[365853, 10798, 1431, 37, 70, "NumberedEquation"], Cell[367287, 10837, 797, 25, 70, "Text"], Cell[368087, 10864, 9204, 191, 70, "NumberedEquation"], Cell[377294, 11057, 574, 18, 70, "Text"] }, Closed]] }, Closed]], Cell[CellGroupData[{ Cell[377917, 11081, 81, 5, 70, "Section"], Cell[378001, 11088, 896, 15, 70, "Text"], Cell[378900, 11105, 395, 6, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[379332, 11116, 79, 4, 70, "Section"], Cell[379414, 11122, 277, 4, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[379728, 11131, 80, 4, 70, "Section"], Cell[379811, 11137, 380, 6, 70, "Text"] }, Closed]], Cell[CellGroupData[{ Cell[380228, 11148, 74, 4, 70, "Section"], Cell[380305, 11154, 304, 10, 70, "Reference", CellTags->"Ref:BeckerHinton1992"], Cell[380612, 11166, 281, 9, 70, "Reference", CellTags->"Ref:DayanHintonNealZemel1995"], Cell[380896, 11177, 262, 9, 70, "Reference", CellTags->"Ref:DayanHinton1996"], Cell[381161, 11188, 266, 9, 70, "Reference", CellTags->"Ref:Farvardin1990"], Cell[381430, 11199, 405, 11, 70, "Reference", CellTags->"Ref:HintonZemel1994"], Cell[381838, 11212, 314, 10, 70, "Reference", CellTags->"Ref:HintonDayanFreyNeal1995"], Cell[382155, 11224, 139, 4, 70, "Reference", CellTags->"Ref:Kohonen1989"], Cell[382297, 11230, 319, 9, 70, "Reference", CellTags->"Ref:KumazawaKasaharaNamekawa1984"], Cell[382619, 11241, 274, 9, 70, "Reference", CellTags->"Ref:LindeBuzoGray1980"], Cell[382896, 11252, 524, 19, 70, "Reference", CellTags->"Ref:Luttrell1988"], Cell[383423, 11273, 448, 15, 70, "Reference", CellTags->"Ref:Luttrell1989a"], Cell[383874, 11290, 466, 18, 70, "Reference", CellTags->"Ref:Luttrell1989b"], Cell[384343, 11310, 486, 18, 70, "Reference", CellTags->"Ref:Luttrell1990"], Cell[384832, 11330, 518, 18, 70, "Reference", CellTags->"Ref:Luttrell1991a"], Cell[385353, 11350, 484, 16, 70, "Reference", CellTags->"Ref:Luttrell1991b"], Cell[385840, 11368, 468, 18, 70, "Reference", CellTags->"Ref:Luttrell1992"], Cell[386311, 11388, 485, 19, 70, "Reference", CellTags->"Ref:Luttrell1994a"], Cell[386799, 11409, 553, 19, 70, "Reference", CellTags->"Ref:Luttrell1994b"], Cell[387355, 11430, 469, 16, 70, "Reference", CellTags->"Ref:Luttrell1994c"], Cell[387827, 11448, 493, 17, 70, "Reference", CellTags->"Ref:Luttrell1994d"], Cell[388323, 11467, 497, 19, 70, "Reference", CellTags->"Ref:Luttrell1996"], Cell[388823, 11488, 462, 15, 70, "Reference", CellTags->"Ref:Luttrell1997"], Cell[389288, 11505, 247, 9, 70, "Reference", CellTags->"Ref:Rissanen1978"], Cell[389538, 11516, 147, 4, 70, "Reference", CellTags->"Ref:Rissanan1989"], Cell[389688, 11522, 270, 9, 70, "Reference", CellTags->"Ref:Shannon1948"] }, Closed]] }, Open ]] } ] *) (******************************************************************* End of Mathematica Notebook file. *******************************************************************)