diff --git a/book/_toc.yml b/book/_toc.yml index 65b1613..b6672b1 100644 --- a/book/_toc.yml +++ b/book/_toc.yml @@ -6,4 +6,5 @@ root: intro chapters: - file: probability_theory - file: queries +- file: graphical_models - file: general_remarks \ No newline at end of file diff --git a/book/graphical_models.md b/book/graphical_models.md index 6691bcc..6e5fd8b 100644 --- a/book/graphical_models.md +++ b/book/graphical_models.md @@ -1,5 +1,10 @@ # Probabilistic Graphical Models +This chapter introduces the concept of probabilistic graphical models, +which are a powerful tool for modeling complex systems. +We will cover the basics of Bayesian networks and Markov random fields, +and discuss their applications and limitations in machine learning. + ## Motivation ## Bayesian Networks diff --git a/book/queries.md b/book/queries.md index b762c2b..46cc530 100644 --- a/book/queries.md +++ b/book/queries.md @@ -155,7 +155,6 @@ fig.show() The plot shows the normal distribution that is the maximum likelihood estimate for the data if we assume the data is i. i. d. and drawn from a normal distribution. - ## Marginals The marginal query is the next interesting quantity we investigate. @@ -238,24 +237,41 @@ color_distribution = distribution.marginal([color]) print(tabulate.tabulate(color_distribution.to_tabulate(), tablefmt="fancy_grid")) ``` -A final remark on the marginal method, is that every inference method that is part of the marginal query class is still efficiently calculable. A problem is that, it sometimes destroys a property that is needed for finding the mode of the distribution. We will investigate later what that means. +A final remark on the marginal method, is that every inference method that is part of the marginal query class is +still efficiently calculable. +A problem is that, it sometimes destroys a property that is needed for finding the mode of the distribution. +We will investigate later what that means. ## Moments -The final interesting object that belongs to the marginal query class are the moments of a distribution. The (central) moment of a distribution is defined as the following integral: +The final interesting quantity that belongs to the marginal query class is the moment of a distribution. -$$\mathbb{M}_n(x) = E((x-E(x))^n) = \int (x-\mu)^n p(x) dx$$ - -In this equation there are multiple symbols that should be discussed. Firstly, $n$ is the order of a moment. Second, $\mu$ is the center of the moment. If the center is $0$, the moment is called a central moment. The most common moments are the expectation given as +````{prf:definition} Moment Query class +:label: def-moment + +The (central) moment of a distribution is defined as the integral + +$$\mathbb{M}_n(x) = E((x-E(x))^n) = \int (x-\mu)^n p(x) dx,$$ + +where $n$ is the order of the moment and $\mu$ is the center of the moment. + +```` + +The most common moments are the expectation given as + +$$E(x) = \int x \, p(x) dx,$$ + +which is a moment query with $n=1$ and center $0$. + +The variance is the second central moment and given by -$$E(x) = \int x p(x) dx$$, +$$Var(x) = \int (x-E(x))^2 p(x) dx, $$ -which is a moment query with $n=1$ and center $0$. The variance is the second central moment and given by +which is a moment query with the mean as the center and $n=2$. -$$Var(x) = E((x-E(x))^2) = \int (x-E(x))^2 p(x) dx$$, +While higher order moments exist $(n>2)$, they have little practical use and are not further discussed here. -which is a moment query with the mean as the center and $n=2$. While higher order moments exist ($n>2$), they have little practical use and are not further discussed here. The interface to calculate moments is the `moment` method of the distribution object. ```{code-cell} ipython3 @@ -267,19 +283,27 @@ print("Raw variance:", distribution.moment(VariableMap({sepal_length: 2}), Varia print("Third moment with center 3:", distribution.moment(VariableMap({sepal_length: 3}), VariableMap({sepal_length: 3}))) ``` -As we can see, mean and variance are shortcut by their names since they are so common. Furthermore, we can calculate all moments that exist by plugging in the order and center that we want as VariableMap. +As we can see, the expectation and variance are shortcut by their names since they are so common. + +Furthermore, we can calculate all moments that exist by plugging in the order and center that we want as `VariableMap`. ## Mode query -The final important quantity of a probability distribution is the mode. The mode refers to the region where the denisty is maximized. Formally, +The next important quantity of a probability distribution is the mode. +The mode refers to the region where the density is maximized. +Formally, + +````{prf:definition} Mode Query class + +The mode of a distribution is defined as the set where the density is maximal, i.e., $$\hat{x} = \underset{x \in \mathcal{X}}{arg \,max} p(x). $$ +```` While common literature describes the mode under a condition, we can omit such a description since we already defined that the conditional distribution is part of the marginal query class. Hence, the mode under a condition is just the chain of the condition and mode methods. A common perception of the mode is, that it is the single point of highest density, such as in the example below. - ```{code-cell} ipython3 distribution = GaussianDistribution(Continuous("x"), 0, 1) @@ -292,7 +316,7 @@ However, the mode is more accurately described as the set of points with the hig ```{code-cell} ipython3 condition = closed(-float("inf"), -1) | closed(1, float("inf")) distribution, _ = distribution.conditional(SimpleEvent({distribution.variables[0]: condition}).as_composite_set()) -go.Figure(distribution.plot()).show() +go.Figure(distribution.plot(), distribution.plotly_layout()).show() ``` We can see that conditioning a Gaussian on such an event already creates a mode that has two points. Furthermore, modes can be sets of infinite many points, such as shown below. @@ -303,20 +327,92 @@ uniform = UniformDistribution(Continuous("x"), open(-1, 1).simple_sets[0]) go.Figure(uniform.plot(), uniform.plotly_layout()).show() ``` -The mode of the uniform distribution is the entire interval of the uniform distribution $(-1, 1)$. The mode is particular useful when we want to find the best (most likely) solution to a problem und not just any. +The mode of the uniform distribution is the entire interval of the uniform distribution $(-1, 1) $. +The mode is particularly useful when we want to find the best (most likely) solution to a problem and not just any. ## Sampling -Sampling very gucci + +Sampling refers to the generation of random samples from a distribution. +Defining sampling formally is a bit tricky, as it is not a query in the sense of the other queries. +However, it is an important part of probabilistic modeling, as it allows us to generate examples from a distribution. +If you are interested in a good definition of sampling, I recommend {cite:p}`Sampling2017StackExchange`. + +For most practical purposes, we can define sampling as the generation of random samples from a distribution. +Let's look at an example from the Gaussian distribution. + +```{code-cell} ipython3 +distribution = GaussianDistribution(Continuous("x"), 0, 1) +samples = distribution.sample(2) +print(samples) +``` + +As we can see, we just draw two samples from the Gaussian distribution. ## Monte Carlo Estimate +In {prf:ref}`def-marginal`, we defined the marginal query class as the integration over events of the product algebra. +In practice, quantities such as the marginal probability are often intractable to compute. +Furthermore, there are integrals that are not part of the marginal query class and yet interesting to calculate. +One way to approximate such integrals is by using Monte Carlo methods. + +````{prf:definition} Monte Carlo Estimate +:label: def-monte-carlo + +Consider k indipendent samples $x_1, ..., x_k$ from a multidimensional random variable with a certain pdf $p(x)$. +Then a Monte Carlo estimate would be a way of approximating multidimensional integrals of the form + +$$ +\int f(x) p(x) dx +$$ + +by using the empirical expectation of the function $f$ evaluated at the samples + +$$ +\int f(x) p(x) dx \approx \frac{1}{k} \sum_{i=1}^k f(x_i). +$$ + +The Monte Carlo estimate is sometimes reffered to as the expectation of the function $f$ under the distribution $p$. +```` + +The Monte Carlo estimate is a powerful tool to approximate integrals that have unconstrained form. + +In general, Monte Carlo Methods are integral approximations and not tied to probability distributions. +Until now, we described our integrals as random events. +Let $E$ be a random event as we know it, consider the function + +$$ +\mathbb{1}_E(x) = \begin{cases} 1 & \text{if } E \models x \\ 0 & \text{else} \end{cases} +$$ + +then, + +$$ +\int \mathbb{1}_E(x) p(x) dx +$$ + +calculates $P(E)$. +The Monte Carlo Estimate yields an approximation by + +$$ +P(E) \approx = \frac{1}{k} \sum_{i=1}^k \mathbb{1}_E(x_i). +$$ + +Since Monte Carlo estimates are not constrained by any for of $f$, we can use them to approximate any integral, such as +$P(x < y)$ or similar complex integrals. +For further examples on Monte Carlo estimates, +I recommend [this Monte Carlo Integration example](https://en.wikipedia.org/wiki/Monte_Carlo_method). ## Practical Example -In practice, probabilities can be used in robotics. Consider the kitchen scenario from the [product algebra notebook](https://random-events.readthedocs.io/en/latest/notebooks/independent_constraints.html). +In practice, probabilities can be used in robotics. +Consider the kitchen scenario from the [product algebra notebook](https://random-events.readthedocs.io/en/latest/conceptual_guide.html#application). ```{code-cell} ipython3 + +x = Continuous("x") +y = Continuous("y") + kitchen = SimpleEvent({x: closed(0, 6.6), y: closed(0, 7)}).as_composite_set() refrigerator = SimpleEvent({x: closed(5, 6), y: closed(6.3, 7)}).as_composite_set() top_kitchen_island = SimpleEvent({x: closed(0, 5), y: closed(6.5, 7)}).as_composite_set() @@ -330,14 +426,20 @@ fig = go.Figure(free_space.plot(), free_space.plotly_layout()) fig.show() ``` -Let's now say, that we want a position to access the fridge. We want to be as close to the fridge as possible. Naturally, we need a gaussian model for that. We now construct a gaussian distribution over the free space to describe locations and their probabilities to access the fridge. +Let's now say that we want a position to access the fridge. +We want to be as close to the fridge as possible. +Naturally, we need a gaussian model for that. +We now construct a gaussian distribution over the free space +to describe locations and their probabilities to access the fridge. ```{code-cell} ipython3 -from probabilistic_model.probabilistic_circuit.probabilistic_circuit import DecomposableProductUnit +from probabilistic_model.probabilistic_circuit.probabilistic_circuit import ProductUnit from probabilistic_model.probabilistic_circuit.distributions import GaussianDistribution -p_x = GaussianDistribution(Continuous("x"), 5.5, 0.5) -p_y = GaussianDistribution(Continuous("y"), 6.65, 0.5) -p_xy = DecomposableProductUnit() + + +p_x = GaussianDistribution(x, 5.5, 0.5) +p_y = GaussianDistribution(y, 6.65, 0.5) +p_xy = ProductUnit() p_xy.add_subcircuit(p_x) p_xy.add_subcircuit(p_y) p_xy = p_xy.probabilistic_circuit @@ -345,7 +447,9 @@ fig = go.Figure(p_xy.plot(), p_xy.plotly_layout()) fig.show() ``` -We now want to filter for all positions that are available in the kitchen. Hence, we need to condition our probability distribution on the free space of the kitchen. We can do this by invoking the `conditional` method of the distribution object. +We now want to filter for all positions that are available in the kitchen. +Hence, we need to condition our probability distribution on the free space of the kitchen. +We can do this by invoking the `conditional` method of the distribution object. ```{code-cell} ipython3 distribution, _ = p_xy.conditional(free_space) @@ -353,8 +457,10 @@ fig = go.Figure(distribution.plot(number_of_samples=10000), distribution.plotly_ fig.show() ``` -As you can see, we can express our belief of good accessing positions for the fridge using probability theory. This idea scales to many more complex scenarios, such as the localization of a robot in a room or the prediction of the next state of a system. However, this is out of scope for this tutorial. - +As you can see, we can express our belief of good accessing positions for the fridge using probability theory. +This idea scales to many more complex scenarios, +such as the localization of a robot in a room or the prediction of the next state of a system. +However, this is out of scope for this tutorial. ```{bibliography} ``` diff --git a/book/references.bib b/book/references.bib index 435ea7f..834bcbf 100644 --- a/book/references.bib +++ b/book/references.bib @@ -136,4 +136,13 @@ @book{mackay2003information series = {Probability Theory and Stochastic Modelling 99}, edition = {3}, volume = {} +} + +@MISC {Sampling2017StackExchange, + TITLE = {How does one formally define sampling from a probability distribution?}, + AUTHOR = {kccu (https://math.stackexchange.com/users/255727/kccu)}, + HOWPUBLISHED = {Mathematics Stack Exchange}, + NOTE = {URL:https://math.stackexchange.com/q/2336295 (version: 2017-06-26)}, + EPRINT = {https://math.stackexchange.com/q/2336295}, + URL = {https://math.stackexchange.com/q/2336295} } \ No newline at end of file diff --git a/src/probabilistic_model/distributions/uniform.py b/src/probabilistic_model/distributions/uniform.py index 6d49990..c62ccc6 100644 --- a/src/probabilistic_model/distributions/uniform.py +++ b/src/probabilistic_model/distributions/uniform.py @@ -20,13 +20,13 @@ def log_likelihood_without_bounds_check(self, x: np.array) -> np.array: def cdf(self, x: np.array) -> np.array: result = (x - self.lower) / (self.upper - self.lower) result = np.minimum(1, np.maximum(0, result)) - return result + return result[:, 0] def univariate_log_mode(self) -> Tuple[AbstractCompositeSet, float]: return self.interval.as_composite_set(), self.log_pdf_value() def log_conditional_from_non_singleton_simple_interval(self, interval: SimpleInterval) -> Tuple[Self, float]: - probability = self.cdf(interval.upper) - self.cdf(interval.lower) + probability = self.probability_of_simple_event(SimpleEvent({self.variable: interval})) return self.__class__(self.variable, interval), np.log(probability) def sample(self, amount: int) -> np.array: @@ -99,7 +99,7 @@ def pdf_trace(self) -> go.Scatter: def cdf_trace(self) -> go.Scatter: x = self.x_axis_points_for_plotly() - cdf_values = [value if value is None else self.cdf(value) for value in x] + cdf_values = [value if value is None else self.cdf(np.array([[value]])) for value in x] cdf_trace = go.Scatter(x=x, y=cdf_values, mode='lines', name="Cumulative Distribution Function") return cdf_trace diff --git a/src/probabilistic_model/learning/nyga_distribution.py b/src/probabilistic_model/learning/nyga_distribution.py index 3f07fa7..4d82091 100644 --- a/src/probabilistic_model/learning/nyga_distribution.py +++ b/src/probabilistic_model/learning/nyga_distribution.py @@ -490,7 +490,7 @@ def cdf_trace(self) -> go.Scatter: for subcircuit in self.subcircuits: x_values += [subcircuit.interval.lower, subcircuit.interval.upper] x_values = sorted(x_values) - y_values = self.cdf(np.array(x_values)) + y_values = self.cdf(np.array(x_values).reshape(-1, 1)) return go.Scatter(x=x_values, y=y_values, mode="lines", name=CDF_TRACE_NAME, line=dict(color=CDF_TRACE_COLOR)) def plot(self, **kwargs) -> List[go.Scatter]: diff --git a/src/probabilistic_model/probabilistic_model.py b/src/probabilistic_model/probabilistic_model.py index d39af15..b7b8d75 100644 --- a/src/probabilistic_model/probabilistic_model.py +++ b/src/probabilistic_model/probabilistic_model.py @@ -431,7 +431,7 @@ def multivariate_mode_traces(self): mode, _ = self.mode() mode_traces = mode.plot(color=MODE_TRACE_COLOR) for trace in mode_traces: - trace.update(name=MODE_TRACE_NAME) + trace.update(name=MODE_TRACE_NAME, mode="lines+markers") except IntractableError: mode_traces = [] return mode_traces diff --git a/test/test_distributions/test_uniform.py b/test/test_distributions/test_uniform.py index 761a354..25ec074 100644 --- a/test/test_distributions/test_uniform.py +++ b/test/test_distributions/test_uniform.py @@ -25,7 +25,7 @@ def test_probability_of_domain(self): self.assertEqual(self.distribution.probability(self.distribution.support()), 1) def test_cdf(self): - cdf = self.distribution.cdf(np.array([-1, 1, 2])) + cdf = self.distribution.cdf(np.array([-1, 1, 2]).reshape(-1, 1)) self.assertEqual(cdf[0], 0) self.assertEqual(cdf[1], 0.5) self.assertEqual(cdf[2], 1) diff --git a/test/test_probabilistic_circuits/test_probabilistic_circuit.py b/test/test_probabilistic_circuits/test_probabilistic_circuit.py index e474529..1fafc39 100644 --- a/test/test_probabilistic_circuits/test_probabilistic_circuit.py +++ b/test/test_probabilistic_circuits/test_probabilistic_circuit.py @@ -569,7 +569,7 @@ def test_plot(self): sum_unit.normalize() traces = sum_unit.plot() self.assertGreater(len(traces), 0) - # go.Figure(sum_unit.plot(), sum_unit.plotly_layout()).show() + # go.Figure(traces, sum_unit.plotly_layout()).show() class MultivariateGaussianTestCase(unittest.TestCase, ShowMixin):