From 5b11e9cd42d08e76b86eacf103b1fa7794e21bff Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 18 Dec 2015 17:01:46 +0000
Subject: iommu/dma: Add some missing #includes

dma-iommu.c was naughtily relying on an implicit transitive #include of
linux/vmalloc.h, which is apparently not present on some architectures.
Add that, plus a couple more headers for other functions which are used
similarly.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 3a20db4f8604..4168668f5dd4 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,10 +21,13 @@
 
 #include <linux/device.h>
 #include <linux/dma-iommu.h>
+#include <linux/gfp.h>
 #include <linux/huge_mm.h>
 #include <linux/iommu.h>
 #include <linux/iova.h>
 #include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/vmalloc.h>
 
 int iommu_dma_init(void)
 {
-- 
cgit v1.2.3


From 0a9afeda8057bcedc997278db193914d32c4003b Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 18 Dec 2015 17:01:48 +0000
Subject: iommu/dma: Avoid unlikely high-order allocations

Doug reports that the equivalent page allocator on 32-bit ARM exhibits
particularly pathalogical behaviour under memory pressure when
fragmentation is high, where allocating a 4MB buffer takes tens of
seconds and the number of calls to alloc_pages() is over 9000![1]

We can drastically improve that situation without losing the other
benefits of high-order allocations when they would succeed, by assuming
memory pressure is relatively constant over the course of an allocation,
and not retrying allocations at orders we know to have failed before.
This way, the best-case behaviour remains unchanged, and in the worst
case we should see at most a dozen or so (MAX_ORDER - 1) failed attempts
before falling back to single pages for the remainder of the buffer.

[1]:http://lists.infradead.org/pipermail/linux-arm-kernel/2015-December/394660.html

Reported-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4168668f5dd4..2e7417f98116 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -194,6 +194,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
+	unsigned int order = MAX_ORDER;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -207,14 +208,15 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 
 	while (count) {
 		struct page *page = NULL;
-		int j, order = __fls(count);
+		int j;
 
 		/*
 		 * Higher-order allocations are a convenience rather
 		 * than a necessity, hence using __GFP_NORETRY until
 		 * falling back to single-page allocations.
 		 */
-		for (order = min(order, MAX_ORDER); order > 0; order--) {
+		for (order = min_t(unsigned int, order, __fls(count));
+		     order > 0; order--) {
 			page = alloc_pages(gfp | __GFP_NORETRY, order);
 			if (!page)
 				continue;
-- 
cgit v1.2.3


From f64232eee61906f22942224af2a9fd40e26e592e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 22 Dec 2015 20:01:06 +0100
Subject: iommu/ipmmu-vmsa: Don't truncate ttbr if LPAE is not enabled

If CONFIG_PHYS_ADDR_T_64BIT=n:

    drivers/iommu/ipmmu-vmsa.c: In function 'ipmmu_domain_init_context':
    drivers/iommu/ipmmu-vmsa.c:434:2: warning: right shift count >= width of type
      ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32);
      ^

As io_pgtable_cfg.arm_lpae_s1_cfg.ttbr[] is an array of u64s, assigning
it to a phys_addr_t may truncates it.  Make ttbr u64 to fix this.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8cf605fa9946..dfb868e2d129 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -295,7 +295,7 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
 
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
-	phys_addr_t ttbr;
+	u64 ttbr;
 
 	/*
 	 * Allocate the page table operations.
-- 
cgit v1.2.3


From 164afb1d85b872907cfac048b46c094db596d529 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 4 Jan 2016 16:19:42 +0000
Subject: iommu/dma: Use correct offset in map_sg

When mapping a non-page-aligned scatterlist entry, we copy the original
offset to the output DMA address before aligning it to hand off to
iommu_map_sg(), then later adding the IOVA page address portion to get
the final mapped address. However, when the IOVA page size is smaller
than the CPU page size, it is the offset within the IOVA page we want,
not that within the CPU page, which can easily be larger than an IOVA
page and thus result in an incorrect final address.

Fix the bug by taking only the IOVA-aligned part of the offset as the
basis of the DMA address, not the whole thing.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 2e7417f98116..72d6182666cb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -458,7 +458,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		size_t s_offset = iova_offset(iovad, s->offset);
 		size_t s_length = s->length;
 
-		sg_dma_address(s) = s->offset;
+		sg_dma_address(s) = s_offset;
 		sg_dma_len(s) = s_length;
 		s->offset -= s_offset;
 		s_length = iova_align(iovad, s_length + s_offset);
-- 
cgit v1.2.3