Discussion:
[PATCH 03/10] arm64/iommu: implement support for DMA_ATTR_NON_CONSISTENT
Christoph Hellwig
2018-12-08 17:36:55 UTC
Permalink
DMA_ATTR_NON_CONSISTENT forces contiguous allocations as we don't
want to remap, and is otherwise forced down the same pass as if we
were always on a coherent device. No new code required except for
a few conditionals.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/arm64/mm/dma-mapping.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index d39b60113539..0010688ca30e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -240,7 +240,8 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
dma_free_from_pool(addr, size);
addr = NULL;
}
- } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ } else if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS |
+ DMA_ATTR_NON_CONSISTENT)) {
pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
struct page *page;

@@ -256,7 +257,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
return NULL;
}

- if (coherent) {
+ if (coherent || (attrs & DMA_ATTR_NON_CONSISTENT)) {
memset(addr, 0, size);
return addr;
}
@@ -309,7 +310,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
if (dma_in_atomic_pool(cpu_addr, size)) {
iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
dma_free_from_pool(cpu_addr, size);
- } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ } else if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS |
+ DMA_ATTR_NON_CONSISTENT)) {
struct page *page = vmalloc_to_page(cpu_addr);

iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
@@ -342,10 +344,11 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;

- if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NON_CONSISTENT)) {
unsigned long pfn;

- if (dev_is_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev) ||
+ (attrs & DMA_ATTR_NON_CONSISTENT))
pfn = virt_to_pfn(cpu_addr);
else
pfn = vmalloc_to_pfn(cpu_addr);
@@ -366,10 +369,11 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct vm_struct *area = find_vm_area(cpu_addr);

- if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+ if (attrs & (DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NON_CONSISTENT)) {
struct page *page;

- if (dev_is_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev) ||
+ (attrs & DMA_ATTR_NON_CONSISTENT))
page = virt_to_page(cpu_addr);
else
page = vmalloc_to_page(cpu_addr);
--
2.19.2
Christoph Hellwig
2018-12-08 17:36:54 UTC
Permalink
There is no need to have an additional kernel mapping for a contiguous
allocation if the device already is DMA coherent, so skip it.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/arm64/mm/dma-mapping.c | 35 ++++++++++++++++++++++-------------
1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4c0f498069e8..d39b60113539 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -255,13 +255,18 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
size >> PAGE_SHIFT);
return NULL;
}
+
+ if (coherent) {
+ memset(addr, 0, size);
+ return addr;
+ }
+
addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
prot,
__builtin_return_address(0));
if (addr) {
memset(addr, 0, size);
- if (!coherent)
- __dma_flush_area(page_to_virt(page), iosize);
+ __dma_flush_area(page_to_virt(page), iosize);
} else {
iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
dma_release_from_contiguous(dev, page,
@@ -309,7 +314,9 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,

iommu_dma_unmap_page(dev, handle, iosize, 0, attrs);
dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
- dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+
+ if (!dev_is_dma_coherent(dev))
+ dma_common_free_remap(cpu_addr, size, VM_USERMAP);
} else if (is_vmalloc_addr(cpu_addr)){
struct vm_struct *area = find_vm_area(cpu_addr);

@@ -336,11 +343,12 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
return ret;

if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- /*
- * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
- * hence in the vmalloc space.
- */
- unsigned long pfn = vmalloc_to_pfn(cpu_addr);
+ unsigned long pfn;
+
+ if (dev_is_dma_coherent(dev))
+ pfn = virt_to_pfn(cpu_addr);
+ else
+ pfn = vmalloc_to_pfn(cpu_addr);
return __swiotlb_mmap_pfn(vma, pfn, size);
}

@@ -359,11 +367,12 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
struct vm_struct *area = find_vm_area(cpu_addr);

if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- /*
- * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
- * hence in the vmalloc space.
- */
- struct page *page = vmalloc_to_page(cpu_addr);
+ struct page *page;
+
+ if (dev_is_dma_coherent(dev))
+ page = virt_to_page(cpu_addr);
+ else
+ page = vmalloc_to_page(cpu_addr);
return __swiotlb_get_sgtable_page(sgt, page, size);
}
--
2.19.2
Christoph Hellwig
2018-12-10 19:25:08 UTC
Permalink
Post by Christoph Hellwig
There is no need to have an additional kernel mapping for a contiguous
allocation if the device already is DMA coherent, so skip it.
FWIW, the "need" was that it kept the code in this path simple and the
mapping behaviour consistent with the regular iommu_dma_alloc() case. One
could quite easily retort that there is no need for the extra complexity of
this patch, since vmalloc is cheap on a 64-bit architecture ;)
Heh. Well, without the remap we do less work, we prepare for a simple
implementation of DMA_ATTR_NON_CONSISTENT, and also prepapre the code
to be better reusable for architectures that don't do remapping of
DMA allocations at all.
Post by Christoph Hellwig
if (addr) {
memset(addr, 0, size);
- if (!coherent)
- __dma_flush_area(page_to_virt(page), iosize);
+ __dma_flush_area(page_to_virt(page), iosize);
Oh poo - seems I missed it at the time but the existing logic here is
wrong. Let me send a separate fix to flip those statements into the correct
order...
Yes, flushing the remapped alias only after zeroing it looks odd.
Christoph Hellwig
2018-12-08 17:36:59 UTC
Permalink
Move the alloc / free routines down the file so that we can easily use
the map / unmap helpers to implement non-consistent allocations.

Also drop the _coherent postfix to match the method name.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/sparc/kernel/pci_sun4v.c | 229 +++++++++++++++++-----------------
1 file changed, 114 insertions(+), 115 deletions(-)

diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index fa0e42b4cbfb..b95c70136559 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -171,87 +171,6 @@ static inline long iommu_batch_end(u64 mask)
return iommu_batch_flush(p, mask);
}

-static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addrp, gfp_t gfp,
- unsigned long attrs)
-{
- u64 mask;
- unsigned long flags, order, first_page, npages, n;
- unsigned long prot = 0;
- struct iommu *iommu;
- struct atu *atu;
- struct iommu_map_table *tbl;
- struct page *page;
- void *ret;
- long entry;
- int nid;
-
- size = IO_PAGE_ALIGN(size);
- order = get_order(size);
- if (unlikely(order >= MAX_ORDER))
- return NULL;
-
- npages = size >> IO_PAGE_SHIFT;
-
- if (attrs & DMA_ATTR_WEAK_ORDERING)
- prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
-
- nid = dev->archdata.numa_node;
- page = alloc_pages_node(nid, gfp, order);
- if (unlikely(!page))
- return NULL;
-
- first_page = (unsigned long) page_address(page);
- memset((char *)first_page, 0, PAGE_SIZE << order);
-
- iommu = dev->archdata.iommu;
- atu = iommu->atu;
-
- mask = dev->coherent_dma_mask;
- if (mask <= DMA_BIT_MASK(32))
- tbl = &iommu->tbl;
- else
- tbl = &atu->tbl;
-
- entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
- (unsigned long)(-1), 0);
-
- if (unlikely(entry == IOMMU_ERROR_CODE))
- goto range_alloc_fail;
-
- *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
- ret = (void *) first_page;
- first_page = __pa(first_page);
-
- local_irq_save(flags);
-
- iommu_batch_start(dev,
- (HV_PCI_MAP_ATTR_READ | prot |
- HV_PCI_MAP_ATTR_WRITE),
- entry);
-
- for (n = 0; n < npages; n++) {
- long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
- if (unlikely(err < 0L))
- goto iommu_map_fail;
- }
-
- if (unlikely(iommu_batch_end(mask) < 0L))
- goto iommu_map_fail;
-
- local_irq_restore(flags);
-
- return ret;
-
-iommu_map_fail:
- local_irq_restore(flags);
- iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
-
-range_alloc_fail:
- free_pages(first_page, order);
- return NULL;
-}
-
unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
unsigned long iotsb_num,
struct pci_bus *bus_dev)
@@ -316,38 +235,6 @@ static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
local_irq_restore(flags);
}

-static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
- dma_addr_t dvma, unsigned long attrs)
-{
- struct pci_pbm_info *pbm;
- struct iommu *iommu;
- struct atu *atu;
- struct iommu_map_table *tbl;
- unsigned long order, npages, entry;
- unsigned long iotsb_num;
- u32 devhandle;
-
- npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
- iommu = dev->archdata.iommu;
- pbm = dev->archdata.host_controller;
- atu = iommu->atu;
- devhandle = pbm->devhandle;
-
- if (dvma <= DMA_BIT_MASK(32)) {
- tbl = &iommu->tbl;
- iotsb_num = 0; /* we don't care for legacy iommu */
- } else {
- tbl = &atu->tbl;
- iotsb_num = atu->iotsb->iotsb_num;
- }
- entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
- dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
- iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
- order = get_order(size);
- if (order < 10)
- free_pages((unsigned long)cpu, order);
-}
-
static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t sz,
enum dma_data_direction direction,
@@ -671,6 +558,118 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
local_irq_restore(flags);
}

+static void *dma_4v_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs)
+{
+ u64 mask;
+ unsigned long flags, order, first_page, npages, n;
+ unsigned long prot = 0;
+ struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ struct page *page;
+ void *ret;
+ long entry;
+ int nid;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (unlikely(order >= MAX_ORDER))
+ return NULL;
+
+ npages = size >> IO_PAGE_SHIFT;
+
+ if (attrs & DMA_ATTR_WEAK_ORDERING)
+ prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
+
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ return NULL;
+
+ first_page = (unsigned long) page_address(page);
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ iommu = dev->archdata.iommu;
+ atu = iommu->atu;
+
+ mask = dev->coherent_dma_mask;
+ if (mask <= DMA_BIT_MASK(32))
+ tbl = &iommu->tbl;
+ else
+ tbl = &atu->tbl;
+
+ entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
+ (unsigned long)(-1), 0);
+
+ if (unlikely(entry == IOMMU_ERROR_CODE))
+ goto range_alloc_fail;
+
+ *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ first_page = __pa(first_page);
+
+ local_irq_save(flags);
+
+ iommu_batch_start(dev,
+ (HV_PCI_MAP_ATTR_READ | prot |
+ HV_PCI_MAP_ATTR_WRITE),
+ entry);
+
+ for (n = 0; n < npages; n++) {
+ long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
+ if (unlikely(err < 0L))
+ goto iommu_map_fail;
+ }
+
+ if (unlikely(iommu_batch_end(mask) < 0L))
+ goto iommu_map_fail;
+
+ local_irq_restore(flags);
+
+ return ret;
+
+iommu_map_fail:
+ local_irq_restore(flags);
+ iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+
+range_alloc_fail:
+ free_pages(first_page, order);
+ return NULL;
+}
+
+static void dma_4v_free(struct device *dev, size_t size, void *cpu,
+ dma_addr_t dvma, unsigned long attrs)
+{
+ struct pci_pbm_info *pbm;
+ struct iommu *iommu;
+ struct atu *atu;
+ struct iommu_map_table *tbl;
+ unsigned long order, npages, entry;
+ unsigned long iotsb_num;
+ u32 devhandle;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ iommu = dev->archdata.iommu;
+ pbm = dev->archdata.host_controller;
+ atu = iommu->atu;
+ devhandle = pbm->devhandle;
+
+ if (dvma <= DMA_BIT_MASK(32)) {
+ tbl = &iommu->tbl;
+ iotsb_num = 0; /* we don't care for legacy iommu */
+ } else {
+ tbl = &atu->tbl;
+ iotsb_num = atu->iotsb->iotsb_num;
+ }
+ entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+ dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+ iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
static int dma_4v_supported(struct device *dev, u64 device_mask)
{
struct iommu *iommu = dev->archdata.iommu;
@@ -689,8 +688,8 @@ static int dma_4v_supported(struct device *dev, u64 device_mask)
}

static const struct dma_map_ops sun4v_dma_ops = {
- .alloc = dma_4v_alloc_coherent,
- .free = dma_4v_free_coherent,
+ .alloc = dma_4v_alloc,
+ .free = dma_4v_free,
.map_page = dma_4v_map_page,
.unmap_page = dma_4v_unmap_page,
.map_sg = dma_4v_map_sg,
--
2.19.2
David Miller
2018-12-09 04:58:47 UTC
Permalink
From: Christoph Hellwig <***@lst.de>
Date: Sat, 8 Dec 2018 09:36:59 -0800
Post by Christoph Hellwig
Move the alloc / free routines down the file so that we can easily use
the map / unmap helpers to implement non-consistent allocations.
Also drop the _coherent postfix to match the method name.
Acked-by: David S. Miller <***@davemloft.net>
Christoph Hellwig
2018-12-08 17:37:01 UTC
Permalink
Memory declared using dma_declare_coherent is ioremapped and thus not
always suitable for our tightened DMA_ATTR_NON_CONSISTENT definition.

Skip it given all the existing callers don't DMA_ATTR_NON_CONSISTENT
anyway.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
include/linux/dma-mapping.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 7799c2b27849..8c81fa5d1f44 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -521,7 +521,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
BUG_ON(!ops);
WARN_ON_ONCE(dev && !dev->coherent_dma_mask);

- if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
+ if (!(attrs & DMA_ATTR_NON_CONSISTENT) &&
+ dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;

/* let the implementation decide on the zone to allocate from: */
--
2.19.2
Christoph Hellwig
2018-12-08 17:36:57 UTC
Permalink
Move the alloc / free routines down the file so that we can easily use
the map / unmap helpers to implement non-consistent allocations.

Also drop the _coherent postfix to match the method name.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/sparc/kernel/iommu.c | 135 +++++++++++++++++++-------------------
1 file changed, 67 insertions(+), 68 deletions(-)

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 0626bae5e3da..4bf0497e0704 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -195,72 +195,6 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
}
}

-static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addrp, gfp_t gfp,
- unsigned long attrs)
-{
- unsigned long order, first_page;
- struct iommu *iommu;
- struct page *page;
- int npages, nid;
- iopte_t *iopte;
- void *ret;
-
- size = IO_PAGE_ALIGN(size);
- order = get_order(size);
- if (order >= 10)
- return NULL;
-
- nid = dev->archdata.numa_node;
- page = alloc_pages_node(nid, gfp, order);
- if (unlikely(!page))
- return NULL;
-
- first_page = (unsigned long) page_address(page);
- memset((char *)first_page, 0, PAGE_SIZE << order);
-
- iommu = dev->archdata.iommu;
-
- iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
-
- if (unlikely(iopte == NULL)) {
- free_pages(first_page, order);
- return NULL;
- }
-
- *dma_addrp = (iommu->tbl.table_map_base +
- ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
- ret = (void *) first_page;
- npages = size >> IO_PAGE_SHIFT;
- first_page = __pa(first_page);
- while (npages--) {
- iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
- IOPTE_WRITE |
- (first_page & IOPTE_PAGE));
- iopte++;
- first_page += IO_PAGE_SIZE;
- }
-
- return ret;
-}
-
-static void dma_4u_free_coherent(struct device *dev, size_t size,
- void *cpu, dma_addr_t dvma,
- unsigned long attrs)
-{
- struct iommu *iommu;
- unsigned long order, npages;
-
- npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
- iommu = dev->archdata.iommu;
-
- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
-
- order = get_order(size);
- if (order < 10)
- free_pages((unsigned long)cpu, order);
-}
-
static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t sz,
enum dma_data_direction direction,
@@ -742,6 +676,71 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
spin_unlock_irqrestore(&iommu->lock, flags);
}

+static void *dma_4u_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs)
+{
+ unsigned long order, first_page;
+ struct iommu *iommu;
+ struct page *page;
+ int npages, nid;
+ iopte_t *iopte;
+ void *ret;
+
+ size = IO_PAGE_ALIGN(size);
+ order = get_order(size);
+ if (order >= 10)
+ return NULL;
+
+ nid = dev->archdata.numa_node;
+ page = alloc_pages_node(nid, gfp, order);
+ if (unlikely(!page))
+ return NULL;
+
+ first_page = (unsigned long) page_address(page);
+ memset((char *)first_page, 0, PAGE_SIZE << order);
+
+ iommu = dev->archdata.iommu;
+
+ iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
+
+ if (unlikely(iopte == NULL)) {
+ free_pages(first_page, order);
+ return NULL;
+ }
+
+ *dma_addrp = (iommu->tbl.table_map_base +
+ ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
+ ret = (void *) first_page;
+ npages = size >> IO_PAGE_SHIFT;
+ first_page = __pa(first_page);
+ while (npages--) {
+ iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
+ IOPTE_WRITE |
+ (first_page & IOPTE_PAGE));
+ iopte++;
+ first_page += IO_PAGE_SIZE;
+ }
+
+ return ret;
+}
+
+static void dma_4u_free(struct device *dev, size_t size, void *cpu,
+ dma_addr_t dvma, unsigned long attrs)
+{
+ struct iommu *iommu;
+ unsigned long order, npages;
+
+ npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+ iommu = dev->archdata.iommu;
+
+ iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+ order = get_order(size);
+ if (order < 10)
+ free_pages((unsigned long)cpu, order);
+}
+
+
static int dma_4u_supported(struct device *dev, u64 device_mask)
{
struct iommu *iommu = dev->archdata.iommu;
@@ -758,8 +757,8 @@ static int dma_4u_supported(struct device *dev, u64 device_mask)
}

static const struct dma_map_ops sun4u_dma_ops = {
- .alloc = dma_4u_alloc_coherent,
- .free = dma_4u_free_coherent,
+ .alloc = dma_4u_alloc,
+ .free = dma_4u_free,
.map_page = dma_4u_map_page,
.unmap_page = dma_4u_unmap_page,
.map_sg = dma_4u_map_sg,
--
2.19.2
David Miller
2018-12-09 04:58:28 UTC
Permalink
From: Christoph Hellwig <***@lst.de>
Date: Sat, 8 Dec 2018 09:36:57 -0800
Post by Christoph Hellwig
Move the alloc / free routines down the file so that we can easily use
the map / unmap helpers to implement non-consistent allocations.
Also drop the _coherent postfix to match the method name.
Acked-by: David S. Miller <***@davemloft.net>
Christoph Hellwig
2018-12-08 17:36:56 UTC
Permalink
For the iommu ops we can just use the implementaton for DMA coherent
devices. For the regular ops we need mix and match a bit so that
we either use the CMA allocator without remapping, but with a special
error handling case for highmem pages, or the simple allocator.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/arm/mm/dma-mapping.c | 49 ++++++++++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 2cfb17bad1e6..b3b66b41c450 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -49,6 +49,7 @@ struct arm_dma_alloc_args {
const void *caller;
bool want_vaddr;
int coherent_flag;
+ bool nonconsistent_flag;
};

struct arm_dma_free_args {
@@ -57,6 +58,7 @@ struct arm_dma_free_args {
void *cpu_addr;
struct page *page;
bool want_vaddr;
+ bool nonconsistent_flag;
};

#define NORMAL 0
@@ -348,7 +350,8 @@ static void __dma_free_buffer(struct page *page, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag, gfp_t gfp);
+ int coherent_flag, bool nonconsistent_flag,
+ gfp_t gfp);

static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
@@ -405,7 +408,7 @@ static int __init atomic_pool_init(void)
if (dev_get_cma_area(NULL))
ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
&page, atomic_pool_init, true, NORMAL,
- GFP_KERNEL);
+ false, GFP_KERNEL);
else
ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
&page, atomic_pool_init, true);
@@ -579,7 +582,8 @@ static int __free_from_pool(void *start, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag, gfp_t gfp)
+ int coherent_flag, bool nonconsistent_flag,
+ gfp_t gfp)
{
unsigned long order = get_order(size);
size_t count = size >> PAGE_SHIFT;
@@ -595,12 +599,16 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
if (!want_vaddr)
goto out;

+ if (nonconsistent_flag) {
+ if (PageHighMem(page))
+ goto fail;
+ goto out;
+ }
+
if (PageHighMem(page)) {
ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
- if (!ptr) {
- dma_release_from_contiguous(dev, page, count);
- return NULL;
- }
+ if (!ptr)
+ goto fail;
} else {
__dma_remap(page, size, prot);
ptr = page_address(page);
@@ -609,12 +617,15 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
out:
*ret_page = page;
return ptr;
+ fail:
+ dma_release_from_contiguous(dev, page, count);
+ return NULL;
}

static void __free_from_contiguous(struct device *dev, struct page *page,
- void *cpu_addr, size_t size, bool want_vaddr)
+ void *cpu_addr, size_t size, bool remapped)
{
- if (want_vaddr) {
+ if (remapped) {
if (PageHighMem(page))
__dma_free_remap(cpu_addr, size);
else
@@ -635,7 +646,11 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
struct page **ret_page)
{
struct page *page;
- /* __alloc_simple_buffer is only called when the device is coherent */
+ /*
+ * __alloc_simple_buffer is only called when the device is coherent,
+ * or if the caller explicitly asked for an allocation that is not
+ * consistent.
+ */
page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
if (!page)
return NULL;
@@ -667,13 +682,15 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
return __alloc_from_contiguous(args->dev, args->size, args->prot,
ret_page, args->caller,
args->want_vaddr, args->coherent_flag,
+ args->nonconsistent_flag,
args->gfp);
}

static void cma_allocator_free(struct arm_dma_free_args *args)
{
__free_from_contiguous(args->dev, args->page, args->cpu_addr,
- args->size, args->want_vaddr);
+ args->size,
+ args->want_vaddr || args->nonconsistent_flag);
}

static struct arm_dma_allocator cma_allocator = {
@@ -735,6 +752,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
.caller = caller,
.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
.coherent_flag = is_coherent ? COHERENT : NORMAL,
+ .nonconsistent_flag = (attrs & DMA_ATTR_NON_CONSISTENT),
};

#ifdef CONFIG_DMA_API_DEBUG
@@ -773,7 +791,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,

if (cma)
buf->allocator = &cma_allocator;
- else if (is_coherent)
+ else if (is_coherent || (attrs & DMA_ATTR_NON_CONSISTENT))
buf->allocator = &simple_allocator;
else if (allowblock)
buf->allocator = &remap_allocator;
@@ -874,6 +892,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
.cpu_addr = cpu_addr,
.page = page,
.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
+ .nonconsistent_flag = (attrs & DMA_ATTR_NON_CONSISTENT),
};

buf = arm_dma_buffer_find(cpu_addr);
@@ -1562,7 +1581,8 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
{
- return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL);
+ return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs,
+ (attrs & DMA_ATTR_NON_CONSISTENT) ? COHERENT : NORMAL);
}

static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size,
@@ -1650,7 +1670,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
void arm_iommu_free_attrs(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t handle, unsigned long attrs)
{
- __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL);
+ __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs,
+ (attrs & DMA_ATTR_NON_CONSISTENT) ? COHERENT : NORMAL);
}

void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
--
2.19.2
Ezequiel Garcia
2018-12-08 22:52:04 UTC
Permalink
Post by Christoph Hellwig
For the iommu ops we can just use the implementaton for DMA coherent
devices. For the regular ops we need mix and match a bit so that
we either use the CMA allocator without remapping, but with a special
error handling case for highmem pages, or the simple allocator.
---
arch/arm/mm/dma-mapping.c | 49 ++++++++++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 14 deletions(-)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 2cfb17bad1e6..b3b66b41c450 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -49,6 +49,7 @@ struct arm_dma_alloc_args {
const void *caller;
bool want_vaddr;
int coherent_flag;
+ bool nonconsistent_flag;
};
struct arm_dma_free_args {
@@ -57,6 +58,7 @@ struct arm_dma_free_args {
void *cpu_addr;
struct page *page;
bool want_vaddr;
+ bool nonconsistent_flag;
};
#define NORMAL 0
@@ -348,7 +350,8 @@ static void __dma_free_buffer(struct page *page, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag, gfp_t gfp);
+ int coherent_flag, bool nonconsistent_flag,
+ gfp_t gfp);
static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
pgprot_t prot, struct page **ret_page,
@@ -405,7 +408,7 @@ static int __init atomic_pool_init(void)
if (dev_get_cma_area(NULL))
ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
&page, atomic_pool_init, true, NORMAL,
- GFP_KERNEL);
+ false, GFP_KERNEL);
else
ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
&page, atomic_pool_init, true);
@@ -579,7 +582,8 @@ static int __free_from_pool(void *start, size_t size)
static void *__alloc_from_contiguous(struct device *dev, size_t size,
pgprot_t prot, struct page **ret_page,
const void *caller, bool want_vaddr,
- int coherent_flag, gfp_t gfp)
+ int coherent_flag, bool nonconsistent_flag,
+ gfp_t gfp)
{
unsigned long order = get_order(size);
size_t count = size >> PAGE_SHIFT;
@@ -595,12 +599,16 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
if (!want_vaddr)
goto out;
+ if (nonconsistent_flag) {
+ if (PageHighMem(page))
+ goto fail;
+ goto out;
+ }
+
if (PageHighMem(page)) {
ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
- if (!ptr) {
- dma_release_from_contiguous(dev, page, count);
- return NULL;
- }
+ if (!ptr)
+ goto fail;
} else {
__dma_remap(page, size, prot);
ptr = page_address(page);
@@ -609,12 +617,15 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
*ret_page = page;
return ptr;
+ dma_release_from_contiguous(dev, page, count);
+ return NULL;
}
static void __free_from_contiguous(struct device *dev, struct page *page,
- void *cpu_addr, size_t size, bool want_vaddr)
+ void *cpu_addr, size_t size, bool remapped)
{
- if (want_vaddr) {
+ if (remapped) {
if (PageHighMem(page))
__dma_free_remap(cpu_addr, size);
else
@@ -635,7 +646,11 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
struct page **ret_page)
{
struct page *page;
- /* __alloc_simple_buffer is only called when the device is coherent */
+ /*
+ * __alloc_simple_buffer is only called when the device is coherent,
+ * or if the caller explicitly asked for an allocation that is not
+ * consistent.
+ */
page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
if (!page)
return NULL;
@@ -667,13 +682,15 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
return __alloc_from_contiguous(args->dev, args->size, args->prot,
ret_page, args->caller,
args->want_vaddr, args->coherent_flag,
+ args->nonconsistent_flag,
args->gfp);
}
static void cma_allocator_free(struct arm_dma_free_args *args)
{
__free_from_contiguous(args->dev, args->page, args->cpu_addr,
- args->size, args->want_vaddr);
+ args->size,
+ args->want_vaddr || args->nonconsistent_flag);
}
static struct arm_dma_allocator cma_allocator = {
@@ -735,6 +752,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
.caller = caller,
.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
.coherent_flag = is_coherent ? COHERENT : NORMAL,
+ .nonconsistent_flag = (attrs & DMA_ATTR_NON_CONSISTENT),
};
#ifdef CONFIG_DMA_API_DEBUG
@@ -773,7 +791,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (cma)
buf->allocator = &cma_allocator;
- else if (is_coherent)
+ else if (is_coherent || (attrs & DMA_ATTR_NON_CONSISTENT))
buf->allocator = &simple_allocator;
Reading through your code I can't really see where the pgprot is changed
for non-consistent requests. Namely, __get_dma_pgprot only
returns writecombine or coherent memory.

Regards,
Ezequiel
Christoph Hellwig
2018-12-10 19:16:34 UTC
Permalink
Post by Ezequiel Garcia
Post by Christoph Hellwig
#ifdef CONFIG_DMA_API_DEBUG
@@ -773,7 +791,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (cma)
buf->allocator = &cma_allocator;
- else if (is_coherent)
+ else if (is_coherent || (attrs & DMA_ATTR_NON_CONSISTENT))
buf->allocator = &simple_allocator;
Reading through your code I can't really see where the pgprot is changed
for non-consistent requests. Namely, __get_dma_pgprot only
returns writecombine or coherent memory.
We don't look at the pgprot at all for the simple allocator, and
don't look at prot for the DMA_ATTR_NON_CONSISTENT case in the
CMA allocator, so this should not be a problem. However we need to
take DMA_ATTR_NON_CONSISTENT into account for calculating the mmap
pgprot, with something like this as an incremental patch:

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index b3b66b41c450..6ac7e430a47c 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -873,7 +873,8 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
- vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+ if (!(attrs & DMA_ATTR_NON_CONSISTENT))
+ vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
}
Christoph Hellwig
2018-12-08 17:37:02 UTC
Permalink
We got rid of the odd selective consistent or not behavior, and now
want the normal dma_sync_single_* functions to be used for strict
ownership transfers. While dma_cache_sync hasn't been removed from
the tree yet it should not be used in any new caller, so documentation
for it is dropped here.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
Documentation/DMA-API.txt | 30 ++++--------------------------
Documentation/DMA-attributes.txt | 9 +++++----
include/linux/dma-mapping.h | 3 +++
3 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index ac66ae2509a9..c81fe8a4aeec 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -518,20 +518,9 @@ API at all.
dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flag, unsigned long attrs)

-Identical to dma_alloc_coherent() except that when the
-DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the
-platform will choose to return either consistent or non-consistent memory
-as it sees fit. By using this API, you are guaranteeing to the platform
-that you have all the correct and necessary sync points for this memory
-in the driver should it choose to return non-consistent memory.
-
-Note: where the platform can return consistent memory, it will
-guarantee that the sync points become nops.
-
-Warning: Handling non-consistent memory is a real pain. You should
-only use this API if you positively know your driver will be
-required to work on one of the rare (usually non-PCI) architectures
-that simply cannot make consistent memory.
+Similar to dma_alloc_coherent(), except that the behavior can be controlled
+in more detail using the attrs argument. See Documentation/DMA-attributes.txt
+for more details.

::

@@ -540,7 +529,7 @@ that simply cannot make consistent memory.
dma_addr_t dma_handle, unsigned long attrs)

Free memory allocated by the dma_alloc_attrs(). All parameters common
-parameters must identical to those otherwise passed to dma_fre_coherent,
+parameters must identical to those otherwise passed to dma_free_coherent,
and the attrs argument must be identical to the attrs passed to
dma_alloc_attrs().

@@ -560,17 +549,6 @@ memory or doing partial flushes.
into the width returned by this call. It will also always be a power
of two for easy alignment.

-::
-
- void
- dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction)
-
-Do a partial sync of memory that was allocated by dma_alloc_attrs() with
-the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and
-continuing on for size. Again, you *must* observe the cache line
-boundaries when doing this.
-
::

int
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 8f8d97f65d73..2bb3fc0a621b 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -46,10 +46,11 @@ behavior.
DMA_ATTR_NON_CONSISTENT
-----------------------

-DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
-consistent or non-consistent memory as it sees fit. By using this API,
-you are guaranteeing to the platform that you have all the correct and
-necessary sync points for this memory in the driver.
+DMA_ATTR_NON_CONSISTENT specifies that the memory returned is not
+required to be consistent. The memory is owned by the device when
+returned from this function, and ownership must be explicitly
+transferred to the CPU using dma_sync_single_for_cpu, and back to the
+device using dma_sync_single_for_device.

DMA_ATTR_NO_KERNEL_MAPPING
--------------------------
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8c81fa5d1f44..8757ad5087c4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -432,6 +432,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)

+/*
+ * Don't use in new code, use dma_sync_single_for_{device,cpu} instead.
+ */
static inline void
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir)
--
2.19.2
Christoph Hellwig
2018-12-08 17:36:53 UTC
Permalink
If DMA_ATTR_NON_CONSISTENT is passed in the flags we can always just
use the dma_direct_alloc_pages implementation given that the callers
will take care of any cache maintainance on ownership transfers between
the CPU and the device.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/arc/mm/dma.c | 21 ++++++--------------
arch/mips/mm/dma-noncoherent.c | 5 ++---
arch/openrisc/kernel/dma.c | 23 +++++++++-------------
arch/parisc/kernel/pci-dma.c | 35 ++++++++++++----------------------
kernel/dma/direct.c | 4 ++--
5 files changed, 31 insertions(+), 57 deletions(-)

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index db203ff69ccf..135759d4ea8c 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -24,7 +24,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
struct page *page;
phys_addr_t paddr;
void *kvaddr;
- bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT);

/*
* __GFP_HIGHMEM flag is cleared by upper layer functions
@@ -46,14 +45,10 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* A coherent buffer needs MMU mapping to enforce non-cachability.
* kvaddr is kernel Virtual address (0x7000_0000 based).
*/
- if (need_coh) {
- kvaddr = ioremap_nocache(paddr, size);
- if (kvaddr == NULL) {
- __free_pages(page, order);
- return NULL;
- }
- } else {
- kvaddr = (void *)(u32)paddr;
+ kvaddr = ioremap_nocache(paddr, size);
+ if (kvaddr == NULL) {
+ __free_pages(page, order);
+ return NULL;
}

/*
@@ -66,9 +61,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
* Currently flush_cache_vmap nukes the L1 cache completely which
* will be optimized as a separate commit
*/
- if (need_coh)
- dma_cache_wback_inv(paddr, size);
-
+ dma_cache_wback_inv(paddr, size);
return kvaddr;
}

@@ -78,9 +71,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
phys_addr_t paddr = dma_handle;
struct page *page = virt_to_page(paddr);

- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- iounmap((void __force __iomem *)vaddr);
-
+ iounmap((void __force __iomem *)vaddr);
__free_pages(page, get_order(size));
}

diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index cb38461391cb..7576cd7193ba 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
void *ret;

ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
- if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+ if (ret) {
dma_cache_wback_inv((unsigned long) ret, size);
ret = (void *)UNCAC_ADDR(ret);
}
@@ -61,8 +61,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
- if (!(attrs & DMA_ATTR_NON_CONSISTENT))
- cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
+ cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
}

diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index 159336adfa2f..483adbb000bb 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -98,15 +98,13 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,

va = (unsigned long)page;

- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
- /*
- * We need to iterate through the pages, clearing the dcache for
- * them and setting the cache-inhibit bit.
- */
- if (walk_page_range(va, va + size, &walk)) {
- free_pages_exact(page, size);
- return NULL;
- }
+ /*
+ * We need to iterate through the pages, clearing the dcache for
+ * them and setting the cache-inhibit bit.
+ */
+ if (walk_page_range(va, va + size, &walk)) {
+ free_pages_exact(page, size);
+ return NULL;
}

return (void *)va;
@@ -122,11 +120,8 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
.mm = &init_mm
};

- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) {
- /* walk_page_range shouldn't be able to fail here */
- WARN_ON(walk_page_range(va, va + size, &walk));
- }
-
+ /* walk_page_range shouldn't be able to fail here */
+ WARN_ON(walk_page_range(va, va + size, &walk));
free_pages_exact(vaddr, size);
}

diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 04c48f1ef3fb..6780449e3e8b 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -421,29 +421,18 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size,
return (void *)vaddr;
}

-static void *pcx_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
+static inline bool cpu_supports_coherent_area(void)
{
- void *addr;
-
- if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0)
- return NULL;
-
- addr = (void *)__get_free_pages(flag, get_order(size));
- if (addr)
- *dma_handle = (dma_addr_t)virt_to_phys(addr);
-
- return addr;
+ return boot_cpu_data.cpu_type == pcxl2 ||
+ boot_cpu_data.cpu_type == pcxl;
}

void *arch_dma_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
-
- if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl)
+ if (cpu_supports_coherent_area())
return pcxl_dma_alloc(dev, size, dma_handle, gfp, attrs);
- else
- return pcx_dma_alloc(dev, size, dma_handle, gfp, attrs);
+ return NULL;
}

void arch_dma_free(struct device *dev, size_t size, void *vaddr,
@@ -451,14 +440,14 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
{
int order = get_order(size);

- if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
- size = 1 << (order + PAGE_SHIFT);
- unmap_uncached_pages((unsigned long)vaddr, size);
- pcxl_free_range((unsigned long)vaddr, size);
+ if (WARN_ON_ONCE(!cpu_supports_coherent_area()))
+ return;

- vaddr = __va(dma_handle);
- }
- free_pages((unsigned long)vaddr, get_order(size));
+ size = 1 << (order + PAGE_SHIFT);
+ unmap_uncached_pages((unsigned long)vaddr, size);
+ pcxl_free_range((unsigned long)vaddr, size);
+
+ free_pages((unsigned long)__va(dma_handle), get_order(size));
}

void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 308f88a750c8..4efe1188fd2e 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -206,7 +206,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT))
return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
}
@@ -214,7 +214,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
void dma_direct_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT))
arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
else
dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
--
2.19.2
Christoph Hellwig
2018-12-08 17:36:58 UTC
Permalink
Just allocate the memory and use map_page to map the memory.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/sparc/kernel/iommu.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 4bf0497e0704..4ce24c9dc691 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -699,14 +699,19 @@ static void *dma_4u_alloc(struct device *dev, size_t size,
first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order);

+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ *dma_addrp = dma_4u_map_page(dev, page, 0, size,
+ DMA_BIDIRECTIONAL, 0);
+ if (*dma_addrp == DMA_MAPPING_ERROR)
+ goto out_free_page;
+ return page_address(page);
+ }
+
iommu = dev->archdata.iommu;

iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
-
- if (unlikely(iopte == NULL)) {
- free_pages(first_page, order);
- return NULL;
- }
+ if (unlikely(iopte == NULL))
+ goto out_free_page;

*dma_addrp = (iommu->tbl.table_map_base +
((iopte - iommu->page_table) << IO_PAGE_SHIFT));
@@ -722,18 +727,26 @@ static void *dma_4u_alloc(struct device *dev, size_t size,
}

return ret;
+
+out_free_page:
+ free_pages(first_page, order);
+ return NULL;
}

static void dma_4u_free(struct device *dev, size_t size, void *cpu,
dma_addr_t dvma, unsigned long attrs)
{
- struct iommu *iommu;
- unsigned long order, npages;
+ unsigned long order;

- npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
- iommu = dev->archdata.iommu;
+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ dma_4u_unmap_page(dev, dvma, size, DMA_BIDIRECTIONAL, 0);
+ } else {
+ struct iommu *iommu = dev->archdata.iommu;

- iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+ iommu_tbl_range_free(&iommu->tbl, dvma,
+ IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT,
+ IOMMU_ERROR_CODE);
+ }

order = get_order(size);
if (order < 10)
--
2.19.2
David Miller
2018-12-09 04:58:40 UTC
Permalink
From: Christoph Hellwig <***@lst.de>
Date: Sat, 8 Dec 2018 09:36:58 -0800
Post by Christoph Hellwig
Just allocate the memory and use map_page to map the memory.
Acked-by: David S. Miller <***@davemloft.net>
Christoph Hellwig
2018-12-08 17:37:00 UTC
Permalink
Just allocate the memory and use map_page to map the memory.

Signed-off-by: Christoph Hellwig <***@lst.de>
---
arch/sparc/kernel/pci_sun4v.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)

diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index b95c70136559..24a76ecf2986 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -590,6 +590,14 @@ static void *dma_4v_alloc(struct device *dev, size_t size,
first_page = (unsigned long) page_address(page);
memset((char *)first_page, 0, PAGE_SIZE << order);

+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ *dma_addrp = dma_4v_map_page(dev, page, 0, size,
+ DMA_BIDIRECTIONAL, 0);
+ if (*dma_addrp == DMA_MAPPING_ERROR)
+ goto range_alloc_fail;
+ return page_address(page);
+ }
+
iommu = dev->archdata.iommu;
atu = iommu->atu;

@@ -649,6 +657,11 @@ static void dma_4v_free(struct device *dev, size_t size, void *cpu,
unsigned long iotsb_num;
u32 devhandle;

+ if (attrs & DMA_ATTR_NON_CONSISTENT) {
+ dma_4v_unmap_page(dev, dvma, size, DMA_BIDIRECTIONAL, 0);
+ goto free_pages;
+ }
+
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
@@ -665,6 +678,7 @@ static void dma_4v_free(struct device *dev, size_t size, void *cpu,
entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
+free_pages:
order = get_order(size);
if (order < 10)
free_pages((unsigned long)cpu, order);
--
2.19.2
David Miller
2018-12-09 04:58:55 UTC
Permalink
From: Christoph Hellwig <***@lst.de>
Date: Sat, 8 Dec 2018 09:37:00 -0800
Post by Christoph Hellwig
Just allocate the memory and use map_page to map the memory.
Acked-by: David S. Miller <***@davemloft.net>

Loading...