155 files changed, 17825 insertions, 2445 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index ebee55537a05..3471c903f4ff 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -161,3 +161,4 @@ obj-$(CONFIG_POWERCAP)		+= powercap/
 obj-$(CONFIG_MCB)		+= mcb/
 obj-$(CONFIG_RAS)		+= ras/
 obj-$(CONFIG_THUNDERBOLT)	+= thunderbolt/
+obj-$(CONFIG_CORESIGHT)		+= hwtracing/coresight/
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index d24fa1964eb8..6d4e44ea74ac 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -800,7 +800,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 				result =
 					thermal_zone_bind_cooling_device
 					(thermal, trip, cdev,
-					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+					 THERMAL_WEIGHT_DEFAULT);
 			else
 				result =
 					thermal_zone_unbind_cooling_device
@@ -824,7 +825,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 			if (bind)
 				result = thermal_zone_bind_cooling_device
 					(thermal, trip, cdev,
-					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+					 THERMAL_WEIGHT_DEFAULT);
 			else
 				result = thermal_zone_unbind_cooling_device
 					(thermal, trip, cdev);
@@ -841,7 +843,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 				result = thermal_zone_bind_cooling_device
 						(thermal, THERMAL_TRIPS_NONE,
 						 cdev, THERMAL_NO_LIMIT,
-						 THERMAL_NO_LIMIT);
+						 THERMAL_NO_LIMIT,
+						 THERMAL_WEIGHT_DEFAULT);
 			else
 				result = thermal_zone_unbind_cooling_device
 						(thermal, THERMAL_TRIPS_NONE,
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 47bbdc1b5be3..a4ac490dd784 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -336,7 +336,7 @@ int amba_device_add(struct amba_device *dev, struct resource *parent)
 
 		amba_put_disable_pclk(dev);
 
-		if (cid == AMBA_CID)
+		if (cid == AMBA_CID || cid == CORESIGHT_CID)
 			dev->periphid = pid;
 
 		if (!dev->periphid)
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 50e8bd00e431..fe95d6ca047f 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -866,7 +866,6 @@ fw_create_instance(struct firmware *firmware, const char *fw_name,
 
 	fw_priv = kzalloc(sizeof(*fw_priv), GFP_KERNEL);
 	if (!fw_priv) {
-		dev_err(device, "%s: kmalloc failed\n", __func__);
 		fw_priv = ERR_PTR(-ENOMEM);
 		goto exit;
 	}
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 317e0e491ea0..b387fb91885e 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1011,6 +1011,7 @@ int __init platform_bus_init(void)
 	error =  bus_register(&platform_bus_type);
 	if (error)
 		device_unregister(&platform_bus);
+	of_platform_register_reconfig_notifier();
 	return error;
 }
 
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 1cb8544598d5..49fddd1964cc 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_PM)	+= sysfs.o generic_ops.o common.o qos.o runtime.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
-obj-$(CONFIG_PM_OPP)	+= opp.o
+obj-$(CONFIG_PM_OPP)	+= opp/
 obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 78369305e069..308e9ffa4abf 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -80,10 +80,8 @@ int pm_clk_add(struct device *dev, const char *con_id)
 		return -EINVAL;
 
 	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
-	if (!ce) {
-		dev_err(dev, "Not enough memory for clock entry.\n");
+	if (!ce)
 		return -ENOMEM;
-	}
 
 	if (con_id) {
 		ce->con_id = kstrdup(con_id, GFP_KERNEL);
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index b0f138806bbc..f32b802b98f4 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -19,8 +19,8 @@
  * @dev: Device to handle.
  *
  * If power.subsys_data is NULL, point it to a new object, otherwise increment
- * its reference counter.  Return 1 if a new object has been created, otherwise
- * return 0 or error code.
+ * its reference counter.  Return 0 if new object has been created or refcount
+ * increased, otherwise negative error code.
  */
 int dev_pm_get_subsys_data(struct device *dev)
 {
@@ -56,13 +56,11 @@ EXPORT_SYMBOL_GPL(dev_pm_get_subsys_data);
  * @dev: Device to handle.
  *
  * If the reference counter of power.subsys_data is zero after dropping the
- * reference, power.subsys_data is removed.  Return 1 if that happens or 0
- * otherwise.
+ * reference, power.subsys_data is removed.
  */
-int dev_pm_put_subsys_data(struct device *dev)
+void dev_pm_put_subsys_data(struct device *dev)
 {
 	struct pm_subsys_data *psd;
-	int ret = 1;
 
 	spin_lock_irq(&dev->power.lock);
 
@@ -70,18 +68,14 @@ int dev_pm_put_subsys_data(struct device *dev)
 	if (!psd)
 		goto out;
 
-	if (--psd->refcount == 0) {
+	if (--psd->refcount == 0)
 		dev->power.subsys_data = NULL;
-	} else {
+	else
 		psd = NULL;
-		ret = 0;
-	}
 
  out:
 	spin_unlock_irq(&dev->power.lock);
 	kfree(psd);
-
-	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);
 
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
deleted file mode 100644
index 89ced955fafa..000000000000
--- a/drivers/base/power/opp.c
+++ /dev/null
@@ -1,678 +0,0 @@
-/*
- * Generic OPP Interface
- *
- * Copyright (C) 2009-2010 Texas Instruments Incorporated.
- *	Nishanth Menon
- *	Romit Dasgupta
- *	Kevin Hilman
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/pm_opp.h>
-#include <linux/of.h>
-#include <linux/export.h>
-
-/*
- * Internal data structure organization with the OPP layer library is as
- * follows:
- * dev_opp_list (root)
- *	|- device 1 (represents voltage domain 1)
- *	|	|- opp 1 (availability, freq, voltage)
- *	|	|- opp 2 ..
- *	...	...
- *	|	`- opp n ..
- *	|- device 2 (represents the next voltage domain)
- *	...
- *	`- device m (represents mth voltage domain)
- * device 1, 2.. are represented by dev_opp structure while each opp
- * is represented by the opp structure.
- */
-
-/**
- * struct dev_pm_opp - Generic OPP description structure
- * @node:	opp list node. The nodes are maintained throughout the lifetime
- *		of boot. It is expected only an optimal set of OPPs are
- *		added to the library by the SoC framework.
- *		RCU usage: opp list is traversed with RCU locks. node
- *		modification is possible realtime, hence the modifications
- *		are protected by the dev_opp_list_lock for integrity.
- *		IMPORTANT: the opp nodes should be maintained in increasing
- *		order.
- * @available:	true/false - marks if this OPP as available or not
- * @rate:	Frequency in hertz
- * @u_volt:	Nominal voltage in microvolts corresponding to this OPP
- * @dev_opp:	points back to the device_opp struct this opp belongs to
- * @head:	RCU callback head used for deferred freeing
- *
- * This structure stores the OPP information for a given device.
- */
-struct dev_pm_opp {
-	struct list_head node;
-
-	bool available;
-	unsigned long rate;
-	unsigned long u_volt;
-
-	struct device_opp *dev_opp;
-	struct rcu_head head;
-};
-
-/**
- * struct device_opp - Device opp structure
- * @node:	list node - contains the devices with OPPs that
- *		have been registered. Nodes once added are not modified in this
- *		list.
- *		RCU usage: nodes are not modified in the list of device_opp,
- *		however addition is possible and is secured by dev_opp_list_lock
- * @dev:	device pointer
- * @head:	notifier head to notify the OPP availability changes.
- * @opp_list:	list of opps
- *
- * This is an internal data structure maintaining the link to opps attached to
- * a device. This structure is not meant to be shared to users as it is
- * meant for book keeping and private to OPP library
- */
-struct device_opp {
-	struct list_head node;
-
-	struct device *dev;
-	struct srcu_notifier_head head;
-	struct list_head opp_list;
-};
-
-/*
- * The root of the list of all devices. All device_opp structures branch off
- * from here, with each device_opp containing the list of opp it supports in
- * various states of availability.
- */
-static LIST_HEAD(dev_opp_list);
-/* Lock to allow exclusive modification to the device and opp lists */
-static DEFINE_MUTEX(dev_opp_list_lock);
-
-/**
- * find_device_opp() - find device_opp struct using device pointer
- * @dev:	device pointer used to lookup device OPPs
- *
- * Search list of device OPPs for one containing matching device. Does a RCU
- * reader operation to grab the pointer needed.
- *
- * Returns pointer to 'struct device_opp' if found, otherwise -ENODEV or
- * -EINVAL based on type of error.
- *
- * Locking: This function must be called under rcu_read_lock(). device_opp
- * is a RCU protected pointer. This means that device_opp is valid as long
- * as we are under RCU lock.
- */
-static struct device_opp *find_device_opp(struct device *dev)
-{
-	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
-
-	if (unlikely(IS_ERR_OR_NULL(dev))) {
-		pr_err("%s: Invalid parameters\n", __func__);
-		return ERR_PTR(-EINVAL);
-	}
-
-	list_for_each_entry_rcu(tmp_dev_opp, &dev_opp_list, node) {
-		if (tmp_dev_opp->dev == dev) {
-			dev_opp = tmp_dev_opp;
-			break;
-		}
-	}
-
-	return dev_opp;
-}
-
-/**
- * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an available opp
- * @opp:	opp for which voltage has to be returned for
- *
- * Return voltage in micro volt corresponding to the opp, else
- * return 0
- *
- * Locking: This function must be called under rcu_read_lock(). opp is a rcu
- * protected pointer. This means that opp which could have been fetched by
- * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
- * under RCU lock. The pointer returned by the opp_find_freq family must be
- * used in the same section as the usage of this function with the pointer
- * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
- * pointer.
- */
-unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
-{
-	struct dev_pm_opp *tmp_opp;
-	unsigned long v = 0;
-
-	tmp_opp = rcu_dereference(opp);
-	if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
-		pr_err("%s: Invalid parameters\n", __func__);
-	else
-		v = tmp_opp->u_volt;
-
-	return v;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage);
-
-/**
- * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
- * @opp:	opp for which frequency has to be returned for
- *
- * Return frequency in hertz corresponding to the opp, else
- * return 0
- *
- * Locking: This function must be called under rcu_read_lock(). opp is a rcu
- * protected pointer. This means that opp which could have been fetched by
- * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
- * under RCU lock. The pointer returned by the opp_find_freq family must be
- * used in the same section as the usage of this function with the pointer
- * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
- * pointer.
- */
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
-{
-	struct dev_pm_opp *tmp_opp;
-	unsigned long f = 0;
-
-	tmp_opp = rcu_dereference(opp);
-	if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
-		pr_err("%s: Invalid parameters\n", __func__);
-	else
-		f = tmp_opp->rate;
-
-	return f;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
-
-/**
- * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
- * @dev:	device for which we do this operation
- *
- * This function returns the number of available opps if there are any,
- * else returns 0 if none or the corresponding error value.
- *
- * Locking: This function must be called under rcu_read_lock(). This function
- * internally references two RCU protected structures: device_opp and opp which
- * are safe as long as we are under a common RCU locked section.
- */
-int dev_pm_opp_get_opp_count(struct device *dev)
-{
-	struct device_opp *dev_opp;
-	struct dev_pm_opp *temp_opp;
-	int count = 0;
-
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		int r = PTR_ERR(dev_opp);
-		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
-		return r;
-	}
-
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
-		if (temp_opp->available)
-			count++;
-	}
-
-	return count;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
-
-/**
- * dev_pm_opp_find_freq_exact() - search for an exact frequency
- * @dev:		device for which we do this operation
- * @freq:		frequency to search for
- * @available:		true/false - match for available opp
- *
- * Searches for exact match in the opp list and returns pointer to the matching
- * opp if found, else returns ERR_PTR in case of error and should be handled
- * using IS_ERR. Error return values can be:
- * EINVAL:	for bad pointer
- * ERANGE:	no match found for search
- * ENODEV:	if device not found in list of registered devices
- *
- * Note: available is a modifier for the search. if available=true, then the
- * match is for exact matching frequency and is available in the stored OPP
- * table. if false, the match is for exact frequency which is not available.
- *
- * This provides a mechanism to enable an opp which is not available currently
- * or the opposite as well.
- *
- * Locking: This function must be called under rcu_read_lock(). opp is a rcu
- * protected pointer. The reason for the same is that the opp pointer which is
- * returned will remain valid for use with opp_get_{voltage, freq} only while
- * under the locked area. The pointer returned must be used prior to unlocking
- * with rcu_read_unlock() to maintain the integrity of the pointer.
- */
-struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
-					      unsigned long freq,
-					      bool available)
-{
-	struct device_opp *dev_opp;
-	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
-
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		int r = PTR_ERR(dev_opp);
-		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
-		return ERR_PTR(r);
-	}
-
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
-		if (temp_opp->available == available &&
-				temp_opp->rate == freq) {
-			opp = temp_opp;
-			break;
-		}
-	}
-
-	return opp;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
-
-/**
- * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq
- * @dev:	device for which we do this operation
- * @freq:	Start frequency
- *
- * Search for the matching ceil *available* OPP from a starting freq
- * for a device.
- *
- * Returns matching *opp and refreshes *freq accordingly, else returns
- * ERR_PTR in case of error and should be handled using IS_ERR. Error return
- * values can be:
- * EINVAL:	for bad pointer
- * ERANGE:	no match found for search
- * ENODEV:	if device not found in list of registered devices
- *
- * Locking: This function must be called under rcu_read_lock(). opp is a rcu
- * protected pointer. The reason for the same is that the opp pointer which is
- * returned will remain valid for use with opp_get_{voltage, freq} only while
- * under the locked area. The pointer returned must be used prior to unlocking
- * with rcu_read_unlock() to maintain the integrity of the pointer.
- */
-struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
-					     unsigned long *freq)
-{
-	struct device_opp *dev_opp;
-	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
-
-	if (!dev || !freq) {
-		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
-		return ERR_PTR(-EINVAL);
-	}
-
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp);
-
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
-		if (temp_opp->available && temp_opp->rate >= *freq) {
-			opp = temp_opp;
-			*freq = opp->rate;
-			break;
-		}
-	}
-
-	return opp;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
-
-/**
- * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
- * @dev:	device for which we do this operation
- * @freq:	Start frequency
- *
- * Search for the matching floor *available* OPP from a starting freq
- * for a device.
- *
- * Returns matching *opp and refreshes *freq accordingly, else returns
- * ERR_PTR in case of error and should be handled using IS_ERR. Error return
- * values can be:
- * EINVAL:	for bad pointer
- * ERANGE:	no match found for search
- * ENODEV:	if device not found in list of registered devices
- *
- * Locking: This function must be called under rcu_read_lock(). opp is a rcu
- * protected pointer. The reason for the same is that the opp pointer which is
- * returned will remain valid for use with opp_get_{voltage, freq} only while
- * under the locked area. The pointer returned must be used prior to unlocking
- * with rcu_read_unlock() to maintain the integrity of the pointer.
- */
-struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
-					      unsigned long *freq)
-{
-	struct device_opp *dev_opp;
-	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
-
-	if (!dev || !freq) {
-		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
-		return ERR_PTR(-EINVAL);
-	}
-
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp);
-
-	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
-		if (temp_opp->available) {
-			/* go to the next node, before choosing prev */
-			if (temp_opp->rate > *freq)
-				break;
-			else
-				opp = temp_opp;
-		}
-	}
-	if (!IS_ERR(opp))
-		*freq = opp->rate;
-
-	return opp;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
-
-/**
- * dev_pm_opp_add()  - Add an OPP table from a table definitions
- * @dev:	device for which we do this operation
- * @freq:	Frequency in Hz for this OPP
- * @u_volt:	Voltage in uVolts for this OPP
- *
- * This function adds an opp definition to the opp list and returns status.
- * The opp is made available by default and it can be controlled using
- * dev_pm_opp_enable/disable functions.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- *
- * Return:
- * 0:		On success OR
- *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST:	Freq are same and volt are different OR
- *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM:	Memory allocation failure
- */
-int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
-{
-	struct device_opp *dev_opp = NULL;
-	struct dev_pm_opp *opp, *new_opp;
-	struct list_head *head;
-
-	/* allocate new OPP node */
-	new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
-	if (!new_opp) {
-		dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
-		return -ENOMEM;
-	}
-
-	/* Hold our list modification lock here */
-	mutex_lock(&dev_opp_list_lock);
-
-	/* Check for existing list for 'dev' */
-	dev_opp = find_device_opp(dev);
-	if (IS_ERR(dev_opp)) {
-		/*
-		 * Allocate a new device OPP table. In the infrequent case
-		 * where a new device is needed to be added, we pay this
-		 * penalty.
-		 */
-		dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
-		if (!dev_opp) {
-			mutex_unlock(&dev_opp_list_lock);
-			kfree(new_opp);
-			dev_warn(dev,
-				"%s: Unable to create device OPP structure\n",
-				__func__);
-			return -ENOMEM;
-		}
-
-		dev_opp->dev = dev;
-		srcu_init_notifier_head(&dev_opp->head);
-		INIT_LIST_HEAD(&dev_opp->opp_list);
-
-		/* Secure the device list modification */
-		list_add_rcu(&dev_opp->node, &dev_opp_list);
-	}
-
-	/* populate the opp table */
-	new_opp->dev_opp = dev_opp;
-	new_opp->rate = freq;
-	new_opp->u_volt = u_volt;
-	new_opp->available = true;
-
-	/*
-	 * Insert new OPP in order of increasing frequency
-	 * and discard if already present
-	 */
-	head = &dev_opp->opp_list;
-	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
-		if (new_opp->rate <= opp->rate)
-			break;
-		else
-			head = &opp->node;
-	}
-
-	/* Duplicate OPPs ? */
-	if (new_opp->rate == opp->rate) {
-		int ret = opp->available && new_opp->u_volt == opp->u_volt ?
-			0 : -EEXIST;
-
-		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
-			 __func__, opp->rate, opp->u_volt, opp->available,
-			 new_opp->rate, new_opp->u_volt, new_opp->available);
-		mutex_unlock(&dev_opp_list_lock);
-		kfree(new_opp);
-		return ret;
-	}
-
-	list_add_rcu(&new_opp->node, head);
-	mutex_unlock(&dev_opp_list_lock);
-
-	/*
-	 * Notify the changes in the availability of the operable
-	 * frequency/voltage list.
-	 */
-	srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_add);
-
-/**
- * opp_set_availability() - helper to set the availability of an opp
- * @dev:		device for which we do this operation
- * @freq:		OPP frequency to modify availability
- * @availability_req:	availability status requested for this opp
- *
- * Set the availability of an OPP with an RCU operation, opp_{enable,disable}
- * share a common logic which is isolated here.
- *
- * Returns -EINVAL for bad pointers, -ENOMEM if no memory available for the
- * copy operation, returns 0 if no modifcation was done OR modification was
- * successful.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks to
- * keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex locking or synchronize_rcu() blocking calls cannot be used.
- */
-static int opp_set_availability(struct device *dev, unsigned long freq,
-		bool availability_req)
-{
-	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
-	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
-	int r = 0;
-
-	/* keep the node allocated */
-	new_opp = kmalloc(sizeof(*new_opp), GFP_KERNEL);
-	if (!new_opp) {
-		dev_warn(dev, "%s: Unable to create OPP\n", __func__);
-		return -ENOMEM;
-	}
-
-	mutex_lock(&dev_opp_list_lock);
-
-	/* Find the device_opp */
-	list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
-		if (dev == tmp_dev_opp->dev) {
-			dev_opp = tmp_dev_opp;
-			break;
-		}
-	}
-	if (IS_ERR(dev_opp)) {
-		r = PTR_ERR(dev_opp);
-		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
-		goto unlock;
-	}
-
-	/* Do we have the frequency? */
-	list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
-		if (tmp_opp->rate == freq) {
-			opp = tmp_opp;
-			break;
-		}
-	}
-	if (IS_ERR(opp)) {
-		r = PTR_ERR(opp);
-		goto unlock;
-	}
-
-	/* Is update really needed? */
-	if (opp->available == availability_req)
-		goto unlock;
-	/* copy the old data over */
-	*new_opp = *opp;
-
-	/* plug in new node */
-	new_opp->available = availability_req;
-
-	list_replace_rcu(&opp->node, &new_opp->node);
-	mutex_unlock(&dev_opp_list_lock);
-	kfree_rcu(opp, head);
-
-	/* Notify the change of the OPP availability */
-	if (availability_req)
-		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE,
-					 new_opp);
-	else
-		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE,
-					 new_opp);
-
-	return 0;
-
-unlock:
-	mutex_unlock(&dev_opp_list_lock);
-	kfree(new_opp);
-	return r;
-}
-
-/**
- * dev_pm_opp_enable() - Enable a specific OPP
- * @dev:	device for which we do this operation
- * @freq:	OPP frequency to enable
- *
- * Enables a provided opp. If the operation is valid, this returns 0, else the
- * corresponding error value. It is meant to be used for users an OPP available
- * after being temporarily made unavailable with dev_pm_opp_disable.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Hence this function indirectly uses RCU and mutex locks to keep the
- * integrity of the internal data structures. Callers should ensure that
- * this function is *NOT* called under RCU protection or in contexts where
- * mutex locking or synchronize_rcu() blocking calls cannot be used.
- */
-int dev_pm_opp_enable(struct device *dev, unsigned long freq)
-{
-	return opp_set_availability(dev, freq, true);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
-
-/**
- * dev_pm_opp_disable() - Disable a specific OPP
- * @dev:	device for which we do this operation
- * @freq:	OPP frequency to disable
- *
- * Disables a provided opp. If the operation is valid, this returns
- * 0, else the corresponding error value. It is meant to be a temporary
- * control by users to make this OPP not available until the circumstances are
- * right to make it available again (with a call to dev_pm_opp_enable).
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Hence this function indirectly uses RCU and mutex locks to keep the
- * integrity of the internal data structures. Callers should ensure that
- * this function is *NOT* called under RCU protection or in contexts where
- * mutex locking or synchronize_rcu() blocking calls cannot be used.
- */
-int dev_pm_opp_disable(struct device *dev, unsigned long freq)
-{
-	return opp_set_availability(dev, freq, false);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
-
-/**
- * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
- * @dev:	device pointer used to lookup device OPPs.
- */
-struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
-{
-	struct device_opp *dev_opp = find_device_opp(dev);
-
-	if (IS_ERR(dev_opp))
-		return ERR_CAST(dev_opp); /* matching type */
-
-	return &dev_opp->head;
-}
-
-#ifdef CONFIG_OF
-/**
- * of_init_opp_table() - Initialize opp table from device tree
- * @dev:	device pointer used to lookup device OPPs.
- *
- * Register the initial OPP table with the OPP library for given device.
- */
-int of_init_opp_table(struct device *dev)
-{
-	const struct property *prop;
-	const __be32 *val;
-	int nr;
-
-	prop = of_find_property(dev->of_node, "operating-points", NULL);
-	if (!prop)
-		return -ENODEV;
-	if (!prop->value)
-		return -ENODATA;
-
-	/*
-	 * Each OPP is a set of tuples consisting of frequency and
-	 * voltage like <freq-kHz vol-uV>.
-	 */
-	nr = prop->length / sizeof(u32);
-	if (nr % 2) {
-		dev_err(dev, "%s: Invalid OPP list\n", __func__);
-		return -EINVAL;
-	}
-
-	val = prop->value;
-	while (nr) {
-		unsigned long freq = be32_to_cpup(val++) * 1000;
-		unsigned long volt = be32_to_cpup(val++);
-
-		if (dev_pm_opp_add(dev, freq, volt))
-			dev_warn(dev, "%s: Failed to add OPP %ld\n",
-				 __func__, freq);
-		nr -= 2;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(of_init_opp_table);
-#endif
diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile
new file mode 100644
index 000000000000..33c1e18c41a4
--- /dev/null
+++ b/drivers/base/power/opp/Makefile
@@ -0,0 +1,2 @@
+ccflags-$(CONFIG_DEBUG_DRIVER)	:= -DDEBUG
+obj-y				+= core.o cpu.o
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
new file mode 100644
index 000000000000..ed19c41d782e
--- /dev/null
+++ b/drivers/base/power/opp/core.c
@@ -0,0 +1,2008 @@
+/*
+ * Generic OPP Interface
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/clk.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/export.h>
+#include <linux/regulator/consumer.h>
+
+#include "opp.h"
+
+/*
+ * The root of the list of all devices. All device_opp structures branch off
+ * from here, with each device_opp containing the list of opp it supports in
+ * various states of availability.
+ */
+static LIST_HEAD(dev_opp_list);
+/* Lock to allow exclusive modification to the device and opp lists */
+DEFINE_MUTEX(dev_opp_list_lock);
+
+#define opp_rcu_lockdep_assert()					\
+do {									\
+	rcu_lockdep_assert(rcu_read_lock_held() ||			\
+				lockdep_is_held(&dev_opp_list_lock),	\
+			   "Missing rcu_read_lock() or "		\
+			   "dev_opp_list_lock protection");		\
+} while (0)
+
+static struct device_list_opp *_find_list_dev(const struct device *dev,
+					      struct device_opp *dev_opp)
+{
+	struct device_list_opp *list_dev;
+
+	list_for_each_entry(list_dev, &dev_opp->dev_list, node)
+		if (list_dev->dev == dev)
+			return list_dev;
+
+	return NULL;
+}
+
+static struct device_opp *_managed_opp(const struct device_node *np)
+{
+	struct device_opp *dev_opp;
+
+	list_for_each_entry_rcu(dev_opp, &dev_opp_list, node) {
+		if (dev_opp->np == np) {
+			/*
+			 * Multiple devices can point to the same OPP table and
+			 * so will have same node-pointer, np.
+			 *
+			 * But the OPPs will be considered as shared only if the
+			 * OPP table contains a "opp-shared" property.
+			 */
+			return dev_opp->shared_opp ? dev_opp : NULL;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * _find_device_opp() - find device_opp struct using device pointer
+ * @dev:	device pointer used to lookup device OPPs
+ *
+ * Search list of device OPPs for one containing matching device. Does a RCU
+ * reader operation to grab the pointer needed.
+ *
+ * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or
+ * -EINVAL based on type of error.
+ *
+ * Locking: For readers, this function must be called under rcu_read_lock().
+ * device_opp is a RCU protected pointer, which means that device_opp is valid
+ * as long as we are under RCU lock.
+ *
+ * For Writers, this function must be called with dev_opp_list_lock held.
+ */
+struct device_opp *_find_device_opp(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	opp_rcu_lockdep_assert();
+
+	if (IS_ERR_OR_NULL(dev)) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	list_for_each_entry_rcu(dev_opp, &dev_opp_list, node)
+		if (_find_list_dev(dev, dev_opp))
+			return dev_opp;
+
+	return ERR_PTR(-ENODEV);
+}
+
+/**
+ * dev_pm_opp_get_voltage() - Gets the voltage corresponding to an available opp
+ * @opp:	opp for which voltage has to be returned for
+ *
+ * Return: voltage in micro volt corresponding to the opp, else
+ * return 0
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
+{
+	struct dev_pm_opp *tmp_opp;
+	unsigned long v = 0;
+
+	opp_rcu_lockdep_assert();
+
+	tmp_opp = rcu_dereference(opp);
+	if (IS_ERR_OR_NULL(tmp_opp) || !tmp_opp->available)
+		pr_err("%s: Invalid parameters\n", __func__);
+	else
+		v = tmp_opp->u_volt;
+
+	return v;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_voltage);
+
+/**
+ * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
+ * @opp:	opp for which frequency has to be returned for
+ *
+ * Return: frequency in hertz corresponding to the opp, else
+ * return 0
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+{
+	struct dev_pm_opp *tmp_opp;
+	unsigned long f = 0;
+
+	opp_rcu_lockdep_assert();
+
+	tmp_opp = rcu_dereference(opp);
+	if (IS_ERR_OR_NULL(tmp_opp) || !tmp_opp->available)
+		pr_err("%s: Invalid parameters\n", __func__);
+	else
+		f = tmp_opp->rate;
+
+	return f;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
+
+/**
+ * dev_pm_opp_is_turbo() - Returns if opp is turbo OPP or not
+ * @opp: opp for which turbo mode is being verified
+ *
+ * Turbo OPPs are not for normal use, and can be enabled (under certain
+ * conditions) for short duration of times to finish high throughput work
+ * quickly. Running on them for longer times may overheat the chip.
+ *
+ * Return: true if opp is turbo opp, else false.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp)
+{
+	struct dev_pm_opp *tmp_opp;
+
+	opp_rcu_lockdep_assert();
+
+	tmp_opp = rcu_dereference(opp);
+	if (IS_ERR_OR_NULL(tmp_opp) || !tmp_opp->available) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return false;
+	}
+
+	return tmp_opp->turbo;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo);
+
+/**
+ * dev_pm_opp_get_max_clock_latency() - Get max clock latency in nanoseconds
+ * @dev:	device for which we do this operation
+ *
+ * Return: This function returns the max clock latency in nanoseconds.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	unsigned long clock_latency_ns;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		clock_latency_ns = 0;
+	else
+		clock_latency_ns = dev_opp->clock_latency_ns_max;
+
+	rcu_read_unlock();
+	return clock_latency_ns;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency);
+
+/**
+ * dev_pm_opp_get_max_volt_latency() - Get max voltage latency in nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max voltage latency in nanoseconds.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *opp;
+	struct regulator *reg;
+	unsigned long latency_ns = 0;
+	unsigned long min_uV = ~0, max_uV = 0;
+	int ret;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	reg = dev_opp->regulator;
+	if (IS_ERR(reg)) {
+		/* Regulator may not be required for device */
+		if (reg)
+			dev_err(dev, "%s: Invalid regulator (%ld)\n", __func__,
+				PTR_ERR(reg));
+		rcu_read_unlock();
+		return 0;
+	}
+
+	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+		if (!opp->available)
+			continue;
+
+		if (opp->u_volt_min < min_uV)
+			min_uV = opp->u_volt_min;
+		if (opp->u_volt_max > max_uV)
+			max_uV = opp->u_volt_max;
+	}
+
+	rcu_read_unlock();
+
+	/*
+	 * The caller needs to ensure that dev_opp (and hence the regulator)
+	 * isn't freed, while we are executing this routine.
+	 */
+	ret = regulator_set_voltage_time(reg, min_uV, max_uV);
+	if (ret > 0)
+		latency_ns = ret * 1000;
+
+	return latency_ns;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_volt_latency);
+
+/**
+ * dev_pm_opp_get_max_transition_latency() - Get max transition latency in
+ *					     nanoseconds
+ * @dev: device for which we do this operation
+ *
+ * Return: This function returns the max transition latency, in nanoseconds, to
+ * switch from one OPP to other.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev)
+{
+	return dev_pm_opp_get_max_volt_latency(dev) +
+		dev_pm_opp_get_max_clock_latency(dev);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_transition_latency);
+
+/**
+ * dev_pm_opp_get_suspend_opp() - Get suspend opp
+ * @dev:	device for which we do this operation
+ *
+ * Return: This function returns pointer to the suspend opp if it is
+ * defined and available, otherwise it returns NULL.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	opp_rcu_lockdep_assert();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp) || !dev_opp->suspend_opp ||
+	    !dev_opp->suspend_opp->available)
+		return NULL;
+
+	return dev_opp->suspend_opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp);
+
+/**
+ * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list
+ * @dev:	device for which we do this operation
+ *
+ * Return: This function returns the number of available opps if there are any,
+ * else returns 0 if none or the corresponding error value.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+int dev_pm_opp_get_opp_count(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *temp_opp;
+	int count = 0;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		count = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n",
+			__func__, count);
+		goto out_unlock;
+	}
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available)
+			count++;
+	}
+
+out_unlock:
+	rcu_read_unlock();
+	return count;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
+
+/**
+ * dev_pm_opp_find_freq_exact() - search for an exact frequency
+ * @dev:		device for which we do this operation
+ * @freq:		frequency to search for
+ * @available:		true/false - match for available opp
+ *
+ * Return: Searches for exact match in the opp list and returns pointer to the
+ * matching opp if found, else returns ERR_PTR in case of error and should
+ * be handled using IS_ERR. Error return values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * Note: available is a modifier for the search. if available=true, then the
+ * match is for exact matching frequency and is available in the stored OPP
+ * table. if false, the match is for exact frequency which is not available.
+ *
+ * This provides a mechanism to enable an opp which is not available currently
+ * or the opposite as well.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+					      unsigned long freq,
+					      bool available)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+	opp_rcu_lockdep_assert();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+		return ERR_PTR(r);
+	}
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available == available &&
+				temp_opp->rate == freq) {
+			opp = temp_opp;
+			break;
+		}
+	}
+
+	return opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
+
+/**
+ * dev_pm_opp_find_freq_ceil() - Search for an rounded ceil freq
+ * @dev:	device for which we do this operation
+ * @freq:	Start frequency
+ *
+ * Search for the matching ceil *available* OPP from a starting freq
+ * for a device.
+ *
+ * Return: matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+					     unsigned long *freq)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+	opp_rcu_lockdep_assert();
+
+	if (!dev || !freq) {
+		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		return ERR_CAST(dev_opp);
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available && temp_opp->rate >= *freq) {
+			opp = temp_opp;
+			*freq = opp->rate;
+			break;
+		}
+	}
+
+	return opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
+
+/**
+ * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
+ * @dev:	device for which we do this operation
+ * @freq:	Start frequency
+ *
+ * Search for the matching floor *available* OPP from a starting freq
+ * for a device.
+ *
+ * Return: matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+					      unsigned long *freq)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+	opp_rcu_lockdep_assert();
+
+	if (!dev || !freq) {
+		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		return ERR_CAST(dev_opp);
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available) {
+			/* go to the next node, before choosing prev */
+			if (temp_opp->rate > *freq)
+				break;
+			else
+				opp = temp_opp;
+		}
+	}
+	if (!IS_ERR(opp))
+		*freq = opp->rate;
+
+	return opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
+
+/*
+ * The caller needs to ensure that device_opp (and hence the clk) isn't freed,
+ * while clk returned here is used.
+ */
+static struct clk *_get_opp_clk(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct clk *clk;
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+		clk = ERR_CAST(dev_opp);
+		goto unlock;
+	}
+
+	clk = dev_opp->clk;
+	if (IS_ERR(clk))
+		dev_err(dev, "%s: No clock available for the device\n",
+			__func__);
+
+unlock:
+	rcu_read_unlock();
+	return clk;
+}
+
+static int _set_opp_voltage(struct device *dev, struct regulator *reg,
+			    unsigned long u_volt, unsigned long u_volt_min,
+			    unsigned long u_volt_max)
+{
+	int ret;
+
+	/* Regulator not available for device */
+	if (IS_ERR(reg)) {
+		dev_dbg(dev, "%s: regulator not available: %ld\n", __func__,
+			PTR_ERR(reg));
+		return 0;
+	}
+
+	dev_dbg(dev, "%s: voltages (mV): %lu %lu %lu\n", __func__, u_volt_min,
+		u_volt, u_volt_max);
+
+	ret = regulator_set_voltage_triplet(reg, u_volt_min, u_volt,
+					    u_volt_max);
+	if (ret)
+		dev_err(dev, "%s: failed to set voltage (%lu %lu %lu mV): %d\n",
+			__func__, u_volt_min, u_volt, u_volt_max, ret);
+
+	return ret;
+}
+
+/**
+ * dev_pm_opp_set_rate() - Configure new OPP based on frequency
+ * @dev:	 device for which we do this operation
+ * @target_freq: frequency to achieve
+ *
+ * This configures the power-supplies and clock source to the levels specified
+ * by the OPP corresponding to the target_freq.
+ *
+ * Locking: This function takes rcu_read_lock().
+ */
+int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *old_opp, *opp;
+	struct regulator *reg;
+	struct clk *clk;
+	unsigned long freq, old_freq;
+	unsigned long u_volt, u_volt_min, u_volt_max;
+	unsigned long ou_volt, ou_volt_min, ou_volt_max;
+	int ret;
+
+	if (unlikely(!target_freq)) {
+		dev_err(dev, "%s: Invalid target frequency %lu\n", __func__,
+			target_freq);
+		return -EINVAL;
+	}
+
+	clk = _get_opp_clk(dev);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	freq = clk_round_rate(clk, target_freq);
+	if ((long)freq <= 0)
+		freq = target_freq;
+
+	old_freq = clk_get_rate(clk);
+
+	/* Return early if nothing to do */
+	if (old_freq == freq) {
+		dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+			__func__, freq);
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "%s: device opp doesn't exist\n", __func__);
+		rcu_read_unlock();
+		return PTR_ERR(dev_opp);
+	}
+
+	old_opp = dev_pm_opp_find_freq_ceil(dev, &old_freq);
+	if (!IS_ERR(old_opp)) {
+		ou_volt = old_opp->u_volt;
+		ou_volt_min = old_opp->u_volt_min;
+		ou_volt_max = old_opp->u_volt_max;
+	} else {
+		dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n",
+			__func__, old_freq, PTR_ERR(old_opp));
+	}
+
+	opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+	if (IS_ERR(opp)) {
+		ret = PTR_ERR(opp);
+		dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n",
+			__func__, freq, ret);
+		rcu_read_unlock();
+		return ret;
+	}
+
+	u_volt = opp->u_volt;
+	u_volt_min = opp->u_volt_min;
+	u_volt_max = opp->u_volt_max;
+
+	reg = dev_opp->regulator;
+
+	rcu_read_unlock();
+
+	/* Scaling up? Scale voltage before frequency */
+	if (freq > old_freq) {
+		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+				       u_volt_max);
+		if (ret)
+			goto restore_voltage;
+	}
+
+	/* Change frequency */
+
+	dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n",
+		__func__, old_freq, freq);
+
+	ret = clk_set_rate(clk, freq);
+	if (ret) {
+		dev_err(dev, "%s: failed to set clock rate: %d\n", __func__,
+			ret);
+		goto restore_voltage;
+	}
+
+	/* Scaling down? Scale voltage after frequency */
+	if (freq < old_freq) {
+		ret = _set_opp_voltage(dev, reg, u_volt, u_volt_min,
+				       u_volt_max);
+		if (ret)
+			goto restore_freq;
+	}
+
+	return 0;
+
+restore_freq:
+	if (clk_set_rate(clk, old_freq))
+		dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n",
+			__func__, old_freq);
+restore_voltage:
+	/* This shouldn't harm even if the voltages weren't updated earlier */
+	if (!IS_ERR(old_opp))
+		_set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate);
+
+/* List-dev Helpers */
+static void _kfree_list_dev_rcu(struct rcu_head *head)
+{
+	struct device_list_opp *list_dev;
+
+	list_dev = container_of(head, struct device_list_opp, rcu_head);
+	kfree_rcu(list_dev, rcu_head);
+}
+
+static void _remove_list_dev(struct device_list_opp *list_dev,
+			     struct device_opp *dev_opp)
+{
+	list_del(&list_dev->node);
+	call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head,
+		  _kfree_list_dev_rcu);
+}
+
+struct device_list_opp *_add_list_dev(const struct device *dev,
+				      struct device_opp *dev_opp)
+{
+	struct device_list_opp *list_dev;
+
+	list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL);
+	if (!list_dev)
+		return NULL;
+
+	/* Initialize list-dev */
+	list_dev->dev = dev;
+	list_add_rcu(&list_dev->node, &dev_opp->dev_list);
+
+	return list_dev;
+}
+
+/**
+ * _add_device_opp() - Find device OPP table or allocate a new one
+ * @dev:	device for which we do this operation
+ *
+ * It tries to find an existing table first, if it couldn't find one, it
+ * allocates a new OPP table and returns that.
+ *
+ * Return: valid device_opp pointer if success, else NULL.
+ */
+static struct device_opp *_add_device_opp(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct device_list_opp *list_dev;
+	struct device_node *np;
+	int ret;
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (!IS_ERR(dev_opp))
+		return dev_opp;
+
+	/*
+	 * Allocate a new device OPP table. In the infrequent case where a new
+	 * device is needed to be added, we pay this penalty.
+	 */
+	dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
+	if (!dev_opp)
+		return NULL;
+
+	INIT_LIST_HEAD(&dev_opp->dev_list);
+
+	list_dev = _add_list_dev(dev, dev_opp);
+	if (!list_dev) {
+		kfree(dev_opp);
+		return NULL;
+	}
+
+	/*
+	 * Only required for backward compatibility with v1 bindings, but isn't
+	 * harmful for other cases. And so we do it unconditionally.
+	 */
+	np = of_node_get(dev->of_node);
+	if (np) {
+		u32 val;
+
+		if (!of_property_read_u32(np, "clock-latency", &val))
+			dev_opp->clock_latency_ns_max = val;
+		of_property_read_u32(np, "voltage-tolerance",
+				     &dev_opp->voltage_tolerance_v1);
+		of_node_put(np);
+	}
+
+	/* Set regulator to a non-NULL error value */
+	dev_opp->regulator = ERR_PTR(-ENXIO);
+
+	/* Find clk for the device */
+	dev_opp->clk = clk_get(dev, NULL);
+	if (IS_ERR(dev_opp->clk)) {
+		ret = PTR_ERR(dev_opp->clk);
+		if (ret != -EPROBE_DEFER)
+			dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__,
+				ret);
+	}
+
+	srcu_init_notifier_head(&dev_opp->srcu_head);
+	INIT_LIST_HEAD(&dev_opp->opp_list);
+
+	/* Secure the device list modification */
+	list_add_rcu(&dev_opp->node, &dev_opp_list);
+	return dev_opp;
+}
+
+/**
+ * _kfree_device_rcu() - Free device_opp RCU handler
+ * @head:	RCU head
+ */
+static void _kfree_device_rcu(struct rcu_head *head)
+{
+	struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
+
+	kfree_rcu(device_opp, rcu_head);
+}
+
+/**
+ * _remove_device_opp() - Removes a device OPP table
+ * @dev_opp: device OPP table to be removed.
+ *
+ * Removes/frees device OPP table it it doesn't contain any OPPs.
+ */
+static void _remove_device_opp(struct device_opp *dev_opp)
+{
+	struct device_list_opp *list_dev;
+
+	if (!list_empty(&dev_opp->opp_list))
+		return;
+
+	if (dev_opp->supported_hw)
+		return;
+
+	if (dev_opp->prop_name)
+		return;
+
+	if (!IS_ERR(dev_opp->regulator))
+		return;
+
+	/* Release clk */
+	if (!IS_ERR(dev_opp->clk))
+		clk_put(dev_opp->clk);
+
+	list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp,
+				    node);
+
+	_remove_list_dev(list_dev, dev_opp);
+
+	/* dev_list must be empty now */
+	WARN_ON(!list_empty(&dev_opp->dev_list));
+
+	list_del_rcu(&dev_opp->node);
+	call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head,
+		  _kfree_device_rcu);
+}
+
+/**
+ * _kfree_opp_rcu() - Free OPP RCU handler
+ * @head:	RCU head
+ */
+static void _kfree_opp_rcu(struct rcu_head *head)
+{
+	struct dev_pm_opp *opp = container_of(head, struct dev_pm_opp, rcu_head);
+
+	kfree_rcu(opp, rcu_head);
+}
+
+/**
+ * _opp_remove()  - Remove an OPP from a table definition
+ * @dev_opp:	points back to the device_opp struct this opp belongs to
+ * @opp:	pointer to the OPP to remove
+ * @notify:	OPP_EVENT_REMOVE notification should be sent or not
+ *
+ * This function removes an opp definition from the opp list.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * It is assumed that the caller holds required mutex for an RCU updater
+ * strategy.
+ */
+static void _opp_remove(struct device_opp *dev_opp,
+			struct dev_pm_opp *opp, bool notify)
+{
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	if (notify)
+		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+	list_del_rcu(&opp->node);
+	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
+
+	_remove_device_opp(dev_opp);
+}
+
+/**
+ * dev_pm_opp_remove()  - Remove an OPP from OPP list
+ * @dev:	device for which we do this operation
+ * @freq:	OPP to remove with matching 'freq'
+ *
+ * This function removes an opp from the opp list.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_remove(struct device *dev, unsigned long freq)
+{
+	struct dev_pm_opp *opp;
+	struct device_opp *dev_opp;
+	bool found = false;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		goto unlock;
+
+	list_for_each_entry(opp, &dev_opp->opp_list, node) {
+		if (opp->rate == freq) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n",
+			 __func__, freq);
+		goto unlock;
+	}
+
+	_opp_remove(dev_opp, opp, true);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
+
+static struct dev_pm_opp *_allocate_opp(struct device *dev,
+					struct device_opp **dev_opp)
+{
+	struct dev_pm_opp *opp;
+
+	/* allocate new OPP node */
+	opp = kzalloc(sizeof(*opp), GFP_KERNEL);
+	if (!opp)
+		return NULL;
+
+	INIT_LIST_HEAD(&opp->node);
+
+	*dev_opp = _add_device_opp(dev);
+	if (!*dev_opp) {
+		kfree(opp);
+		return NULL;
+	}
+
+	return opp;
+}
+
+static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
+		    struct device_opp *dev_opp)
+{
+	struct dev_pm_opp *opp;
+	struct list_head *head = &dev_opp->opp_list;
+
+	/*
+	 * Insert new OPP in order of increasing frequency and discard if
+	 * already present.
+	 *
+	 * Need to use &dev_opp->opp_list in the condition part of the 'for'
+	 * loop, don't replace it with head otherwise it will become an infinite
+	 * loop.
+	 */
+	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+		if (new_opp->rate > opp->rate) {
+			head = &opp->node;
+			continue;
+		}
+
+		if (new_opp->rate < opp->rate)
+			break;
+
+		/* Duplicate OPPs */
+		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
+			 __func__, opp->rate, opp->u_volt, opp->available,
+			 new_opp->rate, new_opp->u_volt, new_opp->available);
+
+		return opp->available && new_opp->u_volt == opp->u_volt ?
+			0 : -EEXIST;
+	}
+
+	new_opp->dev_opp = dev_opp;
+	list_add_rcu(&new_opp->node, head);
+
+	return 0;
+}
+
+/**
+ * _opp_add_v1() - Allocate a OPP based on v1 bindings.
+ * @dev:	device for which we do this operation
+ * @freq:	Frequency in Hz for this OPP
+ * @u_volt:	Voltage in uVolts for this OPP
+ * @dynamic:	Dynamically added OPPs.
+ *
+ * This function adds an opp definition to the opp list and returns status.
+ * The opp is made available by default and it can be controlled using
+ * dev_pm_opp_enable/disable functions and may be removed by dev_pm_opp_remove.
+ *
+ * NOTE: "dynamic" parameter impacts OPPs added by the dev_pm_opp_of_add_table
+ * and freed by dev_pm_opp_of_remove_table.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ */
+static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
+		       bool dynamic)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *new_opp;
+	unsigned long tol;
+	int ret;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	new_opp = _allocate_opp(dev, &dev_opp);
+	if (!new_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* populate the opp table */
+	new_opp->rate = freq;
+	tol = u_volt * dev_opp->voltage_tolerance_v1 / 100;
+	new_opp->u_volt = u_volt;
+	new_opp->u_volt_min = u_volt - tol;
+	new_opp->u_volt_max = u_volt + tol;
+	new_opp->available = true;
+	new_opp->dynamic = dynamic;
+
+	ret = _opp_add(dev, new_opp, dev_opp);
+	if (ret)
+		goto free_opp;
+
+	mutex_unlock(&dev_opp_list_lock);
+
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
+	return 0;
+
+free_opp:
+	_opp_remove(dev_opp, new_opp, false);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+	return ret;
+}
+
+/* TODO: Support multiple regulators */
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
+			      struct device_opp *dev_opp)
+{
+	u32 microvolt[3] = {0};
+	u32 val;
+	int count, ret;
+	struct property *prop = NULL;
+	char name[NAME_MAX];
+
+	/* Search for "opp-microvolt-<name>" */
+	if (dev_opp->prop_name) {
+		snprintf(name, sizeof(name), "opp-microvolt-%s",
+			 dev_opp->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microvolt" */
+		sprintf(name, "opp-microvolt");
+		prop = of_find_property(opp->np, name, NULL);
+
+		/* Missing property isn't a problem, but an invalid entry is */
+		if (!prop)
+			return 0;
+	}
+
+	count = of_property_count_u32_elems(opp->np, name);
+	if (count < 0) {
+		dev_err(dev, "%s: Invalid %s property (%d)\n",
+			__func__, name, count);
+		return count;
+	}
+
+	/* There can be one or three elements here */
+	if (count != 1 && count != 3) {
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
+			__func__, name, count);
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
+	if (ret) {
+		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
+		return -EINVAL;
+	}
+
+	opp->u_volt = microvolt[0];
+
+	if (count == 1) {
+		opp->u_volt_min = opp->u_volt;
+		opp->u_volt_max = opp->u_volt;
+	} else {
+		opp->u_volt_min = microvolt[1];
+		opp->u_volt_max = microvolt[2];
+	}
+
+	/* Search for "opp-microamp-<name>" */
+	prop = NULL;
+	if (dev_opp->prop_name) {
+		snprintf(name, sizeof(name), "opp-microamp-%s",
+			 dev_opp->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microamp" */
+		sprintf(name, "opp-microamp");
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (prop && !of_property_read_u32(opp->np, name, &val))
+		opp->u_amp = val;
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_set_supported_hw() - Set supported platforms
+ * @dev: Device for which supported-hw has to be set.
+ * @versions: Array of hierarchy of versions to match.
+ * @count: Number of elements in the array.
+ *
+ * This is required only for the V2 bindings, and it enables a platform to
+ * specify the hierarchy of versions it supports. OPP layer will then enable
+ * OPPs, which are available for those versions, based on its 'opp-supported-hw'
+ * property.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
+				unsigned int count)
+{
+	struct device_opp *dev_opp;
+	int ret = 0;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	/* Do we already have a version hierarchy associated with dev_opp? */
+	if (dev_opp->supported_hw) {
+		dev_err(dev, "%s: Already have supported hardware list\n",
+			__func__);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions),
+					GFP_KERNEL);
+	if (!dev_opp->supported_hw) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	dev_opp->supported_hw_count = count;
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
+
+/**
+ * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw
+ * @dev: Device for which supported-hw has to be put.
+ *
+ * This is required only for the V2 bindings, and is called for a matching
+ * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure
+ * will not be freed.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_supported_hw(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	if (!dev_opp->supported_hw) {
+		dev_err(dev, "%s: Doesn't have supported hardware list\n",
+			__func__);
+		goto unlock;
+	}
+
+	kfree(dev_opp->supported_hw);
+	dev_opp->supported_hw = NULL;
+	dev_opp->supported_hw_count = 0;
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw);
+
+/**
+ * dev_pm_opp_set_prop_name() - Set prop-extn name
+ * @dev: Device for which the prop-name has to be set.
+ * @name: name to postfix to properties.
+ *
+ * This is required only for the V2 bindings, and it enables a platform to
+ * specify the extn to be used for certain property names. The properties to
+ * which the extension will apply are opp-microvolt and opp-microamp. OPP core
+ * should postfix the property name with -<name> while looking for them.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
+{
+	struct device_opp *dev_opp;
+	int ret = 0;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	/* Do we already have a prop-name associated with dev_opp? */
+	if (dev_opp->prop_name) {
+		dev_err(dev, "%s: Already have prop-name %s\n", __func__,
+			dev_opp->prop_name);
+		ret = -EBUSY;
+		goto err;
+	}
+
+	dev_opp->prop_name = kstrdup(name, GFP_KERNEL);
+	if (!dev_opp->prop_name) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
+
+/**
+ * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name
+ * @dev: Device for which the prop-name has to be put.
+ *
+ * This is required only for the V2 bindings, and is called for a matching
+ * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure
+ * will not be freed.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_prop_name(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	if (!dev_opp->prop_name) {
+		dev_err(dev, "%s: Doesn't have a prop-name\n", __func__);
+		goto unlock;
+	}
+
+	kfree(dev_opp->prop_name);
+	dev_opp->prop_name = NULL;
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name);
+
+/**
+ * dev_pm_opp_set_regulator() - Set regulator name for the device
+ * @dev: Device for which regulator name is being set.
+ * @name: Name of the regulator.
+ *
+ * In order to support OPP switching, OPP layer needs to know the name of the
+ * device's regulator, as the core would be required to switch voltages as well.
+ *
+ * This must be called before any OPPs are initialized for the device.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_regulator(struct device *dev, const char *name)
+{
+	struct device_opp *dev_opp;
+	struct regulator *reg;
+	int ret;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _add_device_opp(dev);
+	if (!dev_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	/* This should be called before OPPs are initialized */
+	if (WARN_ON(!list_empty(&dev_opp->opp_list))) {
+		ret = -EBUSY;
+		goto err;
+	}
+
+	/* Already have a regulator set */
+	if (WARN_ON(!IS_ERR(dev_opp->regulator))) {
+		ret = -EBUSY;
+		goto err;
+	}
+	/* Allocate the regulator */
+	reg = regulator_get_optional(dev, name);
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "%s: no regulator (%s) found: %d\n",
+				__func__, name, ret);
+		goto err;
+	}
+
+	dev_opp->regulator = reg;
+
+	mutex_unlock(&dev_opp_list_lock);
+	return 0;
+
+err:
+	_remove_device_opp(dev_opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_regulator);
+
+/**
+ * dev_pm_opp_put_regulator() - Releases resources blocked for regulator
+ * @dev: Device for which regulator was set.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_put_regulator(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' first */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp));
+		goto unlock;
+	}
+
+	if (IS_ERR(dev_opp->regulator)) {
+		dev_err(dev, "%s: Doesn't have regulator set\n", __func__);
+		goto unlock;
+	}
+
+	/* Make sure there are no concurrent readers while updating dev_opp */
+	WARN_ON(!list_empty(&dev_opp->opp_list));
+
+	regulator_put(dev_opp->regulator);
+	dev_opp->regulator = ERR_PTR(-ENXIO);
+
+	/* Try freeing device_opp if this was the last blocking resource */
+	_remove_device_opp(dev_opp);
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
+
+static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp,
+			      struct device_node *np)
+{
+	unsigned int count = dev_opp->supported_hw_count;
+	u32 version;
+	int ret;
+
+	if (!dev_opp->supported_hw)
+		return true;
+
+	while (count--) {
+		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
+						 &version);
+		if (ret) {
+			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
+				 __func__, count, ret);
+			return false;
+		}
+
+		/* Both of these are bitwise masks of the versions */
+		if (!(version & dev_opp->supported_hw[count]))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
+ * @dev:	device for which we do this operation
+ * @np:		device node
+ *
+ * This function adds an opp definition to the opp list and returns status. The
+ * opp can be controlled using dev_pm_opp_enable/disable functions and may be
+ * removed by dev_pm_opp_remove.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -EINVAL	Failed parsing the OPP node
+ */
+static int _opp_add_static_v2(struct device *dev, struct device_node *np)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *new_opp;
+	u64 rate;
+	u32 val;
+	int ret;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	new_opp = _allocate_opp(dev, &dev_opp);
+	if (!new_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	ret = of_property_read_u64(np, "opp-hz", &rate);
+	if (ret < 0) {
+		dev_err(dev, "%s: opp-hz not found\n", __func__);
+		goto free_opp;
+	}
+
+	/* Check if the OPP supports hardware's hierarchy of versions or not */
+	if (!_opp_is_supported(dev, dev_opp, np)) {
+		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
+		goto free_opp;
+	}
+
+	/*
+	 * Rate is defined as an unsigned long in clk API, and so casting
+	 * explicitly to its type. Must be fixed once rate is 64 bit
+	 * guaranteed in clk API.
+	 */
+	new_opp->rate = (unsigned long)rate;
+	new_opp->turbo = of_property_read_bool(np, "turbo-mode");
+
+	new_opp->np = np;
+	new_opp->dynamic = false;
+	new_opp->available = true;
+
+	if (!of_property_read_u32(np, "clock-latency-ns", &val))
+		new_opp->clock_latency_ns = val;
+
+	ret = opp_parse_supplies(new_opp, dev, dev_opp);
+	if (ret)
+		goto free_opp;
+
+	ret = _opp_add(dev, new_opp, dev_opp);
+	if (ret)
+		goto free_opp;
+
+	/* OPP to select on device suspend */
+	if (of_property_read_bool(np, "opp-suspend")) {
+		if (dev_opp->suspend_opp)
+			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
+				 __func__, dev_opp->suspend_opp->rate,
+				 new_opp->rate);
+		else
+			dev_opp->suspend_opp = new_opp;
+	}
+
+	if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max)
+		dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns;
+
+	mutex_unlock(&dev_opp_list_lock);
+
+	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
+		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
+		 new_opp->u_volt_min, new_opp->u_volt_max,
+		 new_opp->clock_latency_ns);
+
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
+	return 0;
+
+free_opp:
+	_opp_remove(dev_opp, new_opp, false);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+	return ret;
+}
+
+/**
+ * dev_pm_opp_add()  - Add an OPP table from a table definitions
+ * @dev:	device for which we do this operation
+ * @freq:	Frequency in Hz for this OPP
+ * @u_volt:	Voltage in uVolts for this OPP
+ *
+ * This function adds an opp definition to the opp list and returns status.
+ * The opp is made available by default and it can be controlled using
+ * dev_pm_opp_enable/disable functions.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ */
+int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+{
+	return _opp_add_v1(dev, freq, u_volt, true);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_add);
+
+/**
+ * _opp_set_availability() - helper to set the availability of an opp
+ * @dev:		device for which we do this operation
+ * @freq:		OPP frequency to modify availability
+ * @availability_req:	availability status requested for this opp
+ *
+ * Set the availability of an OPP with an RCU operation, opp_{enable,disable}
+ * share a common logic which is isolated here.
+ *
+ * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
+ * copy operation, returns 0 if no modification was done OR modification was
+ * successful.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks to
+ * keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+static int _opp_set_availability(struct device *dev, unsigned long freq,
+				 bool availability_req)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
+	int r = 0;
+
+	/* keep the node allocated */
+	new_opp = kmalloc(sizeof(*new_opp), GFP_KERNEL);
+	if (!new_opp)
+		return -ENOMEM;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Find the device_opp */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		r = PTR_ERR(dev_opp);
+		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
+		goto unlock;
+	}
+
+	/* Do we have the frequency? */
+	list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
+		if (tmp_opp->rate == freq) {
+			opp = tmp_opp;
+			break;
+		}
+	}
+	if (IS_ERR(opp)) {
+		r = PTR_ERR(opp);
+		goto unlock;
+	}
+
+	/* Is update really needed? */
+	if (opp->available == availability_req)
+		goto unlock;
+	/* copy the old data over */
+	*new_opp = *opp;
+
+	/* plug in new node */
+	new_opp->available = availability_req;
+
+	list_replace_rcu(&opp->node, &new_opp->node);
+	mutex_unlock(&dev_opp_list_lock);
+	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu);
+
+	/* Notify the change of the OPP availability */
+	if (availability_req)
+		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE,
+					 new_opp);
+	else
+		srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE,
+					 new_opp);
+
+	return 0;
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+	kfree(new_opp);
+	return r;
+}
+
+/**
+ * dev_pm_opp_enable() - Enable a specific OPP
+ * @dev:	device for which we do this operation
+ * @freq:	OPP frequency to enable
+ *
+ * Enables a provided opp. If the operation is valid, this returns 0, else the
+ * corresponding error value. It is meant to be used for users an OPP available
+ * after being temporarily made unavailable with dev_pm_opp_disable.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU and mutex locks to keep the
+ * integrity of the internal data structures. Callers should ensure that
+ * this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ *
+ * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
+ * copy operation, returns 0 if no modification was done OR modification was
+ * successful.
+ */
+int dev_pm_opp_enable(struct device *dev, unsigned long freq)
+{
+	return _opp_set_availability(dev, freq, true);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
+
+/**
+ * dev_pm_opp_disable() - Disable a specific OPP
+ * @dev:	device for which we do this operation
+ * @freq:	OPP frequency to disable
+ *
+ * Disables a provided opp. If the operation is valid, this returns
+ * 0, else the corresponding error value. It is meant to be a temporary
+ * control by users to make this OPP not available until the circumstances are
+ * right to make it available again (with a call to dev_pm_opp_enable).
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU and mutex locks to keep the
+ * integrity of the internal data structures. Callers should ensure that
+ * this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ *
+ * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
+ * copy operation, returns 0 if no modification was done OR modification was
+ * successful.
+ */
+int dev_pm_opp_disable(struct device *dev, unsigned long freq)
+{
+	return _opp_set_availability(dev, freq, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
+
+/**
+ * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
+ * @dev:	device pointer used to lookup device OPPs.
+ *
+ * Return: pointer to  notifier head if found, otherwise -ENODEV or
+ * -EINVAL based on type of error casted as pointer. value must be checked
+ *  with IS_ERR to determine valid pointer or error result.
+ *
+ * Locking: This function must be called under rcu_read_lock(). dev_opp is a RCU
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev)
+{
+	struct device_opp *dev_opp = _find_device_opp(dev);
+
+	if (IS_ERR(dev_opp))
+		return ERR_CAST(dev_opp); /* matching type */
+
+	return &dev_opp->srcu_head;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
+
+#ifdef CONFIG_OF
+/**
+ * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
+ *				  entries
+ * @dev:	device pointer used to lookup device OPPs.
+ *
+ * Free OPPs created using static entries present in DT.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_remove_table(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct dev_pm_opp *opp, *tmp;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' */
+	dev_opp = _find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int error = PTR_ERR(dev_opp);
+
+		if (error != -ENODEV)
+			WARN(1, "%s: dev_opp: %d\n",
+			     IS_ERR_OR_NULL(dev) ?
+					"Invalid device" : dev_name(dev),
+			     error);
+		goto unlock;
+	}
+
+	/* Find if dev_opp manages a single device */
+	if (list_is_singular(&dev_opp->dev_list)) {
+		/* Free static OPPs */
+		list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) {
+			if (!opp->dynamic)
+				_opp_remove(dev_opp, opp, true);
+		}
+	} else {
+		_remove_list_dev(_find_list_dev(dev, dev_opp), dev_opp);
+	}
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
+
+/* Returns opp descriptor node for a device, caller must do of_node_put() */
+struct device_node *_of_get_opp_desc_node(struct device *dev)
+{
+	/*
+	 * TODO: Support for multiple OPP tables.
+	 *
+	 * There should be only ONE phandle present in "operating-points-v2"
+	 * property.
+	 */
+
+	return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
+}
+
+/* Initializes OPP tables based on new bindings */
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
+{
+	struct device_node *np;
+	struct device_opp *dev_opp;
+	int ret = 0, count = 0;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _managed_opp(opp_np);
+	if (dev_opp) {
+		/* OPPs are already managed */
+		if (!_add_list_dev(dev, dev_opp))
+			ret = -ENOMEM;
+		mutex_unlock(&dev_opp_list_lock);
+		return ret;
+	}
+	mutex_unlock(&dev_opp_list_lock);
+
+	/* We have opp-list node now, iterate over it and add OPPs */
+	for_each_available_child_of_node(opp_np, np) {
+		count++;
+
+		ret = _opp_add_static_v2(dev, np);
+		if (ret) {
+			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
+				ret);
+			goto free_table;
+		}
+	}
+
+	/* There should be one of more OPP defined */
+	if (WARN_ON(!count))
+		return -ENOENT;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _find_device_opp(dev);
+	if (WARN_ON(IS_ERR(dev_opp))) {
+		ret = PTR_ERR(dev_opp);
+		mutex_unlock(&dev_opp_list_lock);
+		goto free_table;
+	}
+
+	dev_opp->np = opp_np;
+	dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+
+	mutex_unlock(&dev_opp_list_lock);
+
+	return 0;
+
+free_table:
+	dev_pm_opp_of_remove_table(dev);
+
+	return ret;
+}
+
+/* Initializes OPP tables based on old-deprecated bindings */
+static int _of_add_opp_table_v1(struct device *dev)
+{
+	const struct property *prop;
+	const __be32 *val;
+	int nr;
+
+	prop = of_find_property(dev->of_node, "operating-points", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (!prop->value)
+		return -ENODATA;
+
+	/*
+	 * Each OPP is a set of tuples consisting of frequency and
+	 * voltage like <freq-kHz vol-uV>.
+	 */
+	nr = prop->length / sizeof(u32);
+	if (nr % 2) {
+		dev_err(dev, "%s: Invalid OPP list\n", __func__);
+		return -EINVAL;
+	}
+
+	val = prop->value;
+	while (nr) {
+		unsigned long freq = be32_to_cpup(val++) * 1000;
+		unsigned long volt = be32_to_cpup(val++);
+
+		if (_opp_add_v1(dev, freq, volt, false))
+			dev_warn(dev, "%s: Failed to add OPP %ld\n",
+				 __func__, freq);
+		nr -= 2;
+	}
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_of_add_table() - Initialize opp table from device tree
+ * @dev:	device pointer used to lookup device OPPs.
+ *
+ * Register the initial OPP table with the OPP library for given device.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -ENODEV	when 'operating-points' property is not found or is invalid data
+ *		in device node.
+ * -ENODATA	when empty 'operating-points' property is found
+ * -EINVAL	when invalid entries are found in opp-v2 table
+ */
+int dev_pm_opp_of_add_table(struct device *dev)
+{
+	struct device_node *opp_np;
+	int ret;
+
+	/*
+	 * OPPs have two version of bindings now. The older one is deprecated,
+	 * try for the new binding first.
+	 */
+	opp_np = _of_get_opp_desc_node(dev);
+	if (!opp_np) {
+		/*
+		 * Try old-deprecated bindings for backward compatibility with
+		 * older dtbs.
+		 */
+		return _of_add_opp_table_v1(dev);
+	}
+
+	ret = _of_add_opp_table_v2(dev, opp_np);
+	of_node_put(opp_np);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
+#endif
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
new file mode 100644
index 000000000000..9f0c15570f64
--- /dev/null
+++ b/drivers/base/power/opp/cpu.c
@@ -0,0 +1,271 @@
+/*
+ * Generic OPP helper interface for CPU device
+ *
+ * Copyright (C) 2009-2014 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include "opp.h"
+
+#ifdef CONFIG_CPU_FREQ
+
+/**
+ * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
+ * @dev:	device for which we do this operation
+ * @table:	Cpufreq table returned back to caller
+ *
+ * Generate a cpufreq table for a provided device- this assumes that the
+ * opp list is already initialized and ready for usage.
+ *
+ * This function allocates required memory for the cpufreq table. It is
+ * expected that the caller does the required maintenance such as freeing
+ * the table as required.
+ *
+ * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
+ * if no memory available for the operation (table is not populated), returns 0
+ * if successful and table is populated.
+ *
+ * WARNING: It is  important for the callers to ensure refreshing their copy of
+ * the table if any of the mentioned functions have been invoked in the interim.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Since we just use the regular accessor functions to access the internal data
+ * structures, we use RCU read lock inside this function. As a result, users of
+ * this function DONOT need to use explicit locks for invoking.
+ */
+int dev_pm_opp_init_cpufreq_table(struct device *dev,
+				  struct cpufreq_frequency_table **table)
+{
+	struct dev_pm_opp *opp;
+	struct cpufreq_frequency_table *freq_table = NULL;
+	int i, max_opps, ret = 0;
+	unsigned long rate;
+
+	rcu_read_lock();
+
+	max_opps = dev_pm_opp_get_opp_count(dev);
+	if (max_opps <= 0) {
+		ret = max_opps ? max_opps : -ENODATA;
+		goto out;
+	}
+
+	freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC);
+	if (!freq_table) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0, rate = 0; i < max_opps; i++, rate++) {
+		/* find next rate */
+		opp = dev_pm_opp_find_freq_ceil(dev, &rate);
+		if (IS_ERR(opp)) {
+			ret = PTR_ERR(opp);
+			goto out;
+		}
+		freq_table[i].driver_data = i;
+		freq_table[i].frequency = rate / 1000;
+
+		/* Is Boost/turbo opp ? */
+		if (dev_pm_opp_is_turbo(opp))
+			freq_table[i].flags = CPUFREQ_BOOST_FREQ;
+	}
+
+	freq_table[i].driver_data = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
+
+	*table = &freq_table[0];
+
+out:
+	rcu_read_unlock();
+	if (ret)
+		kfree(freq_table);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
+
+/**
+ * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
+ * @dev:	device for which we do this operation
+ * @table:	table to free
+ *
+ * Free up the table allocated by dev_pm_opp_init_cpufreq_table
+ */
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
+				   struct cpufreq_frequency_table **table)
+{
+	if (!table)
+		return;
+
+	kfree(*table);
+	*table = NULL;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
+#endif	/* CONFIG_CPU_FREQ */
+
+/* Required only for V1 bindings, as v2 can manage it from DT itself */
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+{
+	struct device_list_opp *list_dev;
+	struct device_opp *dev_opp;
+	struct device *dev;
+	int cpu, ret = 0;
+
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = _find_device_opp(cpu_dev);
+	if (IS_ERR(dev_opp)) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	for_each_cpu(cpu, cpumask) {
+		if (cpu == cpu_dev->id)
+			continue;
+
+		dev = get_cpu_device(cpu);
+		if (!dev) {
+			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
+				__func__, cpu);
+			continue;
+		}
+
+		list_dev = _add_list_dev(dev, dev_opp);
+		if (!list_dev) {
+			dev_err(dev, "%s: failed to add list-dev for cpu%d device\n",
+				__func__, cpu);
+			continue;
+		}
+	}
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
+
+#ifdef CONFIG_OF
+void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
+{
+	struct device *cpu_dev;
+	int cpu;
+
+	WARN_ON(cpumask_empty(cpumask));
+
+	for_each_cpu(cpu, cpumask) {
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__,
+			       cpu);
+			continue;
+		}
+
+		dev_pm_opp_of_remove_table(cpu_dev);
+	}
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
+
+int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
+{
+	struct device *cpu_dev;
+	int cpu, ret = 0;
+
+	WARN_ON(cpumask_empty(cpumask));
+
+	for_each_cpu(cpu, cpumask) {
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__,
+			       cpu);
+			continue;
+		}
+
+		ret = dev_pm_opp_of_add_table(cpu_dev);
+		if (ret) {
+			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
+			       __func__, cpu, ret);
+
+			/* Free all other OPPs */
+			dev_pm_opp_of_cpumask_remove_table(cpumask);
+			break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
+
+/*
+ * Works only for OPP v2 bindings.
+ *
+ * Returns -ENOENT if operating-points-v2 bindings aren't supported.
+ */
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+{
+	struct device_node *np, *tmp_np;
+	struct device *tcpu_dev;
+	int cpu, ret = 0;
+
+	/* Get OPP descriptor node */
+	np = _of_get_opp_desc_node(cpu_dev);
+	if (!np) {
+		dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
+		return -ENOENT;
+	}
+
+	cpumask_set_cpu(cpu_dev->id, cpumask);
+
+	/* OPPs are shared ? */
+	if (!of_property_read_bool(np, "opp-shared"))
+		goto put_cpu_node;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == cpu_dev->id)
+			continue;
+
+		tcpu_dev = get_cpu_device(cpu);
+		if (!tcpu_dev) {
+			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
+				__func__, cpu);
+			ret = -ENODEV;
+			goto put_cpu_node;
+		}
+
+		/* Get OPP descriptor node */
+		tmp_np = _of_get_opp_desc_node(tcpu_dev);
+		if (!tmp_np) {
+			dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
+				__func__);
+			ret = -ENOENT;
+			goto put_cpu_node;
+		}
+
+		/* CPUs are sharing opp node */
+		if (np == tmp_np)
+			cpumask_set_cpu(cpu, cpumask);
+
+		of_node_put(tmp_np);
+	}
+
+put_cpu_node:
+	of_node_put(np);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
+#endif
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
new file mode 100644
index 000000000000..d8e3999fbb2c
--- /dev/null
+++ b/drivers/base/power/opp/opp.h
@@ -0,0 +1,172 @@
+/*
+ * Generic OPP Interface
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __DRIVER_OPP_H__
+#define __DRIVER_OPP_H__
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pm_opp.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/limits.h>
+
+struct clk;
+struct regulator;
+
+/* Lock to allow exclusive modification to the device and opp lists */
+extern struct mutex dev_opp_list_lock;
+
+/*
+ * Internal data structure organization with the OPP layer library is as
+ * follows:
+ * dev_opp_list (root)
+ *	|- device 1 (represents voltage domain 1)
+ *	|	|- opp 1 (availability, freq, voltage)
+ *	|	|- opp 2 ..
+ *	...	...
+ *	|	`- opp n ..
+ *	|- device 2 (represents the next voltage domain)
+ *	...
+ *	`- device m (represents mth voltage domain)
+ * device 1, 2.. are represented by dev_opp structure while each opp
+ * is represented by the opp structure.
+ */
+
+/**
+ * struct dev_pm_opp - Generic OPP description structure
+ * @node:	opp list node. The nodes are maintained throughout the lifetime
+ *		of boot. It is expected only an optimal set of OPPs are
+ *		added to the library by the SoC framework.
+ *		RCU usage: opp list is traversed with RCU locks. node
+ *		modification is possible realtime, hence the modifications
+ *		are protected by the dev_opp_list_lock for integrity.
+ *		IMPORTANT: the opp nodes should be maintained in increasing
+ *		order.
+ * @available:	true/false - marks if this OPP as available or not
+ * @dynamic:	not-created from static DT entries.
+ * @turbo:	true if turbo (boost) OPP
+ * @rate:	Frequency in hertz
+ * @u_volt:	Target voltage in microvolts corresponding to this OPP
+ * @u_volt_min:	Minimum voltage in microvolts corresponding to this OPP
+ * @u_volt_max:	Maximum voltage in microvolts corresponding to this OPP
+ * @u_amp:	Maximum current drawn by the device in microamperes
+ * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
+ *		frequency from any other OPP's frequency.
+ * @dev_opp:	points back to the device_opp struct this opp belongs to
+ * @rcu_head:	RCU callback head used for deferred freeing
+ * @np:		OPP's device node.
+ *
+ * This structure stores the OPP information for a given device.
+ */
+struct dev_pm_opp {
+	struct list_head node;
+
+	bool available;
+	bool dynamic;
+	bool turbo;
+	unsigned long rate;
+
+	unsigned long u_volt;
+	unsigned long u_volt_min;
+	unsigned long u_volt_max;
+	unsigned long u_amp;
+	unsigned long clock_latency_ns;
+
+	struct device_opp *dev_opp;
+	struct rcu_head rcu_head;
+
+	struct device_node *np;
+};
+
+/**
+ * struct device_list_opp - devices managed by 'struct device_opp'
+ * @node:	list node
+ * @dev:	device to which the struct object belongs
+ * @rcu_head:	RCU callback head used for deferred freeing
+ *
+ * This is an internal data structure maintaining the list of devices that are
+ * managed by 'struct device_opp'.
+ */
+struct device_list_opp {
+	struct list_head node;
+	const struct device *dev;
+	struct rcu_head rcu_head;
+};
+
+/**
+ * struct device_opp - Device opp structure
+ * @node:	list node - contains the devices with OPPs that
+ *		have been registered. Nodes once added are not modified in this
+ *		list.
+ *		RCU usage: nodes are not modified in the list of device_opp,
+ *		however addition is possible and is secured by dev_opp_list_lock
+ * @srcu_head:	notifier head to notify the OPP availability changes.
+ * @rcu_head:	RCU callback head used for deferred freeing
+ * @dev_list:	list of devices that share these OPPs
+ * @opp_list:	list of opps
+ * @np:		struct device_node pointer for opp's DT node.
+ * @clock_latency_ns_max: Max clock latency in nanoseconds.
+ * @shared_opp: OPP is shared between multiple devices.
+ * @suspend_opp: Pointer to OPP to be used during device suspend.
+ * @supported_hw: Array of version number to support.
+ * @supported_hw_count: Number of elements in supported_hw array.
+ * @prop_name: A name to postfix to many DT properties, while parsing them.
+ * @clk: Device's clock handle
+ * @regulator: Supply regulator
+ * @dentry:	debugfs dentry pointer of the real device directory (not links).
+ * @dentry_name: Name of the real dentry.
+ *
+ * @voltage_tolerance_v1: In percentage, for v1 bindings only.
+ *
+ * This is an internal data structure maintaining the link to opps attached to
+ * a device. This structure is not meant to be shared to users as it is
+ * meant for book keeping and private to OPP library.
+ *
+ * Because the opp structures can be used from both rcu and srcu readers, we
+ * need to wait for the grace period of both of them before freeing any
+ * resources. And so we have used kfree_rcu() from within call_srcu() handlers.
+ */
+struct device_opp {
+	struct list_head node;
+
+	struct srcu_notifier_head srcu_head;
+	struct rcu_head rcu_head;
+	struct list_head dev_list;
+	struct list_head opp_list;
+
+	struct device_node *np;
+	unsigned long clock_latency_ns_max;
+
+	/* For backward compatibility with v1 bindings */
+	unsigned int voltage_tolerance_v1;
+
+	bool shared_opp;
+	struct dev_pm_opp *suspend_opp;
+
+	unsigned int *supported_hw;
+	unsigned int supported_hw_count;
+	const char *prop_name;
+	struct clk *clk;
+	struct regulator *regulator;
+
+};
+
+/* Routines internal to opp core */
+struct device_opp *_find_device_opp(struct device *dev);
+struct device_list_opp *_add_list_dev(const struct device *dev,
+				      struct device_opp *dev_opp);
+struct device_node *_of_get_opp_desc_node(struct device *dev);
+
+#endif		/* __DRIVER_OPP_H__ */
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 40c53dc1937e..92eeae40fe72 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -1,6 +1,5 @@
 # CPUfreq core
 obj-$(CONFIG_CPU_FREQ)			+= cpufreq.o freq_table.o
-obj-$(CONFIG_PM_OPP)			+= cpufreq_opp.o
 
 # CPUfreq stats
 obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index a46c223c2506..e1a6ba66a7f5 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -289,6 +289,8 @@ static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
 
 	clk_put(clk[cluster]);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
+	if (arm_bL_ops->free_opp_table)
+		arm_bL_ops->free_opp_table(cpu_dev);
 	dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
 }
 
@@ -337,7 +339,7 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 	if (ret) {
 		dev_err(cpu_dev, "%s: failed to init cpufreq table, cpu: %d, err: %d\n",
 				__func__, cpu_dev->id, ret);
-		goto out;
+		goto free_opp_table;
 	}
 
 	name[12] = cluster + '0';
@@ -354,6 +356,9 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 	ret = PTR_ERR(clk[cluster]);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
 
+free_opp_table:
+	if (arm_bL_ops->free_opp_table)
+		arm_bL_ops->free_opp_table(cpu_dev);
 out:
 	dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
 			cluster);
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index 70f18fc12d4a..b88889d9387e 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -25,13 +25,16 @@
 
 struct cpufreq_arm_bL_ops {
 	char name[CPUFREQ_NAME_LEN];
-	int (*get_transition_latency)(struct device *cpu_dev);
 
 	/*
 	 * This must set opp table for cpu_dev in a similar way as done by
-	 * of_init_opp_table().
+	 * dev_pm_opp_of_add_table().
 	 */
 	int (*init_opp_table)(struct device *cpu_dev);
+
+	/* Optional */
+	int (*get_transition_latency)(struct device *cpu_dev);
+	void (*free_opp_table)(struct device *cpu_dev);
 };
 
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 4550f6976768..16ddeefe9443 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c
@@ -54,7 +54,7 @@ static int dt_init_opp_table(struct device *cpu_dev)
 		return -ENOENT;
 	}
 
-	ret = of_init_opp_table(cpu_dev);
+	ret = dev_pm_opp_of_add_table(cpu_dev);
 	of_node_put(np);
 
 	return ret;
@@ -82,6 +82,7 @@ static struct cpufreq_arm_bL_ops dt_bL_ops = {
 	.name	= "dt-bl",
 	.get_transition_latency = dt_get_transition_latency,
 	.init_opp_table = dt_init_opp_table,
+	.free_opp_table = dev_pm_opp_of_remove_table,
 };
 
 static int generic_bL_probe(struct platform_device *pdev)
@@ -105,7 +106,6 @@ static int generic_bL_remove(struct platform_device *pdev)
 static struct platform_driver generic_bL_platdrv = {
 	.driver = {
 		.name	= "arm-bL-cpufreq-dt",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= generic_bL_probe,
 	.remove		= generic_bL_remove,
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index bdb6951d0978..90d64081ddb3 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -36,6 +36,12 @@ struct private_data {
 	unsigned int voltage_tolerance; /* in percentage */
 };
 
+static struct freq_attr *cpufreq_dt_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,   /* Extra space for boost-attr if required */
+	NULL,
+};
+
 static int set_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	struct dev_pm_opp *opp;
@@ -58,6 +64,8 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
 	old_freq = clk_get_rate(cpu_clk) / 1000;
 
 	if (!IS_ERR(cpu_reg)) {
+		unsigned long opp_freq;
+
 		rcu_read_lock();
 		opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_Hz);
 		if (IS_ERR(opp)) {
@@ -67,13 +75,16 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
 			return PTR_ERR(opp);
 		}
 		volt = dev_pm_opp_get_voltage(opp);
+		opp_freq = dev_pm_opp_get_freq(opp);
 		rcu_read_unlock();
 		tol = volt * priv->voltage_tolerance / 100;
 		volt_old = regulator_get_voltage(cpu_reg);
+		dev_dbg(cpu_dev, "Found OPP: %ld kHz, %ld uV\n",
+			opp_freq / 1000, volt);
 	}
 
 	dev_dbg(cpu_dev, "%u MHz, %ld mV --> %u MHz, %ld mV\n",
-		old_freq / 1000, volt_old ? volt_old / 1000 : -1,
+		old_freq / 1000, (volt_old > 0) ? volt_old / 1000 : -1,
 		new_freq / 1000, volt ? volt / 1000 : -1);
 
 	/* scaling up?  scale voltage before frequency */
@@ -89,7 +100,7 @@ static int set_target(struct cpufreq_policy *policy, unsigned int index)
 	ret = clk_set_rate(cpu_clk, freq_exact);
 	if (ret) {
 		dev_err(cpu_dev, "failed to set clock rate: %d\n", ret);
-		if (!IS_ERR(cpu_reg))
+		if (!IS_ERR(cpu_reg) && volt_old > 0)
 			regulator_set_voltage_tol(cpu_reg, volt_old, tol);
 		return ret;
 	}
@@ -179,21 +190,21 @@ try_again:
 
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
-	struct cpufreq_dt_platform_data *pd;
 	struct cpufreq_frequency_table *freq_table;
-	struct thermal_cooling_device *cdev;
 	struct device_node *np;
 	struct private_data *priv;
 	struct device *cpu_dev;
 	struct regulator *cpu_reg;
 	struct clk *cpu_clk;
+	struct dev_pm_opp *suspend_opp;
 	unsigned long min_uV = ~0, max_uV = 0;
 	unsigned int transition_latency;
+	bool need_update = false;
 	int ret;
 
 	ret = allocate_resources(policy->cpu, &cpu_dev, &cpu_reg, &cpu_clk);
 	if (ret) {
-		pr_err("%s: Failed to allocate resources\n: %d", __func__, ret);
+		pr_err("%s: Failed to allocate resources: %d\n", __func__, ret);
 		return ret;
 	}
 
@@ -204,18 +215,71 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		goto out_put_reg_clk;
 	}
 
-	/* OPPs might be populated at runtime, don't check for error here */
-	of_init_opp_table(cpu_dev);
+	/* Get OPP-sharing information from "operating-points-v2" bindings */
+	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
+	if (ret) {
+		/*
+		 * operating-points-v2 not supported, fallback to old method of
+		 * finding shared-OPPs for backward compatibility.
+		 */
+		if (ret == -ENOENT)
+			need_update = true;
+		else
+			goto out_node_put;
+	}
+
+	/*
+	 * Initialize OPP tables for all policy->cpus. They will be shared by
+	 * all CPUs which have marked their CPUs shared with OPP bindings.
+	 *
+	 * For platforms not using operating-points-v2 bindings, we do this
+	 * before updating policy->cpus. Otherwise, we will end up creating
+	 * duplicate OPPs for policy->cpus.
+	 *
+	 * OPPs might be populated at runtime, don't check for error here
+	 */
+	dev_pm_opp_of_cpumask_add_table(policy->cpus);
+
+	/*
+	 * But we need OPP table to function so if it is not there let's
+	 * give platform code chance to provide it for us.
+	 */
+	ret = dev_pm_opp_get_opp_count(cpu_dev);
+	if (ret <= 0) {
+		pr_debug("OPP table is not ready, deferring probe\n");
+		ret = -EPROBE_DEFER;
+		goto out_free_opp;
+	}
+
+	if (need_update) {
+		struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
+
+		if (!pd || !pd->independent_clocks)
+			cpumask_setall(policy->cpus);
+
+		/*
+		 * OPP tables are initialized only for policy->cpu, do it for
+		 * others as well.
+		 */
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
+		if (ret)
+			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+				__func__, ret);
+
+		of_property_read_u32(np, "clock-latency", &transition_latency);
+	} else {
+		transition_latency = dev_pm_opp_get_max_clock_latency(cpu_dev);
+	}
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
 		ret = -ENOMEM;
-		goto out_put_node;
+		goto out_free_opp;
 	}
 
 	of_property_read_u32(np, "voltage-tolerance", &priv->voltage_tolerance);
 
-	if (of_property_read_u32(np, "clock-latency", &transition_latency))
+	if (!transition_latency)
 		transition_latency = CPUFREQ_ETERNAL;
 
 	if (!IS_ERR(cpu_reg)) {
@@ -265,48 +329,47 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		goto out_free_priv;
 	}
 
-	/*
-	 * For now, just loading the cooling device;
-	 * thermal DT code takes care of matching them.
-	 */
-	if (of_find_property(np, "#cooling-cells", NULL)) {
-		cdev = of_cpufreq_cooling_register(np, cpu_present_mask);
-		if (IS_ERR(cdev))
-			dev_err(cpu_dev,
-				"running cpufreq without cooling device: %ld\n",
-				PTR_ERR(cdev));
-		else
-			priv->cdev = cdev;
-	}
-
 	priv->cpu_dev = cpu_dev;
 	priv->cpu_reg = cpu_reg;
 	policy->driver_data = priv;
 
 	policy->clk = cpu_clk;
+
+	rcu_read_lock();
+	suspend_opp = dev_pm_opp_get_suspend_opp(cpu_dev);
+	if (suspend_opp)
+		policy->suspend_freq = dev_pm_opp_get_freq(suspend_opp) / 1000;
+	rcu_read_unlock();
+
 	ret = cpufreq_table_validate_and_show(policy, freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
 			ret);
-		goto out_cooling_unregister;
+		goto out_free_cpufreq_table;
 	}
 
-	policy->cpuinfo.transition_latency = transition_latency;
+	/* Support turbo/boost mode */
+	if (policy_has_boost_freq(policy)) {
+		/* This gets disabled by core on driver unregister */
+		ret = cpufreq_enable_boost_support();
+		if (ret)
+			goto out_free_cpufreq_table;
+		cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
+	}
 
-	pd = cpufreq_get_driver_data();
-	if (!pd || !pd->independent_clocks)
-		cpumask_setall(policy->cpus);
+	policy->cpuinfo.transition_latency = transition_latency;
 
 	of_node_put(np);
 
 	return 0;
 
-out_cooling_unregister:
-	cpufreq_cooling_unregister(priv->cdev);
+out_free_cpufreq_table:
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_free_priv:
 	kfree(priv);
-out_put_node:
+out_free_opp:
+	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
+out_node_put:
 	of_node_put(np);
 out_put_reg_clk:
 	clk_put(cpu_clk);
@@ -322,6 +385,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 
 	cpufreq_cooling_unregister(priv->cdev);
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
+	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	clk_put(policy->clk);
 	if (!IS_ERR(priv->cpu_reg))
 		regulator_put(priv->cpu_reg);
@@ -330,6 +394,33 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 	return 0;
 }
 
+static void cpufreq_ready(struct cpufreq_policy *policy)
+{
+	struct private_data *priv = policy->driver_data;
+	struct device_node *np = of_node_get(priv->cpu_dev->of_node);
+
+	if (WARN_ON(!np))
+		return;
+
+	/*
+	 * For now, just loading the cooling device;
+	 * thermal DT code takes care of matching them.
+	 */
+	if (of_find_property(np, "#cooling-cells", NULL)) {
+		priv->cdev = of_cpufreq_cooling_register(np,
+							 policy->related_cpus);
+		if (IS_ERR(priv->cdev)) {
+			dev_err(priv->cpu_dev,
+				"running cpufreq without cooling device: %ld\n",
+				PTR_ERR(priv->cdev));
+
+			priv->cdev = NULL;
+		}
+	}
+
+	of_node_put(np);
+}
+
 static struct cpufreq_driver dt_cpufreq_driver = {
 	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
 	.verify = cpufreq_generic_frequency_table_verify,
@@ -337,8 +428,10 @@ static struct cpufreq_driver dt_cpufreq_driver = {
 	.get = cpufreq_generic_get,
 	.init = cpufreq_init,
 	.exit = cpufreq_exit,
+	.ready = cpufreq_ready,
 	.name = "cpufreq-dt",
-	.attr = cpufreq_generic_attr,
+	.attr = cpufreq_dt_attr,
+	.suspend = cpufreq_generic_suspend,
 };
 
 static int dt_cpufreq_probe(struct platform_device *pdev)
@@ -381,13 +474,13 @@ static int dt_cpufreq_remove(struct platform_device *pdev)
 static struct platform_driver dt_cpufreq_platdrv = {
 	.driver = {
 		.name	= "cpufreq-dt",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= dt_cpufreq_probe,
 	.remove		= dt_cpufreq_remove,
 };
 module_platform_driver(dt_cpufreq_platdrv);
 
+MODULE_ALIAS("platform:cpufreq-dt");
 MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
 MODULE_AUTHOR("Shawn Guo <shawn.guo@linaro.org>");
 MODULE_DESCRIPTION("Generic cpufreq driver");
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 90e8deb6c15e..8277d62dd301 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1288,8 +1288,13 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
+
 	up_read(&cpufreq_rwsem);
 
+	/* Callback for handling stuff after policy is ready */
+	if (cpufreq_driver->ready)
+		cpufreq_driver->ready(policy);
+
 	pr_debug("initialization complete\n");
 
 	return 0;
@@ -2406,6 +2411,49 @@ int cpufreq_boost_supported(void)
 }
 EXPORT_SYMBOL_GPL(cpufreq_boost_supported);
 
+static int create_boost_sysfs_file(void)
+{
+	int ret;
+
+	if (!cpufreq_boost_supported())
+		return 0;
+
+	/*
+	 * Check if driver provides function to enable boost -
+	 * if not, use cpufreq_boost_set_sw as default
+	 */
+	if (!cpufreq_driver->set_boost)
+		cpufreq_driver->set_boost = cpufreq_boost_set_sw;
+
+	ret = cpufreq_sysfs_create_file(&boost.attr);
+	if (ret)
+		pr_err("%s: cannot register global BOOST sysfs file\n",
+		       __func__);
+
+	return ret;
+}
+
+static void remove_boost_sysfs_file(void)
+{
+	if (cpufreq_boost_supported())
+		cpufreq_sysfs_remove_file(&boost.attr);
+}
+
+int cpufreq_enable_boost_support(void)
+{
+	if (!cpufreq_driver)
+		return -EINVAL;
+
+	if (cpufreq_boost_supported())
+		return 0;
+
+	cpufreq_driver->boost_supported = true;
+
+	/* This will get removed on driver unregister */
+	return create_boost_sysfs_file();
+}
+EXPORT_SYMBOL_GPL(cpufreq_enable_boost_support);
+
 int cpufreq_boost_enabled(void)
 {
 	return cpufreq_driver->boost_enabled;
@@ -2455,21 +2503,12 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	cpufreq_driver = driver_data;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	if (cpufreq_boost_supported()) {
-		/*
-		 * Check if driver provides function to enable boost -
-		 * if not, use cpufreq_boost_set_sw as default
-		 */
-		if (!cpufreq_driver->set_boost)
-			cpufreq_driver->set_boost = cpufreq_boost_set_sw;
+	if (driver_data->setpolicy)
+		driver_data->flags |= CPUFREQ_CONST_LOOPS;
 
-		ret = cpufreq_sysfs_create_file(&boost.attr);
-		if (ret) {
-			pr_err("%s: cannot register global BOOST sysfs file\n",
-			       __func__);
-			goto err_null_driver;
-		}
-	}
+	ret = create_boost_sysfs_file();
+	if (ret)
+		goto err_null_driver;
 
 	ret = subsys_interface_register(&cpufreq_interface);
 	if (ret)
@@ -2501,8 +2540,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 err_if_unreg:
 	subsys_interface_unregister(&cpufreq_interface);
 err_boost_unreg:
-	if (cpufreq_boost_supported())
-		cpufreq_sysfs_remove_file(&boost.attr);
+	remove_boost_sysfs_file();
 err_null_driver:
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 	cpufreq_driver = NULL;
@@ -2529,9 +2567,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 	pr_debug("unregistering driver %s\n", driver->name);
 
 	subsys_interface_unregister(&cpufreq_interface);
-	if (cpufreq_boost_supported())
-		cpufreq_sysfs_remove_file(&boost.attr);
-
+	remove_boost_sysfs_file();
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
 
 	down_write(&cpufreq_rwsem);
diff --git a/drivers/cpufreq/cpufreq_opp.c b/drivers/cpufreq/cpufreq_opp.c
deleted file mode 100644
index 773bcde893c0..000000000000
--- a/drivers/cpufreq/cpufreq_opp.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Generic OPP helper interface for CPUFreq drivers
- *
- * Copyright (C) 2009-2014 Texas Instruments Incorporated.
- *	Nishanth Menon
- *	Romit Dasgupta
- *	Kevin Hilman
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/cpufreq.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/pm_opp.h>
-#include <linux/rcupdate.h>
-#include <linux/slab.h>
-
-/**
- * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
- * @dev:	device for which we do this operation
- * @table:	Cpufreq table returned back to caller
- *
- * Generate a cpufreq table for a provided device- this assumes that the
- * opp list is already initialized and ready for usage.
- *
- * This function allocates required memory for the cpufreq table. It is
- * expected that the caller does the required maintenance such as freeing
- * the table as required.
- *
- * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
- * if no memory available for the operation (table is not populated), returns 0
- * if successful and table is populated.
- *
- * WARNING: It is  important for the callers to ensure refreshing their copy of
- * the table if any of the mentioned functions have been invoked in the interim.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Since we just use the regular accessor functions to access the internal data
- * structures, we use RCU read lock inside this function. As a result, users of
- * this function DONOT need to use explicit locks for invoking.
- */
-int dev_pm_opp_init_cpufreq_table(struct device *dev,
-				  struct cpufreq_frequency_table **table)
-{
-	struct dev_pm_opp *opp;
-	struct cpufreq_frequency_table *freq_table = NULL;
-	int i, max_opps, ret = 0;
-	unsigned long rate;
-
-	rcu_read_lock();
-
-	max_opps = dev_pm_opp_get_opp_count(dev);
-	if (max_opps <= 0) {
-		ret = max_opps ? max_opps : -ENODATA;
-		goto out;
-	}
-
-	freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_ATOMIC);
-	if (!freq_table) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	for (i = 0, rate = 0; i < max_opps; i++, rate++) {
-		/* find next rate */
-		opp = dev_pm_opp_find_freq_ceil(dev, &rate);
-		if (IS_ERR(opp)) {
-			ret = PTR_ERR(opp);
-			goto out;
-		}
-		freq_table[i].driver_data = i;
-		freq_table[i].frequency = rate / 1000;
-	}
-
-	freq_table[i].driver_data = i;
-	freq_table[i].frequency = CPUFREQ_TABLE_END;
-
-	*table = &freq_table[0];
-
-out:
-	rcu_read_unlock();
-	if (ret)
-		kfree(freq_table);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
-
-/**
- * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
- * @dev:	device for which we do this operation
- * @table:	table to free
- *
- * Free up the table allocated by dev_pm_opp_init_cpufreq_table
- */
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
-				   struct cpufreq_frequency_table **table)
-{
-	if (!table)
-		return;
-
-	kfree(*table);
-	*table = NULL;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index 28a16dc6e02e..7e336d20c184 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -169,7 +169,6 @@ static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
 static struct platform_driver davinci_cpufreq_driver = {
 	.driver = {
 		.name	 = "cpufreq-davinci",
-		.owner	 = THIS_MODULE,
 	},
 	.remove = __exit_p(davinci_cpufreq_remove),
 };
diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 4bebc1b5db48..5c3ec1dd4921 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c
@@ -69,7 +69,6 @@ static int dbx500_cpufreq_probe(struct platform_device *pdev)
 static struct platform_driver dbx500_cpufreq_plat_driver = {
 	.driver = {
 		.name = "cpufreq-ux500",
-		.owner = THIS_MODULE,
 	},
 	.probe = dbx500_cpufreq_probe,
 };
diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c
index 1e0ec57bf6e3..f99a0b0b7c06 100644
--- a/drivers/cpufreq/exynos-cpufreq.c
+++ b/drivers/cpufreq/exynos-cpufreq.c
@@ -211,7 +211,6 @@ err_vdd_arm:
 static struct platform_driver exynos_cpufreq_platdrv = {
 	.driver = {
 		.name	= "exynos-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe = exynos_cpufreq_probe,
 };
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index f33f25b483ca..c0f3373706f4 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -360,7 +360,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 		goto err_put_node;
 	}
 
-	ret = of_init_opp_table(dvfs_info->dev);
+	ret = dev_pm_opp_of_add_table(dvfs_info->dev);
 	if (ret) {
 		dev_err(dvfs_info->dev, "failed to init OPP table: %d\n", ret);
 		goto err_put_node;
@@ -371,7 +371,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(dvfs_info->dev,
 			"failed to init cpufreq table: %d\n", ret);
-		goto err_put_node;
+		goto err_free_opp;
 	}
 	dvfs_info->freq_count = dev_pm_opp_get_opp_count(dvfs_info->dev);
 	exynos_sort_descend_freq_table();
@@ -423,6 +423,8 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 
 err_free_table:
 	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
+err_free_opp:
+	dev_pm_opp_of_remove_table(dvfs_info->dev);
 err_put_node:
 	of_node_put(np);
 	dev_err(&pdev->dev, "%s: failed initialization\n", __func__);
@@ -433,13 +435,13 @@ static int exynos_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&exynos_driver);
 	dev_pm_opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
+	dev_pm_opp_of_remove_table(dvfs_info->dev);
 	return 0;
 }
 
 static struct platform_driver exynos_cpufreq_platdrv = {
 	.driver = {
 		.name	= "exynos5440-cpufreq",
-		.owner	= THIS_MODULE,
 		.of_match_table = exynos_cpufreq_match,
 	},
 	.probe		= exynos_cpufreq_probe,
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index df14766a8e06..d1a0cbfdba63 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -18,6 +18,21 @@
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/
 
+bool policy_has_boost_freq(struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos, *table = policy->freq_table;
+
+	if (!table)
+		return false;
+
+	cpufreq_for_each_valid_entry(pos, table)
+		if (pos->flags & CPUFREQ_BOOST_FREQ)
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(policy_has_boost_freq);
+
 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 				    struct cpufreq_frequency_table *table)
 {
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index c2d30765bf3d..84fbc8e8fc56 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -31,6 +31,7 @@ static struct clk *step_clk;
 static struct clk *pll2_pfd2_396m_clk;
 
 static struct device *cpu_dev;
+static bool free_opp;
 static struct cpufreq_frequency_table *freq_table;
 static unsigned int transition_latency;
 
@@ -201,17 +202,20 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	 */
 	num = dev_pm_opp_get_opp_count(cpu_dev);
 	if (num < 0) {
-		ret = of_init_opp_table(cpu_dev);
+		ret = dev_pm_opp_of_add_table(cpu_dev);
 		if (ret < 0) {
 			dev_err(cpu_dev, "failed to init OPP table: %d\n", ret);
 			goto put_reg;
 		}
 
+		/* Because we have added the OPPs here, we must free them */
+		free_opp = true;
+
 		num = dev_pm_opp_get_opp_count(cpu_dev);
 		if (num < 0) {
 			ret = num;
 			dev_err(cpu_dev, "no OPP table is found: %d\n", ret);
-			goto put_reg;
+			goto out_free_opp;
 		}
 	}
 
@@ -306,6 +310,9 @@ soc_opp_out:
 
 free_freq_table:
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
+out_free_opp:
+	if (free_opp)
+		dev_pm_opp_of_remove_table(cpu_dev);
 put_reg:
 	if (!IS_ERR(arm_reg))
 		regulator_put(arm_reg);
@@ -332,6 +339,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
 {
 	cpufreq_unregister_driver(&imx6q_cpufreq_driver);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
+	if (free_opp)
+		dev_pm_opp_of_remove_table(cpu_dev);
 	regulator_put(arm_reg);
 	if (!IS_ERR(pu_reg))
 		regulator_put(pu_reg);
@@ -348,7 +357,6 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
 static struct platform_driver imx6q_cpufreq_platdrv = {
 	.driver = {
 		.name	= "imx6q-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= imx6q_cpufreq_probe,
 	.remove		= imx6q_cpufreq_remove,
diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c
index 6bd69adc3c5e..129e266f7621 100644
--- a/drivers/cpufreq/integrator-cpufreq.c
+++ b/drivers/cpufreq/integrator-cpufreq.c
@@ -226,7 +226,6 @@ static const struct of_device_id integrator_cpufreq_match[] = {
 static struct platform_driver integrator_cpufreq_driver = {
 	.driver = {
 		.name = "integrator-cpufreq",
-		.owner = THIS_MODULE,
 		.of_match_table = integrator_cpufreq_match,
 	},
 	.remove = __exit_p(integrator_cpufreq_remove),
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index 7906d4acfe40..be42f103db60 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -183,7 +183,6 @@ static struct platform_driver kirkwood_cpufreq_platform_driver = {
 	.remove = kirkwood_cpufreq_remove,
 	.driver = {
 		.name = "kirkwood-cpufreq",
-		.owner = THIS_MODULE,
 	},
 };
 
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index 9fa177206032..fc897babab55 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -130,7 +130,6 @@ MODULE_DEVICE_TABLE(platform, platform_device_ids);
 static struct platform_driver platform_driver = {
 	.driver = {
 		.name = "loongson2_cpufreq",
-		.owner = THIS_MODULE,
 	},
 	.id_table = platform_device_ids,
 };
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 5f69c9aa703c..e3866e0d5bf8 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -195,7 +195,6 @@ static int omap_cpufreq_remove(struct platform_device *pdev)
 static struct platform_driver omap_cpufreq_platdrv = {
 	.driver = {
 		.name	= "omap-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= omap_cpufreq_probe,
 	.remove		= omap_cpufreq_remove,
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 567caa6313ff..b0dac7d6ba31 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -656,7 +656,6 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev)
 static struct platform_driver s5pv210_cpufreq_platdrv = {
 	.driver = {
 		.name	= "s5pv210-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe = s5pv210_cpufreq_probe,
 };
diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c
index 38678396636d..4894924a3ca2 100644
--- a/drivers/cpufreq/spear-cpufreq.c
+++ b/drivers/cpufreq/spear-cpufreq.c
@@ -236,7 +236,6 @@ out_put_node:
 static struct platform_driver spear_cpufreq_platdrv = {
 	.driver = {
 		.name	= "spear-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= spear_cpufreq_probe,
 };
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 7f7c9c01b44e..433e93fd4900 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -60,7 +60,6 @@ static int ve_spc_cpufreq_remove(struct platform_device *pdev)
 static struct platform_driver ve_spc_cpufreq_platdrv = {
 	.driver = {
 		.name	= "vexpress-spc-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= ve_spc_cpufreq_probe,
 	.remove		= ve_spc_cpufreq_remove,
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 061407d59520..887196e9b50c 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -1009,9 +1009,9 @@ error_out:
  *		notifier_to_errno() to decode this value
  */
 static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
-			     void *update)
+			     void *data)
 {
-	struct of_prop_reconfig *upd = update;
+	struct of_reconfig_data *upd = data;
 	struct nx842_devdata *local_devdata;
 	struct device_node *node = NULL;
 
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index aef6a95adef5..bef2579be1ec 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -1,6 +1,14 @@
 #
 # Makefile for linux kernel
 #
+
+#
+# ARM64 maps efi runtime services in userspace addresses
+# which don't have KASAN shadow. So dereference of these addresses
+# in efi_call_virt() will cause crash if this code instrumented.
+#
+KASAN_SANITIZE_runtime-wrappers.o	:= n
+
 obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index b14bc2b9fb4d..c5533c76c202 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -19,6 +19,7 @@ KBUILD_CFLAGS			:= $(cflags-y) \
 				   $(call cc-option,-fno-stack-protector)
 
 GCOV_PROFILE			:= n
+KASAN_SANITIZE			:= n
 
 lib-y				:= efi-stub-helper.o
 lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 304ab295ca1a..d1ba39c3ee4e 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -5,6 +5,10 @@
 /* error code which can't be mistaken for valid address */
 #define EFI_ERROR	(~0UL)
 
+#undef memcpy
+#undef memset
+#undef memmove
+
 void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
 
 efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 87743dd1df48..c91a4fb596e4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -35,9 +35,6 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 
-#define DIV_ROUND_CLOSEST_ULL(ll, d)	\
-({ unsigned long long _tmp = (ll)+(d)/2; do_div(_tmp, d); _tmp; })
-
 /**
  * _wait_for - magic (register) wait macro
  *
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 8bc193f81333..ce5a385150c8 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -30,6 +30,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include "intel_drv.h"
 
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index d16dbb33a531..e7c8bf9093ea 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -176,6 +176,10 @@ static struct attribute *lm75_attrs[] = {
 };
 ATTRIBUTE_GROUPS(lm75);
 
+static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = {
+	.get_temp = lm75_read_temp,
+};
+
 /*-----------------------------------------------------------------------*/
 
 /* device probe and removal */
@@ -291,10 +295,9 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (IS_ERR(data->hwmon_dev))
 		return PTR_ERR(data->hwmon_dev);
 
-	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev,
-						   0,
+	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev, 0,
 						   data->hwmon_dev,
-						   lm75_read_temp, NULL);
+						   &lm75_of_thermal_ops);
 	if (IS_ERR(data->tz))
 		data->tz = NULL;
 
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index 31597c504879..b2c90bdc2dd1 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -495,6 +495,10 @@ static const struct attribute_group ntc_attr_group = {
 	.attrs = ntc_attributes,
 };
 
+static const struct thermal_zone_of_device_ops ntc_of_thermal_ops = {
+	.get_temp = ntc_read_temp,
+};
+
 static int ntc_thermistor_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id =
@@ -588,7 +592,7 @@ static int ntc_thermistor_probe(struct platform_device *pdev)
 								pdev_id->name);
 
 	data->tz = thermal_zone_of_sensor_register(data->dev, 0, data->dev,
-						ntc_read_temp, NULL);
+						   &ntc_of_thermal_ops);
 	if (IS_ERR(data->tz)) {
 		dev_dbg(&pdev->dev, "Failed to register to thermal fw.\n");
 		data->tz = NULL;
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 51719956cc03..ba9f478f64ee 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -158,6 +158,10 @@ ATTRIBUTE_GROUPS(tmp102);
 #define TMP102_CONFIG  (TMP102_CONF_TM | TMP102_CONF_EM | TMP102_CONF_CR1)
 #define TMP102_CONFIG_RD_ONLY (TMP102_CONF_R0 | TMP102_CONF_R1 | TMP102_CONF_AL)
 
+static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = {
+	.get_temp = tmp102_read_temp,
+};
+
 static int tmp102_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
@@ -215,7 +219,7 @@ static int tmp102_probe(struct i2c_client *client,
 	}
 	tmp102->hwmon_dev = hwmon_dev;
 	tmp102->tz = thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev,
-						     tmp102_read_temp, NULL);
+						     &tmp102_of_thermal_ops);
 	if (IS_ERR(tmp102->tz))
 		tmp102->tz = NULL;
 
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
new file mode 100644
index 000000000000..fc1f1ae7a49d
--- /dev/null
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -0,0 +1,61 @@
+#
+# Coresight configuration
+#
+menuconfig CORESIGHT
+	bool "CoreSight Tracing Support"
+	select ARM_AMBA
+	help
+	  This framework provides a kernel interface for the CoreSight debug
+	  and trace drivers to register themselves with. It's intended to build
+	  a topological view of the CoreSight components based on a DT
+	  specification and configure the right serie of components when a
+	  trace source gets enabled.
+
+if CORESIGHT
+config CORESIGHT_LINKS_AND_SINKS
+	bool "CoreSight Link and Sink drivers"
+	help
+	  This enables support for CoreSight link and sink drivers that are
+	  responsible for transporting and collecting the trace data
+	  respectively.  Link and sinks are dynamically aggregated with a trace
+	  entity at run time to form a complete trace path.
+
+config CORESIGHT_LINK_AND_SINK_TMC
+	bool "Coresight generic TMC driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Trace Memory Controller driver.
+	  Depending on its configuration the device can act as a link (embedded
+	  trace router - ETR) or sink (embedded trace FIFO).  The driver
+	  complies with the generic implementation of the component without
+	  special enhancement or added features.
+
+config CORESIGHT_SINK_TPIU
+	bool "Coresight generic TPIU driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Trace Port Interface Unit driver,
+	  responsible for bridging the gap between the on-chip coresight
+	  components and a trace for bridging the gap between the on-chip
+	  coresight components and a trace port collection engine, typically
+	  connected to an external host for use case capturing more traces than
+	  the on-board coresight memory can handle.
+
+config CORESIGHT_SINK_ETBV10
+	bool "Coresight ETBv1.0 driver"
+	depends on CORESIGHT_LINKS_AND_SINKS
+	help
+	  This enables support for the Embedded Trace Buffer version 1.0 driver
+	  that complies with the generic implementation of the component without
+	  special enhancement or added features.
+
+config CORESIGHT_SOURCE_ETM3X
+	bool "CoreSight Embedded Trace Macrocell 3.x driver"
+	depends on !ARM64
+	select CORESIGHT_LINKS_AND_SINKS
+	help
+	  This driver provides support for processor ETM3.x and PTM1.x modules,
+	  which allows tracing the instructions that a processor is executing
+	  This is primarily useful for instruction level tracing.  Depending
+	  the ETM version data tracing may also be available.
+endif
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
new file mode 100644
index 000000000000..4b4bec890ef5
--- /dev/null
+++ b/drivers/hwtracing/coresight/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for CoreSight drivers.
+#
+obj-$(CONFIG_CORESIGHT) += coresight.o
+obj-$(CONFIG_OF) += of_coresight.o
+obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o
+obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o
+obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o
+obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o \
+					   coresight-replicator.o
+obj-$(CONFIG_CORESIGHT_SOURCE_ETM3X) += coresight-etm3x.o coresight-etm-cp14.o
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
new file mode 100644
index 000000000000..40049869aecd
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -0,0 +1,527 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/clk.h>
+#include <linux/seq_file.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+
+#include "coresight-priv.h"
+
+#define ETB_RAM_DEPTH_REG	0x004
+#define ETB_STATUS_REG		0x00c
+#define ETB_RAM_READ_DATA_REG	0x010
+#define ETB_RAM_READ_POINTER	0x014
+#define ETB_RAM_WRITE_POINTER	0x018
+#define ETB_TRG			0x01c
+#define ETB_CTL_REG		0x020
+#define ETB_RWD_REG		0x024
+#define ETB_FFSR		0x300
+#define ETB_FFCR		0x304
+#define ETB_ITMISCOP0		0xee0
+#define ETB_ITTRFLINACK		0xee4
+#define ETB_ITTRFLIN		0xee8
+#define ETB_ITATBDATA0		0xeeC
+#define ETB_ITATBCTR2		0xef0
+#define ETB_ITATBCTR1		0xef4
+#define ETB_ITATBCTR0		0xef8
+
+/* register description */
+/* STS - 0x00C */
+#define ETB_STATUS_RAM_FULL	BIT(0)
+/* CTL - 0x020 */
+#define ETB_CTL_CAPT_EN		BIT(0)
+/* FFCR - 0x304 */
+#define ETB_FFCR_EN_FTC		BIT(0)
+#define ETB_FFCR_FON_MAN	BIT(6)
+#define ETB_FFCR_STOP_FI	BIT(12)
+#define ETB_FFCR_STOP_TRIGGER	BIT(13)
+
+#define ETB_FFCR_BIT		6
+#define ETB_FFSR_BIT		1
+#define ETB_FRAME_SIZE_WORDS	4
+
+/**
+ * struct etb_drvdata - specifics associated to an ETB component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @csdev:	component vitals needed by the framework.
+ * @miscdev:	specifics to handle "/dev/xyz.etb" entry.
+ * @clk:	the clock this component is associated to.
+ * @spinlock:	only one at a time pls.
+ * @in_use:	synchronise user space access to etb buffer.
+ * @buf:	area of memory where ETB buffer content gets sent.
+ * @buffer_depth: size of @buf.
+ * @enable:	this ETB is being used.
+ * @trigger_cntr: amount of words to store after a trigger.
+ */
+struct etb_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	struct miscdevice	miscdev;
+	struct clk		*clk;
+	spinlock_t		spinlock;
+	atomic_t		in_use;
+	u8			*buf;
+	u32			buffer_depth;
+	bool			enable;
+	u32			trigger_cntr;
+};
+
+static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
+{
+	int ret;
+	u32 depth = 0;
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	/* RO registers don't need locking */
+	depth = readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG);
+
+	clk_disable_unprepare(drvdata->clk);
+	return depth;
+}
+
+static void etb_enable_hw(struct etb_drvdata *drvdata)
+{
+	int i;
+	u32 depth;
+
+	CS_UNLOCK(drvdata->base);
+
+	depth = drvdata->buffer_depth;
+	/* reset write RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+	/* clear entire RAM buffer */
+	for (i = 0; i < depth; i++)
+		writel_relaxed(0x0, drvdata->base + ETB_RWD_REG);
+
+	/* reset write RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
+	/* reset read RAM pointer address */
+	writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + ETB_TRG);
+	writel_relaxed(ETB_FFCR_EN_FTC | ETB_FFCR_STOP_TRIGGER,
+		       drvdata->base + ETB_FFCR);
+	/* ETB trace capture enable */
+	writel_relaxed(ETB_CTL_CAPT_EN, drvdata->base + ETB_CTL_REG);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int etb_enable(struct coresight_device *csdev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret;
+	unsigned long flags;
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	etb_enable_hw(drvdata);
+	drvdata->enable = true;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "ETB enabled\n");
+	return 0;
+}
+
+static void etb_disable_hw(struct etb_drvdata *drvdata)
+{
+	u32 ffcr;
+
+	CS_UNLOCK(drvdata->base);
+
+	ffcr = readl_relaxed(drvdata->base + ETB_FFCR);
+	/* stop formatter when a stop has completed */
+	ffcr |= ETB_FFCR_STOP_FI;
+	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
+	/* manually generate a flush of the system */
+	ffcr |= ETB_FFCR_FON_MAN;
+	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
+
+	if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
+		dev_err(drvdata->dev,
+			"timeout observed when probing at offset %#x\n",
+			ETB_FFCR);
+	}
+
+	/* disable trace capture */
+	writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
+
+	if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
+		dev_err(drvdata->dev,
+			"timeout observed when probing at offset %#x\n",
+			ETB_FFCR);
+	}
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etb_dump_hw(struct etb_drvdata *drvdata)
+{
+	int i;
+	u8 *buf_ptr;
+	u32 read_data, depth;
+	u32 read_ptr, write_ptr;
+	u32 frame_off, frame_endoff;
+
+	CS_UNLOCK(drvdata->base);
+
+	read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+
+	frame_off = write_ptr % ETB_FRAME_SIZE_WORDS;
+	frame_endoff = ETB_FRAME_SIZE_WORDS - frame_off;
+	if (frame_off) {
+		dev_err(drvdata->dev,
+			"write_ptr: %lu not aligned to formatter frame size\n",
+			(unsigned long)write_ptr);
+		dev_err(drvdata->dev, "frameoff: %lu, frame_endoff: %lu\n",
+			(unsigned long)frame_off, (unsigned long)frame_endoff);
+		write_ptr += frame_endoff;
+	}
+
+	if ((readl_relaxed(drvdata->base + ETB_STATUS_REG)
+		      & ETB_STATUS_RAM_FULL) == 0)
+		writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER);
+	else
+		writel_relaxed(write_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	depth = drvdata->buffer_depth;
+	buf_ptr = drvdata->buf;
+	for (i = 0; i < depth; i++) {
+		read_data = readl_relaxed(drvdata->base +
+					  ETB_RAM_READ_DATA_REG);
+		*buf_ptr++ = read_data >> 0;
+		*buf_ptr++ = read_data >> 8;
+		*buf_ptr++ = read_data >> 16;
+		*buf_ptr++ = read_data >> 24;
+	}
+
+	if (frame_off) {
+		buf_ptr -= (frame_endoff * 4);
+		for (i = 0; i < frame_endoff; i++) {
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+			*buf_ptr++ = 0x0;
+		}
+	}
+
+	writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void etb_disable(struct coresight_device *csdev)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	etb_disable_hw(drvdata);
+	etb_dump_hw(drvdata);
+	drvdata->enable = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	dev_info(drvdata->dev, "ETB disabled\n");
+}
+
+static const struct coresight_ops_sink etb_sink_ops = {
+	.enable		= etb_enable,
+	.disable	= etb_disable,
+};
+
+static const struct coresight_ops etb_cs_ops = {
+	.sink_ops	= &etb_sink_ops,
+};
+
+static void etb_dump(struct etb_drvdata *drvdata)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->enable) {
+		etb_disable_hw(drvdata);
+		etb_dump_hw(drvdata);
+		etb_enable_hw(drvdata);
+	}
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "ETB dumped\n");
+}
+
+static int etb_open(struct inode *inode, struct file *file)
+{
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+
+	if (atomic_cmpxchg(&drvdata->in_use, 0, 1))
+		return -EBUSY;
+
+	dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__);
+	return 0;
+}
+
+static ssize_t etb_read(struct file *file, char __user *data,
+				size_t len, loff_t *ppos)
+{
+	u32 depth;
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+
+	etb_dump(drvdata);
+
+	depth = drvdata->buffer_depth;
+	if (*ppos + len > depth * 4)
+		len = depth * 4 - *ppos;
+
+	if (copy_to_user(data, drvdata->buf + *ppos, len)) {
+		dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__);
+		return -EFAULT;
+	}
+
+	*ppos += len;
+
+	dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n",
+		__func__, len, (int)(depth * 4 - *ppos));
+	return len;
+}
+
+static int etb_release(struct inode *inode, struct file *file)
+{
+	struct etb_drvdata *drvdata = container_of(file->private_data,
+						   struct etb_drvdata, miscdev);
+	atomic_set(&drvdata->in_use, 0);
+
+	dev_dbg(drvdata->dev, "%s: released\n", __func__);
+	return 0;
+}
+
+static const struct file_operations etb_fops = {
+	.owner		= THIS_MODULE,
+	.open		= etb_open,
+	.read		= etb_read,
+	.release	= etb_release,
+	.llseek		= no_llseek,
+};
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned long flags;
+	u32 etb_rdr, etb_sr, etb_rrp, etb_rwp;
+	u32 etb_trg, etb_cr, etb_ffsr, etb_ffcr;
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		goto out;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	CS_UNLOCK(drvdata->base);
+
+	etb_rdr = readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG);
+	etb_sr = readl_relaxed(drvdata->base + ETB_STATUS_REG);
+	etb_rrp = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER);
+	etb_rwp = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER);
+	etb_trg = readl_relaxed(drvdata->base + ETB_TRG);
+	etb_cr = readl_relaxed(drvdata->base + ETB_CTL_REG);
+	etb_ffsr = readl_relaxed(drvdata->base + ETB_FFSR);
+	etb_ffcr = readl_relaxed(drvdata->base + ETB_FFCR);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	return sprintf(buf,
+		       "Depth:\t\t0x%x\n"
+		       "Status:\t\t0x%x\n"
+		       "RAM read ptr:\t0x%x\n"
+		       "RAM wrt ptr:\t0x%x\n"
+		       "Trigger cnt:\t0x%x\n"
+		       "Control:\t0x%x\n"
+		       "Flush status:\t0x%x\n"
+		       "Flush ctrl:\t0x%x\n",
+		       etb_rdr, etb_sr, etb_rrp, etb_rwp,
+		       etb_trg, etb_cr, etb_ffsr, etb_ffcr);
+out:
+	return -EINVAL;
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t trigger_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->trigger_cntr;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_cntr_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_cntr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_cntr);
+
+static struct attribute *coresight_etb_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	&dev_attr_status.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_etb);
+
+static int etb_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etb_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc *desc;
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->clk = adev->pclk;
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	drvdata->buffer_depth = etb_get_buffer_depth(drvdata);
+	clk_disable_unprepare(drvdata->clk);
+
+	if (drvdata->buffer_depth < 0)
+		return -EINVAL;
+
+	drvdata->buf = devm_kzalloc(dev,
+				    drvdata->buffer_depth * 4, GFP_KERNEL);
+	if (!drvdata->buf)
+		return -ENOMEM;
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	desc->type = CORESIGHT_DEV_TYPE_SINK;
+	desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+	desc->ops = &etb_cs_ops;
+	desc->pdata = pdata;
+	desc->dev = dev;
+	desc->groups = coresight_etb_groups;
+	drvdata->csdev = coresight_register(desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	drvdata->miscdev.name = pdata->name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &etb_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret)
+		goto err_misc_register;
+
+	dev_info(dev, "ETB initialized\n");
+	return 0;
+
+err_misc_register:
+	coresight_unregister(drvdata->csdev);
+	return ret;
+}
+
+static int etb_remove(struct amba_device *adev)
+{
+	struct etb_drvdata *drvdata = amba_get_drvdata(adev);
+
+	misc_deregister(&drvdata->miscdev);
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static struct amba_id etb_ids[] = {
+	{
+		.id	= 0x0003b907,
+		.mask	= 0x0003ffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver etb_driver = {
+	.drv = {
+		.name	= "coresight-etb10",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= etb_probe,
+	.remove		= etb_remove,
+	.id_table	= etb_ids,
+};
+
+module_amba_driver(etb_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight Embedded Trace Buffer driver");
diff --git a/drivers/hwtracing/coresight/coresight-etm-cp14.c b/drivers/hwtracing/coresight/coresight-etm-cp14.c
new file mode 100644
index 000000000000..12a220682117
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm-cp14.c
@@ -0,0 +1,591 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <asm/hardware/cp14.h>
+
+#include "coresight-etm.h"
+
+int etm_readl_cp14(u32 reg, unsigned int *val)
+{
+	switch (reg) {
+	case ETMCR:
+		*val = etm_read(ETMCR);
+		return 0;
+	case ETMCCR:
+		*val = etm_read(ETMCCR);
+		return 0;
+	case ETMTRIGGER:
+		*val = etm_read(ETMTRIGGER);
+		return 0;
+	case ETMSR:
+		*val = etm_read(ETMSR);
+		return 0;
+	case ETMSCR:
+		*val = etm_read(ETMSCR);
+		return 0;
+	case ETMTSSCR:
+		*val = etm_read(ETMTSSCR);
+		return 0;
+	case ETMTEEVR:
+		*val = etm_read(ETMTEEVR);
+		return 0;
+	case ETMTECR1:
+		*val = etm_read(ETMTECR1);
+		return 0;
+	case ETMFFLR:
+		*val = etm_read(ETMFFLR);
+		return 0;
+	case ETMACVRn(0):
+		*val = etm_read(ETMACVR0);
+		return 0;
+	case ETMACVRn(1):
+		*val = etm_read(ETMACVR1);
+		return 0;
+	case ETMACVRn(2):
+		*val = etm_read(ETMACVR2);
+		return 0;
+	case ETMACVRn(3):
+		*val = etm_read(ETMACVR3);
+		return 0;
+	case ETMACVRn(4):
+		*val = etm_read(ETMACVR4);
+		return 0;
+	case ETMACVRn(5):
+		*val = etm_read(ETMACVR5);
+		return 0;
+	case ETMACVRn(6):
+		*val = etm_read(ETMACVR6);
+		return 0;
+	case ETMACVRn(7):
+		*val = etm_read(ETMACVR7);
+		return 0;
+	case ETMACVRn(8):
+		*val = etm_read(ETMACVR8);
+		return 0;
+	case ETMACVRn(9):
+		*val = etm_read(ETMACVR9);
+		return 0;
+	case ETMACVRn(10):
+		*val = etm_read(ETMACVR10);
+		return 0;
+	case ETMACVRn(11):
+		*val = etm_read(ETMACVR11);
+		return 0;
+	case ETMACVRn(12):
+		*val = etm_read(ETMACVR12);
+		return 0;
+	case ETMACVRn(13):
+		*val = etm_read(ETMACVR13);
+		return 0;
+	case ETMACVRn(14):
+		*val = etm_read(ETMACVR14);
+		return 0;
+	case ETMACVRn(15):
+		*val = etm_read(ETMACVR15);
+		return 0;
+	case ETMACTRn(0):
+		*val = etm_read(ETMACTR0);
+		return 0;
+	case ETMACTRn(1):
+		*val = etm_read(ETMACTR1);
+		return 0;
+	case ETMACTRn(2):
+		*val = etm_read(ETMACTR2);
+		return 0;
+	case ETMACTRn(3):
+		*val = etm_read(ETMACTR3);
+		return 0;
+	case ETMACTRn(4):
+		*val = etm_read(ETMACTR4);
+		return 0;
+	case ETMACTRn(5):
+		*val = etm_read(ETMACTR5);
+		return 0;
+	case ETMACTRn(6):
+		*val = etm_read(ETMACTR6);
+		return 0;
+	case ETMACTRn(7):
+		*val = etm_read(ETMACTR7);
+		return 0;
+	case ETMACTRn(8):
+		*val = etm_read(ETMACTR8);
+		return 0;
+	case ETMACTRn(9):
+		*val = etm_read(ETMACTR9);
+		return 0;
+	case ETMACTRn(10):
+		*val = etm_read(ETMACTR10);
+		return 0;
+	case ETMACTRn(11):
+		*val = etm_read(ETMACTR11);
+		return 0;
+	case ETMACTRn(12):
+		*val = etm_read(ETMACTR12);
+		return 0;
+	case ETMACTRn(13):
+		*val = etm_read(ETMACTR13);
+		return 0;
+	case ETMACTRn(14):
+		*val = etm_read(ETMACTR14);
+		return 0;
+	case ETMACTRn(15):
+		*val = etm_read(ETMACTR15);
+		return 0;
+	case ETMCNTRLDVRn(0):
+		*val = etm_read(ETMCNTRLDVR0);
+		return 0;
+	case ETMCNTRLDVRn(1):
+		*val = etm_read(ETMCNTRLDVR1);
+		return 0;
+	case ETMCNTRLDVRn(2):
+		*val = etm_read(ETMCNTRLDVR2);
+		return 0;
+	case ETMCNTRLDVRn(3):
+		*val = etm_read(ETMCNTRLDVR3);
+		return 0;
+	case ETMCNTENRn(0):
+		*val = etm_read(ETMCNTENR0);
+		return 0;
+	case ETMCNTENRn(1):
+		*val = etm_read(ETMCNTENR1);
+		return 0;
+	case ETMCNTENRn(2):
+		*val = etm_read(ETMCNTENR2);
+		return 0;
+	case ETMCNTENRn(3):
+		*val = etm_read(ETMCNTENR3);
+		return 0;
+	case ETMCNTRLDEVRn(0):
+		*val = etm_read(ETMCNTRLDEVR0);
+		return 0;
+	case ETMCNTRLDEVRn(1):
+		*val = etm_read(ETMCNTRLDEVR1);
+		return 0;
+	case ETMCNTRLDEVRn(2):
+		*val = etm_read(ETMCNTRLDEVR2);
+		return 0;
+	case ETMCNTRLDEVRn(3):
+		*val = etm_read(ETMCNTRLDEVR3);
+		return 0;
+	case ETMCNTVRn(0):
+		*val = etm_read(ETMCNTVR0);
+		return 0;
+	case ETMCNTVRn(1):
+		*val = etm_read(ETMCNTVR1);
+		return 0;
+	case ETMCNTVRn(2):
+		*val = etm_read(ETMCNTVR2);
+		return 0;
+	case ETMCNTVRn(3):
+		*val = etm_read(ETMCNTVR3);
+		return 0;
+	case ETMSQ12EVR:
+		*val = etm_read(ETMSQ12EVR);
+		return 0;
+	case ETMSQ21EVR:
+		*val = etm_read(ETMSQ21EVR);
+		return 0;
+	case ETMSQ23EVR:
+		*val = etm_read(ETMSQ23EVR);
+		return 0;
+	case ETMSQ31EVR:
+		*val = etm_read(ETMSQ31EVR);
+		return 0;
+	case ETMSQ32EVR:
+		*val = etm_read(ETMSQ32EVR);
+		return 0;
+	case ETMSQ13EVR:
+		*val = etm_read(ETMSQ13EVR);
+		return 0;
+	case ETMSQR:
+		*val = etm_read(ETMSQR);
+		return 0;
+	case ETMEXTOUTEVRn(0):
+		*val = etm_read(ETMEXTOUTEVR0);
+		return 0;
+	case ETMEXTOUTEVRn(1):
+		*val = etm_read(ETMEXTOUTEVR1);
+		return 0;
+	case ETMEXTOUTEVRn(2):
+		*val = etm_read(ETMEXTOUTEVR2);
+		return 0;
+	case ETMEXTOUTEVRn(3):
+		*val = etm_read(ETMEXTOUTEVR3);
+		return 0;
+	case ETMCIDCVRn(0):
+		*val = etm_read(ETMCIDCVR0);
+		return 0;
+	case ETMCIDCVRn(1):
+		*val = etm_read(ETMCIDCVR1);
+		return 0;
+	case ETMCIDCVRn(2):
+		*val = etm_read(ETMCIDCVR2);
+		return 0;
+	case ETMCIDCMR:
+		*val = etm_read(ETMCIDCMR);
+		return 0;
+	case ETMIMPSPEC0:
+		*val = etm_read(ETMIMPSPEC0);
+		return 0;
+	case ETMIMPSPEC1:
+		*val = etm_read(ETMIMPSPEC1);
+		return 0;
+	case ETMIMPSPEC2:
+		*val = etm_read(ETMIMPSPEC2);
+		return 0;
+	case ETMIMPSPEC3:
+		*val = etm_read(ETMIMPSPEC3);
+		return 0;
+	case ETMIMPSPEC4:
+		*val = etm_read(ETMIMPSPEC4);
+		return 0;
+	case ETMIMPSPEC5:
+		*val = etm_read(ETMIMPSPEC5);
+		return 0;
+	case ETMIMPSPEC6:
+		*val = etm_read(ETMIMPSPEC6);
+		return 0;
+	case ETMIMPSPEC7:
+		*val = etm_read(ETMIMPSPEC7);
+		return 0;
+	case ETMSYNCFR:
+		*val = etm_read(ETMSYNCFR);
+		return 0;
+	case ETMIDR:
+		*val = etm_read(ETMIDR);
+		return 0;
+	case ETMCCER:
+		*val = etm_read(ETMCCER);
+		return 0;
+	case ETMEXTINSELR:
+		*val = etm_read(ETMEXTINSELR);
+		return 0;
+	case ETMTESSEICR:
+		*val = etm_read(ETMTESSEICR);
+		return 0;
+	case ETMEIBCR:
+		*val = etm_read(ETMEIBCR);
+		return 0;
+	case ETMTSEVR:
+		*val = etm_read(ETMTSEVR);
+		return 0;
+	case ETMAUXCR:
+		*val = etm_read(ETMAUXCR);
+		return 0;
+	case ETMTRACEIDR:
+		*val = etm_read(ETMTRACEIDR);
+		return 0;
+	case ETMVMIDCVR:
+		*val = etm_read(ETMVMIDCVR);
+		return 0;
+	case ETMOSLSR:
+		*val = etm_read(ETMOSLSR);
+		return 0;
+	case ETMOSSRR:
+		*val = etm_read(ETMOSSRR);
+		return 0;
+	case ETMPDCR:
+		*val = etm_read(ETMPDCR);
+		return 0;
+	case ETMPDSR:
+		*val = etm_read(ETMPDSR);
+		return 0;
+	default:
+		*val = 0;
+		return -EINVAL;
+	}
+}
+
+int etm_writel_cp14(u32 reg, u32 val)
+{
+	switch (reg) {
+	case ETMCR:
+		etm_write(val, ETMCR);
+		break;
+	case ETMTRIGGER:
+		etm_write(val, ETMTRIGGER);
+		break;
+	case ETMSR:
+		etm_write(val, ETMSR);
+		break;
+	case ETMTSSCR:
+		etm_write(val, ETMTSSCR);
+		break;
+	case ETMTEEVR:
+		etm_write(val, ETMTEEVR);
+		break;
+	case ETMTECR1:
+		etm_write(val, ETMTECR1);
+		break;
+	case ETMFFLR:
+		etm_write(val, ETMFFLR);
+		break;
+	case ETMACVRn(0):
+		etm_write(val, ETMACVR0);
+		break;
+	case ETMACVRn(1):
+		etm_write(val, ETMACVR1);
+		break;
+	case ETMACVRn(2):
+		etm_write(val, ETMACVR2);
+		break;
+	case ETMACVRn(3):
+		etm_write(val, ETMACVR3);
+		break;
+	case ETMACVRn(4):
+		etm_write(val, ETMACVR4);
+		break;
+	case ETMACVRn(5):
+		etm_write(val, ETMACVR5);
+		break;
+	case ETMACVRn(6):
+		etm_write(val, ETMACVR6);
+		break;
+	case ETMACVRn(7):
+		etm_write(val, ETMACVR7);
+		break;
+	case ETMACVRn(8):
+		etm_write(val, ETMACVR8);
+		break;
+	case ETMACVRn(9):
+		etm_write(val, ETMACVR9);
+		break;
+	case ETMACVRn(10):
+		etm_write(val, ETMACVR10);
+		break;
+	case ETMACVRn(11):
+		etm_write(val, ETMACVR11);
+		break;
+	case ETMACVRn(12):
+		etm_write(val, ETMACVR12);
+		break;
+	case ETMACVRn(13):
+		etm_write(val, ETMACVR13);
+		break;
+	case ETMACVRn(14):
+		etm_write(val, ETMACVR14);
+		break;
+	case ETMACVRn(15):
+		etm_write(val, ETMACVR15);
+		break;
+	case ETMACTRn(0):
+		etm_write(val, ETMACTR0);
+		break;
+	case ETMACTRn(1):
+		etm_write(val, ETMACTR1);
+		break;
+	case ETMACTRn(2):
+		etm_write(val, ETMACTR2);
+		break;
+	case ETMACTRn(3):
+		etm_write(val, ETMACTR3);
+		break;
+	case ETMACTRn(4):
+		etm_write(val, ETMACTR4);
+		break;
+	case ETMACTRn(5):
+		etm_write(val, ETMACTR5);
+		break;
+	case ETMACTRn(6):
+		etm_write(val, ETMACTR6);
+		break;
+	case ETMACTRn(7):
+		etm_write(val, ETMACTR7);
+		break;
+	case ETMACTRn(8):
+		etm_write(val, ETMACTR8);
+		break;
+	case ETMACTRn(9):
+		etm_write(val, ETMACTR9);
+		break;
+	case ETMACTRn(10):
+		etm_write(val, ETMACTR10);
+		break;
+	case ETMACTRn(11):
+		etm_write(val, ETMACTR11);
+		break;
+	case ETMACTRn(12):
+		etm_write(val, ETMACTR12);
+		break;
+	case ETMACTRn(13):
+		etm_write(val, ETMACTR13);
+		break;
+	case ETMACTRn(14):
+		etm_write(val, ETMACTR14);
+		break;
+	case ETMACTRn(15):
+		etm_write(val, ETMACTR15);
+		break;
+	case ETMCNTRLDVRn(0):
+		etm_write(val, ETMCNTRLDVR0);
+		break;
+	case ETMCNTRLDVRn(1):
+		etm_write(val, ETMCNTRLDVR1);
+		break;
+	case ETMCNTRLDVRn(2):
+		etm_write(val, ETMCNTRLDVR2);
+		break;
+	case ETMCNTRLDVRn(3):
+		etm_write(val, ETMCNTRLDVR3);
+		break;
+	case ETMCNTENRn(0):
+		etm_write(val, ETMCNTENR0);
+		break;
+	case ETMCNTENRn(1):
+		etm_write(val, ETMCNTENR1);
+		break;
+	case ETMCNTENRn(2):
+		etm_write(val, ETMCNTENR2);
+		break;
+	case ETMCNTENRn(3):
+		etm_write(val, ETMCNTENR3);
+		break;
+	case ETMCNTRLDEVRn(0):
+		etm_write(val, ETMCNTRLDEVR0);
+		break;
+	case ETMCNTRLDEVRn(1):
+		etm_write(val, ETMCNTRLDEVR1);
+		break;
+	case ETMCNTRLDEVRn(2):
+		etm_write(val, ETMCNTRLDEVR2);
+		break;
+	case ETMCNTRLDEVRn(3):
+		etm_write(val, ETMCNTRLDEVR3);
+		break;
+	case ETMCNTVRn(0):
+		etm_write(val, ETMCNTVR0);
+		break;
+	case ETMCNTVRn(1):
+		etm_write(val, ETMCNTVR1);
+		break;
+	case ETMCNTVRn(2):
+		etm_write(val, ETMCNTVR2);
+		break;
+	case ETMCNTVRn(3):
+		etm_write(val, ETMCNTVR3);
+		break;
+	case ETMSQ12EVR:
+		etm_write(val, ETMSQ12EVR);
+		break;
+	case ETMSQ21EVR:
+		etm_write(val, ETMSQ21EVR);
+		break;
+	case ETMSQ23EVR:
+		etm_write(val, ETMSQ23EVR);
+		break;
+	case ETMSQ31EVR:
+		etm_write(val, ETMSQ31EVR);
+		break;
+	case ETMSQ32EVR:
+		etm_write(val, ETMSQ32EVR);
+		break;
+	case ETMSQ13EVR:
+		etm_write(val, ETMSQ13EVR);
+		break;
+	case ETMSQR:
+		etm_write(val, ETMSQR);
+		break;
+	case ETMEXTOUTEVRn(0):
+		etm_write(val, ETMEXTOUTEVR0);
+		break;
+	case ETMEXTOUTEVRn(1):
+		etm_write(val, ETMEXTOUTEVR1);
+		break;
+	case ETMEXTOUTEVRn(2):
+		etm_write(val, ETMEXTOUTEVR2);
+		break;
+	case ETMEXTOUTEVRn(3):
+		etm_write(val, ETMEXTOUTEVR3);
+		break;
+	case ETMCIDCVRn(0):
+		etm_write(val, ETMCIDCVR0);
+		break;
+	case ETMCIDCVRn(1):
+		etm_write(val, ETMCIDCVR1);
+		break;
+	case ETMCIDCVRn(2):
+		etm_write(val, ETMCIDCVR2);
+		break;
+	case ETMCIDCMR:
+		etm_write(val, ETMCIDCMR);
+		break;
+	case ETMIMPSPEC0:
+		etm_write(val, ETMIMPSPEC0);
+		break;
+	case ETMIMPSPEC1:
+		etm_write(val, ETMIMPSPEC1);
+		break;
+	case ETMIMPSPEC2:
+		etm_write(val, ETMIMPSPEC2);
+		break;
+	case ETMIMPSPEC3:
+		etm_write(val, ETMIMPSPEC3);
+		break;
+	case ETMIMPSPEC4:
+		etm_write(val, ETMIMPSPEC4);
+		break;
+	case ETMIMPSPEC5:
+		etm_write(val, ETMIMPSPEC5);
+		break;
+	case ETMIMPSPEC6:
+		etm_write(val, ETMIMPSPEC6);
+		break;
+	case ETMIMPSPEC7:
+		etm_write(val, ETMIMPSPEC7);
+		break;
+	case ETMSYNCFR:
+		etm_write(val, ETMSYNCFR);
+		break;
+	case ETMEXTINSELR:
+		etm_write(val, ETMEXTINSELR);
+		break;
+	case ETMTESSEICR:
+		etm_write(val, ETMTESSEICR);
+		break;
+	case ETMEIBCR:
+		etm_write(val, ETMEIBCR);
+		break;
+	case ETMTSEVR:
+		etm_write(val, ETMTSEVR);
+		break;
+	case ETMAUXCR:
+		etm_write(val, ETMAUXCR);
+		break;
+	case ETMTRACEIDR:
+		etm_write(val, ETMTRACEIDR);
+		break;
+	case ETMVMIDCVR:
+		etm_write(val, ETMVMIDCVR);
+		break;
+	case ETMOSLAR:
+		etm_write(val, ETMOSLAR);
+		break;
+	case ETMOSSRR:
+		etm_write(val, ETMOSSRR);
+		break;
+	case ETMPDCR:
+		etm_write(val, ETMPDCR);
+		break;
+	case ETMPDSR:
+		etm_write(val, ETMPDSR);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
new file mode 100644
index 000000000000..501c5fac8a45
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -0,0 +1,251 @@
+/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CORESIGHT_CORESIGHT_ETM_H
+#define _CORESIGHT_CORESIGHT_ETM_H
+
+#include <linux/spinlock.h>
+#include "coresight-priv.h"
+
+/*
+ * Device registers:
+ * 0x000 - 0x2FC: Trace         registers
+ * 0x300 - 0x314: Management    registers
+ * 0x318 - 0xEFC: Trace         registers
+ *
+ * Coresight registers
+ * 0xF00 - 0xF9C: Management    registers
+ * 0xFA0 - 0xFA4: Management    registers in PFTv1.0
+ *                Trace         registers in PFTv1.1
+ * 0xFA8 - 0xFFC: Management    registers
+ */
+
+/* Trace registers (0x000-0x2FC) */
+#define ETMCR			0x000
+#define ETMCCR			0x004
+#define ETMTRIGGER		0x008
+#define ETMSR			0x010
+#define ETMSCR			0x014
+#define ETMTSSCR		0x018
+#define ETMTECR2		0x01c
+#define ETMTEEVR		0x020
+#define ETMTECR1		0x024
+#define ETMFFLR			0x02c
+#define ETMACVRn(n)		(0x040 + (n * 4))
+#define ETMACTRn(n)		(0x080 + (n * 4))
+#define ETMCNTRLDVRn(n)		(0x140 + (n * 4))
+#define ETMCNTENRn(n)		(0x150 + (n * 4))
+#define ETMCNTRLDEVRn(n)	(0x160 + (n * 4))
+#define ETMCNTVRn(n)		(0x170 + (n * 4))
+#define ETMSQ12EVR		0x180
+#define ETMSQ21EVR		0x184
+#define ETMSQ23EVR		0x188
+#define ETMSQ31EVR		0x18c
+#define ETMSQ32EVR		0x190
+#define ETMSQ13EVR		0x194
+#define ETMSQR			0x19c
+#define ETMEXTOUTEVRn(n)	(0x1a0 + (n * 4))
+#define ETMCIDCVRn(n)		(0x1b0 + (n * 4))
+#define ETMCIDCMR		0x1bc
+#define ETMIMPSPEC0		0x1c0
+#define ETMIMPSPEC1		0x1c4
+#define ETMIMPSPEC2		0x1c8
+#define ETMIMPSPEC3		0x1cc
+#define ETMIMPSPEC4		0x1d0
+#define ETMIMPSPEC5		0x1d4
+#define ETMIMPSPEC6		0x1d8
+#define ETMIMPSPEC7		0x1dc
+#define ETMSYNCFR		0x1e0
+#define ETMIDR			0x1e4
+#define ETMCCER			0x1e8
+#define ETMEXTINSELR		0x1ec
+#define ETMTESSEICR		0x1f0
+#define ETMEIBCR		0x1f4
+#define ETMTSEVR		0x1f8
+#define ETMAUXCR		0x1fc
+#define ETMTRACEIDR		0x200
+#define ETMVMIDCVR		0x240
+/* Management registers (0x300-0x314) */
+#define ETMOSLAR		0x300
+#define ETMOSLSR		0x304
+#define ETMOSSRR		0x308
+#define ETMPDCR			0x310
+#define ETMPDSR			0x314
+#define ETM_MAX_ADDR_CMP	16
+#define ETM_MAX_CNTR		4
+#define ETM_MAX_CTXID_CMP	3
+
+/* Register definition */
+/* ETMCR - 0x00 */
+#define ETMCR_PWD_DWN		BIT(0)
+#define ETMCR_STALL_MODE	BIT(7)
+#define ETMCR_ETM_PRG		BIT(10)
+#define ETMCR_ETM_EN		BIT(11)
+#define ETMCR_CYC_ACC		BIT(12)
+#define ETMCR_CTXID_SIZE	(BIT(14)|BIT(15))
+#define ETMCR_TIMESTAMP_EN	BIT(28)
+/* ETMCCR - 0x04 */
+#define ETMCCR_FIFOFULL		BIT(23)
+/* ETMPDCR - 0x310 */
+#define ETMPDCR_PWD_UP		BIT(3)
+/* ETMTECR1 - 0x024 */
+#define ETMTECR1_ADDR_COMP_1	BIT(0)
+#define ETMTECR1_INC_EXC	BIT(24)
+#define ETMTECR1_START_STOP	BIT(25)
+/* ETMCCER - 0x1E8 */
+#define ETMCCER_TIMESTAMP	BIT(22)
+
+#define ETM_MODE_EXCLUDE	BIT(0)
+#define ETM_MODE_CYCACC		BIT(1)
+#define ETM_MODE_STALL		BIT(2)
+#define ETM_MODE_TIMESTAMP	BIT(3)
+#define ETM_MODE_CTXID		BIT(4)
+#define ETM_MODE_ALL		0x1f
+
+#define ETM_SQR_MASK		0x3
+#define ETM_TRACEID_MASK	0x3f
+#define ETM_EVENT_MASK		0x1ffff
+#define ETM_SYNC_MASK		0xfff
+#define ETM_ALL_MASK		0xffffffff
+
+#define ETMSR_PROG_BIT		1
+#define ETM_SEQ_STATE_MAX_VAL	(0x2)
+#define PORT_SIZE_MASK		(GENMASK(21, 21) | GENMASK(6, 4))
+
+#define ETM_HARD_WIRE_RES_A	/* Hard wired, always true */	\
+				((0x0f << 0)	|		\
+				/* Resource index A */		\
+				(0x06 << 4))
+
+#define ETM_ADD_COMP_0		/* Single addr comparator 1 */	\
+				((0x00 << 7)	|		\
+				/* Resource index B */		\
+				(0x00 << 11))
+
+#define ETM_EVENT_NOT_A		BIT(14) /* NOT(A) */
+
+#define ETM_DEFAULT_EVENT_VAL	(ETM_HARD_WIRE_RES_A	|	\
+				 ETM_ADD_COMP_0		|	\
+				 ETM_EVENT_NOT_A)
+/**
+ * struct etm_drvdata - specifics associated to an ETM component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @csdev:	component vitals needed by the framework.
+ * @clk:	the clock this component is associated to.
+ * @spinlock:	only one at a time pls.
+ * @cpu:	the cpu this component is affined to.
+ * @port_size:	port size as reported by ETMCR bit 4-6 and 21.
+ * @arch:	ETM/PTM version number.
+ * @use_cpu14:	true if management registers need to be accessed via CP14.
+ * @enable:	is this ETM/PTM currently tracing.
+ * @sticky_enable: true if ETM base configuration has been done.
+ * @boot_enable:true if we should start tracing at boot time.
+ * @os_unlock:	true if access to management registers is allowed.
+ * @nr_addr_cmp:Number of pairs of address comparators as found in ETMCCR.
+ * @nr_cntr:	Number of counters as found in ETMCCR bit 13-15.
+ * @nr_ext_inp:	Number of external input as found in ETMCCR bit 17-19.
+ * @nr_ext_out:	Number of external output as found in ETMCCR bit 20-22.
+ * @nr_ctxid_cmp: Number of contextID comparators as found in ETMCCR bit 24-25.
+ * @etmccr:	value of register ETMCCR.
+ * @etmccer:	value of register ETMCCER.
+ * @traceid:	value of the current ID for this component.
+ * @mode:	controls various modes supported by this ETM/PTM.
+ * @ctrl:	used in conjunction with @mode.
+ * @trigger_event: setting for register ETMTRIGGER.
+ * @startstop_ctrl: setting for register ETMTSSCR.
+ * @enable_event: setting for register ETMTEEVR.
+ * @enable_ctrl1: setting for register ETMTECR1.
+ * @fifofull_level: setting for register ETMFFLR.
+ * @addr_idx:	index for the address comparator selection.
+ * @addr_val:	value for address comparator register.
+ * @addr_acctype: access type for address comparator register.
+ * @addr_type:	current status of the comparator register.
+ * @cntr_idx:	index for the counter register selection.
+ * @cntr_rld_val: reload value of a counter register.
+ * @cntr_event:	control for counter enable register.
+ * @cntr_rld_event: value for counter reload event register.
+ * @cntr_val:	counter value register.
+ * @seq_12_event: event causing the transition from 1 to 2.
+ * @seq_21_event: event causing the transition from 2 to 1.
+ * @seq_23_event: event causing the transition from 2 to 3.
+ * @seq_31_event: event causing the transition from 3 to 1.
+ * @seq_32_event: event causing the transition from 3 to 2.
+ * @seq_13_event: event causing the transition from 1 to 3.
+ * @seq_curr_state: current value of the sequencer register.
+ * @ctxid_idx: index for the context ID registers.
+ * @ctxid_val: value for the context ID to trigger on.
+ * @ctxid_mask: mask applicable to all the context IDs.
+ * @sync_freq:	Synchronisation frequency.
+ * @timestamp_event: Defines an event that requests the insertion
+		     of a timestamp into the trace stream.
+ */
+struct etm_drvdata {
+	void __iomem			*base;
+	struct device			*dev;
+	struct coresight_device		*csdev;
+	struct clk			*clk;
+	spinlock_t			spinlock;
+	int				cpu;
+	int				port_size;
+	u8				arch;
+	bool				use_cp14;
+	bool				enable;
+	bool				sticky_enable;
+	bool				boot_enable;
+	bool				os_unlock;
+	u8				nr_addr_cmp;
+	u8				nr_cntr;
+	u8				nr_ext_inp;
+	u8				nr_ext_out;
+	u8				nr_ctxid_cmp;
+	u32				etmccr;
+	u32				etmccer;
+	u32				traceid;
+	u32				mode;
+	u32				ctrl;
+	u32				trigger_event;
+	u32				startstop_ctrl;
+	u32				enable_event;
+	u32				enable_ctrl1;
+	u32				fifofull_level;
+	u8				addr_idx;
+	u32				addr_val[ETM_MAX_ADDR_CMP];
+	u32				addr_acctype[ETM_MAX_ADDR_CMP];
+	u32				addr_type[ETM_MAX_ADDR_CMP];
+	u8				cntr_idx;
+	u32				cntr_rld_val[ETM_MAX_CNTR];
+	u32				cntr_event[ETM_MAX_CNTR];
+	u32				cntr_rld_event[ETM_MAX_CNTR];
+	u32				cntr_val[ETM_MAX_CNTR];
+	u32				seq_12_event;
+	u32				seq_21_event;
+	u32				seq_23_event;
+	u32				seq_31_event;
+	u32				seq_32_event;
+	u32				seq_13_event;
+	u32				seq_curr_state;
+	u8				ctxid_idx;
+	u32				ctxid_val[ETM_MAX_CTXID_CMP];
+	u32				ctxid_mask;
+	u32				sync_freq;
+	u32				timestamp_event;
+};
+
+enum etm_addr_type {
+	ETM_ADDR_TYPE_NONE,
+	ETM_ADDR_TYPE_SINGLE,
+	ETM_ADDR_TYPE_RANGE,
+	ETM_ADDR_TYPE_START,
+	ETM_ADDR_TYPE_STOP,
+};
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
new file mode 100644
index 000000000000..c965f5724abd
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -0,0 +1,1932 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/stat.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+
+#include "coresight-etm.h"
+
+static int boot_enable;
+module_param_named(boot_enable, boot_enable, int, S_IRUGO);
+
+/* The number of ETM/PTM currently registered */
+static int etm_count;
+static struct etm_drvdata *etmdrvdata[NR_CPUS];
+
+static inline void etm_writel(struct etm_drvdata *drvdata,
+			      u32 val, u32 off)
+{
+	if (drvdata->use_cp14) {
+		if (etm_writel_cp14(off, val)) {
+			dev_err(drvdata->dev,
+				"invalid CP14 access to ETM reg: %#x", off);
+		}
+	} else {
+		writel_relaxed(val, drvdata->base + off);
+	}
+}
+
+static inline unsigned int etm_readl(struct etm_drvdata *drvdata, u32 off)
+{
+	u32 val;
+
+	if (drvdata->use_cp14) {
+		if (etm_readl_cp14(off, &val)) {
+			dev_err(drvdata->dev,
+				"invalid CP14 access to ETM reg: %#x", off);
+		}
+	} else {
+		val = readl_relaxed(drvdata->base + off);
+	}
+
+	return val;
+}
+
+/*
+ * Memory mapped writes to clear os lock are not supported on some processors
+ * and OS lock must be unlocked before any memory mapped access on such
+ * processors, otherwise memory mapped reads/writes will be invalid.
+ */
+static void etm_os_unlock(void *info)
+{
+	struct etm_drvdata *drvdata = (struct etm_drvdata *)info;
+	/* Writing any value to ETMOSLAR unlocks the trace registers */
+	etm_writel(drvdata, 0x0, ETMOSLAR);
+	isb();
+}
+
+static void etm_set_pwrdwn(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	/* Ensure pending cp14 accesses complete before setting pwrdwn */
+	mb();
+	isb();
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr |= ETMCR_PWD_DWN;
+	etm_writel(drvdata, etmcr, ETMCR);
+}
+
+static void etm_clr_pwrdwn(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= ~ETMCR_PWD_DWN;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/* Ensure pwrup completes before subsequent cp14 accesses */
+	mb();
+	isb();
+}
+
+static void etm_set_pwrup(struct etm_drvdata *drvdata)
+{
+	u32 etmpdcr;
+
+	etmpdcr = readl_relaxed(drvdata->base + ETMPDCR);
+	etmpdcr |= ETMPDCR_PWD_UP;
+	writel_relaxed(etmpdcr, drvdata->base + ETMPDCR);
+	/* Ensure pwrup completes before subsequent cp14 accesses */
+	mb();
+	isb();
+}
+
+static void etm_clr_pwrup(struct etm_drvdata *drvdata)
+{
+	u32 etmpdcr;
+
+	/* Ensure pending cp14 accesses complete before clearing pwrup */
+	mb();
+	isb();
+	etmpdcr = readl_relaxed(drvdata->base + ETMPDCR);
+	etmpdcr &= ~ETMPDCR_PWD_UP;
+	writel_relaxed(etmpdcr, drvdata->base + ETMPDCR);
+}
+
+/**
+ * coresight_timeout_etm - loop until a bit has changed to a specific state.
+ * @drvdata: etm's private data structure.
+ * @offset: address of a register, starting from @addr.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Basically the same as @coresight_timeout except for the register access
+ * method where we have to account for CP14 configurations.
+
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+
+static int coresight_timeout_etm(struct etm_drvdata *drvdata, u32 offset,
+				  int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = etm_readl(drvdata, offset);
+		/* Waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* Waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+
+
+static void etm_set_prog(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr |= ETMCR_ETM_PRG;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/*
+	 * Recommended by spec for cp14 accesses to ensure etmcr write is
+	 * complete before polling etmsr
+	 */
+	isb();
+	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 1)) {
+		dev_err(drvdata->dev,
+			"timeout observed when probing at offset %#x\n", ETMSR);
+	}
+}
+
+static void etm_clr_prog(struct etm_drvdata *drvdata)
+{
+	u32 etmcr;
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= ~ETMCR_ETM_PRG;
+	etm_writel(drvdata, etmcr, ETMCR);
+	/*
+	 * Recommended by spec for cp14 accesses to ensure etmcr write is
+	 * complete before polling etmsr
+	 */
+	isb();
+	if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 0)) {
+		dev_err(drvdata->dev,
+			"timeout observed when probing at offset %#x\n", ETMSR);
+	}
+}
+
+static void etm_set_default(struct etm_drvdata *drvdata)
+{
+	int i;
+
+	drvdata->trigger_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->enable_event = ETM_HARD_WIRE_RES_A;
+
+	drvdata->seq_12_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->seq_21_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->seq_23_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->seq_31_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->seq_32_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->seq_13_event = ETM_DEFAULT_EVENT_VAL;
+	drvdata->timestamp_event = ETM_DEFAULT_EVENT_VAL;
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		drvdata->cntr_rld_val[i] = 0x0;
+		drvdata->cntr_event[i] = ETM_DEFAULT_EVENT_VAL;
+		drvdata->cntr_rld_event[i] = ETM_DEFAULT_EVENT_VAL;
+		drvdata->cntr_val[i] = 0x0;
+	}
+
+	drvdata->seq_curr_state = 0x0;
+	drvdata->ctxid_idx = 0x0;
+	for (i = 0; i < drvdata->nr_ctxid_cmp; i++)
+		drvdata->ctxid_val[i] = 0x0;
+	drvdata->ctxid_mask = 0x0;
+}
+
+static void etm_enable_hw(void *info)
+{
+	int i;
+	u32 etmcr;
+	struct etm_drvdata *drvdata = info;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Turn engine on */
+	etm_clr_pwrdwn(drvdata);
+	/* Apply power to trace registers */
+	etm_set_pwrup(drvdata);
+	/* Make sure all registers are accessible */
+	etm_os_unlock(drvdata);
+
+	etm_set_prog(drvdata);
+
+	etmcr = etm_readl(drvdata, ETMCR);
+	etmcr &= (ETMCR_PWD_DWN | ETMCR_ETM_PRG);
+	etmcr |= drvdata->port_size;
+	etm_writel(drvdata, drvdata->ctrl | etmcr, ETMCR);
+	etm_writel(drvdata, drvdata->trigger_event, ETMTRIGGER);
+	etm_writel(drvdata, drvdata->startstop_ctrl, ETMTSSCR);
+	etm_writel(drvdata, drvdata->enable_event, ETMTEEVR);
+	etm_writel(drvdata, drvdata->enable_ctrl1, ETMTECR1);
+	etm_writel(drvdata, drvdata->fifofull_level, ETMFFLR);
+	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+		etm_writel(drvdata, drvdata->addr_val[i], ETMACVRn(i));
+		etm_writel(drvdata, drvdata->addr_acctype[i], ETMACTRn(i));
+	}
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		etm_writel(drvdata, drvdata->cntr_rld_val[i], ETMCNTRLDVRn(i));
+		etm_writel(drvdata, drvdata->cntr_event[i], ETMCNTENRn(i));
+		etm_writel(drvdata, drvdata->cntr_rld_event[i],
+			   ETMCNTRLDEVRn(i));
+		etm_writel(drvdata, drvdata->cntr_val[i], ETMCNTVRn(i));
+	}
+	etm_writel(drvdata, drvdata->seq_12_event, ETMSQ12EVR);
+	etm_writel(drvdata, drvdata->seq_21_event, ETMSQ21EVR);
+	etm_writel(drvdata, drvdata->seq_23_event, ETMSQ23EVR);
+	etm_writel(drvdata, drvdata->seq_31_event, ETMSQ31EVR);
+	etm_writel(drvdata, drvdata->seq_32_event, ETMSQ32EVR);
+	etm_writel(drvdata, drvdata->seq_13_event, ETMSQ13EVR);
+	etm_writel(drvdata, drvdata->seq_curr_state, ETMSQR);
+	for (i = 0; i < drvdata->nr_ext_out; i++)
+		etm_writel(drvdata, ETM_DEFAULT_EVENT_VAL, ETMEXTOUTEVRn(i));
+	for (i = 0; i < drvdata->nr_ctxid_cmp; i++)
+		etm_writel(drvdata, drvdata->ctxid_val[i], ETMCIDCVRn(i));
+	etm_writel(drvdata, drvdata->ctxid_mask, ETMCIDCMR);
+	etm_writel(drvdata, drvdata->sync_freq, ETMSYNCFR);
+	/* No external input selected */
+	etm_writel(drvdata, 0x0, ETMEXTINSELR);
+	etm_writel(drvdata, drvdata->timestamp_event, ETMTSEVR);
+	/* No auxiliary control selected */
+	etm_writel(drvdata, 0x0, ETMAUXCR);
+	etm_writel(drvdata, drvdata->traceid, ETMTRACEIDR);
+	/* No VMID comparator value selected */
+	etm_writel(drvdata, 0x0, ETMVMIDCVR);
+
+	/* Ensures trace output is enabled from this ETM */
+	etm_writel(drvdata, drvdata->ctrl | ETMCR_ETM_EN | etmcr, ETMCR);
+
+	etm_clr_prog(drvdata);
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu);
+}
+
+static int etm_trace_id_simple(struct etm_drvdata *drvdata)
+{
+	if (!drvdata->enable)
+		return drvdata->traceid;
+
+	return (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK);
+}
+
+static int etm_trace_id(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	unsigned long flags;
+	int trace_id = -1;
+
+	if (!drvdata->enable)
+		return drvdata->traceid;
+
+	if (clk_prepare_enable(drvdata->clk))
+		goto out;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	CS_UNLOCK(drvdata->base);
+	trace_id = (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK);
+	CS_LOCK(drvdata->base);
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	clk_disable_unprepare(drvdata->clk);
+out:
+	return trace_id;
+}
+
+static int etm_enable(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret;
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		goto err_clk;
+
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Configure the ETM only if the CPU is online.  If it isn't online
+	 * hw configuration will take place when 'CPU_STARTING' is received
+	 * in @etm_cpu_callback.
+	 */
+	if (cpu_online(drvdata->cpu)) {
+		ret = smp_call_function_single(drvdata->cpu,
+					       etm_enable_hw, drvdata, 1);
+		if (ret)
+			goto err;
+	}
+
+	drvdata->enable = true;
+	drvdata->sticky_enable = true;
+
+	spin_unlock(&drvdata->spinlock);
+
+	dev_info(drvdata->dev, "ETM tracing enabled\n");
+	return 0;
+err:
+	spin_unlock(&drvdata->spinlock);
+	clk_disable_unprepare(drvdata->clk);
+err_clk:
+	return ret;
+}
+
+static void etm_disable_hw(void *info)
+{
+	int i;
+	struct etm_drvdata *drvdata = info;
+
+	CS_UNLOCK(drvdata->base);
+	etm_set_prog(drvdata);
+
+	/* Program trace enable to low by using always false event */
+	etm_writel(drvdata, ETM_HARD_WIRE_RES_A | ETM_EVENT_NOT_A, ETMTEEVR);
+
+	/* Read back sequencer and counters for post trace analysis */
+	drvdata->seq_curr_state = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK);
+
+	for (i = 0; i < drvdata->nr_cntr; i++)
+		drvdata->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
+
+	etm_set_pwrdwn(drvdata);
+	CS_LOCK(drvdata->base);
+
+	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
+}
+
+static void etm_disable(struct coresight_device *csdev)
+{
+	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	/*
+	 * Taking hotplug lock here protects from clocks getting disabled
+	 * with tracing being left on (crash scenario) if user disable occurs
+	 * after cpu online mask indicates the cpu is offline but before the
+	 * DYING hotplug callback is serviced by the ETM driver.
+	 */
+	get_online_cpus();
+	spin_lock(&drvdata->spinlock);
+
+	/*
+	 * Executing etm_disable_hw on the cpu whose ETM is being disabled
+	 * ensures that register writes occur when cpu is powered.
+	 */
+	smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
+	drvdata->enable = false;
+
+	spin_unlock(&drvdata->spinlock);
+	put_online_cpus();
+
+	clk_disable_unprepare(drvdata->clk);
+
+	dev_info(drvdata->dev, "ETM tracing disabled\n");
+}
+
+static const struct coresight_ops_source etm_source_ops = {
+	.trace_id	= etm_trace_id,
+	.enable		= etm_enable,
+	.disable	= etm_disable,
+};
+
+static const struct coresight_ops etm_cs_ops = {
+	.source_ops	= &etm_source_ops,
+};
+
+static ssize_t nr_addr_cmp_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_addr_cmp;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_addr_cmp);
+
+static ssize_t nr_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_cntr;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_cntr);
+
+static ssize_t nr_ctxid_cmp_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->nr_ctxid_cmp;
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(nr_ctxid_cmp);
+
+static ssize_t etmsr_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned long flags, val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	CS_UNLOCK(drvdata->base);
+
+	val = etm_readl(drvdata, ETMSR);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	clk_disable_unprepare(drvdata->clk);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+static DEVICE_ATTR_RO(etmsr);
+
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	int i, ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		spin_lock(&drvdata->spinlock);
+		drvdata->mode = ETM_MODE_EXCLUDE;
+		drvdata->ctrl = 0x0;
+		drvdata->trigger_event = ETM_DEFAULT_EVENT_VAL;
+		drvdata->startstop_ctrl = 0x0;
+		drvdata->addr_idx = 0x0;
+		for (i = 0; i < drvdata->nr_addr_cmp; i++) {
+			drvdata->addr_val[i] = 0x0;
+			drvdata->addr_acctype[i] = 0x0;
+			drvdata->addr_type[i] = ETM_ADDR_TYPE_NONE;
+		}
+		drvdata->cntr_idx = 0x0;
+
+		etm_set_default(drvdata);
+		spin_unlock(&drvdata->spinlock);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_WO(reset);
+
+static ssize_t mode_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->mode;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t mode_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->mode = val & ETM_MODE_ALL;
+
+	if (drvdata->mode & ETM_MODE_EXCLUDE)
+		drvdata->enable_ctrl1 |= ETMTECR1_INC_EXC;
+	else
+		drvdata->enable_ctrl1 &= ~ETMTECR1_INC_EXC;
+
+	if (drvdata->mode & ETM_MODE_CYCACC)
+		drvdata->ctrl |= ETMCR_CYC_ACC;
+	else
+		drvdata->ctrl &= ~ETMCR_CYC_ACC;
+
+	if (drvdata->mode & ETM_MODE_STALL) {
+		if (!(drvdata->etmccr & ETMCCR_FIFOFULL)) {
+			dev_warn(drvdata->dev, "stall mode not supported\n");
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+		drvdata->ctrl |= ETMCR_STALL_MODE;
+	 } else
+		drvdata->ctrl &= ~ETMCR_STALL_MODE;
+
+	if (drvdata->mode & ETM_MODE_TIMESTAMP) {
+		if (!(drvdata->etmccer & ETMCCER_TIMESTAMP)) {
+			dev_warn(drvdata->dev, "timestamp not supported\n");
+			ret = -EINVAL;
+			goto err_unlock;
+		}
+		drvdata->ctrl |= ETMCR_TIMESTAMP_EN;
+	} else
+		drvdata->ctrl &= ~ETMCR_TIMESTAMP_EN;
+
+	if (drvdata->mode & ETM_MODE_CTXID)
+		drvdata->ctrl |= ETMCR_CTXID_SIZE;
+	else
+		drvdata->ctrl &= ~ETMCR_CTXID_SIZE;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+
+err_unlock:
+	spin_unlock(&drvdata->spinlock);
+	return ret;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t trigger_event_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->trigger_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_event_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_event = val & ETM_EVENT_MASK;
+
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_event);
+
+static ssize_t enable_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->enable_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t enable_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->enable_event = val & ETM_EVENT_MASK;
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_event);
+
+static ssize_t fifofull_level_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->fifofull_level;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t fifofull_level_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->fifofull_level = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(fifofull_level);
+
+static ssize_t addr_idx_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->addr_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_addr_cmp)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	drvdata->addr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_idx);
+
+static ssize_t addr_single_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      drvdata->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	val = drvdata->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_single_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      drvdata->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EINVAL;
+	}
+
+	drvdata->addr_val[idx] = val;
+	drvdata->addr_type[idx] = ETM_ADDR_TYPE_SINGLE;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_single);
+
+static ssize_t addr_range_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (drvdata->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val1 = drvdata->addr_val[idx];
+	val2 = drvdata->addr_val[idx + 1];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx %#lx\n", val1, val2);
+}
+
+static ssize_t addr_range_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	u8 idx;
+	unsigned long val1, val2;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (sscanf(buf, "%lx %lx", &val1, &val2) != 2)
+		return -EINVAL;
+	/* Lower address comparator cannot have a higher address value */
+	if (val1 > val2)
+		return -EINVAL;
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (idx % 2 != 0) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+	if (!((drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE &&
+	       drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) ||
+	      (drvdata->addr_type[idx] == ETM_ADDR_TYPE_RANGE &&
+	       drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	drvdata->addr_val[idx] = val1;
+	drvdata->addr_type[idx] = ETM_ADDR_TYPE_RANGE;
+	drvdata->addr_val[idx + 1] = val2;
+	drvdata->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE;
+	drvdata->enable_ctrl1 |= (1 << (idx/2));
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_range);
+
+static ssize_t addr_start_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      drvdata->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = drvdata->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_start_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      drvdata->addr_type[idx] == ETM_ADDR_TYPE_START)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	drvdata->addr_val[idx] = val;
+	drvdata->addr_type[idx] = ETM_ADDR_TYPE_START;
+	drvdata->startstop_ctrl |= (1 << idx);
+	drvdata->enable_ctrl1 |= BIT(25);
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_start);
+
+static ssize_t addr_stop_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	u8 idx;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      drvdata->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	val = drvdata->addr_val[idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_stop_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	u8 idx;
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	idx = drvdata->addr_idx;
+	if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE ||
+	      drvdata->addr_type[idx] == ETM_ADDR_TYPE_STOP)) {
+		spin_unlock(&drvdata->spinlock);
+		return -EPERM;
+	}
+
+	drvdata->addr_val[idx] = val;
+	drvdata->addr_type[idx] = ETM_ADDR_TYPE_STOP;
+	drvdata->startstop_ctrl |= (1 << (idx + 16));
+	drvdata->enable_ctrl1 |= ETMTECR1_START_STOP;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_stop);
+
+static ssize_t addr_acctype_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->addr_acctype[drvdata->addr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t addr_acctype_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->addr_acctype[drvdata->addr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(addr_acctype);
+
+static ssize_t cntr_idx_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->cntr_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_idx_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_cntr)
+		return -EINVAL;
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	drvdata->cntr_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_idx);
+
+static ssize_t cntr_rld_val_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->cntr_rld_val[drvdata->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_rld_val_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->cntr_rld_val[drvdata->cntr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_rld_val);
+
+static ssize_t cntr_event_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->cntr_event[drvdata->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_event_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->cntr_event[drvdata->cntr_idx] = val & ETM_EVENT_MASK;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_event);
+
+static ssize_t cntr_rld_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->cntr_rld_event[drvdata->cntr_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t cntr_rld_event_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->cntr_rld_event[drvdata->cntr_idx] = val & ETM_EVENT_MASK;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_rld_event);
+
+static ssize_t cntr_val_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	int i, ret = 0;
+	u32 val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (!drvdata->enable) {
+		spin_lock(&drvdata->spinlock);
+		for (i = 0; i < drvdata->nr_cntr; i++)
+			ret += sprintf(buf, "counter %d: %x\n",
+				       i, drvdata->cntr_val[i]);
+		spin_unlock(&drvdata->spinlock);
+		return ret;
+	}
+
+	for (i = 0; i < drvdata->nr_cntr; i++) {
+		val = etm_readl(drvdata, ETMCNTVRn(i));
+		ret += sprintf(buf, "counter %d: %x\n", i, val);
+	}
+
+	return ret;
+}
+
+static ssize_t cntr_val_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->cntr_val[drvdata->cntr_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(cntr_val);
+
+static ssize_t seq_12_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->seq_12_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_12_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->seq_12_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_12_event);
+
+static ssize_t seq_21_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->seq_21_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_21_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->seq_21_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_21_event);
+
+static ssize_t seq_23_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->seq_23_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_23_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->seq_23_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_23_event);
+
+static ssize_t seq_31_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->seq_31_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_31_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->seq_31_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_31_event);
+
+static ssize_t seq_32_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->seq_32_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_32_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->seq_32_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_32_event);
+
+static ssize_t seq_13_event_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->seq_13_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_13_event_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->seq_13_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(seq_13_event);
+
+static ssize_t seq_curr_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned long val, flags;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (!drvdata->enable) {
+		val = drvdata->seq_curr_state;
+		goto out;
+	}
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	CS_UNLOCK(drvdata->base);
+	val = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK);
+	CS_LOCK(drvdata->base);
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	clk_disable_unprepare(drvdata->clk);
+out:
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t seq_curr_state_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val > ETM_SEQ_STATE_MAX_VAL)
+		return -EINVAL;
+
+	drvdata->seq_curr_state = val;
+
+	return size;
+}
+static DEVICE_ATTR_RW(seq_curr_state);
+
+static ssize_t ctxid_idx_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->ctxid_idx;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_idx_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	if (val >= drvdata->nr_ctxid_cmp)
+		return -EINVAL;
+
+	/*
+	 * Use spinlock to ensure index doesn't change while it gets
+	 * dereferenced multiple times within a spinlock block elsewhere.
+	 */
+	spin_lock(&drvdata->spinlock);
+	drvdata->ctxid_idx = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_idx);
+
+static ssize_t ctxid_val_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	spin_lock(&drvdata->spinlock);
+	val = drvdata->ctxid_val[drvdata->ctxid_idx];
+	spin_unlock(&drvdata->spinlock);
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_val_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	spin_lock(&drvdata->spinlock);
+	drvdata->ctxid_val[drvdata->ctxid_idx] = val;
+	spin_unlock(&drvdata->spinlock);
+
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_val);
+
+static ssize_t ctxid_mask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->ctxid_mask;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t ctxid_mask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->ctxid_mask = val;
+	return size;
+}
+static DEVICE_ATTR_RW(ctxid_mask);
+
+static ssize_t sync_freq_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->sync_freq;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t sync_freq_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->sync_freq = val & ETM_SYNC_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(sync_freq);
+
+static ssize_t timestamp_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	val = drvdata->timestamp_event;
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t timestamp_event_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->timestamp_event = val & ETM_EVENT_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(timestamp_event);
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned long flags;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+
+	CS_UNLOCK(drvdata->base);
+	ret = sprintf(buf,
+		      "ETMCCR: 0x%08x\n"
+		      "ETMCCER: 0x%08x\n"
+		      "ETMSCR: 0x%08x\n"
+		      "ETMIDR: 0x%08x\n"
+		      "ETMCR: 0x%08x\n"
+		      "ETMTRACEIDR: 0x%08x\n"
+		      "Enable event: 0x%08x\n"
+		      "Enable start/stop: 0x%08x\n"
+		      "Enable control: CR1 0x%08x CR2 0x%08x\n"
+		      "CPU affinity: %d\n",
+		      drvdata->etmccr, drvdata->etmccer,
+		      etm_readl(drvdata, ETMSCR), etm_readl(drvdata, ETMIDR),
+		      etm_readl(drvdata, ETMCR), etm_trace_id_simple(drvdata),
+		      etm_readl(drvdata, ETMTEEVR),
+		      etm_readl(drvdata, ETMTSSCR),
+		      etm_readl(drvdata, ETMTECR1),
+		      etm_readl(drvdata, ETMTECR2),
+		      drvdata->cpu);
+	CS_LOCK(drvdata->base);
+
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	clk_disable_unprepare(drvdata->clk);
+
+	return ret;
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t traceid_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned long val, flags;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	if (!drvdata->enable) {
+		val = drvdata->traceid;
+		goto out;
+	}
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	CS_UNLOCK(drvdata->base);
+
+	val = (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	clk_disable_unprepare(drvdata->clk);
+out:
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t traceid_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->traceid = val & ETM_TRACEID_MASK;
+	return size;
+}
+static DEVICE_ATTR_RW(traceid);
+
+static struct attribute *coresight_etm_attrs[] = {
+	&dev_attr_nr_addr_cmp.attr,
+	&dev_attr_nr_cntr.attr,
+	&dev_attr_nr_ctxid_cmp.attr,
+	&dev_attr_etmsr.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_trigger_event.attr,
+	&dev_attr_enable_event.attr,
+	&dev_attr_fifofull_level.attr,
+	&dev_attr_addr_idx.attr,
+	&dev_attr_addr_single.attr,
+	&dev_attr_addr_range.attr,
+	&dev_attr_addr_start.attr,
+	&dev_attr_addr_stop.attr,
+	&dev_attr_addr_acctype.attr,
+	&dev_attr_cntr_idx.attr,
+	&dev_attr_cntr_rld_val.attr,
+	&dev_attr_cntr_event.attr,
+	&dev_attr_cntr_rld_event.attr,
+	&dev_attr_cntr_val.attr,
+	&dev_attr_seq_12_event.attr,
+	&dev_attr_seq_21_event.attr,
+	&dev_attr_seq_23_event.attr,
+	&dev_attr_seq_31_event.attr,
+	&dev_attr_seq_32_event.attr,
+	&dev_attr_seq_13_event.attr,
+	&dev_attr_seq_curr_state.attr,
+	&dev_attr_ctxid_idx.attr,
+	&dev_attr_ctxid_val.attr,
+	&dev_attr_ctxid_mask.attr,
+	&dev_attr_sync_freq.attr,
+	&dev_attr_timestamp_event.attr,
+	&dev_attr_status.attr,
+	&dev_attr_traceid.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_etm);
+
+static int etm_cpu_callback(struct notifier_block *nfb, unsigned long action,
+			    void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	if (!etmdrvdata[cpu])
+		goto out;
+
+	switch (action & (~CPU_TASKS_FROZEN)) {
+	case CPU_STARTING:
+		spin_lock(&etmdrvdata[cpu]->spinlock);
+		if (!etmdrvdata[cpu]->os_unlock) {
+			etm_os_unlock(etmdrvdata[cpu]);
+			etmdrvdata[cpu]->os_unlock = true;
+		}
+
+		if (etmdrvdata[cpu]->enable)
+			etm_enable_hw(etmdrvdata[cpu]);
+		spin_unlock(&etmdrvdata[cpu]->spinlock);
+		break;
+
+	case CPU_ONLINE:
+		if (etmdrvdata[cpu]->boot_enable &&
+		    !etmdrvdata[cpu]->sticky_enable)
+			coresight_enable(etmdrvdata[cpu]->csdev);
+		break;
+
+	case CPU_DYING:
+		spin_lock(&etmdrvdata[cpu]->spinlock);
+		if (etmdrvdata[cpu]->enable)
+			etm_disable_hw(etmdrvdata[cpu]);
+		spin_unlock(&etmdrvdata[cpu]->spinlock);
+		break;
+	}
+out:
+	return NOTIFY_OK;
+}
+
+static struct notifier_block etm_cpu_notifier = {
+	.notifier_call = etm_cpu_callback,
+};
+
+static bool etm_arch_supported(u8 arch)
+{
+	switch (arch) {
+	case ETM_ARCH_V3_3:
+		break;
+	case ETM_ARCH_V3_5:
+		break;
+	case PFT_ARCH_V1_0:
+		break;
+	case PFT_ARCH_V1_1:
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static void etm_init_arch_data(void *info)
+{
+	u32 etmidr;
+	u32 etmccr;
+	struct etm_drvdata *drvdata = info;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* First dummy read */
+	(void)etm_readl(drvdata, ETMPDSR);
+	/* Provide power to ETM: ETMPDCR[3] == 1 */
+	etm_set_pwrup(drvdata);
+	/*
+	 * Clear power down bit since when this bit is set writes to
+	 * certain registers might be ignored.
+	 */
+	etm_clr_pwrdwn(drvdata);
+	/*
+	 * Set prog bit. It will be set from reset but this is included to
+	 * ensure it is set
+	 */
+	etm_set_prog(drvdata);
+
+	/* Find all capabilities */
+	etmidr = etm_readl(drvdata, ETMIDR);
+	drvdata->arch = BMVAL(etmidr, 4, 11);
+	drvdata->port_size = etm_readl(drvdata, ETMCR) & PORT_SIZE_MASK;
+
+	drvdata->etmccer = etm_readl(drvdata, ETMCCER);
+	etmccr = etm_readl(drvdata, ETMCCR);
+	drvdata->etmccr = etmccr;
+	drvdata->nr_addr_cmp = BMVAL(etmccr, 0, 3) * 2;
+	drvdata->nr_cntr = BMVAL(etmccr, 13, 15);
+	drvdata->nr_ext_inp = BMVAL(etmccr, 17, 19);
+	drvdata->nr_ext_out = BMVAL(etmccr, 20, 22);
+	drvdata->nr_ctxid_cmp = BMVAL(etmccr, 24, 25);
+
+	etm_set_pwrdwn(drvdata);
+	etm_clr_pwrup(drvdata);
+	CS_LOCK(drvdata->base);
+}
+
+static void etm_init_default_data(struct etm_drvdata *drvdata)
+{
+	/*
+	 * A trace ID of value 0 is invalid, so let's start at some
+	 * random value that fits in 7 bits and will be just as good.
+	 */
+	static int etm3x_traceid = 0x10;
+
+	u32 flags = (1 << 0 | /* instruction execute*/
+		     3 << 3 | /* ARM instruction */
+		     0 << 5 | /* No data value comparison */
+		     0 << 7 | /* No exact mach */
+		     0 << 8 | /* Ignore context ID */
+		     0 << 10); /* Security ignored */
+
+	/*
+	 * Initial configuration only - guarantees sources handled by
+	 * this driver have a unique ID at startup time but not between
+	 * all other types of sources.  For that we lean on the core
+	 * framework.
+	 */
+	drvdata->traceid = etm3x_traceid++;
+	drvdata->ctrl = (ETMCR_CYC_ACC | ETMCR_TIMESTAMP_EN);
+	drvdata->enable_ctrl1 = ETMTECR1_ADDR_COMP_1;
+	if (drvdata->nr_addr_cmp >= 2) {
+		drvdata->addr_val[0] = (u32) _stext;
+		drvdata->addr_val[1] = (u32) _etext;
+		drvdata->addr_acctype[0] = flags;
+		drvdata->addr_acctype[1] = flags;
+		drvdata->addr_type[0] = ETM_ADDR_TYPE_RANGE;
+		drvdata->addr_type[1] = ETM_ADDR_TYPE_RANGE;
+	}
+
+	etm_set_default(drvdata);
+}
+
+static int etm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct etm_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc *desc;
+	struct device_node *np = adev->dev.of_node;
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+
+		adev->dev.platform_data = pdata;
+		drvdata->use_cp14 = of_property_read_bool(np, "arm,cp14");
+	}
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->clk = adev->pclk;
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	drvdata->cpu = pdata ? pdata->cpu : 0;
+
+	get_online_cpus();
+	etmdrvdata[drvdata->cpu] = drvdata;
+
+	if (!smp_call_function_single(drvdata->cpu, etm_os_unlock, drvdata, 1))
+		drvdata->os_unlock = true;
+
+	if (smp_call_function_single(drvdata->cpu,
+				     etm_init_arch_data,  drvdata, 1))
+		dev_err(dev, "ETM arch init failed\n");
+
+	if (!etm_count++)
+		register_hotcpu_notifier(&etm_cpu_notifier);
+
+	put_online_cpus();
+
+	if (etm_arch_supported(drvdata->arch) == false) {
+		ret = -EINVAL;
+		goto err_arch_supported;
+	}
+	etm_init_default_data(drvdata);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	desc->type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc->ops = &etm_cs_ops;
+	desc->pdata = pdata;
+	desc->dev = dev;
+	desc->groups = coresight_etm_groups;
+	drvdata->csdev = coresight_register(desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto err_arch_supported;
+	}
+
+	dev_info(dev, "ETM initialized\n");
+
+	if (boot_enable) {
+		coresight_enable(drvdata->csdev);
+		drvdata->boot_enable = true;
+	}
+
+	return 0;
+
+err_arch_supported:
+	clk_disable_unprepare(drvdata->clk);
+	if (--etm_count == 0)
+		unregister_hotcpu_notifier(&etm_cpu_notifier);
+	return ret;
+}
+
+static int etm_remove(struct amba_device *adev)
+{
+	struct etm_drvdata *drvdata = amba_get_drvdata(adev);
+
+	coresight_unregister(drvdata->csdev);
+	if (--etm_count == 0)
+		unregister_hotcpu_notifier(&etm_cpu_notifier);
+
+	return 0;
+}
+
+static struct amba_id etm_ids[] = {
+	{	/* ETM 3.3 */
+		.id	= 0x0003b921,
+		.mask	= 0x0003ffff,
+	},
+	{	/* ETM 3.5 */
+		.id	= 0x0003b956,
+		.mask	= 0x0003ffff,
+	},
+	{	/* PTM 1.0 */
+		.id	= 0x0003b950,
+		.mask	= 0x0003ffff,
+	},
+	{	/* PTM 1.1 */
+		.id	= 0x0003b95f,
+		.mask	= 0x0003ffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver etm_driver = {
+	.drv = {
+		.name	= "coresight-etm3x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= etm_probe,
+	.remove		= etm_remove,
+	.id_table	= etm_ids,
+};
+
+int __init etm_init(void)
+{
+	return amba_driver_register(&etm_driver);
+}
+module_init(etm_init);
+
+void __exit etm_exit(void)
+{
+	amba_driver_unregister(&etm_driver);
+}
+module_exit(etm_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight Program Flow Trace driver");
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
new file mode 100644
index 000000000000..3db36f70b666
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+
+#include "coresight-priv.h"
+
+#define FUNNEL_FUNCTL		0x000
+#define FUNNEL_PRICTL		0x004
+
+#define FUNNEL_HOLDTIME_MASK	0xf00
+#define FUNNEL_HOLDTIME_SHFT	0x8
+#define FUNNEL_HOLDTIME		(0x7 << FUNNEL_HOLDTIME_SHFT)
+
+/**
+ * struct funnel_drvdata - specifics associated to a funnel component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @csdev:	component vitals needed by the framework.
+ * @clk:	the clock this component is associated to.
+ * @priority:	port selection order.
+ */
+struct funnel_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	struct clk		*clk;
+	unsigned long		priority;
+};
+
+static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	functl &= ~FUNNEL_HOLDTIME_MASK;
+	functl |= FUNNEL_HOLDTIME;
+	functl |= (1 << port);
+	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
+	writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int funnel_enable(struct coresight_device *csdev, int inport,
+			 int outport)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret;
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	funnel_enable_hw(drvdata, inport);
+
+	dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport);
+	return 0;
+}
+
+static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	functl &= ~(1 << inport);
+	writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void funnel_disable(struct coresight_device *csdev, int inport,
+			   int outport)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	funnel_disable_hw(drvdata, inport);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport);
+}
+
+static const struct coresight_ops_link funnel_link_ops = {
+	.enable		= funnel_enable,
+	.disable	= funnel_disable,
+};
+
+static const struct coresight_ops funnel_cs_ops = {
+	.link_ops	= &funnel_link_ops,
+};
+
+static ssize_t priority_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->priority;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t priority_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->priority = val;
+	return size;
+}
+static DEVICE_ATTR_RW(priority);
+
+static u32 get_funnel_ctrl_hw(struct funnel_drvdata *drvdata)
+{
+	u32 functl;
+
+	CS_UNLOCK(drvdata->base);
+	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
+	CS_LOCK(drvdata->base);
+
+	return functl;
+}
+
+static ssize_t funnel_ctrl_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	int ret;
+	u32 val;
+	struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	val = get_funnel_ctrl_hw(drvdata);
+	clk_disable_unprepare(drvdata->clk);
+
+	return sprintf(buf, "%#x\n", val);
+}
+static DEVICE_ATTR_RO(funnel_ctrl);
+
+static struct attribute *coresight_funnel_attrs[] = {
+	&dev_attr_funnel_ctrl.attr,
+	&dev_attr_priority.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_funnel);
+
+static int funnel_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct funnel_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc *desc;
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	drvdata->clk = adev->pclk;
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	desc->type = CORESIGHT_DEV_TYPE_LINK;
+	desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
+	desc->ops = &funnel_cs_ops;
+	desc->pdata = pdata;
+	desc->dev = dev;
+	desc->groups = coresight_funnel_groups;
+	drvdata->csdev = coresight_register(desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	dev_info(dev, "FUNNEL initialized\n");
+	return 0;
+}
+
+static int funnel_remove(struct amba_device *adev)
+{
+	struct funnel_drvdata *drvdata = amba_get_drvdata(adev);
+
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static struct amba_id funnel_ids[] = {
+	{
+		.id     = 0x0003b908,
+		.mask   = 0x0003ffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver funnel_driver = {
+	.drv = {
+		.name	= "coresight-funnel",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= funnel_probe,
+	.remove		= funnel_remove,
+	.id_table	= funnel_ids,
+};
+
+module_amba_driver(funnel_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight Funnel driver");
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
new file mode 100644
index 000000000000..62fcd98cc7cf
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -0,0 +1,63 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CORESIGHT_PRIV_H
+#define _CORESIGHT_PRIV_H
+
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/coresight.h>
+
+/*
+ * Coresight management registers (0xf00-0xfcc)
+ * 0xfa0 - 0xfa4: Management	registers in PFTv1.0
+ *		  Trace		registers in PFTv1.1
+ */
+#define CORESIGHT_ITCTRL	0xf00
+#define CORESIGHT_CLAIMSET	0xfa0
+#define CORESIGHT_CLAIMCLR	0xfa4
+#define CORESIGHT_LAR		0xfb0
+#define CORESIGHT_LSR		0xfb4
+#define CORESIGHT_AUTHSTATUS	0xfb8
+#define CORESIGHT_DEVID		0xfc8
+#define CORESIGHT_DEVTYPE	0xfcc
+
+#define TIMEOUT_US		100
+#define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
+
+static inline void CS_LOCK(void __iomem *addr)
+{
+	do {
+		/* Wait for things to settle */
+		mb();
+		writel_relaxed(0x0, addr + CORESIGHT_LAR);
+	} while (0);
+}
+
+static inline void CS_UNLOCK(void __iomem *addr)
+{
+	do {
+		writel_relaxed(CORESIGHT_UNLOCK, addr + CORESIGHT_LAR);
+		/* Make sure everyone has seen this */
+		mb();
+	} while (0);
+}
+
+#ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
+extern int etm_readl_cp14(u32 off, unsigned int *val);
+extern int etm_writel_cp14(u32 off, u32 val);
+#else
+static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; }
+static inline int etm_writel_cp14(u32 off, u32 val) { return 0; }
+#endif
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
new file mode 100644
index 000000000000..cdf05537d574
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -0,0 +1,137 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+
+#include "coresight-priv.h"
+
+/**
+ * struct replicator_drvdata - specifics associated to a replicator component
+ * @dev:	the device entity associated with this component
+ * @csdev:	component vitals needed by the framework
+ */
+struct replicator_drvdata {
+	struct device		*dev;
+	struct coresight_device	*csdev;
+};
+
+static int replicator_enable(struct coresight_device *csdev, int inport,
+			     int outport)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	dev_info(drvdata->dev, "REPLICATOR enabled\n");
+	return 0;
+}
+
+static void replicator_disable(struct coresight_device *csdev, int inport,
+			       int outport)
+{
+	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	dev_info(drvdata->dev, "REPLICATOR disabled\n");
+}
+
+static const struct coresight_ops_link replicator_link_ops = {
+	.enable		= replicator_enable,
+	.disable	= replicator_disable,
+};
+
+static const struct coresight_ops replicator_cs_ops = {
+	.link_ops	= &replicator_link_ops,
+};
+
+static int replicator_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct replicator_drvdata *drvdata;
+	struct coresight_desc *desc;
+	struct device_node *np = pdev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		pdev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &pdev->dev;
+	platform_set_drvdata(pdev, drvdata);
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	desc->type = CORESIGHT_DEV_TYPE_LINK;
+	desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+	desc->ops = &replicator_cs_ops;
+	desc->pdata = pdev->dev.platform_data;
+	desc->dev = &pdev->dev;
+	drvdata->csdev = coresight_register(desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	dev_info(dev, "REPLICATOR initialized\n");
+	return 0;
+}
+
+static int replicator_remove(struct platform_device *pdev)
+{
+	struct replicator_drvdata *drvdata = platform_get_drvdata(pdev);
+
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static struct of_device_id replicator_match[] = {
+	{.compatible = "arm,coresight-replicator"},
+	{}
+};
+
+static struct platform_driver replicator_driver = {
+	.probe          = replicator_probe,
+	.remove         = replicator_remove,
+	.driver         = {
+		.name   = "coresight-replicator",
+		.of_match_table = replicator_match,
+	},
+};
+
+static int __init replicator_init(void)
+{
+	return platform_driver_register(&replicator_driver);
+}
+module_init(replicator_init);
+
+static void __exit replicator_exit(void)
+{
+	platform_driver_unregister(&replicator_driver);
+}
+module_exit(replicator_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight Replicator driver");
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
new file mode 100644
index 000000000000..7147f3dd363c
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -0,0 +1,822 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+
+#include "coresight-priv.h"
+
+#define TMC_RSZ			0x004
+#define TMC_STS			0x00c
+#define TMC_RRD			0x010
+#define TMC_RRP			0x014
+#define TMC_RWP			0x018
+#define TMC_TRG			0x01c
+#define TMC_CTL			0x020
+#define TMC_RWD			0x024
+#define TMC_MODE		0x028
+#define TMC_LBUFLEVEL		0x02c
+#define TMC_CBUFLEVEL		0x030
+#define TMC_BUFWM		0x034
+#define TMC_RRPHI		0x038
+#define TMC_RWPHI		0x03c
+#define TMC_AXICTL		0x110
+#define TMC_DBALO		0x118
+#define TMC_DBAHI		0x11c
+#define TMC_FFSR		0x300
+#define TMC_FFCR		0x304
+#define TMC_PSCR		0x308
+#define TMC_ITMISCOP0		0xee0
+#define TMC_ITTRFLIN		0xee8
+#define TMC_ITATBDATA0		0xeec
+#define TMC_ITATBCTR2		0xef0
+#define TMC_ITATBCTR1		0xef4
+#define TMC_ITATBCTR0		0xef8
+
+/* register description */
+/* TMC_CTL - 0x020 */
+#define TMC_CTL_CAPT_EN		BIT(0)
+/* TMC_STS - 0x00C */
+#define TMC_STS_TRIGGERED	BIT(1)
+/* TMC_AXICTL - 0x110 */
+#define TMC_AXICTL_PROT_CTL_B0	BIT(0)
+#define TMC_AXICTL_PROT_CTL_B1	BIT(1)
+#define TMC_AXICTL_SCT_GAT_MODE	BIT(7)
+#define TMC_AXICTL_WR_BURST_LEN 0xF00
+/* TMC_FFCR - 0x304 */
+#define TMC_FFCR_EN_FMT		BIT(0)
+#define TMC_FFCR_EN_TI		BIT(1)
+#define TMC_FFCR_FON_FLIN	BIT(4)
+#define TMC_FFCR_FON_TRIG_EVT	BIT(5)
+#define TMC_FFCR_FLUSHMAN	BIT(6)
+#define TMC_FFCR_TRIGON_TRIGIN	BIT(8)
+#define TMC_FFCR_STOP_ON_FLUSH	BIT(12)
+
+#define TMC_STS_TRIGGERED_BIT	2
+#define TMC_FFCR_FLUSHMAN_BIT	6
+
+enum tmc_config_type {
+	TMC_CONFIG_TYPE_ETB,
+	TMC_CONFIG_TYPE_ETR,
+	TMC_CONFIG_TYPE_ETF,
+};
+
+enum tmc_mode {
+	TMC_MODE_CIRCULAR_BUFFER,
+	TMC_MODE_SOFTWARE_FIFO,
+	TMC_MODE_HARDWARE_FIFO,
+};
+
+enum tmc_mem_intf_width {
+	TMC_MEM_INTF_WIDTH_32BITS	= 0x2,
+	TMC_MEM_INTF_WIDTH_64BITS	= 0x3,
+	TMC_MEM_INTF_WIDTH_128BITS	= 0x4,
+	TMC_MEM_INTF_WIDTH_256BITS	= 0x5,
+};
+
+/**
+ * struct tmc_drvdata - specifics associated to an TMC component
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @csdev:	component vitals needed by the framework.
+ * @miscdev:	specifics to handle "/dev/xyz.tmc" entry.
+ * @clk:	the clock this component is associated to.
+ * @spinlock:	only one at a time pls.
+ * @read_count:	manages preparation of buffer for reading.
+ * @buf:	area of memory where trace data get sent.
+ * @paddr:	DMA start location in RAM.
+ * @vaddr:	virtual representation of @paddr.
+ * @size:	@buf size.
+ * @enable:	this TMC is being used.
+ * @config_type: TMC variant, must be of type @tmc_config_type.
+ * @trigger_cntr: amount of words to store after a trigger.
+ */
+struct tmc_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	struct miscdevice	miscdev;
+	struct clk		*clk;
+	spinlock_t		spinlock;
+	int			read_count;
+	bool			reading;
+	char			*buf;
+	dma_addr_t		paddr;
+	void __iomem		*vaddr;
+	u32			size;
+	bool			enable;
+	enum tmc_config_type	config_type;
+	u32			trigger_cntr;
+};
+
+static void tmc_wait_for_ready(struct tmc_drvdata *drvdata)
+{
+	/* Ensure formatter, unformatter and hardware fifo are empty */
+	if (coresight_timeout(drvdata->base,
+			      TMC_STS, TMC_STS_TRIGGERED_BIT, 1)) {
+		dev_err(drvdata->dev,
+			"timeout observed when probing at offset %#x\n",
+			TMC_STS);
+	}
+}
+
+static void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
+{
+	u32 ffcr;
+
+	ffcr = readl_relaxed(drvdata->base + TMC_FFCR);
+	ffcr |= TMC_FFCR_STOP_ON_FLUSH;
+	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
+	ffcr |= TMC_FFCR_FLUSHMAN;
+	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
+	/* Ensure flush completes */
+	if (coresight_timeout(drvdata->base,
+			      TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
+		dev_err(drvdata->dev,
+			"timeout observed when probing at offset %#x\n",
+			TMC_FFCR);
+	}
+
+	tmc_wait_for_ready(drvdata);
+}
+
+static void tmc_enable_hw(struct tmc_drvdata *drvdata)
+{
+	writel_relaxed(TMC_CTL_CAPT_EN, drvdata->base + TMC_CTL);
+}
+
+static void tmc_disable_hw(struct tmc_drvdata *drvdata)
+{
+	writel_relaxed(0x0, drvdata->base + TMC_CTL);
+}
+
+static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+{
+	/* Zero out the memory to help with debug */
+	memset(drvdata->buf, 0, drvdata->size);
+
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
+		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
+		       TMC_FFCR_TRIGON_TRIGIN,
+		       drvdata->base + TMC_FFCR);
+
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+{
+	u32 axictl;
+
+	/* Zero out the memory to help with debug */
+	memset(drvdata->vaddr, 0, drvdata->size);
+
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(drvdata->size / 4, drvdata->base + TMC_RSZ);
+	writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE);
+
+	axictl = readl_relaxed(drvdata->base + TMC_AXICTL);
+	axictl |= TMC_AXICTL_WR_BURST_LEN;
+	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
+	axictl &= ~TMC_AXICTL_SCT_GAT_MODE;
+	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
+	axictl = (axictl &
+		  ~(TMC_AXICTL_PROT_CTL_B0 | TMC_AXICTL_PROT_CTL_B1)) |
+		  TMC_AXICTL_PROT_CTL_B1;
+	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
+
+	writel_relaxed(drvdata->paddr, drvdata->base + TMC_DBALO);
+	writel_relaxed(0x0, drvdata->base + TMC_DBAHI);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
+		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
+		       TMC_FFCR_TRIGON_TRIGIN,
+		       drvdata->base + TMC_FFCR);
+	writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	writel_relaxed(TMC_MODE_HARDWARE_FIFO, drvdata->base + TMC_MODE);
+	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI,
+		       drvdata->base + TMC_FFCR);
+	writel_relaxed(0x0, drvdata->base + TMC_BUFWM);
+	tmc_enable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static int tmc_enable(struct tmc_drvdata *drvdata, enum tmc_mode mode)
+{
+	int ret;
+	unsigned long flags;
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading) {
+		spin_unlock_irqrestore(&drvdata->spinlock, flags);
+		clk_disable_unprepare(drvdata->clk);
+		return -EBUSY;
+	}
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
+		tmc_etb_enable_hw(drvdata);
+	} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		tmc_etr_enable_hw(drvdata);
+	} else {
+		if (mode == TMC_MODE_CIRCULAR_BUFFER)
+			tmc_etb_enable_hw(drvdata);
+		else
+			tmc_etf_enable_hw(drvdata);
+	}
+	drvdata->enable = true;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC enabled\n");
+	return 0;
+}
+
+static int tmc_enable_sink(struct coresight_device *csdev)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return tmc_enable(drvdata, TMC_MODE_CIRCULAR_BUFFER);
+}
+
+static int tmc_enable_link(struct coresight_device *csdev, int inport,
+			   int outport)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	return tmc_enable(drvdata, TMC_MODE_HARDWARE_FIFO);
+}
+
+static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata)
+{
+	enum tmc_mem_intf_width memwidth;
+	u8 memwords;
+	char *bufp;
+	u32 read_data;
+	int i;
+
+	memwidth = BMVAL(readl_relaxed(drvdata->base + CORESIGHT_DEVID), 8, 10);
+	if (memwidth == TMC_MEM_INTF_WIDTH_32BITS)
+		memwords = 1;
+	else if (memwidth == TMC_MEM_INTF_WIDTH_64BITS)
+		memwords = 2;
+	else if (memwidth == TMC_MEM_INTF_WIDTH_128BITS)
+		memwords = 4;
+	else
+		memwords = 8;
+
+	bufp = drvdata->buf;
+	while (1) {
+		for (i = 0; i < memwords; i++) {
+			read_data = readl_relaxed(drvdata->base + TMC_RRD);
+			if (read_data == 0xFFFFFFFF)
+				return;
+			memcpy(bufp, &read_data, 4);
+			bufp += 4;
+		}
+	}
+}
+
+static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_etb_dump_hw(drvdata);
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata)
+{
+	u32 rwp, val;
+
+	rwp = readl_relaxed(drvdata->base + TMC_RWP);
+	val = readl_relaxed(drvdata->base + TMC_STS);
+
+	/* How much memory do we still have */
+	if (val & BIT(0))
+		drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr;
+	else
+		drvdata->buf = drvdata->vaddr;
+}
+
+static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_etr_dump_hw(drvdata);
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	tmc_flush_and_stop(drvdata);
+	tmc_disable_hw(drvdata);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tmc_disable(struct tmc_drvdata *drvdata, enum tmc_mode mode)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (drvdata->reading)
+		goto out;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
+		tmc_etb_disable_hw(drvdata);
+	} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		tmc_etr_disable_hw(drvdata);
+	} else {
+		if (mode == TMC_MODE_CIRCULAR_BUFFER)
+			tmc_etb_disable_hw(drvdata);
+		else
+			tmc_etf_disable_hw(drvdata);
+	}
+out:
+	drvdata->enable = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	dev_info(drvdata->dev, "TMC disabled\n");
+}
+
+static void tmc_disable_sink(struct coresight_device *csdev)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	tmc_disable(drvdata, TMC_MODE_CIRCULAR_BUFFER);
+}
+
+static void tmc_disable_link(struct coresight_device *csdev, int inport,
+			     int outport)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	tmc_disable(drvdata, TMC_MODE_HARDWARE_FIFO);
+}
+
+static const struct coresight_ops_sink tmc_sink_ops = {
+	.enable		= tmc_enable_sink,
+	.disable	= tmc_disable_sink,
+};
+
+static const struct coresight_ops_link tmc_link_ops = {
+	.enable		= tmc_enable_link,
+	.disable	= tmc_disable_link,
+};
+
+static const struct coresight_ops tmc_etb_cs_ops = {
+	.sink_ops	= &tmc_sink_ops,
+};
+
+static const struct coresight_ops tmc_etr_cs_ops = {
+	.sink_ops	= &tmc_sink_ops,
+};
+
+static const struct coresight_ops tmc_etf_cs_ops = {
+	.sink_ops	= &tmc_sink_ops,
+	.link_ops	= &tmc_link_ops,
+};
+
+static int tmc_read_prepare(struct tmc_drvdata *drvdata)
+{
+	int ret;
+	unsigned long flags;
+	enum tmc_mode mode;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (!drvdata->enable)
+		goto out;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
+		tmc_etb_disable_hw(drvdata);
+	} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		tmc_etr_disable_hw(drvdata);
+	} else {
+		mode = readl_relaxed(drvdata->base + TMC_MODE);
+		if (mode == TMC_MODE_CIRCULAR_BUFFER) {
+			tmc_etb_disable_hw(drvdata);
+		} else {
+			ret = -ENODEV;
+			goto err;
+		}
+	}
+out:
+	drvdata->reading = true;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC read start\n");
+	return 0;
+err:
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+	return ret;
+}
+
+static void tmc_read_unprepare(struct tmc_drvdata *drvdata)
+{
+	unsigned long flags;
+	enum tmc_mode mode;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	if (!drvdata->enable)
+		goto out;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
+		tmc_etb_enable_hw(drvdata);
+	} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		tmc_etr_enable_hw(drvdata);
+	} else {
+		mode = readl_relaxed(drvdata->base + TMC_MODE);
+		if (mode == TMC_MODE_CIRCULAR_BUFFER)
+			tmc_etb_enable_hw(drvdata);
+	}
+out:
+	drvdata->reading = false;
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	dev_info(drvdata->dev, "TMC read end\n");
+}
+
+static int tmc_open(struct inode *inode, struct file *file)
+{
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+	int ret = 0;
+
+	if (drvdata->read_count++)
+		goto out;
+
+	ret = tmc_read_prepare(drvdata);
+	if (ret)
+		return ret;
+out:
+	nonseekable_open(inode, file);
+
+	dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__);
+	return 0;
+}
+
+static ssize_t tmc_read(struct file *file, char __user *data, size_t len,
+			loff_t *ppos)
+{
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+	char *bufp = drvdata->buf + *ppos;
+
+	if (*ppos + len > drvdata->size)
+		len = drvdata->size - *ppos;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		if (bufp == (char *)(drvdata->vaddr + drvdata->size))
+			bufp = drvdata->vaddr;
+		else if (bufp > (char *)(drvdata->vaddr + drvdata->size))
+			bufp -= drvdata->size;
+		if ((bufp + len) > (char *)(drvdata->vaddr + drvdata->size))
+			len = (char *)(drvdata->vaddr + drvdata->size) - bufp;
+	}
+
+	if (copy_to_user(data, bufp, len)) {
+		dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__);
+		return -EFAULT;
+	}
+
+	*ppos += len;
+
+	dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n",
+		__func__, len, (int)(drvdata->size - *ppos));
+	return len;
+}
+
+static int tmc_release(struct inode *inode, struct file *file)
+{
+	struct tmc_drvdata *drvdata = container_of(file->private_data,
+						   struct tmc_drvdata, miscdev);
+
+	if (--drvdata->read_count) {
+		if (drvdata->read_count < 0) {
+			dev_err(drvdata->dev, "mismatched close\n");
+			drvdata->read_count = 0;
+		}
+		goto out;
+	}
+
+	tmc_read_unprepare(drvdata);
+out:
+	dev_dbg(drvdata->dev, "%s: released\n", __func__);
+	return 0;
+}
+
+static const struct file_operations tmc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= tmc_open,
+	.read		= tmc_read,
+	.release	= tmc_release,
+	.llseek		= no_llseek,
+};
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	int ret;
+	unsigned long flags;
+	u32 tmc_rsz, tmc_sts, tmc_rrp, tmc_rwp, tmc_trg;
+	u32 tmc_ctl, tmc_ffsr, tmc_ffcr, tmc_mode, tmc_pscr;
+	u32 devid;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		goto out;
+
+	spin_lock_irqsave(&drvdata->spinlock, flags);
+	CS_UNLOCK(drvdata->base);
+
+	tmc_rsz = readl_relaxed(drvdata->base + TMC_RSZ);
+	tmc_sts = readl_relaxed(drvdata->base + TMC_STS);
+	tmc_rrp = readl_relaxed(drvdata->base + TMC_RRP);
+	tmc_rwp = readl_relaxed(drvdata->base + TMC_RWP);
+	tmc_trg = readl_relaxed(drvdata->base + TMC_TRG);
+	tmc_ctl = readl_relaxed(drvdata->base + TMC_CTL);
+	tmc_ffsr = readl_relaxed(drvdata->base + TMC_FFSR);
+	tmc_ffcr = readl_relaxed(drvdata->base + TMC_FFCR);
+	tmc_mode = readl_relaxed(drvdata->base + TMC_MODE);
+	tmc_pscr = readl_relaxed(drvdata->base + TMC_PSCR);
+	devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+
+	CS_LOCK(drvdata->base);
+	spin_unlock_irqrestore(&drvdata->spinlock, flags);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	return sprintf(buf,
+		       "Depth:\t\t0x%x\n"
+		       "Status:\t\t0x%x\n"
+		       "RAM read ptr:\t0x%x\n"
+		       "RAM wrt ptr:\t0x%x\n"
+		       "Trigger cnt:\t0x%x\n"
+		       "Control:\t0x%x\n"
+		       "Flush status:\t0x%x\n"
+		       "Flush ctrl:\t0x%x\n"
+		       "Mode:\t\t0x%x\n"
+		       "PSRC:\t\t0x%x\n"
+		       "DEVID:\t\t0x%x\n",
+			tmc_rsz, tmc_sts, tmc_rrp, tmc_rwp, tmc_trg,
+			tmc_ctl, tmc_ffsr, tmc_ffcr, tmc_mode, tmc_pscr, devid);
+out:
+	return -EINVAL;
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t trigger_cntr_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	unsigned long val = drvdata->trigger_cntr;
+
+	return sprintf(buf, "%#lx\n", val);
+}
+
+static ssize_t trigger_cntr_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
+
+	ret = kstrtoul(buf, 16, &val);
+	if (ret)
+		return ret;
+
+	drvdata->trigger_cntr = val;
+	return size;
+}
+static DEVICE_ATTR_RW(trigger_cntr);
+
+static struct attribute *coresight_etb_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	&dev_attr_status.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_etb);
+
+static struct attribute *coresight_etr_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	&dev_attr_status.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_etr);
+
+static struct attribute *coresight_etf_attrs[] = {
+	&dev_attr_trigger_cntr.attr,
+	&dev_attr_status.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_etf);
+
+static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 devid;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct tmc_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc *desc;
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->clk = adev->pclk;
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID);
+	drvdata->config_type = BMVAL(devid, 6, 7);
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		if (np)
+			ret = of_property_read_u32(np,
+						   "arm,buffer-size",
+						   &drvdata->size);
+		if (ret)
+			drvdata->size = SZ_1M;
+	} else {
+		drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4;
+	}
+
+	clk_disable_unprepare(drvdata->clk);
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		drvdata->vaddr = dma_alloc_coherent(dev, drvdata->size,
+						&drvdata->paddr, GFP_KERNEL);
+		if (!drvdata->vaddr)
+			return -ENOMEM;
+
+		memset(drvdata->vaddr, 0, drvdata->size);
+		drvdata->buf = drvdata->vaddr;
+	} else {
+		drvdata->buf = devm_kzalloc(dev, drvdata->size, GFP_KERNEL);
+		if (!drvdata->buf)
+			return -ENOMEM;
+	}
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto err_devm_kzalloc;
+	}
+
+	desc->pdata = pdata;
+	desc->dev = dev;
+	desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
+		desc->type = CORESIGHT_DEV_TYPE_SINK;
+		desc->ops = &tmc_etb_cs_ops;
+		desc->groups = coresight_etb_groups;
+	} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
+		desc->type = CORESIGHT_DEV_TYPE_SINK;
+		desc->ops = &tmc_etr_cs_ops;
+		desc->groups = coresight_etr_groups;
+	} else {
+		desc->type = CORESIGHT_DEV_TYPE_LINKSINK;
+		desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
+		desc->ops = &tmc_etf_cs_ops;
+		desc->groups = coresight_etf_groups;
+	}
+
+	drvdata->csdev = coresight_register(desc);
+	if (IS_ERR(drvdata->csdev)) {
+		ret = PTR_ERR(drvdata->csdev);
+		goto err_devm_kzalloc;
+	}
+
+	drvdata->miscdev.name = pdata->name;
+	drvdata->miscdev.minor = MISC_DYNAMIC_MINOR;
+	drvdata->miscdev.fops = &tmc_fops;
+	ret = misc_register(&drvdata->miscdev);
+	if (ret)
+		goto err_misc_register;
+
+	dev_info(dev, "TMC initialized\n");
+	return 0;
+
+err_misc_register:
+	coresight_unregister(drvdata->csdev);
+err_devm_kzalloc:
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
+		dma_free_coherent(dev, drvdata->size,
+				&drvdata->paddr, GFP_KERNEL);
+	return ret;
+}
+
+static int tmc_remove(struct amba_device *adev)
+{
+	struct tmc_drvdata *drvdata = amba_get_drvdata(adev);
+
+	misc_deregister(&drvdata->miscdev);
+	coresight_unregister(drvdata->csdev);
+	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
+		dma_free_coherent(drvdata->dev, drvdata->size,
+				  &drvdata->paddr, GFP_KERNEL);
+
+	return 0;
+}
+
+static struct amba_id tmc_ids[] = {
+	{
+		.id     = 0x0003b961,
+		.mask   = 0x0003ffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver tmc_driver = {
+	.drv = {
+		.name   = "coresight-tmc",
+		.owner  = THIS_MODULE,
+	},
+	.probe		= tmc_probe,
+	.remove		= tmc_remove,
+	.id_table	= tmc_ids,
+};
+
+module_amba_driver(tmc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight Trace Memory Controller driver");
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
new file mode 100644
index 000000000000..3b33af2416bb
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -0,0 +1,207 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/amba/bus.h>
+
+#include "coresight-priv.h"
+
+#define TPIU_SUPP_PORTSZ	0x000
+#define TPIU_CURR_PORTSZ	0x004
+#define TPIU_SUPP_TRIGMODES	0x100
+#define TPIU_TRIG_CNTRVAL	0x104
+#define TPIU_TRIG_MULT		0x108
+#define TPIU_SUPP_TESTPATM	0x200
+#define TPIU_CURR_TESTPATM	0x204
+#define TPIU_TEST_PATREPCNTR	0x208
+#define TPIU_FFSR		0x300
+#define TPIU_FFCR		0x304
+#define TPIU_FSYNC_CNTR		0x308
+#define TPIU_EXTCTL_INPORT	0x400
+#define TPIU_EXTCTL_OUTPORT	0x404
+#define TPIU_ITTRFLINACK	0xee4
+#define TPIU_ITTRFLIN		0xee8
+#define TPIU_ITATBDATA0		0xeec
+#define TPIU_ITATBCTR2		0xef0
+#define TPIU_ITATBCTR1		0xef4
+#define TPIU_ITATBCTR0		0xef8
+
+/** register definition **/
+/* FFCR - 0x304 */
+#define FFCR_FON_MAN		BIT(6)
+
+/**
+ * @base:	memory mapped base address for this component.
+ * @dev:	the device entity associated to this component.
+ * @csdev:	component vitals needed by the framework.
+ * @clk:	the clock this component is associated to.
+ */
+struct tpiu_drvdata {
+	void __iomem		*base;
+	struct device		*dev;
+	struct coresight_device	*csdev;
+	struct clk		*clk;
+};
+
+static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* TODO: fill this up */
+
+	CS_LOCK(drvdata->base);
+}
+
+static int tpiu_enable(struct coresight_device *csdev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	int ret;
+
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	tpiu_enable_hw(drvdata);
+
+	dev_info(drvdata->dev, "TPIU enabled\n");
+	return 0;
+}
+
+static void tpiu_disable_hw(struct tpiu_drvdata *drvdata)
+{
+	CS_UNLOCK(drvdata->base);
+
+	/* Clear formatter controle reg. */
+	writel_relaxed(0x0, drvdata->base + TPIU_FFCR);
+	/* Generate manual flush */
+	writel_relaxed(FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+static void tpiu_disable(struct coresight_device *csdev)
+{
+	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
+	tpiu_disable_hw(drvdata);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	dev_info(drvdata->dev, "TPIU disabled\n");
+}
+
+static const struct coresight_ops_sink tpiu_sink_ops = {
+	.enable		= tpiu_enable,
+	.disable	= tpiu_disable,
+};
+
+static const struct coresight_ops tpiu_cs_ops = {
+	.sink_ops	= &tpiu_sink_ops,
+};
+
+static int tpiu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret;
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct coresight_platform_data *pdata = NULL;
+	struct tpiu_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct coresight_desc *desc;
+	struct device_node *np = adev->dev.of_node;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+		adev->dev.platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &adev->dev;
+	dev_set_drvdata(dev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	drvdata->clk = adev->pclk;
+	ret = clk_prepare_enable(drvdata->clk);
+	if (ret)
+		return ret;
+
+	/* Disable tpiu to support older devices */
+	tpiu_disable_hw(drvdata);
+
+	clk_disable_unprepare(drvdata->clk);
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	desc->type = CORESIGHT_DEV_TYPE_SINK;
+	desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
+	desc->ops = &tpiu_cs_ops;
+	desc->pdata = pdata;
+	desc->dev = dev;
+	drvdata->csdev = coresight_register(desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	dev_info(dev, "TPIU initialized\n");
+	return 0;
+}
+
+static int tpiu_remove(struct amba_device *adev)
+{
+	struct tpiu_drvdata *drvdata = amba_get_drvdata(adev);
+
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static struct amba_id tpiu_ids[] = {
+	{
+		.id	= 0x0003b912,
+		.mask	= 0x0003ffff,
+	},
+	{ 0, 0},
+};
+
+static struct amba_driver tpiu_driver = {
+	.drv = {
+		.name	= "coresight-tpiu",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= tpiu_probe,
+	.remove		= tpiu_remove,
+	.id_table	= tpiu_ids,
+};
+
+module_amba_driver(tpiu_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight Trace Port Interface Unit driver");
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
new file mode 100644
index 000000000000..894531d315b8
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -0,0 +1,720 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+
+#include "coresight-priv.h"
+
+static DEFINE_MUTEX(coresight_mutex);
+
+static int coresight_id_match(struct device *dev, void *data)
+{
+	int trace_id, i_trace_id;
+	struct coresight_device *csdev, *i_csdev;
+
+	csdev = data;
+	i_csdev = to_coresight_device(dev);
+
+	/*
+	 * No need to care about oneself and components that are not
+	 * sources or not enabled
+	 */
+	if (i_csdev == csdev || !i_csdev->enable ||
+	    i_csdev->type != CORESIGHT_DEV_TYPE_SOURCE)
+		return 0;
+
+	/* Get the source ID for both compoment */
+	trace_id = source_ops(csdev)->trace_id(csdev);
+	i_trace_id = source_ops(i_csdev)->trace_id(i_csdev);
+
+	/* All you need is one */
+	if (trace_id == i_trace_id)
+		return 1;
+
+	return 0;
+}
+
+static int coresight_source_is_unique(struct coresight_device *csdev)
+{
+	int trace_id = source_ops(csdev)->trace_id(csdev);
+
+	/* this shouldn't happen */
+	if (trace_id < 0)
+		return 0;
+
+	return !bus_for_each_dev(&coresight_bustype, NULL,
+				 csdev, coresight_id_match);
+}
+
+static int coresight_find_link_inport(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *parent;
+	struct coresight_connection *conn;
+
+	parent = container_of(csdev->path_link.next,
+			      struct coresight_device, path_link);
+
+	for (i = 0; i < parent->nr_outport; i++) {
+		conn = &parent->conns[i];
+		if (conn->child_dev == csdev)
+			return conn->child_port;
+	}
+
+	dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n",
+		dev_name(&parent->dev), dev_name(&csdev->dev));
+
+	return 0;
+}
+
+static int coresight_find_link_outport(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *child;
+	struct coresight_connection *conn;
+
+	child = container_of(csdev->path_link.prev,
+			     struct coresight_device, path_link);
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conn = &csdev->conns[i];
+		if (conn->child_dev == child)
+			return conn->outport;
+	}
+
+	dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n",
+		dev_name(&csdev->dev), dev_name(&child->dev));
+
+	return 0;
+}
+
+static int coresight_enable_sink(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!csdev->enable) {
+		if (sink_ops(csdev)->enable) {
+			ret = sink_ops(csdev)->enable(csdev);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(csdev->refcnt);
+
+	return 0;
+}
+
+static void coresight_disable_sink(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(csdev->refcnt) == 0) {
+		if (sink_ops(csdev)->disable) {
+			sink_ops(csdev)->disable(csdev);
+			csdev->enable = false;
+		}
+	}
+}
+
+static int coresight_enable_link(struct coresight_device *csdev)
+{
+	int ret;
+	int link_subtype;
+	int refport, inport, outport;
+
+	inport = coresight_find_link_inport(csdev);
+	outport = coresight_find_link_outport(csdev);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
+		refport = inport;
+	else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
+		refport = outport;
+	else
+		refport = 0;
+
+	if (atomic_inc_return(&csdev->refcnt[refport]) == 1) {
+		if (link_ops(csdev)->enable) {
+			ret = link_ops(csdev)->enable(csdev, inport, outport);
+			if (ret)
+				return ret;
+		}
+	}
+
+	csdev->enable = true;
+
+	return 0;
+}
+
+static void coresight_disable_link(struct coresight_device *csdev)
+{
+	int i, nr_conns;
+	int link_subtype;
+	int refport, inport, outport;
+
+	inport = coresight_find_link_inport(csdev);
+	outport = coresight_find_link_outport(csdev);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
+		refport = inport;
+		nr_conns = csdev->nr_inport;
+	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
+		refport = outport;
+		nr_conns = csdev->nr_outport;
+	} else {
+		refport = 0;
+		nr_conns = 1;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt[refport]) == 0) {
+		if (link_ops(csdev)->disable)
+			link_ops(csdev)->disable(csdev, inport, outport);
+	}
+
+	for (i = 0; i < nr_conns; i++)
+		if (atomic_read(&csdev->refcnt[i]) != 0)
+			return;
+
+	csdev->enable = false;
+}
+
+static int coresight_enable_source(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!coresight_source_is_unique(csdev)) {
+		dev_warn(&csdev->dev, "traceID %d not unique\n",
+			 source_ops(csdev)->trace_id(csdev));
+		return -EINVAL;
+	}
+
+	if (!csdev->enable) {
+		if (source_ops(csdev)->enable) {
+			ret = source_ops(csdev)->enable(csdev);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(csdev->refcnt);
+
+	return 0;
+}
+
+static void coresight_disable_source(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(csdev->refcnt) == 0) {
+		if (source_ops(csdev)->disable) {
+			source_ops(csdev)->disable(csdev);
+			csdev->enable = false;
+		}
+	}
+}
+
+static int coresight_enable_path(struct list_head *path)
+{
+	int ret = 0;
+	struct coresight_device *cd;
+
+	list_for_each_entry(cd, path, path_link) {
+		if (cd == list_first_entry(path, struct coresight_device,
+					   path_link)) {
+			ret = coresight_enable_sink(cd);
+		} else if (list_is_last(&cd->path_link, path)) {
+			/*
+			 * Don't enable the source just yet - this needs to
+			 * happen at the very end when all links and sink
+			 * along the path have been configured properly.
+			 */
+			;
+		} else {
+			ret = coresight_enable_link(cd);
+		}
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	list_for_each_entry_continue_reverse(cd, path, path_link) {
+		if (cd == list_first_entry(path, struct coresight_device,
+					   path_link)) {
+			coresight_disable_sink(cd);
+		} else if (list_is_last(&cd->path_link, path)) {
+			;
+		} else {
+			coresight_disable_link(cd);
+		}
+	}
+
+	return ret;
+}
+
+static int coresight_disable_path(struct list_head *path)
+{
+	struct coresight_device *cd;
+
+	list_for_each_entry_reverse(cd, path, path_link) {
+		if (cd == list_first_entry(path, struct coresight_device,
+					   path_link)) {
+			coresight_disable_sink(cd);
+		} else if (list_is_last(&cd->path_link, path)) {
+			/*
+			 * The source has already been stopped, no need
+			 * to do it again here.
+			 */
+			;
+		} else {
+			coresight_disable_link(cd);
+		}
+	}
+
+	return 0;
+}
+
+static int coresight_build_paths(struct coresight_device *csdev,
+				 struct list_head *path,
+				 bool enable)
+{
+	int i, ret = -EINVAL;
+	struct coresight_connection *conn;
+
+	list_add(&csdev->path_link, path);
+
+	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	    csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) &&
+	    csdev->activated) {
+		if (enable)
+			ret = coresight_enable_path(path);
+		else
+			ret = coresight_disable_path(path);
+	} else {
+		for (i = 0; i < csdev->nr_outport; i++) {
+			conn = &csdev->conns[i];
+			if (coresight_build_paths(conn->child_dev,
+						    path, enable) == 0)
+				ret = 0;
+		}
+	}
+
+	if (list_first_entry(path, struct coresight_device, path_link) != csdev)
+		dev_err(&csdev->dev, "wrong device in %s\n", __func__);
+
+	list_del(&csdev->path_link);
+
+	return ret;
+}
+
+int coresight_enable(struct coresight_device *csdev)
+{
+	int ret = 0;
+	LIST_HEAD(path);
+
+	mutex_lock(&coresight_mutex);
+	if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) {
+		ret = -EINVAL;
+		dev_err(&csdev->dev, "wrong device type in %s\n", __func__);
+		goto out;
+	}
+	if (csdev->enable)
+		goto out;
+
+	if (coresight_build_paths(csdev, &path, true)) {
+		dev_err(&csdev->dev, "building path(s) failed\n");
+		goto out;
+	}
+
+	if (coresight_enable_source(csdev))
+		dev_err(&csdev->dev, "source enable failed\n");
+out:
+	mutex_unlock(&coresight_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(coresight_enable);
+
+void coresight_disable(struct coresight_device *csdev)
+{
+	LIST_HEAD(path);
+
+	mutex_lock(&coresight_mutex);
+	if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) {
+		dev_err(&csdev->dev, "wrong device type in %s\n", __func__);
+		goto out;
+	}
+	if (!csdev->enable)
+		goto out;
+
+	coresight_disable_source(csdev);
+	if (coresight_build_paths(csdev, &path, false))
+		dev_err(&csdev->dev, "releasing path(s) failed\n");
+
+out:
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_disable);
+
+static ssize_t enable_sink_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", (unsigned)csdev->activated);
+}
+
+static ssize_t enable_sink_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val)
+		csdev->activated = true;
+	else
+		csdev->activated = false;
+
+	return size;
+
+}
+static DEVICE_ATTR_RW(enable_sink);
+
+static ssize_t enable_source_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", (unsigned)csdev->enable);
+}
+
+static ssize_t enable_source_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret = 0;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = coresight_enable(csdev);
+		if (ret)
+			return ret;
+	} else {
+		coresight_disable(csdev);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_source);
+
+static struct attribute *coresight_sink_attrs[] = {
+	&dev_attr_enable_sink.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_sink);
+
+static struct attribute *coresight_source_attrs[] = {
+	&dev_attr_enable_source.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_source);
+
+static struct device_type coresight_dev_type[] = {
+	{
+		.name = "none",
+	},
+	{
+		.name = "sink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "link",
+	},
+	{
+		.name = "linksink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "source",
+		.groups = coresight_source_groups,
+	},
+};
+
+static void coresight_device_release(struct device *dev)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	kfree(csdev);
+}
+
+static int coresight_orphan_match(struct device *dev, void *data)
+{
+	int i;
+	bool still_orphan = false;
+	struct coresight_device *csdev, *i_csdev;
+	struct coresight_connection *conn;
+
+	csdev = data;
+	i_csdev = to_coresight_device(dev);
+
+	/* No need to check oneself */
+	if (csdev == i_csdev)
+		return 0;
+
+	/* Move on to another component if no connection is orphan */
+	if (!i_csdev->orphan)
+		return 0;
+	/*
+	 * Circle throuch all the connection of that component.  If we find
+	 * an orphan connection whose name matches @csdev, link it.
+	 */
+	for (i = 0; i < i_csdev->nr_outport; i++) {
+		conn = &i_csdev->conns[i];
+
+		/* We have found at least one orphan connection */
+		if (conn->child_dev == NULL) {
+			/* Does it match this newly added device? */
+			if (!strcmp(dev_name(&csdev->dev), conn->child_name)) {
+				conn->child_dev = csdev;
+			} else {
+				/* This component still has an orphan */
+				still_orphan = true;
+			}
+		}
+	}
+
+	i_csdev->orphan = still_orphan;
+
+	/*
+	 * Returning '0' ensures that all known component on the
+	 * bus will be checked.
+	 */
+	return 0;
+}
+
+static void coresight_fixup_orphan_conns(struct coresight_device *csdev)
+{
+	/*
+	 * No need to check for a return value as orphan connection(s)
+	 * are hooked-up with each newly added component.
+	 */
+	bus_for_each_dev(&coresight_bustype, NULL,
+				 csdev, coresight_orphan_match);
+}
+
+
+static int coresight_name_match(struct device *dev, void *data)
+{
+	char *to_match;
+	struct coresight_device *i_csdev;
+
+	to_match = data;
+	i_csdev = to_coresight_device(dev);
+
+	if (!strcmp(to_match, dev_name(&i_csdev->dev)))
+		return 1;
+
+	return 0;
+}
+
+static void coresight_fixup_device_conns(struct coresight_device *csdev)
+{
+	int i;
+	struct device *dev = NULL;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conn = &csdev->conns[i];
+		dev = bus_find_device(&coresight_bustype, NULL,
+				      (void *)conn->child_name,
+				      coresight_name_match);
+
+		if (dev) {
+			conn->child_dev = to_coresight_device(dev);
+		} else {
+			csdev->orphan = true;
+			conn->child_dev = NULL;
+		}
+	}
+}
+
+/**
+ * coresight_timeout - loop until a bit has changed to a specific state.
+ * @addr: base address of the area of interest.
+ * @offset: address of a register, starting from @addr.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+
+int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = __raw_readl(addr + offset);
+		/* waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+
+struct bus_type coresight_bustype = {
+	.name	= "coresight",
+};
+
+static int __init coresight_init(void)
+{
+	return bus_register(&coresight_bustype);
+}
+postcore_initcall(coresight_init);
+
+struct coresight_device *coresight_register(struct coresight_desc *desc)
+{
+	int i;
+	int ret;
+	int link_subtype;
+	int nr_refcnts = 1;
+	atomic_t *refcnts = NULL;
+	struct coresight_device *csdev;
+	struct coresight_connection *conns;
+
+	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
+	if (!csdev) {
+		ret = -ENOMEM;
+		goto err_kzalloc_csdev;
+	}
+
+	if (desc->type == CORESIGHT_DEV_TYPE_LINK ||
+	    desc->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		link_subtype = desc->subtype.link_subtype;
+
+		if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
+			nr_refcnts = desc->pdata->nr_inport;
+		else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
+			nr_refcnts = desc->pdata->nr_outport;
+	}
+
+	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
+	if (!refcnts) {
+		ret = -ENOMEM;
+		goto err_kzalloc_refcnts;
+	}
+
+	csdev->refcnt = refcnts;
+
+	csdev->nr_inport = desc->pdata->nr_inport;
+	csdev->nr_outport = desc->pdata->nr_outport;
+	conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
+	if (!conns) {
+		ret = -ENOMEM;
+		goto err_kzalloc_conns;
+	}
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conns[i].outport = desc->pdata->outports[i];
+		conns[i].child_name = desc->pdata->child_names[i];
+		conns[i].child_port = desc->pdata->child_ports[i];
+	}
+
+	csdev->conns = conns;
+
+	csdev->type = desc->type;
+	csdev->subtype = desc->subtype;
+	csdev->ops = desc->ops;
+	csdev->orphan = false;
+
+	csdev->dev.type = &coresight_dev_type[desc->type];
+	csdev->dev.groups = desc->groups;
+	csdev->dev.parent = desc->dev;
+	csdev->dev.release = coresight_device_release;
+	csdev->dev.bus = &coresight_bustype;
+	dev_set_name(&csdev->dev, "%s", desc->pdata->name);
+
+	ret = device_register(&csdev->dev);
+	if (ret)
+		goto err_device_register;
+
+	mutex_lock(&coresight_mutex);
+
+	coresight_fixup_device_conns(csdev);
+	coresight_fixup_orphan_conns(csdev);
+
+	mutex_unlock(&coresight_mutex);
+
+	return csdev;
+
+err_device_register:
+	kfree(conns);
+err_kzalloc_conns:
+	kfree(refcnts);
+err_kzalloc_refcnts:
+	kfree(csdev);
+err_kzalloc_csdev:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(coresight_register);
+
+void coresight_unregister(struct coresight_device *csdev)
+{
+	mutex_lock(&coresight_mutex);
+
+	kfree(csdev->conns);
+	device_unregister(&csdev->dev);
+
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_unregister);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
new file mode 100644
index 000000000000..f3cc8e97a0f8
--- /dev/null
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -0,0 +1,200 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpumask.h>
+#include <asm/smp_plat.h>
+
+
+static int of_dev_node_match(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static struct device *
+of_coresight_get_endpoint_device(struct device_node *endpoint)
+{
+	struct device *dev = NULL;
+
+	/*
+	 * If we have a non-configuable replicator, it will be found on the
+	 * platform bus.
+	 */
+	dev = bus_find_device(&platform_bus_type, NULL,
+			      endpoint, of_dev_node_match);
+	if (dev)
+		return dev;
+
+	/*
+	 * We have a configurable component - circle through the AMBA bus
+	 * looking for the device that matches the endpoint node.
+	 */
+	return bus_find_device(&amba_bustype, NULL,
+			       endpoint, of_dev_node_match);
+}
+
+static struct device_node *of_get_coresight_endpoint(
+		const struct device_node *parent, struct device_node *prev)
+{
+	struct device_node *node = of_graph_get_next_endpoint(parent, prev);
+
+	of_node_put(prev);
+	return node;
+}
+
+static void of_coresight_get_ports(struct device_node *node,
+				   int *nr_inport, int *nr_outport)
+{
+	struct device_node *ep = NULL;
+	int in = 0, out = 0;
+
+	do {
+		ep = of_get_coresight_endpoint(node, ep);
+		if (!ep)
+			break;
+
+		if (of_property_read_bool(ep, "slave-mode"))
+			in++;
+		else
+			out++;
+
+	} while (ep);
+
+	*nr_inport = in;
+	*nr_outport = out;
+}
+
+static int of_coresight_alloc_memory(struct device *dev,
+			struct coresight_platform_data *pdata)
+{
+	/* List of output port on this component */
+	pdata->outports = devm_kzalloc(dev, pdata->nr_outport *
+				       sizeof(*pdata->outports),
+				       GFP_KERNEL);
+	if (!pdata->outports)
+		return -ENOMEM;
+
+	/* Children connected to this component via @outports */
+	 pdata->child_names = devm_kzalloc(dev, pdata->nr_outport *
+					  sizeof(*pdata->child_names),
+					  GFP_KERNEL);
+	if (!pdata->child_names)
+		return -ENOMEM;
+
+	/* Port number on the child this component is connected to */
+	pdata->child_ports = devm_kzalloc(dev, pdata->nr_outport *
+					  sizeof(*pdata->child_ports),
+					  GFP_KERNEL);
+	if (!pdata->child_ports)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct coresight_platform_data *of_get_coresight_platform_data(
+				struct device *dev, struct device_node *node)
+{
+	int i = 0, ret = 0, cpu;
+	struct coresight_platform_data *pdata;
+	struct of_endpoint endpoint, rendpoint;
+	struct device *rdev;
+	struct device_node *dn;
+	struct device_node *ep = NULL;
+	struct device_node *rparent = NULL;
+	struct device_node *rport = NULL;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	/* Use device name as sysfs handle */
+	pdata->name = dev_name(dev);
+
+	/* Get the number of input and output port for this component */
+	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
+
+	if (pdata->nr_outport) {
+		ret = of_coresight_alloc_memory(dev, pdata);
+		if (ret)
+			return ERR_PTR(ret);
+
+		/* Iterate through each port to discover topology */
+		do {
+			/* Get a handle on a port */
+			ep = of_get_coresight_endpoint(node, ep);
+			if (!ep)
+				break;
+
+			/*
+			 * No need to deal with input ports, processing for as
+			 * processing for output ports will deal with them.
+			 */
+			if (of_find_property(ep, "slave-mode", NULL))
+				continue;
+
+			/* Get a handle on the local endpoint */
+			ret = of_graph_parse_endpoint(ep, &endpoint);
+
+			if (ret)
+				continue;
+
+			/* The local out port number */
+			pdata->outports[i] = endpoint.id;
+
+			/*
+			 * Get a handle on the remote port and parent
+			 * attached to it.
+			 */
+			rparent = of_graph_get_remote_port_parent(ep);
+			rport = of_graph_get_remote_port(ep);
+
+			if (!rparent || !rport)
+				continue;
+
+			if (of_graph_parse_endpoint(rport, &rendpoint))
+				continue;
+
+			rdev = of_coresight_get_endpoint_device(rparent);
+			if (!rdev)
+				continue;
+
+			pdata->child_names[i] = dev_name(rdev);
+			pdata->child_ports[i] = rendpoint.id;
+
+			i++;
+		} while (ep);
+	}
+
+	/* Affinity defaults to CPU0 */
+	pdata->cpu = 0;
+	dn = of_parse_phandle(node, "cpu", 0);
+	for (cpu = 0; dn && cpu < nr_cpu_ids; cpu++) {
+		if (dn == of_get_cpu_node(cpu, NULL)) {
+			pdata->cpu = cpu;
+			break;
+		}
+	}
+
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(of_get_coresight_platform_data);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dd5112265cc9..6d13f962f156 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -15,7 +15,7 @@ if IOMMU_SUPPORT
 
 config OF_IOMMU
        def_bool y
-       depends on OF
+       depends on OF && IOMMU_API
 
 config FSL_PAMU
 	bool "Freescale IOMMU support"
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 74a1767c89b5..2c3f5ad01098 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -56,19 +56,13 @@ static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
 	unsigned int irq;
 	struct msi_desc *msidesc;
 
-	WARN_ON(!list_is_singular(&dev->msi_list));
 	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-	WARN_ON(msidesc->irq);
-	WARN_ON(msidesc->msi_attrib.multiple);
-	WARN_ON(msidesc->nvec_used);
 
 	irq = irq_alloc_hwirqs(nvec, dev_to_node(&dev->dev));
 	if (irq == 0)
 		return -ENOSPC;
 
 	nvec_pow2 = __roundup_pow_of_two(nvec);
-	msidesc->nvec_used = nvec;
-	msidesc->msi_attrib.multiple = ilog2(nvec_pow2);
 	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
 		if (!sub_handle) {
 			index = msi_alloc_remapped_irq(dev, irq, nvec_pow2);
@@ -96,8 +90,6 @@ error:
 	 * IRQs from tearing down again in default_teardown_msi_irqs()
 	 */
 	msidesc->irq = 0;
-	msidesc->nvec_used = 0;
-	msidesc->msi_attrib.multiple = 0;
 
 	return ret;
 }
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e550ccb7634e..43429ab62228 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -18,9 +18,14 @@
  */
 
 #include <linux/export.h>
+#include <linux/iommu.h>
 #include <linux/limits.h>
 #include <linux/of.h>
 #include <linux/of_iommu.h>
+#include <linux/slab.h>
+
+static const struct of_device_id __iommu_of_table_sentinel
+	__used __section(__iommu_of_table_end);
 
 /**
  * of_get_dma_window - Parse *dma-window property and returns 0 if found.
@@ -89,3 +94,93 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
+
+struct of_iommu_node {
+	struct list_head list;
+	struct device_node *np;
+	struct iommu_ops *ops;
+};
+static LIST_HEAD(of_iommu_list);
+static DEFINE_SPINLOCK(of_iommu_lock);
+
+void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops)
+{
+	struct of_iommu_node *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+
+	if (WARN_ON(!iommu))
+		return;
+
+	INIT_LIST_HEAD(&iommu->list);
+	iommu->np = np;
+	iommu->ops = ops;
+	spin_lock(&of_iommu_lock);
+	list_add_tail(&iommu->list, &of_iommu_list);
+	spin_unlock(&of_iommu_lock);
+}
+
+struct iommu_ops *of_iommu_get_ops(struct device_node *np)
+{
+	struct of_iommu_node *node;
+	struct iommu_ops *ops = NULL;
+
+	spin_lock(&of_iommu_lock);
+	list_for_each_entry(node, &of_iommu_list, list)
+		if (node->np == np) {
+			ops = node->ops;
+			break;
+		}
+	spin_unlock(&of_iommu_lock);
+	return ops;
+}
+
+struct iommu_ops *of_iommu_configure(struct device *dev,
+				     struct device_node *master_np)
+{
+	struct of_phandle_args iommu_spec;
+	struct device_node *np;
+	struct iommu_ops *ops = NULL;
+	int idx = 0;
+
+	if (dev_is_pci(dev)) {
+		dev_err(dev, "IOMMU is currently not supported for PCI\n");
+		return NULL;
+	}
+
+	/*
+	 * We don't currently walk up the tree looking for a parent IOMMU.
+	 * See the `Notes:' section of
+	 * Documentation/devicetree/bindings/iommu/iommu.txt
+	 */
+	while (!of_parse_phandle_with_args(master_np, "iommus",
+					   "#iommu-cells", idx,
+					   &iommu_spec)) {
+		np = iommu_spec.np;
+		ops = of_iommu_get_ops(np);
+
+		if (!ops || !ops->of_xlate || ops->of_xlate(dev, &iommu_spec))
+			goto err_put_node;
+
+		of_node_put(np);
+		idx++;
+	}
+
+	return ops;
+
+err_put_node:
+	of_node_put(np);
+	return NULL;
+}
+
+void __init of_iommu_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match, *matches = &__iommu_of_table;
+
+	for_each_matching_node_and_match(np, matches, &match) {
+		const of_iommu_init_fn init_fn = match->data;
+
+		if (init_fn(np))
+			pr_err("Failed to initialise IOMMU %s\n",
+				of_node_full_name(np));
+	}
+}
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index b21f12f1766d..e72e23960632 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -5,8 +5,15 @@ config IRQCHIP
 config ARM_GIC
 	bool
 	select IRQ_DOMAIN
+	select IRQ_DOMAIN_HIERARCHY
 	select MULTI_IRQ_HANDLER
 
+config ARM_GIC_V2M
+	bool
+	depends on ARM_GIC
+	depends on PCI && PCI_MSI
+	select PCI_MSI_IRQ_DOMAIN
+
 config GIC_NON_BANKED
 	bool
 
@@ -14,6 +21,11 @@ config ARM_GIC_V3
 	bool
 	select IRQ_DOMAIN
 	select MULTI_IRQ_HANDLER
+	select IRQ_DOMAIN_HIERARCHY
+
+config ARM_GIC_V3_ITS
+	bool
+	select PCI_MSI_IRQ_DOMAIN
 
 config ARM_NVIC
 	bool
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 173bb5fa2cc9..a64d861b4031 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -19,7 +19,9 @@ obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V2M)		+= irq-gic-v2m.o
 obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3_ITS)		+= irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o
 obj-$(CONFIG_ARM_NVIC)			+= irq-nvic.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
 obj-$(CONFIG_ATMEL_AIC_IRQ)		+= irq-atmel-aic-common.o irq-atmel-aic.o
@@ -38,3 +40,4 @@ obj-$(CONFIG_IRQ_CROSSBAR)		+= irq-crossbar.o
 obj-$(CONFIG_BRCMSTB_L2_IRQ)		+= irq-brcmstb-l2.o \
 					   irq-bcm7120-l2.o
 obj-$(CONFIG_KEYSTONE_IRQ)		+= irq-keystone.o
+obj-$(CONFIG_ARCH_MEDIATEK)		+= irq-mtk-sysirq.o
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 41ac85af043e..615075db5fcf 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -131,7 +131,7 @@ static void armada_370_xp_free_msi(int hwirq)
 	mutex_unlock(&msi_used_lock);
 }
 
-static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
+static int armada_370_xp_setup_msi_irq(struct msi_controller *chip,
 				       struct pci_dev *pdev,
 				       struct msi_desc *desc)
 {
@@ -158,11 +158,11 @@ static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
 	msg.address_hi = 0;
 	msg.data = 0xf00 | (hwirq + 16);
 
-	write_msi_msg(virq, &msg);
+	pci_write_msi_msg(virq, &msg);
 	return 0;
 }
 
-static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip,
+static void armada_370_xp_teardown_msi_irq(struct msi_controller *chip,
 					   unsigned int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
@@ -174,10 +174,10 @@ static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip,
 
 static struct irq_chip armada_370_xp_msi_irq_chip = {
 	.name = "armada_370_xp_msi_irq",
-	.irq_enable = unmask_msi_irq,
-	.irq_disable = mask_msi_irq,
-	.irq_mask = mask_msi_irq,
-	.irq_unmask = unmask_msi_irq,
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
 };
 
 static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq,
@@ -197,7 +197,7 @@ static const struct irq_domain_ops armada_370_xp_msi_irq_ops = {
 static int armada_370_xp_msi_init(struct device_node *node,
 				  phys_addr_t main_int_phys_base)
 {
-	struct msi_chip *msi_chip;
+	struct msi_controller *msi_chip;
 	u32 reg;
 	int ret;
 
diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index 9a2cf3c1a3a5..27fdd8c3e7b4 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c
@@ -65,11 +65,11 @@ aic_handle(struct pt_regs *regs)
 	u32 irqnr;
 	u32 irqstat;
 
-	irqnr = irq_reg_readl(gc->reg_base + AT91_AIC_IVR);
-	irqstat = irq_reg_readl(gc->reg_base + AT91_AIC_ISR);
+	irqnr = irq_reg_readl(gc, AT91_AIC_IVR);
+	irqstat = irq_reg_readl(gc, AT91_AIC_ISR);
 
 	if (!irqstat)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
 	else
 		handle_domain_irq(aic_domain, irqnr, regs);
 }
@@ -80,7 +80,7 @@ static int aic_retrigger(struct irq_data *d)
 
 	/* Enable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->mask, gc->reg_base + AT91_AIC_ISCR);
+	irq_reg_writel(gc, d->mask, AT91_AIC_ISCR);
 	irq_gc_unlock(gc);
 
 	return 0;
@@ -92,12 +92,12 @@ static int aic_set_type(struct irq_data *d, unsigned type)
 	unsigned int smr;
 	int ret;
 
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC_SMR(d->hwirq));
+	smr = irq_reg_readl(gc, AT91_AIC_SMR(d->hwirq));
 	ret = aic_common_set_type(d, type, &smr);
 	if (ret)
 		return ret;
 
-	irq_reg_writel(smr, gc->reg_base + AT91_AIC_SMR(d->hwirq));
+	irq_reg_writel(gc, smr, AT91_AIC_SMR(d->hwirq));
 
 	return 0;
 }
@@ -108,8 +108,8 @@ static void aic_suspend(struct irq_data *d)
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 
 	irq_gc_lock(gc);
-	irq_reg_writel(gc->mask_cache, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(gc->wake_active, gc->reg_base + AT91_AIC_IECR);
+	irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IDCR);
+	irq_reg_writel(gc, gc->wake_active, AT91_AIC_IECR);
 	irq_gc_unlock(gc);
 }
 
@@ -118,8 +118,8 @@ static void aic_resume(struct irq_data *d)
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 
 	irq_gc_lock(gc);
-	irq_reg_writel(gc->wake_active, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(gc->mask_cache, gc->reg_base + AT91_AIC_IECR);
+	irq_reg_writel(gc, gc->wake_active, AT91_AIC_IDCR);
+	irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IECR);
 	irq_gc_unlock(gc);
 }
 
@@ -128,8 +128,8 @@ static void aic_pm_shutdown(struct irq_data *d)
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 
 	irq_gc_lock(gc);
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_ICCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
 	irq_gc_unlock(gc);
 }
 #else
@@ -148,24 +148,24 @@ static void __init aic_hw_init(struct irq_domain *domain)
 	 * will not Lock out nIRQ
 	 */
 	for (i = 0; i < 8; i++)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
 
 	/*
 	 * Spurious Interrupt ID in Spurious Vector Register.
 	 * When there is no current interrupt, the IRQ Vector Register
 	 * reads the value stored in AIC_SPU
 	 */
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_SPU);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_SPU);
 
 	/* No debugging in AIC: Debug (Protect) Control Register */
-	irq_reg_writel(0, gc->reg_base + AT91_AIC_DCR);
+	irq_reg_writel(gc, 0, AT91_AIC_DCR);
 
 	/* Disable and clear all interrupts initially */
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_ICCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
 
 	for (i = 0; i < 32; i++)
-		irq_reg_writel(i, gc->reg_base + AT91_AIC_SVR(i));
+		irq_reg_writel(gc, i, AT91_AIC_SVR(i));
 }
 
 static int aic_irq_domain_xlate(struct irq_domain *d,
@@ -195,10 +195,10 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
 	gc = dgc->gc[idx];
 
 	irq_gc_lock(gc);
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC_SMR(*out_hwirq));
+	smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
 	ret = aic_common_set_priority(intspec[2], &smr);
 	if (!ret)
-		irq_reg_writel(smr, gc->reg_base + AT91_AIC_SMR(*out_hwirq));
+		irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
 	irq_gc_unlock(gc);
 
 	return ret;
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index a11aae8fb006..a2e8c3f876cb 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -75,11 +75,11 @@ aic5_handle(struct pt_regs *regs)
 	u32 irqnr;
 	u32 irqstat;
 
-	irqnr = irq_reg_readl(gc->reg_base + AT91_AIC5_IVR);
-	irqstat = irq_reg_readl(gc->reg_base + AT91_AIC5_ISR);
+	irqnr = irq_reg_readl(gc, AT91_AIC5_IVR);
+	irqstat = irq_reg_readl(gc, AT91_AIC5_ISR);
 
 	if (!irqstat)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC5_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC5_EOICR);
 	else
 		handle_domain_irq(aic5_domain, irqnr, regs);
 }
@@ -92,8 +92,8 @@ static void aic5_mask(struct irq_data *d)
 
 	/* Disable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	irq_reg_writel(1, gc->reg_base + AT91_AIC5_IDCR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
 	gc->mask_cache &= ~d->mask;
 	irq_gc_unlock(gc);
 }
@@ -106,8 +106,8 @@ static void aic5_unmask(struct irq_data *d)
 
 	/* Enable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	irq_reg_writel(1, gc->reg_base + AT91_AIC5_IECR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	irq_reg_writel(gc, 1, AT91_AIC5_IECR);
 	gc->mask_cache |= d->mask;
 	irq_gc_unlock(gc);
 }
@@ -120,8 +120,8 @@ static int aic5_retrigger(struct irq_data *d)
 
 	/* Enable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	irq_reg_writel(1, gc->reg_base + AT91_AIC5_ISCR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	irq_reg_writel(gc, 1, AT91_AIC5_ISCR);
 	irq_gc_unlock(gc);
 
 	return 0;
@@ -136,11 +136,11 @@ static int aic5_set_type(struct irq_data *d, unsigned type)
 	int ret;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC5_SMR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	smr = irq_reg_readl(gc, AT91_AIC5_SMR);
 	ret = aic_common_set_type(d, type, &smr);
 	if (!ret)
-		irq_reg_writel(smr, gc->reg_base + AT91_AIC5_SMR);
+		irq_reg_writel(gc, smr, AT91_AIC5_SMR);
 	irq_gc_unlock(gc);
 
 	return ret;
@@ -162,12 +162,11 @@ static void aic5_suspend(struct irq_data *d)
 		if ((mask & gc->mask_cache) == (mask & gc->wake_active))
 			continue;
 
-		irq_reg_writel(i + gc->irq_base,
-			       bgc->reg_base + AT91_AIC5_SSR);
+		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
 		if (mask & gc->wake_active)
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IECR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IECR);
 		else
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IDCR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
 	}
 	irq_gc_unlock(bgc);
 }
@@ -187,12 +186,11 @@ static void aic5_resume(struct irq_data *d)
 		if ((mask & gc->mask_cache) == (mask & gc->wake_active))
 			continue;
 
-		irq_reg_writel(i + gc->irq_base,
-			       bgc->reg_base + AT91_AIC5_SSR);
+		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
 		if (mask & gc->mask_cache)
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IECR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IECR);
 		else
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IDCR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
 	}
 	irq_gc_unlock(bgc);
 }
@@ -207,10 +205,9 @@ static void aic5_pm_shutdown(struct irq_data *d)
 
 	irq_gc_lock(bgc);
 	for (i = 0; i < dgc->irqs_per_chip; i++) {
-		irq_reg_writel(i + gc->irq_base,
-			       bgc->reg_base + AT91_AIC5_SSR);
-		irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IDCR);
-		irq_reg_writel(1, bgc->reg_base + AT91_AIC5_ICCR);
+		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
+		irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
+		irq_reg_writel(bgc, 1, AT91_AIC5_ICCR);
 	}
 	irq_gc_unlock(bgc);
 }
@@ -230,24 +227,24 @@ static void __init aic5_hw_init(struct irq_domain *domain)
 	 * will not Lock out nIRQ
 	 */
 	for (i = 0; i < 8; i++)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC5_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC5_EOICR);
 
 	/*
 	 * Spurious Interrupt ID in Spurious Vector Register.
 	 * When there is no current interrupt, the IRQ Vector Register
 	 * reads the value stored in AIC_SPU
 	 */
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC5_SPU);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC5_SPU);
 
 	/* No debugging in AIC: Debug (Protect) Control Register */
-	irq_reg_writel(0, gc->reg_base + AT91_AIC5_DCR);
+	irq_reg_writel(gc, 0, AT91_AIC5_DCR);
 
 	/* Disable and clear all interrupts initially */
 	for (i = 0; i < domain->revmap_size; i++) {
-		irq_reg_writel(i, gc->reg_base + AT91_AIC5_SSR);
-		irq_reg_writel(i, gc->reg_base + AT91_AIC5_SVR);
-		irq_reg_writel(1, gc->reg_base + AT91_AIC5_IDCR);
-		irq_reg_writel(1, gc->reg_base + AT91_AIC5_ICCR);
+		irq_reg_writel(gc, i, AT91_AIC5_SSR);
+		irq_reg_writel(gc, i, AT91_AIC5_SVR);
+		irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
+		irq_reg_writel(gc, 1, AT91_AIC5_ICCR);
 	}
 }
 
@@ -273,11 +270,11 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
 	gc = dgc->gc[0];
 
 	irq_gc_lock(gc);
-	irq_reg_writel(*out_hwirq, gc->reg_base + AT91_AIC5_SSR);
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC5_SMR);
+	irq_reg_writel(gc, *out_hwirq, AT91_AIC5_SSR);
+	smr = irq_reg_readl(gc, AT91_AIC5_SMR);
 	ret = aic_common_set_priority(intspec[2], &smr);
 	if (!ret)
-		irq_reg_writel(intspec[2] | smr, gc->reg_base + AT91_AIC5_SMR);
+		irq_reg_writel(gc, intspec[2] | smr, AT91_AIC5_SMR);
 	irq_gc_unlock(gc);
 
 	return ret;
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 61541ff24397..ad96ebb0c7ab 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,7 +21,7 @@
 
 #include "irq-gic-common.h"
 
-void gic_configure_irq(unsigned int irq, unsigned int type,
+int gic_configure_irq(unsigned int irq, unsigned int type,
 		       void __iomem *base, void (*sync_access)(void))
 {
 	u32 enablemask = 1 << (irq % 32);
@@ -29,16 +29,17 @@ void gic_configure_irq(unsigned int irq, unsigned int type,
 	u32 confmask = 0x2 << ((irq % 16) * 2);
 	u32 confoff = (irq / 16) * 4;
 	bool enabled = false;
-	u32 val;
+	u32 val, oldval;
+	int ret = 0;
 
 	/*
 	 * Read current configuration register, and insert the config
 	 * for "irq", depending on "type".
 	 */
-	val = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
-	if (type == IRQ_TYPE_LEVEL_HIGH)
+	val = oldval = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
+	if (type & IRQ_TYPE_LEVEL_MASK)
 		val &= ~confmask;
-	else if (type == IRQ_TYPE_EDGE_RISING)
+	else if (type & IRQ_TYPE_EDGE_BOTH)
 		val |= confmask;
 
 	/*
@@ -54,15 +55,20 @@ void gic_configure_irq(unsigned int irq, unsigned int type,
 
 	/*
 	 * Write back the new configuration, and possibly re-enable
-	 * the interrupt.
+	 * the interrupt. If we tried to write a new configuration and failed,
+	 * return an error.
 	 */
 	writel_relaxed(val, base + GIC_DIST_CONFIG + confoff);
+	if (readl_relaxed(base + GIC_DIST_CONFIG + confoff) != val && val != oldval)
+		ret = -EINVAL;
 
 	if (enabled)
 		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
 
 	if (sync_access)
 		sync_access();
+
+	return ret;
 }
 
 void __init gic_dist_config(void __iomem *base, int gic_irqs,
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index b41f02481c3a..35a9884778bd 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -20,7 +20,7 @@
 #include <linux/of.h>
 #include <linux/irqdomain.h>
 
-void gic_configure_irq(unsigned int irq, unsigned int type,
+int gic_configure_irq(unsigned int irq, unsigned int type,
                        void __iomem *base, void (*sync_access)(void));
 void gic_dist_config(void __iomem *base, int gic_irqs,
 		     void (*sync_access)(void));
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
new file mode 100644
index 000000000000..ec9c37674a0b
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -0,0 +1,326 @@
+/*
+ * ARM GIC v2m MSI(-X) support
+ * Support for Message Signaled Interrupts for systems that
+ * implement ARM Generic Interrupt Controller: GICv2m.
+ *
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ * Authors: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ *	    Harish Kasiviswanathan <harish.kasiviswanathan@amd.com>
+ *	    Brandon Anderson <brandon.anderson@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "GICv2m: " fmt
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/*
+* MSI_TYPER:
+*     [31:26] Reserved
+*     [25:16] lowest SPI assigned to MSI
+*     [15:10] Reserved
+*     [9:0]   Numer of SPIs assigned to MSI
+*/
+#define V2M_MSI_TYPER		       0x008
+#define V2M_MSI_TYPER_BASE_SHIFT       16
+#define V2M_MSI_TYPER_BASE_MASK	       0x3FF
+#define V2M_MSI_TYPER_NUM_MASK	       0x3FF
+#define V2M_MSI_SETSPI_NS	       0x040
+#define V2M_MIN_SPI		       32
+#define V2M_MAX_SPI		       1019
+
+#define V2M_MSI_TYPER_BASE_SPI(x)      \
+	       (((x) >> V2M_MSI_TYPER_BASE_SHIFT) & V2M_MSI_TYPER_BASE_MASK)
+
+#define V2M_MSI_TYPER_NUM_SPI(x)       ((x) & V2M_MSI_TYPER_NUM_MASK)
+
+struct v2m_data {
+	spinlock_t msi_cnt_lock;
+	struct resource res;	/* GICv2m resource */
+	void __iomem *base;	/* GICv2m virt address */
+	u32 spi_start;		/* The SPI number that MSIs start */
+	u32 nr_spis;		/* The number of SPIs for MSIs */
+	unsigned long *bm;	/* MSI vector bitmap */
+	struct irq_domain *domain;
+};
+
+static void gicv2m_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void gicv2m_unmask_msi_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip gicv2m_msi_irq_chip = {
+	.name			= "MSI",
+	.irq_mask		= gicv2m_mask_msi_irq,
+	.irq_unmask		= gicv2m_unmask_msi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+};
+
+static struct msi_domain_info gicv2m_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX),
+	.chip	= &gicv2m_msi_irq_chip,
+};
+
+static int gicv2m_set_affinity(struct irq_data *irq_data,
+			       const struct cpumask *mask, bool force)
+{
+	int ret;
+
+	ret = irq_chip_set_affinity_parent(irq_data, mask, force);
+	if (ret == IRQ_SET_MASK_OK)
+		ret = IRQ_SET_MASK_OK_DONE;
+
+	return ret;
+}
+
+static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct v2m_data *v2m = irq_data_get_irq_chip_data(data);
+	phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS;
+
+	msg->address_hi = (u32) (addr >> 32);
+	msg->address_lo = (u32) (addr);
+	msg->data = data->hwirq;
+}
+
+static struct irq_chip gicv2m_irq_chip = {
+	.name			= "GICv2m",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= gicv2m_set_affinity,
+	.irq_compose_msi_msg	= gicv2m_compose_msi_msg,
+};
+
+static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq,
+				       irq_hw_number_t hwirq)
+{
+	struct of_phandle_args args;
+	struct irq_data *d;
+	int err;
+
+	args.np = domain->parent->of_node;
+	args.args_count = 3;
+	args.args[0] = 0;
+	args.args[1] = hwirq - 32;
+	args.args[2] = IRQ_TYPE_EDGE_RISING;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &args);
+	if (err)
+		return err;
+
+	/* Configure the interrupt line to be edge */
+	d = irq_domain_get_irq_data(domain->parent, virq);
+	d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+	return 0;
+}
+
+static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq)
+{
+	int pos;
+
+	pos = hwirq - v2m->spi_start;
+	if (pos < 0 || pos >= v2m->nr_spis) {
+		pr_err("Failed to teardown msi. Invalid hwirq %d\n", hwirq);
+		return;
+	}
+
+	spin_lock(&v2m->msi_cnt_lock);
+	__clear_bit(pos, v2m->bm);
+	spin_unlock(&v2m->msi_cnt_lock);
+}
+
+static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *args)
+{
+	struct v2m_data *v2m = domain->host_data;
+	int hwirq, offset, err = 0;
+
+	spin_lock(&v2m->msi_cnt_lock);
+	offset = find_first_zero_bit(v2m->bm, v2m->nr_spis);
+	if (offset < v2m->nr_spis)
+		__set_bit(offset, v2m->bm);
+	else
+		err = -ENOSPC;
+	spin_unlock(&v2m->msi_cnt_lock);
+
+	if (err)
+		return err;
+
+	hwirq = v2m->spi_start + offset;
+
+	err = gicv2m_irq_gic_domain_alloc(domain, virq, hwirq);
+	if (err) {
+		gicv2m_unalloc_msi(v2m, hwirq);
+		return err;
+	}
+
+	irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+				      &gicv2m_irq_chip, v2m);
+
+	return 0;
+}
+
+static void gicv2m_irq_domain_free(struct irq_domain *domain,
+				   unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct v2m_data *v2m = irq_data_get_irq_chip_data(d);
+
+	BUG_ON(nr_irqs != 1);
+	gicv2m_unalloc_msi(v2m, d->hwirq);
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops gicv2m_domain_ops = {
+	.alloc			= gicv2m_irq_domain_alloc,
+	.free			= gicv2m_irq_domain_free,
+};
+
+static bool is_msi_spi_valid(u32 base, u32 num)
+{
+	if (base < V2M_MIN_SPI) {
+		pr_err("Invalid MSI base SPI (base:%u)\n", base);
+		return false;
+	}
+
+	if ((num == 0) || (base + num > V2M_MAX_SPI)) {
+		pr_err("Number of SPIs (%u) exceed maximum (%u)\n",
+		       num, V2M_MAX_SPI - V2M_MIN_SPI + 1);
+		return false;
+	}
+
+	return true;
+}
+
+static int __init gicv2m_init_one(struct device_node *node,
+				  struct irq_domain *parent)
+{
+	int ret;
+	struct v2m_data *v2m;
+	struct irq_domain *inner_domain;
+
+	v2m = kzalloc(sizeof(struct v2m_data), GFP_KERNEL);
+	if (!v2m) {
+		pr_err("Failed to allocate struct v2m_data.\n");
+		return -ENOMEM;
+	}
+
+	ret = of_address_to_resource(node, 0, &v2m->res);
+	if (ret) {
+		pr_err("Failed to allocate v2m resource.\n");
+		goto err_free_v2m;
+	}
+
+	v2m->base = ioremap(v2m->res.start, resource_size(&v2m->res));
+	if (!v2m->base) {
+		pr_err("Failed to map GICv2m resource\n");
+		ret = -ENOMEM;
+		goto err_free_v2m;
+	}
+
+	if (!of_property_read_u32(node, "arm,msi-base-spi", &v2m->spi_start) &&
+	    !of_property_read_u32(node, "arm,msi-num-spis", &v2m->nr_spis)) {
+		pr_info("Overriding V2M MSI_TYPER (base:%u, num:%u)\n",
+			v2m->spi_start, v2m->nr_spis);
+	} else {
+		u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
+
+		v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer);
+		v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer);
+	}
+
+	if (!is_msi_spi_valid(v2m->spi_start, v2m->nr_spis)) {
+		ret = -EINVAL;
+		goto err_iounmap;
+	}
+
+	v2m->bm = kzalloc(sizeof(long) * BITS_TO_LONGS(v2m->nr_spis),
+			  GFP_KERNEL);
+	if (!v2m->bm) {
+		ret = -ENOMEM;
+		goto err_iounmap;
+	}
+
+	inner_domain = irq_domain_add_tree(node, &gicv2m_domain_ops, v2m);
+	if (!inner_domain) {
+		pr_err("Failed to create GICv2m domain\n");
+		ret = -ENOMEM;
+		goto err_free_bm;
+	}
+
+	inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+	inner_domain->parent = parent;
+	v2m->domain = pci_msi_create_irq_domain(node, &gicv2m_msi_domain_info,
+						inner_domain);
+	if (!v2m->domain) {
+		pr_err("Failed to create MSI domain\n");
+		ret = -ENOMEM;
+		goto err_free_domains;
+	}
+
+	spin_lock_init(&v2m->msi_cnt_lock);
+
+	pr_info("Node %s: range[%#lx:%#lx], SPI[%d:%d]\n", node->name,
+		(unsigned long)v2m->res.start, (unsigned long)v2m->res.end,
+		v2m->spi_start, (v2m->spi_start + v2m->nr_spis));
+
+	return 0;
+
+err_free_domains:
+	if (v2m->domain)
+		irq_domain_remove(v2m->domain);
+	if (inner_domain)
+		irq_domain_remove(inner_domain);
+err_free_bm:
+	kfree(v2m->bm);
+err_iounmap:
+	iounmap(v2m->base);
+err_free_v2m:
+	kfree(v2m);
+	return ret;
+}
+
+static struct of_device_id gicv2m_device_id[] = {
+	{	.compatible	= "arm,gic-v2m-frame",	},
+	{},
+};
+
+int __init gicv2m_of_init(struct device_node *node, struct irq_domain *parent)
+{
+	int ret = 0;
+	struct device_node *child;
+
+	for (child = of_find_matching_node(node, gicv2m_device_id); child;
+	     child = of_find_matching_node(child, gicv2m_device_id)) {
+		if (!of_find_property(child, "msi-controller", NULL))
+			continue;
+
+		ret = gicv2m_init_one(child, parent);
+		if (ret) {
+			of_node_put(node);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
new file mode 100644
index 000000000000..cf351c637464
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+
+static void its_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void its_unmask_msi_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip its_msi_irq_chip = {
+	.name			= "ITS-MSI",
+	.irq_unmask		= its_unmask_msi_irq,
+	.irq_mask		= its_mask_msi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+};
+
+struct its_pci_alias {
+	struct pci_dev	*pdev;
+	u32		dev_id;
+	u32		count;
+};
+
+static int its_pci_msi_vec_count(struct pci_dev *pdev)
+{
+	int msi, msix;
+
+	msi = max(pci_msi_vec_count(pdev), 0);
+	msix = max(pci_msix_vec_count(pdev), 0);
+
+	return max(msi, msix);
+}
+
+static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+	struct its_pci_alias *dev_alias = data;
+
+	dev_alias->dev_id = alias;
+	if (pdev != dev_alias->pdev)
+		dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev);
+
+	return 0;
+}
+
+static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
+			       int nvec, msi_alloc_info_t *info)
+{
+	struct pci_dev *pdev;
+	struct its_pci_alias dev_alias;
+	struct msi_domain_info *msi_info;
+
+	if (!dev_is_pci(dev))
+		return -EINVAL;
+
+	msi_info = msi_get_domain_info(domain->parent);
+
+	pdev = to_pci_dev(dev);
+	dev_alias.pdev = pdev;
+	dev_alias.count = nvec;
+
+	pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias);
+
+	/* ITS specific DeviceID, as the core ITS ignores dev. */
+	info->scratchpad[0].ul = dev_alias.dev_id;
+
+	return msi_info->ops->msi_prepare(domain->parent,
+					  dev, dev_alias.count, info);
+}
+
+static struct msi_domain_ops its_pci_msi_ops = {
+	.msi_prepare	= its_pci_msi_prepare,
+};
+
+static struct msi_domain_info its_pci_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+	.ops	= &its_pci_msi_ops,
+	.chip	= &its_msi_irq_chip,
+};
+
+static struct of_device_id its_device_id[] = {
+	{	.compatible	= "arm,gic-v3-its",	},
+	{},
+};
+
+static int __init its_pci_msi_init(void)
+{
+	struct device_node *np;
+	struct irq_domain *parent;
+
+	for (np = of_find_matching_node(NULL, its_device_id); np;
+	     np = of_find_matching_node(np, its_device_id)) {
+		if (!of_property_read_bool(np, "msi-controller"))
+			continue;
+
+		parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
+		if (!parent || !msi_get_domain_info(parent)) {
+			pr_err("%s: unable to locate ITS domain\n",
+			       np->full_name);
+			continue;
+		}
+
+		if (!pci_msi_create_irq_domain(np, &its_pci_msi_domain_info,
+					       parent)) {
+			pr_err("%s: unable to create PCI domain\n",
+			       np->full_name);
+			continue;
+		}
+
+		pr_info("PCI/MSI: %s domain created\n", np->full_name);
+	}
+
+	return 0;
+}
+early_initcall(its_pci_msi_init);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
new file mode 100644
index 000000000000..78db25293384
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -0,0 +1,1555 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/exception.h>
+
+#include "irqchip.h"
+
+#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING		(1 << 0)
+
+#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING	(1 << 0)
+
+/*
+ * Collection structure - just an ID, and a redistributor address to
+ * ping. We use one per CPU as a bag of interrupts assigned to this
+ * CPU.
+ */
+struct its_collection {
+	u64			target_address;
+	u16			col_id;
+};
+
+/*
+ * The ITS structure - contains most of the infrastructure, with the
+ * top-level MSI domain, the command queue, the collections, and the
+ * list of devices writing to it.
+ */
+struct its_node {
+	raw_spinlock_t		lock;
+	struct list_head	entry;
+	void __iomem		*base;
+	unsigned long		phys_base;
+	struct its_cmd_block	*cmd_base;
+	struct its_cmd_block	*cmd_write;
+	void			*tables[GITS_BASER_NR_REGS];
+	struct its_collection	*collections;
+	struct list_head	its_device_list;
+	u64			flags;
+	u32			ite_size;
+};
+
+#define ITS_ITT_ALIGN		SZ_256
+
+struct event_lpi_map {
+	unsigned long		*lpi_map;
+	u16			*col_map;
+	irq_hw_number_t		lpi_base;
+	int			nr_lpis;
+};
+
+/*
+ * The ITS view of a device - belongs to an ITS, a collection, owns an
+ * interrupt translation table, and a list of interrupts.
+ */
+struct its_device {
+	struct list_head	entry;
+	struct its_node		*its;
+	struct event_lpi_map	event_map;
+	void			*itt;
+	u32			nr_ites;
+	u32			device_id;
+};
+
+static LIST_HEAD(its_nodes);
+static DEFINE_SPINLOCK(its_lock);
+static struct device_node *gic_root_node;
+static struct rdists *gic_rdists;
+
+#define gic_data_rdist()		(raw_cpu_ptr(gic_rdists->rdist))
+#define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
+
+static struct its_collection *dev_event_to_col(struct its_device *its_dev,
+					       u32 event)
+{
+	struct its_node *its = its_dev->its;
+
+	return its->collections + its_dev->event_map.col_map[event];
+}
+
+/*
+ * ITS command descriptors - parameters to be encoded in a command
+ * block.
+ */
+struct its_cmd_desc {
+	union {
+		struct {
+			struct its_device *dev;
+			u32 event_id;
+		} its_inv_cmd;
+
+		struct {
+			struct its_device *dev;
+			u32 event_id;
+		} its_int_cmd;
+
+		struct {
+			struct its_device *dev;
+			int valid;
+		} its_mapd_cmd;
+
+		struct {
+			struct its_collection *col;
+			int valid;
+		} its_mapc_cmd;
+
+		struct {
+			struct its_device *dev;
+			u32 phys_id;
+			u32 event_id;
+		} its_mapvi_cmd;
+
+		struct {
+			struct its_device *dev;
+			struct its_collection *col;
+			u32 event_id;
+		} its_movi_cmd;
+
+		struct {
+			struct its_device *dev;
+			u32 event_id;
+		} its_discard_cmd;
+
+		struct {
+			struct its_collection *col;
+		} its_invall_cmd;
+	};
+};
+
+/*
+ * The ITS command block, which is what the ITS actually parses.
+ */
+struct its_cmd_block {
+	u64	raw_cmd[4];
+};
+
+#define ITS_CMD_QUEUE_SZ		SZ_64K
+#define ITS_CMD_QUEUE_NR_ENTRIES	(ITS_CMD_QUEUE_SZ / sizeof(struct its_cmd_block))
+
+typedef struct its_collection *(*its_cmd_builder_t)(struct its_cmd_block *,
+						    struct its_cmd_desc *);
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+	cmd->raw_cmd[0] &= ~0xffUL;
+	cmd->raw_cmd[0] |= cmd_nr;
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+	cmd->raw_cmd[0] &= BIT_ULL(32) - 1;
+	cmd->raw_cmd[0] |= ((u64)devid) << 32;
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+	cmd->raw_cmd[1] &= ~0xffffffffUL;
+	cmd->raw_cmd[1] |= id;
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+	cmd->raw_cmd[1] &= 0xffffffffUL;
+	cmd->raw_cmd[1] |= ((u64)phys_id) << 32;
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+	cmd->raw_cmd[1] &= ~0x1fUL;
+	cmd->raw_cmd[1] |= size & 0x1f;
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+	cmd->raw_cmd[2] &= ~0xffffffffffffUL;
+	cmd->raw_cmd[2] |= itt_addr & 0xffffffffff00UL;
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+	cmd->raw_cmd[2] &= ~(1UL << 63);
+	cmd->raw_cmd[2] |= ((u64)!!valid) << 63;
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+	cmd->raw_cmd[2] &= ~(0xffffffffUL << 16);
+	cmd->raw_cmd[2] |= (target_addr & (0xffffffffUL << 16));
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+	cmd->raw_cmd[2] &= ~0xffffUL;
+	cmd->raw_cmd[2] |= col;
+}
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+	/* Let's fixup BE commands */
+	cmd->raw_cmd[0] = cpu_to_le64(cmd->raw_cmd[0]);
+	cmd->raw_cmd[1] = cpu_to_le64(cmd->raw_cmd[1]);
+	cmd->raw_cmd[2] = cpu_to_le64(cmd->raw_cmd[2]);
+	cmd->raw_cmd[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static struct its_collection *its_build_mapd_cmd(struct its_cmd_block *cmd,
+						 struct its_cmd_desc *desc)
+{
+	unsigned long itt_addr;
+	u8 size = ilog2(desc->its_mapd_cmd.dev->nr_ites);
+
+	itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt);
+	itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN);
+
+	its_encode_cmd(cmd, GITS_CMD_MAPD);
+	its_encode_devid(cmd, desc->its_mapd_cmd.dev->device_id);
+	its_encode_size(cmd, size - 1);
+	its_encode_itt(cmd, itt_addr);
+	its_encode_valid(cmd, desc->its_mapd_cmd.valid);
+
+	its_fixup_cmd(cmd);
+
+	return NULL;
+}
+
+static struct its_collection *its_build_mapc_cmd(struct its_cmd_block *cmd,
+						 struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_MAPC);
+	its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
+	its_encode_target(cmd, desc->its_mapc_cmd.col->target_address);
+	its_encode_valid(cmd, desc->its_mapc_cmd.valid);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_mapc_cmd.col;
+}
+
+static struct its_collection *its_build_mapvi_cmd(struct its_cmd_block *cmd,
+						  struct its_cmd_desc *desc)
+{
+	struct its_collection *col;
+
+	col = dev_event_to_col(desc->its_mapvi_cmd.dev,
+			       desc->its_mapvi_cmd.event_id);
+
+	its_encode_cmd(cmd, GITS_CMD_MAPVI);
+	its_encode_devid(cmd, desc->its_mapvi_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_mapvi_cmd.event_id);
+	its_encode_phys_id(cmd, desc->its_mapvi_cmd.phys_id);
+	its_encode_collection(cmd, col->col_id);
+
+	its_fixup_cmd(cmd);
+
+	return col;
+}
+
+static struct its_collection *its_build_movi_cmd(struct its_cmd_block *cmd,
+						 struct its_cmd_desc *desc)
+{
+	struct its_collection *col;
+
+	col = dev_event_to_col(desc->its_movi_cmd.dev,
+			       desc->its_movi_cmd.event_id);
+
+	its_encode_cmd(cmd, GITS_CMD_MOVI);
+	its_encode_devid(cmd, desc->its_movi_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_movi_cmd.event_id);
+	its_encode_collection(cmd, desc->its_movi_cmd.col->col_id);
+
+	its_fixup_cmd(cmd);
+
+	return col;
+}
+
+static struct its_collection *its_build_discard_cmd(struct its_cmd_block *cmd,
+						    struct its_cmd_desc *desc)
+{
+	struct its_collection *col;
+
+	col = dev_event_to_col(desc->its_discard_cmd.dev,
+			       desc->its_discard_cmd.event_id);
+
+	its_encode_cmd(cmd, GITS_CMD_DISCARD);
+	its_encode_devid(cmd, desc->its_discard_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_discard_cmd.event_id);
+
+	its_fixup_cmd(cmd);
+
+	return col;
+}
+
+static struct its_collection *its_build_inv_cmd(struct its_cmd_block *cmd,
+						struct its_cmd_desc *desc)
+{
+	struct its_collection *col;
+
+	col = dev_event_to_col(desc->its_inv_cmd.dev,
+			       desc->its_inv_cmd.event_id);
+
+	its_encode_cmd(cmd, GITS_CMD_INV);
+	its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_inv_cmd.event_id);
+
+	its_fixup_cmd(cmd);
+
+	return col;
+}
+
+static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
+						   struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_INVALL);
+	its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
+
+	its_fixup_cmd(cmd);
+
+	return NULL;
+}
+
+static u64 its_cmd_ptr_to_offset(struct its_node *its,
+				 struct its_cmd_block *ptr)
+{
+	return (ptr - its->cmd_base) * sizeof(*ptr);
+}
+
+static int its_queue_full(struct its_node *its)
+{
+	int widx;
+	int ridx;
+
+	widx = its->cmd_write - its->cmd_base;
+	ridx = readl_relaxed(its->base + GITS_CREADR) / sizeof(struct its_cmd_block);
+
+	/* This is incredibly unlikely to happen, unless the ITS locks up. */
+	if (((widx + 1) % ITS_CMD_QUEUE_NR_ENTRIES) == ridx)
+		return 1;
+
+	return 0;
+}
+
+static struct its_cmd_block *its_allocate_entry(struct its_node *its)
+{
+	struct its_cmd_block *cmd;
+	u32 count = 1000000;	/* 1s! */
+
+	while (its_queue_full(its)) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("ITS queue not draining\n");
+			return NULL;
+		}
+		cpu_relax();
+		udelay(1);
+	}
+
+	cmd = its->cmd_write++;
+
+	/* Handle queue wrapping */
+	if (its->cmd_write == (its->cmd_base + ITS_CMD_QUEUE_NR_ENTRIES))
+		its->cmd_write = its->cmd_base;
+
+	return cmd;
+}
+
+static struct its_cmd_block *its_post_commands(struct its_node *its)
+{
+	u64 wr = its_cmd_ptr_to_offset(its, its->cmd_write);
+
+	writel_relaxed(wr, its->base + GITS_CWRITER);
+
+	return its->cmd_write;
+}
+
+static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd)
+{
+	/*
+	 * Make sure the commands written to memory are observable by
+	 * the ITS.
+	 */
+	if (its->flags & ITS_FLAGS_CMDQ_NEEDS_FLUSHING)
+		__flush_dcache_area(cmd, sizeof(*cmd));
+	else
+		dsb(ishst);
+}
+
+static void its_wait_for_range_completion(struct its_node *its,
+					  struct its_cmd_block *from,
+					  struct its_cmd_block *to)
+{
+	u64 rd_idx, from_idx, to_idx;
+	u32 count = 1000000;	/* 1s! */
+
+	from_idx = its_cmd_ptr_to_offset(its, from);
+	to_idx = its_cmd_ptr_to_offset(its, to);
+
+	while (1) {
+		rd_idx = readl_relaxed(its->base + GITS_CREADR);
+		if (rd_idx >= to_idx || rd_idx < from_idx)
+			break;
+
+		count--;
+		if (!count) {
+			pr_err_ratelimited("ITS queue timeout\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	}
+}
+
+static void its_send_single_command(struct its_node *its,
+				    its_cmd_builder_t builder,
+				    struct its_cmd_desc *desc)
+{
+	struct its_cmd_block *cmd, *sync_cmd, *next_cmd;
+	struct its_collection *sync_col;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&its->lock, flags);
+
+	cmd = its_allocate_entry(its);
+	if (!cmd) {		/* We're soooooo screewed... */
+		pr_err_ratelimited("ITS can't allocate, dropping command\n");
+		raw_spin_unlock_irqrestore(&its->lock, flags);
+		return;
+	}
+	sync_col = builder(cmd, desc);
+	its_flush_cmd(its, cmd);
+
+	if (sync_col) {
+		sync_cmd = its_allocate_entry(its);
+		if (!sync_cmd) {
+			pr_err_ratelimited("ITS can't SYNC, skipping\n");
+			goto post;
+		}
+		its_encode_cmd(sync_cmd, GITS_CMD_SYNC);
+		its_encode_target(sync_cmd, sync_col->target_address);
+		its_fixup_cmd(sync_cmd);
+		its_flush_cmd(its, sync_cmd);
+	}
+
+post:
+	next_cmd = its_post_commands(its);
+	raw_spin_unlock_irqrestore(&its->lock, flags);
+
+	its_wait_for_range_completion(its, cmd, next_cmd);
+}
+
+static void its_send_inv(struct its_device *dev, u32 event_id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_inv_cmd.dev = dev;
+	desc.its_inv_cmd.event_id = event_id;
+
+	its_send_single_command(dev->its, its_build_inv_cmd, &desc);
+}
+
+static void its_send_mapd(struct its_device *dev, int valid)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_mapd_cmd.dev = dev;
+	desc.its_mapd_cmd.valid = !!valid;
+
+	its_send_single_command(dev->its, its_build_mapd_cmd, &desc);
+}
+
+static void its_send_mapc(struct its_node *its, struct its_collection *col,
+			  int valid)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_mapc_cmd.col = col;
+	desc.its_mapc_cmd.valid = !!valid;
+
+	its_send_single_command(its, its_build_mapc_cmd, &desc);
+}
+
+static void its_send_mapvi(struct its_device *dev, u32 irq_id, u32 id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_mapvi_cmd.dev = dev;
+	desc.its_mapvi_cmd.phys_id = irq_id;
+	desc.its_mapvi_cmd.event_id = id;
+
+	its_send_single_command(dev->its, its_build_mapvi_cmd, &desc);
+}
+
+static void its_send_movi(struct its_device *dev,
+			  struct its_collection *col, u32 id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_movi_cmd.dev = dev;
+	desc.its_movi_cmd.col = col;
+	desc.its_movi_cmd.event_id = id;
+
+	its_send_single_command(dev->its, its_build_movi_cmd, &desc);
+}
+
+static void its_send_discard(struct its_device *dev, u32 id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_discard_cmd.dev = dev;
+	desc.its_discard_cmd.event_id = id;
+
+	its_send_single_command(dev->its, its_build_discard_cmd, &desc);
+}
+
+static void its_send_invall(struct its_node *its, struct its_collection *col)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_invall_cmd.col = col;
+
+	its_send_single_command(its, its_build_invall_cmd, &desc);
+}
+
+/*
+ * irqchip functions - assumes MSI, mostly.
+ */
+
+static inline u32 its_get_event_id(struct irq_data *d)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	return d->hwirq - its_dev->event_map.lpi_base;
+}
+
+static void lpi_set_config(struct irq_data *d, bool enable)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	irq_hw_number_t hwirq = d->hwirq;
+	u32 id = its_get_event_id(d);
+	u8 *cfg = page_address(gic_rdists->prop_page) + hwirq - 8192;
+
+	if (enable)
+		*cfg |= LPI_PROP_ENABLED;
+	else
+		*cfg &= ~LPI_PROP_ENABLED;
+
+	/*
+	 * Make the above write visible to the redistributors.
+	 * And yes, we're flushing exactly: One. Single. Byte.
+	 * Humpf...
+	 */
+	if (gic_rdists->flags & RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING)
+		__flush_dcache_area(cfg, sizeof(*cfg));
+	else
+		dsb(ishst);
+	its_send_inv(its_dev, id);
+}
+
+static void its_mask_irq(struct irq_data *d)
+{
+	lpi_set_config(d, false);
+}
+
+static void its_unmask_irq(struct irq_data *d)
+{
+	lpi_set_config(d, true);
+}
+
+static void its_eoi_irq(struct irq_data *d)
+{
+	gic_write_eoir(d->hwirq);
+}
+
+static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
+			    bool force)
+{
+	unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask);
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	struct its_collection *target_col;
+	u32 id = its_get_event_id(d);
+
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	target_col = &its_dev->its->collections[cpu];
+	its_send_movi(its_dev, target_col, id);
+	its_dev->event_map.col_map[id] = cpu;
+
+	return IRQ_SET_MASK_OK_DONE;
+}
+
+static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	struct its_node *its;
+	u64 addr;
+
+	its = its_dev->its;
+	addr = its->phys_base + GITS_TRANSLATER;
+
+	msg->address_lo		= addr & ((1UL << 32) - 1);
+	msg->address_hi		= addr >> 32;
+	msg->data		= its_get_event_id(d);
+}
+
+static struct irq_chip its_irq_chip = {
+	.name			= "ITS",
+	.irq_mask		= its_mask_irq,
+	.irq_unmask		= its_unmask_irq,
+	.irq_eoi		= its_eoi_irq,
+	.irq_set_affinity	= its_set_affinity,
+	.irq_compose_msi_msg	= its_irq_compose_msi_msg,
+};
+
+/*
+ * How we allocate LPIs:
+ *
+ * The GIC has id_bits bits for interrupt identifiers. From there, we
+ * must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as
+ * we allocate LPIs by chunks of 32, we can shift the whole thing by 5
+ * bits to the right.
+ *
+ * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations.
+ */
+#define IRQS_PER_CHUNK_SHIFT	5
+#define IRQS_PER_CHUNK		(1 << IRQS_PER_CHUNK_SHIFT)
+
+static unsigned long *lpi_bitmap;
+static u32 lpi_chunks;
+static DEFINE_SPINLOCK(lpi_lock);
+
+static int its_lpi_to_chunk(int lpi)
+{
+	return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT;
+}
+
+static int its_chunk_to_lpi(int chunk)
+{
+	return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
+}
+
+static int its_lpi_init(u32 id_bits)
+{
+	lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
+
+	lpi_bitmap = kzalloc(BITS_TO_LONGS(lpi_chunks) * sizeof(long),
+			     GFP_KERNEL);
+	if (!lpi_bitmap) {
+		lpi_chunks = 0;
+		return -ENOMEM;
+	}
+
+	pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks);
+	return 0;
+}
+
+static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids)
+{
+	unsigned long *bitmap = NULL;
+	int chunk_id;
+	int nr_chunks;
+	int i;
+
+	nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK);
+
+	spin_lock(&lpi_lock);
+
+	do {
+		chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks,
+						      0, nr_chunks, 0);
+		if (chunk_id < lpi_chunks)
+			break;
+
+		nr_chunks--;
+	} while (nr_chunks > 0);
+
+	if (!nr_chunks)
+		goto out;
+
+	bitmap = kzalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK) * sizeof (long),
+			 GFP_ATOMIC);
+	if (!bitmap)
+		goto out;
+
+	for (i = 0; i < nr_chunks; i++)
+		set_bit(chunk_id + i, lpi_bitmap);
+
+	*base = its_chunk_to_lpi(chunk_id);
+	*nr_ids = nr_chunks * IRQS_PER_CHUNK;
+
+out:
+	spin_unlock(&lpi_lock);
+
+	if (!bitmap)
+		*base = *nr_ids = 0;
+
+	return bitmap;
+}
+
+static void its_lpi_free(struct event_lpi_map *map)
+{
+	int base = map->lpi_base;
+	int nr_ids = map->nr_lpis;
+	int lpi;
+
+	spin_lock(&lpi_lock);
+
+	for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
+		int chunk = its_lpi_to_chunk(lpi);
+		BUG_ON(chunk > lpi_chunks);
+		if (test_bit(chunk, lpi_bitmap)) {
+			clear_bit(chunk, lpi_bitmap);
+		} else {
+			pr_err("Bad LPI chunk %d\n", chunk);
+		}
+	}
+
+	spin_unlock(&lpi_lock);
+
+	kfree(map->lpi_map);
+	kfree(map->col_map);
+}
+
+/*
+ * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
+ * deal with (one configuration byte per interrupt). PENDBASE has to
+ * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
+ */
+#define LPI_PROPBASE_SZ		SZ_64K
+#define LPI_PENDBASE_SZ		(LPI_PROPBASE_SZ / 8 + SZ_1K)
+
+/*
+ * This is how many bits of ID we need, including the useless ones.
+ */
+#define LPI_NRBITS		ilog2(LPI_PROPBASE_SZ + SZ_8K)
+
+#define LPI_PROP_DEFAULT_PRIO	0xa0
+
+static int __init its_alloc_lpi_tables(void)
+{
+	phys_addr_t paddr;
+
+	gic_rdists->prop_page = alloc_pages(GFP_NOWAIT,
+					   get_order(LPI_PROPBASE_SZ));
+	if (!gic_rdists->prop_page) {
+		pr_err("Failed to allocate PROPBASE\n");
+		return -ENOMEM;
+	}
+
+	paddr = page_to_phys(gic_rdists->prop_page);
+	pr_info("GIC: using LPI property table @%pa\n", &paddr);
+
+	/* Priority 0xa0, Group-1, disabled */
+	memset(page_address(gic_rdists->prop_page),
+	       LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
+	       LPI_PROPBASE_SZ);
+
+	/* Make sure the GIC will observe the written configuration */
+	__flush_dcache_area(page_address(gic_rdists->prop_page), LPI_PROPBASE_SZ);
+
+	return 0;
+}
+
+static const char *its_base_type_string[] = {
+	[GITS_BASER_TYPE_DEVICE]	= "Devices",
+	[GITS_BASER_TYPE_VCPU]		= "Virtual CPUs",
+	[GITS_BASER_TYPE_CPU]		= "Physical CPUs",
+	[GITS_BASER_TYPE_COLLECTION]	= "Interrupt Collections",
+	[GITS_BASER_TYPE_RESERVED5] 	= "Reserved (5)",
+	[GITS_BASER_TYPE_RESERVED6] 	= "Reserved (6)",
+	[GITS_BASER_TYPE_RESERVED7] 	= "Reserved (7)",
+};
+
+static void its_free_tables(struct its_node *its)
+{
+	int i;
+
+	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+		if (its->tables[i]) {
+			free_page((unsigned long)its->tables[i]);
+			its->tables[i] = NULL;
+		}
+	}
+}
+
+static int its_alloc_tables(const char *node_name, struct its_node *its)
+{
+	int err;
+	int i;
+	int psz = SZ_64K;
+	u64 shr = GITS_BASER_InnerShareable;
+	u64 cache = GITS_BASER_WaWb;
+
+	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+		u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
+		u64 type = GITS_BASER_TYPE(val);
+		u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
+		int order = get_order(psz);
+		int alloc_size;
+		u64 tmp;
+		void *base;
+
+		if (type == GITS_BASER_TYPE_NONE)
+			continue;
+
+		/*
+		 * Allocate as many entries as required to fit the
+		 * range of device IDs that the ITS can grok... The ID
+		 * space being incredibly sparse, this results in a
+		 * massive waste of memory.
+		 *
+		 * For other tables, only allocate a single page.
+		 */
+		if (type == GITS_BASER_TYPE_DEVICE) {
+			u64 typer = readq_relaxed(its->base + GITS_TYPER);
+			u32 ids = GITS_TYPER_DEVBITS(typer);
+
+			/*
+			 * 'order' was initialized earlier to the default page
+			 * granule of the the ITS.  We can't have an allocation
+			 * smaller than that.  If the requested allocation
+			 * is smaller, round up to the default page granule.
+			 */
+			order = max(get_order((1UL << ids) * entry_size),
+				    order);
+			if (order >= MAX_ORDER) {
+				order = MAX_ORDER - 1;
+				pr_warn("%s: Device Table too large, reduce its page order to %u\n",
+					node_name, order);
+			}
+		}
+
+		alloc_size = (1 << order) * PAGE_SIZE;
+		base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+		if (!base) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		its->tables[i] = base;
+
+retry_baser:
+		val = (virt_to_phys(base) 				 |
+		       (type << GITS_BASER_TYPE_SHIFT)			 |
+		       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
+		       cache						 |
+		       shr						 |
+		       GITS_BASER_VALID);
+
+		switch (psz) {
+		case SZ_4K:
+			val |= GITS_BASER_PAGE_SIZE_4K;
+			break;
+		case SZ_16K:
+			val |= GITS_BASER_PAGE_SIZE_16K;
+			break;
+		case SZ_64K:
+			val |= GITS_BASER_PAGE_SIZE_64K;
+			break;
+		}
+
+		val |= (alloc_size / psz) - 1;
+
+		writeq_relaxed(val, its->base + GITS_BASER + i * 8);
+		tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
+
+		if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
+			/*
+			 * Shareability didn't stick. Just use
+			 * whatever the read reported, which is likely
+			 * to be the only thing this redistributor
+			 * supports. If that's zero, make it
+			 * non-cacheable as well.
+			 */
+			shr = tmp & GITS_BASER_SHAREABILITY_MASK;
+			if (!shr) {
+				cache = GITS_BASER_nC;
+				__flush_dcache_area(base, alloc_size);
+			}
+			goto retry_baser;
+		}
+
+		if ((val ^ tmp) & GITS_BASER_PAGE_SIZE_MASK) {
+			/*
+			 * Page size didn't stick. Let's try a smaller
+			 * size and retry. If we reach 4K, then
+			 * something is horribly wrong...
+			 */
+			switch (psz) {
+			case SZ_16K:
+				psz = SZ_4K;
+				goto retry_baser;
+			case SZ_64K:
+				psz = SZ_16K;
+				goto retry_baser;
+			}
+		}
+
+		if (val != tmp) {
+			pr_err("ITS: %s: GITS_BASER%d doesn't stick: %lx %lx\n",
+			       node_name, i,
+			       (unsigned long) val, (unsigned long) tmp);
+			err = -ENXIO;
+			goto out_free;
+		}
+
+		pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
+			(int)(alloc_size / entry_size),
+			its_base_type_string[type],
+			(unsigned long)virt_to_phys(base),
+			psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
+	}
+
+	return 0;
+
+out_free:
+	its_free_tables(its);
+
+	return err;
+}
+
+static int its_alloc_collections(struct its_node *its)
+{
+	its->collections = kzalloc(nr_cpu_ids * sizeof(*its->collections),
+				   GFP_KERNEL);
+	if (!its->collections)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void its_cpu_init_lpis(void)
+{
+	void __iomem *rbase = gic_data_rdist_rd_base();
+	struct page *pend_page;
+	u64 val, tmp;
+
+	/* If we didn't allocate the pending table yet, do it now */
+	pend_page = gic_data_rdist()->pend_page;
+	if (!pend_page) {
+		phys_addr_t paddr;
+		/*
+		 * The pending pages have to be at least 64kB aligned,
+		 * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
+		 */
+		pend_page = alloc_pages(GFP_NOWAIT | __GFP_ZERO,
+					get_order(max(LPI_PENDBASE_SZ, SZ_64K)));
+		if (!pend_page) {
+			pr_err("Failed to allocate PENDBASE for CPU%d\n",
+			       smp_processor_id());
+			return;
+		}
+
+		/* Make sure the GIC will observe the zero-ed page */
+		__flush_dcache_area(page_address(pend_page), LPI_PENDBASE_SZ);
+
+		paddr = page_to_phys(pend_page);
+		pr_info("CPU%d: using LPI pending table @%pa\n",
+			smp_processor_id(), &paddr);
+		gic_data_rdist()->pend_page = pend_page;
+	}
+
+	/* Disable LPIs */
+	val = readl_relaxed(rbase + GICR_CTLR);
+	val &= ~GICR_CTLR_ENABLE_LPIS;
+	writel_relaxed(val, rbase + GICR_CTLR);
+
+	/*
+	 * Make sure any change to the table is observable by the GIC.
+	 */
+	dsb(sy);
+
+	/* set PROPBASE */
+	val = (page_to_phys(gic_rdists->prop_page) |
+	       GICR_PROPBASER_InnerShareable |
+	       GICR_PROPBASER_WaWb |
+	       ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
+
+	writeq_relaxed(val, rbase + GICR_PROPBASER);
+	tmp = readq_relaxed(rbase + GICR_PROPBASER);
+
+	if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
+		if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) {
+			/*
+			 * The HW reports non-shareable, we must
+			 * remove the cacheability attributes as
+			 * well.
+			 */
+			val &= ~(GICR_PROPBASER_SHAREABILITY_MASK |
+				 GICR_PROPBASER_CACHEABILITY_MASK);
+			val |= GICR_PROPBASER_nC;
+			writeq_relaxed(val, rbase + GICR_PROPBASER);
+		}
+		pr_info_once("GIC: using cache flushing for LPI property table\n");
+		gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING;
+	}
+
+	/* set PENDBASE */
+	val = (page_to_phys(pend_page) |
+	       GICR_PENDBASER_InnerShareable |
+	       GICR_PENDBASER_WaWb);
+
+	writeq_relaxed(val, rbase + GICR_PENDBASER);
+	tmp = readq_relaxed(rbase + GICR_PENDBASER);
+
+	if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
+		/*
+		 * The HW reports non-shareable, we must remove the
+		 * cacheability attributes as well.
+		 */
+		val &= ~(GICR_PENDBASER_SHAREABILITY_MASK |
+			 GICR_PENDBASER_CACHEABILITY_MASK);
+		val |= GICR_PENDBASER_nC;
+		writeq_relaxed(val, rbase + GICR_PENDBASER);
+	}
+
+	/* Enable LPIs */
+	val = readl_relaxed(rbase + GICR_CTLR);
+	val |= GICR_CTLR_ENABLE_LPIS;
+	writel_relaxed(val, rbase + GICR_CTLR);
+
+	/* Make sure the GIC has seen the above */
+	dsb(sy);
+}
+
+static void its_cpu_init_collection(void)
+{
+	struct its_node *its;
+	int cpu;
+
+	spin_lock(&its_lock);
+	cpu = smp_processor_id();
+
+	list_for_each_entry(its, &its_nodes, entry) {
+		u64 target;
+
+		/*
+		 * We now have to bind each collection to its target
+		 * redistributor.
+		 */
+		if (readq_relaxed(its->base + GITS_TYPER) & GITS_TYPER_PTA) {
+			/*
+			 * This ITS wants the physical address of the
+			 * redistributor.
+			 */
+			target = gic_data_rdist()->phys_base;
+		} else {
+			/*
+			 * This ITS wants a linear CPU number.
+			 */
+			target = readq_relaxed(gic_data_rdist_rd_base() + GICR_TYPER);
+			target = GICR_TYPER_CPU_NUMBER(target) << 16;
+		}
+
+		/* Perform collection mapping */
+		its->collections[cpu].target_address = target;
+		its->collections[cpu].col_id = cpu;
+
+		its_send_mapc(its, &its->collections[cpu], 1);
+		its_send_invall(its, &its->collections[cpu]);
+	}
+
+	spin_unlock(&its_lock);
+}
+
+static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
+{
+	struct its_device *its_dev = NULL, *tmp;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&its->lock, flags);
+
+	list_for_each_entry(tmp, &its->its_device_list, entry) {
+		if (tmp->device_id == dev_id) {
+			its_dev = tmp;
+			break;
+		}
+	}
+
+	raw_spin_unlock_irqrestore(&its->lock, flags);
+
+	return its_dev;
+}
+
+static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
+					    int nvecs)
+{
+	struct its_device *dev;
+	unsigned long *lpi_map;
+	unsigned long flags;
+	u16 *col_map = NULL;
+	void *itt;
+	int lpi_base;
+	int nr_lpis;
+	int nr_ites;
+	int sz;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	/*
+	 * At least one bit of EventID is being used, hence a minimum
+	 * of two entries. No, the architecture doesn't let you
+	 * express an ITT with a single entry.
+	 */
+	nr_ites = max(2UL, roundup_pow_of_two(nvecs));
+	sz = nr_ites * its->ite_size;
+	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
+	itt = kzalloc(sz, GFP_KERNEL);
+	lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis);
+	if (lpi_map)
+		col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL);
+
+	if (!dev || !itt || !lpi_map || !col_map) {
+		kfree(dev);
+		kfree(itt);
+		kfree(lpi_map);
+		kfree(col_map);
+		return NULL;
+	}
+
+	__flush_dcache_area(itt, sz);
+
+	dev->its = its;
+	dev->itt = itt;
+	dev->nr_ites = nr_ites;
+	dev->event_map.lpi_map = lpi_map;
+	dev->event_map.col_map = col_map;
+	dev->event_map.lpi_base = lpi_base;
+	dev->event_map.nr_lpis = nr_lpis;
+	dev->device_id = dev_id;
+	INIT_LIST_HEAD(&dev->entry);
+
+	raw_spin_lock_irqsave(&its->lock, flags);
+	list_add(&dev->entry, &its->its_device_list);
+	raw_spin_unlock_irqrestore(&its->lock, flags);
+
+	/* Map device to its ITT */
+	its_send_mapd(dev, 1);
+
+	return dev;
+}
+
+static void its_free_device(struct its_device *its_dev)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&its_dev->its->lock, flags);
+	list_del(&its_dev->entry);
+	raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
+	kfree(its_dev->itt);
+	kfree(its_dev);
+}
+
+static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
+{
+	int idx;
+
+	idx = find_first_zero_bit(dev->event_map.lpi_map,
+				  dev->event_map.nr_lpis);
+	if (idx == dev->event_map.nr_lpis)
+		return -ENOSPC;
+
+	*hwirq = dev->event_map.lpi_base + idx;
+	set_bit(idx, dev->event_map.lpi_map);
+
+	return 0;
+}
+
+static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
+			   int nvec, msi_alloc_info_t *info)
+{
+	struct its_node *its;
+	struct its_device *its_dev;
+	struct msi_domain_info *msi_info;
+	u32 dev_id;
+
+	/*
+	 * We ignore "dev" entierely, and rely on the dev_id that has
+	 * been passed via the scratchpad. This limits this domain's
+	 * usefulness to upper layers that definitely know that they
+	 * are built on top of the ITS.
+	 */
+	dev_id = info->scratchpad[0].ul;
+
+	msi_info = msi_get_domain_info(domain);
+	its = msi_info->data;
+
+	its_dev = its_find_device(its, dev_id);
+	if (its_dev) {
+		/*
+		 * We already have seen this ID, probably through
+		 * another alias (PCI bridge of some sort). No need to
+		 * create the device.
+		 */
+		pr_debug("Reusing ITT for devID %x\n", dev_id);
+		goto out;
+	}
+
+	its_dev = its_create_device(its, dev_id, nvec);
+	if (!its_dev)
+		return -ENOMEM;
+
+	pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec));
+out:
+	info->scratchpad[0].ptr = its_dev;
+	return 0;
+}
+
+static struct msi_domain_ops its_msi_domain_ops = {
+	.msi_prepare	= its_msi_prepare,
+};
+
+static int its_irq_gic_domain_alloc(struct irq_domain *domain,
+				    unsigned int virq,
+				    irq_hw_number_t hwirq)
+{
+	struct of_phandle_args args;
+
+	args.np = domain->parent->of_node;
+	args.args_count = 3;
+	args.args[0] = GIC_IRQ_TYPE_LPI;
+	args.args[1] = hwirq;
+	args.args[2] = IRQ_TYPE_EDGE_RISING;
+
+	return irq_domain_alloc_irqs_parent(domain, virq, 1, &args);
+}
+
+static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *args)
+{
+	msi_alloc_info_t *info = args;
+	struct its_device *its_dev = info->scratchpad[0].ptr;
+	irq_hw_number_t hwirq;
+	int err;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		err = its_alloc_device_irq(its_dev, &hwirq);
+		if (err)
+			return err;
+
+		err = its_irq_gic_domain_alloc(domain, virq + i, hwirq);
+		if (err)
+			return err;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i,
+					      hwirq, &its_irq_chip, its_dev);
+		pr_debug("ID:%d pID:%d vID:%d\n",
+			 (int)(hwirq - its_dev->event_map.lpi_base),
+			 (int) hwirq, virq + i);
+	}
+
+	return 0;
+}
+
+static void its_irq_domain_activate(struct irq_domain *domain,
+				    struct irq_data *d)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	u32 event = its_get_event_id(d);
+
+	/* Bind the LPI to the first possible CPU */
+	its_dev->event_map.col_map[event] = cpumask_first(cpu_online_mask);
+
+	/* Map the GIC IRQ and event to the device */
+	its_send_mapvi(its_dev, d->hwirq, event);
+}
+
+static void its_irq_domain_deactivate(struct irq_domain *domain,
+				      struct irq_data *d)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	u32 event = its_get_event_id(d);
+
+	/* Stop the delivery of interrupts */
+	its_send_discard(its_dev, event);
+}
+
+static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *data = irq_domain_get_irq_data(domain,
+								virq + i);
+		u32 event = its_get_event_id(data);
+
+		/* Mark interrupt index as unused */
+		clear_bit(event, its_dev->event_map.lpi_map);
+
+		/* Nuke the entry in the domain */
+		irq_domain_reset_irq_data(data);
+	}
+
+	/* If all interrupts have been freed, start mopping the floor */
+	if (bitmap_empty(its_dev->event_map.lpi_map,
+			 its_dev->event_map.nr_lpis)) {
+		its_lpi_free(&its_dev->event_map);
+
+		/* Unmap device/itt */
+		its_send_mapd(its_dev, 0);
+		its_free_device(its_dev);
+	}
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops its_domain_ops = {
+	.alloc			= its_irq_domain_alloc,
+	.free			= its_irq_domain_free,
+	.activate		= its_irq_domain_activate,
+	.deactivate		= its_irq_domain_deactivate,
+};
+
+static int its_force_quiescent(void __iomem *base)
+{
+	u32 count = 1000000;	/* 1s */
+	u32 val;
+
+	val = readl_relaxed(base + GITS_CTLR);
+	if (val & GITS_CTLR_QUIESCENT)
+		return 0;
+
+	/* Disable the generation of all interrupts to this ITS */
+	val &= ~GITS_CTLR_ENABLE;
+	writel_relaxed(val, base + GITS_CTLR);
+
+	/* Poll GITS_CTLR and wait until ITS becomes quiescent */
+	while (1) {
+		val = readl_relaxed(base + GITS_CTLR);
+		if (val & GITS_CTLR_QUIESCENT)
+			return 0;
+
+		count--;
+		if (!count)
+			return -EBUSY;
+
+		cpu_relax();
+		udelay(1);
+	}
+}
+
+static int its_probe(struct device_node *node, struct irq_domain *parent)
+{
+	struct resource res;
+	struct its_node *its;
+	void __iomem *its_base;
+	struct irq_domain *inner_domain;
+	u32 val;
+	u64 baser, tmp;
+	int err;
+
+	err = of_address_to_resource(node, 0, &res);
+	if (err) {
+		pr_warn("%s: no regs?\n", node->full_name);
+		return -ENXIO;
+	}
+
+	its_base = ioremap(res.start, resource_size(&res));
+	if (!its_base) {
+		pr_warn("%s: unable to map registers\n", node->full_name);
+		return -ENOMEM;
+	}
+
+	val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK;
+	if (val != 0x30 && val != 0x40) {
+		pr_warn("%s: no ITS detected, giving up\n", node->full_name);
+		err = -ENODEV;
+		goto out_unmap;
+	}
+
+	err = its_force_quiescent(its_base);
+	if (err) {
+		pr_warn("%s: failed to quiesce, giving up\n",
+			node->full_name);
+		goto out_unmap;
+	}
+
+	pr_info("ITS: %s\n", node->full_name);
+
+	its = kzalloc(sizeof(*its), GFP_KERNEL);
+	if (!its) {
+		err = -ENOMEM;
+		goto out_unmap;
+	}
+
+	raw_spin_lock_init(&its->lock);
+	INIT_LIST_HEAD(&its->entry);
+	INIT_LIST_HEAD(&its->its_device_list);
+	its->base = its_base;
+	its->phys_base = res.start;
+	its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1;
+
+	its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL);
+	if (!its->cmd_base) {
+		err = -ENOMEM;
+		goto out_free_its;
+	}
+	its->cmd_write = its->cmd_base;
+
+	err = its_alloc_tables(node->full_name, its);
+	if (err)
+		goto out_free_cmd;
+
+	err = its_alloc_collections(its);
+	if (err)
+		goto out_free_tables;
+
+	baser = (virt_to_phys(its->cmd_base)	|
+		 GITS_CBASER_WaWb		|
+		 GITS_CBASER_InnerShareable	|
+		 (ITS_CMD_QUEUE_SZ / SZ_4K - 1)	|
+		 GITS_CBASER_VALID);
+
+	writeq_relaxed(baser, its->base + GITS_CBASER);
+	tmp = readq_relaxed(its->base + GITS_CBASER);
+
+	if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
+		if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) {
+			/*
+			 * The HW reports non-shareable, we must
+			 * remove the cacheability attributes as
+			 * well.
+			 */
+			baser &= ~(GITS_CBASER_SHAREABILITY_MASK |
+				   GITS_CBASER_CACHEABILITY_MASK);
+			baser |= GITS_CBASER_nC;
+			writeq_relaxed(baser, its->base + GITS_CBASER);
+		}
+		pr_info("ITS: using cache flushing for cmd queue\n");
+		its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING;
+	}
+
+	writeq_relaxed(0, its->base + GITS_CWRITER);
+	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
+
+	if (of_property_read_bool(node, "msi-controller")) {
+		struct msi_domain_info *info;
+
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info) {
+			err = -ENOMEM;
+			goto out_free_tables;
+		}
+
+		inner_domain = irq_domain_add_tree(node, &its_domain_ops, its);
+		if (!inner_domain) {
+			err = -ENOMEM;
+			kfree(info);
+			goto out_free_tables;
+		}
+
+		inner_domain->parent = parent;
+		inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+		info->ops = &its_msi_domain_ops;
+		info->data = its;
+		inner_domain->host_data = info;
+	}
+
+	spin_lock(&its_lock);
+	list_add(&its->entry, &its_nodes);
+	spin_unlock(&its_lock);
+
+	return 0;
+
+out_free_tables:
+	its_free_tables(its);
+out_free_cmd:
+	kfree(its->cmd_base);
+out_free_its:
+	kfree(its);
+out_unmap:
+	iounmap(its_base);
+	pr_err("ITS: failed probing %s (%d)\n", node->full_name, err);
+	return err;
+}
+
+static bool gic_rdists_supports_plpis(void)
+{
+	return !!(readl_relaxed(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
+}
+
+int its_cpu_init(void)
+{
+	if (!list_empty(&its_nodes)) {
+		if (!gic_rdists_supports_plpis()) {
+			pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
+			return -ENXIO;
+		}
+		its_cpu_init_lpis();
+		its_cpu_init_collection();
+	}
+
+	return 0;
+}
+
+static struct of_device_id its_device_id[] = {
+	{	.compatible	= "arm,gic-v3-its",	},
+	{},
+};
+
+int its_init(struct device_node *node, struct rdists *rdists,
+	     struct irq_domain *parent_domain)
+{
+	struct device_node *np;
+
+	for (np = of_find_matching_node(node, its_device_id); np;
+	     np = of_find_matching_node(np, its_device_id)) {
+		its_probe(np, parent_domain);
+	}
+
+	if (list_empty(&its_nodes)) {
+		pr_warn("ITS: No ITS available, not enabling LPIs\n");
+		return -ENXIO;
+	}
+
+	gic_rdists = rdists;
+	gic_root_node = node;
+
+	its_alloc_lpi_tables();
+	its_lpi_init(rdists->id_bits);
+
+	return 0;
+}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index a94342f4a3ef..62acb0ced45a 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -34,20 +34,25 @@
 #include "irq-gic-common.h"
 #include "irqchip.h"
 
+struct redist_region {
+	void __iomem		*redist_base;
+	phys_addr_t		phys_base;
+};
+
 struct gic_chip_data {
 	void __iomem		*dist_base;
-	void __iomem		**redist_base;
-	void __iomem * __percpu	*rdist;
+	struct redist_region	*redist_regions;
+	struct rdists		rdists;
 	struct irq_domain	*domain;
 	u64			redist_stride;
-	u32			redist_regions;
+	u32			nr_redist_regions;
 	unsigned int		irq_nr;
 };
 
 static struct gic_chip_data gic_data __read_mostly;
 
-#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdist))
-#define gic_data_rdist_rd_base()	(*gic_data_rdist())
+#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdists.rdist))
+#define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
 #define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
 
 /* Our default, arbitrary priority value. Linux only uses one anyway. */
@@ -71,9 +76,6 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
 	if (d->hwirq <= 1023)		/* SPI -> dist_base */
 		return gic_data.dist_base;
 
-	if (d->hwirq >= 8192)
-		BUG();		/* LPI Detected!!! */
-
 	return NULL;
 }
 
@@ -236,7 +238,9 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	if (irq < 16)
 		return -EINVAL;
 
-	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+	/* SPIs have restrictions on the supported types */
+	if (irq >= 32 && type != IRQ_TYPE_LEVEL_HIGH &&
+			 type != IRQ_TYPE_EDGE_RISING)
 		return -EINVAL;
 
 	if (gic_irq_in_rdist(d)) {
@@ -247,9 +251,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 		rwp_wait = gic_dist_wait_for_rwp;
 	}
 
-	gic_configure_irq(irq, type, base, rwp_wait);
-
-	return 0;
+	return gic_configure_irq(irq, type, base, rwp_wait);
 }
 
 static u64 gic_mpidr_to_affinity(u64 mpidr)
@@ -271,11 +273,11 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 	do {
 		irqnr = gic_read_iar();
 
-		if (likely(irqnr > 15 && irqnr < 1020)) {
+		if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
 			int err;
 			err = handle_domain_irq(gic_data.domain, irqnr, regs);
 			if (err) {
-				WARN_ONCE(true, "Unexpected SPI received!\n");
+				WARN_ONCE(true, "Unexpected interrupt received!\n");
 				gic_write_eoir(irqnr);
 			}
 			continue;
@@ -340,8 +342,8 @@ static int gic_populate_rdist(void)
 	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
 	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
 
-	for (i = 0; i < gic_data.redist_regions; i++) {
-		void __iomem *ptr = gic_data.redist_base[i];
+	for (i = 0; i < gic_data.nr_redist_regions; i++) {
+		void __iomem *ptr = gic_data.redist_regions[i].redist_base;
 		u32 reg;
 
 		reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
@@ -354,10 +356,13 @@ static int gic_populate_rdist(void)
 		do {
 			typer = readq_relaxed(ptr + GICR_TYPER);
 			if ((typer >> 32) == aff) {
+				u64 offset = ptr - gic_data.redist_regions[i].redist_base;
 				gic_data_rdist_rd_base() = ptr;
-				pr_info("CPU%d: found redistributor %llx @%p\n",
+				gic_data_rdist()->phys_base = gic_data.redist_regions[i].phys_base + offset;
+				pr_info("CPU%d: found redistributor %llx region %d:%pa\n",
 					smp_processor_id(),
-					(unsigned long long)mpidr, ptr);
+					(unsigned long long)mpidr,
+					i, &gic_data_rdist()->phys_base);
 				return 0;
 			}
 
@@ -392,6 +397,11 @@ static void gic_cpu_sys_reg_init(void)
 	gic_write_grpen1(1);
 }
 
+static int gic_dist_supports_lpis(void)
+{
+	return !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS);
+}
+
 static void gic_cpu_init(void)
 {
 	void __iomem *rbase;
@@ -406,6 +416,10 @@ static void gic_cpu_init(void)
 
 	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
 
+	/* Give LPIs a spin */
+	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
+		its_cpu_init();
+
 	/* initialise system registers */
 	gic_cpu_sys_reg_init();
 }
@@ -459,7 +473,7 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
 		tlist |= 1 << (mpidr & 0xf);
 
 		cpu = cpumask_next(cpu, mask);
-		if (cpu == nr_cpu_ids)
+		if (cpu >= nr_cpu_ids)
 			goto out;
 
 		mpidr = cpu_logical_map(cpu);
@@ -592,26 +606,43 @@ static struct irq_chip gic_chip = {
 	.irq_set_affinity	= gic_set_affinity,
 };
 
+#define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
 {
 	/* SGIs are private to the core kernel */
 	if (hw < 16)
 		return -EPERM;
+	/* Nothing here */
+	if (hw >= gic_data.irq_nr && hw < 8192)
+		return -EPERM;
+	/* Off limits */
+	if (hw >= GIC_ID_NR)
+		return -EPERM;
+
 	/* PPIs */
 	if (hw < 32) {
 		irq_set_percpu_devid(irq);
-		irq_set_chip_and_handler(irq, &gic_chip,
-					 handle_percpu_devid_irq);
+		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+				    handle_percpu_devid_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
 	}
 	/* SPIs */
 	if (hw >= 32 && hw < gic_data.irq_nr) {
-		irq_set_chip_and_handler(irq, &gic_chip,
-					 handle_fasteoi_irq);
+		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
-	irq_set_chip_data(irq, d->host_data);
+	/* LPIs */
+	if (hw >= 8192 && hw < GIC_ID_NR) {
+		if (!gic_dist_supports_lpis())
+			return -EPERM;
+		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+				    handle_fasteoi_irq, NULL, NULL);
+		set_irq_flags(irq, IRQF_VALID);
+	}
+
 	return 0;
 }
 
@@ -632,6 +663,9 @@ static int gic_irq_domain_xlate(struct irq_domain *d,
 	case 1:			/* PPI */
 		*out_hwirq = intspec[1] + 16;
 		break;
+	case GIC_IRQ_TYPE_LPI:	/* LPI */
+		*out_hwirq = intspec[1];
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -640,17 +674,50 @@ static int gic_irq_domain_xlate(struct irq_domain *d,
 	return 0;
 }
 
+static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	int i, ret;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	struct of_phandle_args *irq_data = arg;
+
+	ret = gic_irq_domain_xlate(domain, irq_data->np, irq_data->args,
+				   irq_data->args_count, &hwirq, &type);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++)
+		gic_irq_domain_map(domain, virq + i, hwirq + i);
+
+	return 0;
+}
+
+static void gic_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+		irq_set_handler(virq + i, NULL);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
 static const struct irq_domain_ops gic_irq_domain_ops = {
-	.map = gic_irq_domain_map,
 	.xlate = gic_irq_domain_xlate,
+	.alloc = gic_irq_domain_alloc,
+	.free = gic_irq_domain_free,
 };
 
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *dist_base;
-	void __iomem **redist_base;
+	struct redist_region *rdist_regs;
 	u64 redist_stride;
-	u32 redist_regions;
+	u32 nr_redist_regions;
+	u32 typer;
 	u32 reg;
 	int gic_irqs;
 	int err;
@@ -671,54 +738,63 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 		goto out_unmap_dist;
 	}
 
-	if (of_property_read_u32(node, "#redistributor-regions", &redist_regions))
-		redist_regions = 1;
+	if (of_property_read_u32(node, "#redistributor-regions", &nr_redist_regions))
+		nr_redist_regions = 1;
 
-	redist_base = kzalloc(sizeof(*redist_base) * redist_regions, GFP_KERNEL);
-	if (!redist_base) {
+	rdist_regs = kzalloc(sizeof(*rdist_regs) * nr_redist_regions, GFP_KERNEL);
+	if (!rdist_regs) {
 		err = -ENOMEM;
 		goto out_unmap_dist;
 	}
 
-	for (i = 0; i < redist_regions; i++) {
-		redist_base[i] = of_iomap(node, 1 + i);
-		if (!redist_base[i]) {
+	for (i = 0; i < nr_redist_regions; i++) {
+		struct resource res;
+		int ret;
+
+		ret = of_address_to_resource(node, 1 + i, &res);
+		rdist_regs[i].redist_base = of_iomap(node, 1 + i);
+		if (ret || !rdist_regs[i].redist_base) {
 			pr_err("%s: couldn't map region %d\n",
 			       node->full_name, i);
 			err = -ENODEV;
 			goto out_unmap_rdist;
 		}
+		rdist_regs[i].phys_base = res.start;
 	}
 
 	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
 		redist_stride = 0;
 
 	gic_data.dist_base = dist_base;
-	gic_data.redist_base = redist_base;
-	gic_data.redist_regions = redist_regions;
+	gic_data.redist_regions = rdist_regs;
+	gic_data.nr_redist_regions = nr_redist_regions;
 	gic_data.redist_stride = redist_stride;
 
 	/*
 	 * Find out how many interrupts are supported.
 	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
 	 */
-	gic_irqs = readl_relaxed(gic_data.dist_base + GICD_TYPER) & 0x1f;
-	gic_irqs = (gic_irqs + 1) * 32;
+	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
+	gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer);
+	gic_irqs = GICD_TYPER_IRQS(typer);
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
 	gic_data.irq_nr = gic_irqs;
 
 	gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops,
 					      &gic_data);
-	gic_data.rdist = alloc_percpu(typeof(*gic_data.rdist));
+	gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
 
-	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdist)) {
+	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) {
 		err = -ENOMEM;
 		goto out_free;
 	}
 
 	set_handle_irq(gic_handle_irq);
 
+	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
+		its_init(node, &gic_data.rdists, gic_data.domain);
+
 	gic_smp_init();
 	gic_dist_init();
 	gic_cpu_init();
@@ -729,12 +805,12 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 out_free:
 	if (gic_data.domain)
 		irq_domain_remove(gic_data.domain);
-	free_percpu(gic_data.rdist);
+	free_percpu(gic_data.rdists.rdist);
 out_unmap_rdist:
-	for (i = 0; i < redist_regions; i++)
-		if (redist_base[i])
-			iounmap(redist_base[i]);
-	kfree(redist_base);
+	for (i = 0; i < nr_redist_regions; i++)
+		if (rdist_regs[i].redist_base)
+			iounmap(rdist_regs[i].redist_base);
+	kfree(rdist_regs);
 out_unmap_dist:
 	iounmap(dist_base);
 	return err;
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 1107051df379..d67507911f9f 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -154,23 +154,25 @@ static inline unsigned int gic_irq(struct irq_data *d)
 static void gic_mask_irq(struct irq_data *d)
 {
 	u32 mask = 1 << (gic_irq(d) % 32);
+	unsigned long flags;
 
-	raw_spin_lock(&irq_controller_lock);
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 	writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4);
 	if (gic_arch_extn.irq_mask)
 		gic_arch_extn.irq_mask(d);
-	raw_spin_unlock(&irq_controller_lock);
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 static void gic_unmask_irq(struct irq_data *d)
 {
 	u32 mask = 1 << (gic_irq(d) % 32);
+	unsigned long flags;
 
-	raw_spin_lock(&irq_controller_lock);
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 	if (gic_arch_extn.irq_unmask)
 		gic_arch_extn.irq_unmask(d);
 	writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4);
-	raw_spin_unlock(&irq_controller_lock);
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 static void gic_eoi_irq(struct irq_data *d)
@@ -188,24 +190,28 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
 	void __iomem *base = gic_dist_base(d);
 	unsigned int gicirq = gic_irq(d);
+	unsigned long flags;
+	int ret;
 
 	/* Interrupt configuration for SGIs can't be changed */
 	if (gicirq < 16)
 		return -EINVAL;
 
-	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+	/* SPIs have restrictions on the supported types */
+	if (gicirq >= 32 && type != IRQ_TYPE_LEVEL_HIGH &&
+			    type != IRQ_TYPE_EDGE_RISING)
 		return -EINVAL;
 
-	raw_spin_lock(&irq_controller_lock);
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
 	if (gic_arch_extn.irq_set_type)
 		gic_arch_extn.irq_set_type(d, type);
 
-	gic_configure_irq(gicirq, type, base, NULL);
+	ret = gic_configure_irq(gicirq, type, base, NULL);
 
-	raw_spin_unlock(&irq_controller_lock);
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 static int gic_retrigger(struct irq_data *d)
@@ -224,6 +230,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
 	unsigned int cpu, shift = (gic_irq(d) % 4) * 8;
 	u32 val, mask, bit;
+	unsigned long flags;
 
 	if (!force)
 		cpu = cpumask_any_and(mask_val, cpu_online_mask);
@@ -233,12 +240,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
-	raw_spin_lock(&irq_controller_lock);
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
-	raw_spin_unlock(&irq_controller_lock);
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 
 	return IRQ_SET_MASK_OK;
 }
@@ -796,17 +803,16 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 {
 	if (hw < 32) {
 		irq_set_percpu_devid(irq);
-		irq_set_chip_and_handler(irq, &gic_chip,
-					 handle_percpu_devid_irq);
+		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+				    handle_percpu_devid_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
 	} else {
-		irq_set_chip_and_handler(irq, &gic_chip,
-					 handle_fasteoi_irq);
+		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 
 		gic_routable_irq_domain_ops->map(d, irq, hw);
 	}
-	irq_set_chip_data(irq, d->host_data);
 	return 0;
 }
 
@@ -866,6 +872,31 @@ static struct notifier_block gic_cpu_notifier = {
 };
 #endif
 
+static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *arg)
+{
+	int i, ret;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	struct of_phandle_args *irq_data = arg;
+
+	ret = gic_irq_domain_xlate(domain, irq_data->np, irq_data->args,
+				   irq_data->args_count, &hwirq, &type);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++)
+		gic_irq_domain_map(domain, virq + i, hwirq + i);
+
+	return 0;
+}
+
+static const struct irq_domain_ops gic_irq_domain_hierarchy_ops = {
+	.xlate = gic_irq_domain_xlate,
+	.alloc = gic_irq_domain_alloc,
+	.free = irq_domain_free_irqs_top,
+};
+
 static const struct irq_domain_ops gic_irq_domain_ops = {
 	.map = gic_irq_domain_map,
 	.unmap = gic_irq_domain_unmap,
@@ -956,18 +987,6 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		gic_cpu_map[i] = 0xff;
 
 	/*
-	 * For primary GICs, skip over SGIs.
-	 * For secondary GICs, skip over PPIs, too.
-	 */
-	if (gic_nr == 0 && (irq_start & 31) > 0) {
-		hwirq_base = 16;
-		if (irq_start != -1)
-			irq_start = (irq_start & ~31) + 16;
-	} else {
-		hwirq_base = 32;
-	}
-
-	/*
 	 * Find out how many interrupts are supported.
 	 * The GIC only supports up to 1020 interrupt sources.
 	 */
@@ -977,10 +996,31 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		gic_irqs = 1020;
 	gic->gic_irqs = gic_irqs;
 
-	gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
+	if (node) {		/* DT case */
+		const struct irq_domain_ops *ops = &gic_irq_domain_hierarchy_ops;
+
+		if (!of_property_read_u32(node, "arm,routable-irqs",
+					  &nr_routable_irqs)) {
+			ops = &gic_irq_domain_ops;
+			gic_irqs = nr_routable_irqs;
+		}
+
+		gic->domain = irq_domain_add_linear(node, gic_irqs, ops, gic);
+	} else {		/* Non-DT case */
+		/*
+		 * For primary GICs, skip over SGIs.
+		 * For secondary GICs, skip over PPIs, too.
+		 */
+		if (gic_nr == 0 && (irq_start & 31) > 0) {
+			hwirq_base = 16;
+			if (irq_start != -1)
+				irq_start = (irq_start & ~31) + 16;
+		} else {
+			hwirq_base = 32;
+		}
+
+		gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
 
-	if (of_property_read_u32(node, "arm,routable-irqs",
-				 &nr_routable_irqs)) {
 		irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
 					   numa_node_id());
 		if (IS_ERR_VALUE(irq_base)) {
@@ -991,10 +1031,6 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 
 		gic->domain = irq_domain_add_legacy(node, gic_irqs, irq_base,
 					hwirq_base, &gic_irq_domain_ops, gic);
-	} else {
-		gic->domain = irq_domain_add_linear(node, nr_routable_irqs,
-						    &gic_irq_domain_ops,
-						    gic);
 	}
 
 	if (WARN_ON(!gic->domain))
@@ -1045,6 +1081,10 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 		irq = irq_of_parse_and_map(node, 0);
 		gic_cascade_irq(gic_cnt, irq);
 	}
+
+	if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
+		gicv2m_of_init(node, gic_data[gic_cnt].domain);
+
 	gic_cnt++;
 	return 0;
 }
diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c
index 9c8f833522e6..5507a0c9a61d 100644
--- a/drivers/irqchip/irq-hip04.c
+++ b/drivers/irqchip/irq-hip04.c
@@ -120,21 +120,24 @@ static int hip04_irq_set_type(struct irq_data *d, unsigned int type)
 {
 	void __iomem *base = hip04_dist_base(d);
 	unsigned int irq = hip04_irq(d);
+	int ret;
 
 	/* Interrupt configuration for SGIs can't be changed */
 	if (irq < 16)
 		return -EINVAL;
 
-	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+	/* SPIs have restrictions on the supported types */
+	if (irq >= 32 && type != IRQ_TYPE_LEVEL_HIGH &&
+			 type != IRQ_TYPE_EDGE_RISING)
 		return -EINVAL;
 
 	raw_spin_lock(&irq_controller_lock);
 
-	gic_configure_irq(irq, type, base, NULL);
+	ret = gic_configure_irq(irq, type, base, NULL);
 
 	raw_spin_unlock(&irq_controller_lock);
 
-	return 0;
+	return ret;
 }
 
 #ifdef CONFIG_SMP
diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c
new file mode 100644
index 000000000000..7e342df6a62f
--- /dev/null
+++ b/drivers/irqchip/irq-mtk-sysirq.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2014 MediaTek Inc.
+ * Author: Joe.C <yingjoe.chen@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "irqchip.h"
+
+#define MT6577_SYS_INTPOL_NUM	(224)
+
+struct mtk_sysirq_chip_data {
+	spinlock_t lock;
+	void __iomem *intpol_base;
+};
+
+static int mtk_sysirq_set_type(struct irq_data *data, unsigned int type)
+{
+	irq_hw_number_t hwirq = data->hwirq;
+	struct mtk_sysirq_chip_data *chip_data = data->chip_data;
+	u32 offset, reg_index, value;
+	unsigned long flags;
+	int ret;
+
+	offset = hwirq & 0x1f;
+	reg_index = hwirq >> 5;
+
+	spin_lock_irqsave(&chip_data->lock, flags);
+	value = readl_relaxed(chip_data->intpol_base + reg_index * 4);
+	if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_EDGE_FALLING) {
+		if (type == IRQ_TYPE_LEVEL_LOW)
+			type = IRQ_TYPE_LEVEL_HIGH;
+		else
+			type = IRQ_TYPE_EDGE_RISING;
+		value |= (1 << offset);
+	} else {
+		value &= ~(1 << offset);
+	}
+	writel(value, chip_data->intpol_base + reg_index * 4);
+
+	data = data->parent_data;
+	ret = data->chip->irq_set_type(data, type);
+	spin_unlock_irqrestore(&chip_data->lock, flags);
+	return ret;
+}
+
+static struct irq_chip mtk_sysirq_chip = {
+	.name			= "MT_SYSIRQ",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_type		= mtk_sysirq_set_type,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static int mtk_sysirq_domain_xlate(struct irq_domain *d,
+				   struct device_node *controller,
+				   const u32 *intspec, unsigned int intsize,
+				   unsigned long *out_hwirq,
+				   unsigned int *out_type)
+{
+	if (intsize != 3)
+		return -EINVAL;
+
+	/* sysirq doesn't support PPI */
+	if (intspec[0])
+		return -EINVAL;
+
+	*out_hwirq = intspec[1];
+	*out_type = intspec[2] & IRQ_TYPE_SENSE_MASK;
+	return 0;
+}
+
+static int mtk_sysirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *arg)
+{
+	int i;
+	irq_hw_number_t hwirq;
+	struct of_phandle_args *irq_data = arg;
+	struct of_phandle_args gic_data = *irq_data;
+
+	if (irq_data->args_count != 3)
+		return -EINVAL;
+
+	/* sysirq doesn't support PPI */
+	if (irq_data->args[0])
+		return -EINVAL;
+
+	hwirq = irq_data->args[1];
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &mtk_sysirq_chip,
+					      domain->host_data);
+
+	gic_data.np = domain->parent->of_node;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &gic_data);
+}
+
+static struct irq_domain_ops sysirq_domain_ops = {
+	.xlate = mtk_sysirq_domain_xlate,
+	.alloc = mtk_sysirq_domain_alloc,
+	.free = irq_domain_free_irqs_common,
+};
+
+static int __init mtk_sysirq_of_init(struct device_node *node,
+				     struct device_node *parent)
+{
+	struct irq_domain *domain, *domain_parent;
+	struct mtk_sysirq_chip_data *chip_data;
+	int ret = 0;
+
+	domain_parent = irq_find_host(parent);
+	if (!domain_parent) {
+		pr_err("mtk_sysirq: interrupt-parent not found\n");
+		return -EINVAL;
+	}
+
+	chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL);
+	if (!chip_data)
+		return -ENOMEM;
+
+	chip_data->intpol_base = of_io_request_and_map(node, 0, "intpol");
+	if (!chip_data->intpol_base) {
+		pr_err("mtk_sysirq: unable to map sysirq register\n");
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	domain = irq_domain_add_hierarchy(domain_parent, 0,
+					  MT6577_SYS_INTPOL_NUM, node,
+					  &sysirq_domain_ops, chip_data);
+	if (!domain) {
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+	spin_lock_init(&chip_data->lock);
+
+	return 0;
+
+out_unmap:
+	iounmap(chip_data->intpol_base);
+out_free:
+	kfree(chip_data);
+	return ret;
+}
+IRQCHIP_DECLARE(mtk_sysirq, "mediatek,mt6577-sysirq", mtk_sysirq_of_init);
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index eb9b59e8f122..6b2b582433bd 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -50,12 +50,12 @@ static struct sunxi_sc_nmi_reg_offs sun6i_reg_offs = {
 static inline void sunxi_sc_nmi_write(struct irq_chip_generic *gc, u32 off,
 				      u32 val)
 {
-	irq_reg_writel(val, gc->reg_base + off);
+	irq_reg_writel(gc, val, off);
 }
 
 static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off)
 {
-	return irq_reg_readl(gc->reg_base + off);
+	return irq_reg_readl(gc, off);
 }
 
 static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc)
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index 7c44c99bf1f2..accc20036a3c 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -43,12 +43,12 @@
 static inline void ab_irqctl_writereg(struct irq_chip_generic *gc, u32 reg,
 	u32 val)
 {
-	irq_reg_writel(val, gc->reg_base + reg);
+	irq_reg_writel(gc, val, reg);
 }
 
 static inline u32 ab_irqctl_readreg(struct irq_chip_generic *gc, u32 reg)
 {
-	return irq_reg_readl(gc->reg_base + reg);
+	return irq_reg_readl(gc, reg);
 }
 
 static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08981be7baa1..5503e43e5f28 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -18,9 +18,11 @@
 #include <linux/slab.h>
 #include <linux/crypto.h>
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 #include <linux/backing-dev.h>
 #include <linux/atomic.h>
 #include <linux/scatterlist.h>
+#include <linux/rbtree.h>
 #include <asm/page.h>
 #include <asm/unaligned.h>
 #include <crypto/hash.h>
@@ -58,7 +60,8 @@ struct dm_crypt_io {
 	atomic_t io_pending;
 	int error;
 	sector_t sector;
-	struct dm_crypt_io *base_io;
+
+	struct rb_node rb_node;
 } CRYPTO_MINALIGN_ATTR;
 
 struct dm_crypt_request {
@@ -108,7 +111,8 @@ struct iv_tcw_private {
  * Crypt: maps a linear range of a block device
  * and encrypts / decrypts at the same time.
  */
-enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
+enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
+	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
 
 /*
  * The fields in here must be read only after initialization.
@@ -121,14 +125,18 @@ struct crypt_config {
 	 * pool for per bio private data, crypto requests and
 	 * encryption requeusts/buffer pages
 	 */
-	mempool_t *io_pool;
 	mempool_t *req_pool;
 	mempool_t *page_pool;
 	struct bio_set *bs;
+	struct mutex bio_alloc_lock;
 
 	struct workqueue_struct *io_queue;
 	struct workqueue_struct *crypt_queue;
 
+	struct task_struct *write_thread;
+	wait_queue_head_t write_thread_wait;
+	struct rb_root write_tree;
+
 	char *cipher;
 	char *cipher_string;
 
@@ -172,9 +180,6 @@ struct crypt_config {
 };
 
 #define MIN_IOS        16
-#define MIN_POOL_PAGES 32
-
-static struct kmem_cache *_crypt_io_pool;
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -223,7 +228,7 @@ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc)
  *
  * tcw:  Compatible implementation of the block chaining mode used
  *       by the TrueCrypt device encryption system (prior to version 4.1).
- *       For more info see: http://www.truecrypt.org
+ *       For more info see: https://gitlab.com/cryptsetup/cryptsetup/wikis/TrueCryptOnDiskFormat
  *       It operates on full 512 byte sectors and uses CBC
  *       with an IV derived from initial key and the sector number.
  *       In addition, whitening value is applied on every sector, whitening
@@ -946,57 +951,70 @@ static int crypt_convert(struct crypt_config *cc,
 	return 0;
 }
 
+static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
+
 /*
  * Generate a new unfragmented bio with the given size
  * This should never violate the device limitations
- * May return a smaller bio when running out of pages, indicated by
- * *out_of_pages set to 1.
+ *
+ * This function may be called concurrently. If we allocate from the mempool
+ * concurrently, there is a possibility of deadlock. For example, if we have
+ * mempool of 256 pages, two processes, each wanting 256, pages allocate from
+ * the mempool concurrently, it may deadlock in a situation where both processes
+ * have allocated 128 pages and the mempool is exhausted.
+ *
+ * In order to avoid this scenario we allocate the pages under a mutex.
+ *
+ * In order to not degrade performance with excessive locking, we try
+ * non-blocking allocations without a mutex first but on failure we fallback
+ * to blocking allocations with a mutex.
  */
-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
-				      unsigned *out_of_pages)
+static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *clone;
 	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
-	unsigned i, len;
+	gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
+	unsigned i, len, remaining_size;
 	struct page *page;
+	struct bio_vec *bvec;
+
+retry:
+	if (unlikely(gfp_mask & __GFP_WAIT))
+		mutex_lock(&cc->bio_alloc_lock);
 
 	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
 	if (!clone)
-		return NULL;
+		goto return_clone;
 
 	clone_init(io, clone);
-	*out_of_pages = 0;
+
+	remaining_size = size;
 
 	for (i = 0; i < nr_iovecs; i++) {
 		page = mempool_alloc(cc->page_pool, gfp_mask);
 		if (!page) {
-			*out_of_pages = 1;
-			break;
+			crypt_free_buffer_pages(cc, clone);
+			bio_put(clone);
+			gfp_mask |= __GFP_WAIT;
+			goto retry;
 		}
 
-		/*
-		 * If additional pages cannot be allocated without waiting,
-		 * return a partially-allocated bio.  The caller will then try
-		 * to allocate more bios while submitting this partial bio.
-		 */
-		gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
+		len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
 
-		len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+		bvec = &clone->bi_io_vec[clone->bi_vcnt++];
+		bvec->bv_page = page;
+		bvec->bv_len = len;
+		bvec->bv_offset = 0;
 
-		if (!bio_add_page(clone, page, len, 0)) {
-			mempool_free(page, cc->page_pool);
-			break;
-		}
+		clone->bi_iter.bi_size += len;
 
-		size -= len;
+		remaining_size -= len;
 	}
 
-	if (!clone->bi_iter.bi_size) {
-		bio_put(clone);
-		return NULL;
-	}
+return_clone:
+	if (unlikely(gfp_mask & __GFP_WAIT))
+		mutex_unlock(&cc->bio_alloc_lock);
 
 	return clone;
 }
@@ -1020,7 +1038,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
 	io->base_bio = bio;
 	io->sector = sector;
 	io->error = 0;
-	io->base_io = NULL;
 	io->ctx.req = NULL;
 	atomic_set(&io->io_pending, 0);
 }
@@ -1033,13 +1050,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
 /*
  * One of the bios was finished. Check for completion of
  * the whole request and correctly clean up the buffer.
- * If base_io is set, wait for the last fragment to complete.
  */
 static void crypt_dec_pending(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *base_bio = io->base_bio;
-	struct dm_crypt_io *base_io = io->base_io;
 	int error = io->error;
 
 	if (!atomic_dec_and_test(&io->io_pending))
@@ -1047,16 +1062,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 
 	if (io->ctx.req)
 		crypt_free_req(cc, io->ctx.req, base_bio);
-	if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
-		mempool_free(io, cc->io_pool);
-
-	if (likely(!base_io))
-		bio_endio(base_bio, error);
-	else {
-		if (error && !base_io->error)
-			base_io->error = error;
-		crypt_dec_pending(base_io);
-	}
+
+	bio_endio(base_bio, error);
 }
 
 /*
@@ -1117,15 +1124,15 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 {
 	struct crypt_config *cc = io->cc;
-	struct bio *base_bio = io->base_bio;
 	struct bio *clone;
 
 	/*
-	 * The block layer might modify the bvec array, so always
-	 * copy the required bvecs because we need the original
-	 * one in order to decrypt the whole bio data *afterwards*.
+	 * We need the original biovec array in order to decrypt
+	 * the whole bio data *afterwards* -- thanks to immutable
+	 * biovecs we don't need to worry about the block layer
+	 * modifying the biovec array; so leverage bio_clone_fast().
 	 */
-	clone = bio_clone_bioset(base_bio, gfp, cc->bs);
+	clone = bio_clone_fast(io->base_bio, gfp, cc->bs);
 	if (!clone)
 		return 1;
 
@@ -1138,37 +1145,97 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
 	return 0;
 }
 
+static void kcryptd_io_read_work(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+	crypt_inc_pending(io);
+	if (kcryptd_io_read(io, GFP_NOIO))
+		io->error = -ENOMEM;
+	crypt_dec_pending(io);
+}
+
+static void kcryptd_queue_read(struct dm_crypt_io *io)
+{
+	struct crypt_config *cc = io->cc;
+
+	INIT_WORK(&io->work, kcryptd_io_read_work);
+	queue_work(cc->io_queue, &io->work);
+}
+
 static void kcryptd_io_write(struct dm_crypt_io *io)
 {
 	struct bio *clone = io->ctx.bio_out;
+
 	generic_make_request(clone);
 }
 
-static void kcryptd_io(struct work_struct *work)
+#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
+
+static int dmcrypt_write(void *data)
 {
-	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+	struct crypt_config *cc = data;
+	struct dm_crypt_io *io;
 
-	if (bio_data_dir(io->base_bio) == READ) {
-		crypt_inc_pending(io);
-		if (kcryptd_io_read(io, GFP_NOIO))
-			io->error = -ENOMEM;
-		crypt_dec_pending(io);
-	} else
-		kcryptd_io_write(io);
-}
+	while (1) {
+		struct rb_root write_tree;
+		struct blk_plug plug;
 
-static void kcryptd_queue_io(struct dm_crypt_io *io)
-{
-	struct crypt_config *cc = io->cc;
+		DECLARE_WAITQUEUE(wait, current);
 
-	INIT_WORK(&io->work, kcryptd_io);
-	queue_work(cc->io_queue, &io->work);
+		spin_lock_irq(&cc->write_thread_wait.lock);
+continue_locked:
+
+		if (!RB_EMPTY_ROOT(&cc->write_tree))
+			goto pop_from_list;
+
+		__set_current_state(TASK_INTERRUPTIBLE);
+		__add_wait_queue(&cc->write_thread_wait, &wait);
+
+		spin_unlock_irq(&cc->write_thread_wait.lock);
+
+		if (unlikely(kthread_should_stop())) {
+			set_task_state(current, TASK_RUNNING);
+			remove_wait_queue(&cc->write_thread_wait, &wait);
+			break;
+		}
+
+		schedule();
+
+		set_task_state(current, TASK_RUNNING);
+		spin_lock_irq(&cc->write_thread_wait.lock);
+		__remove_wait_queue(&cc->write_thread_wait, &wait);
+		goto continue_locked;
+
+pop_from_list:
+		write_tree = cc->write_tree;
+		cc->write_tree = RB_ROOT;
+		spin_unlock_irq(&cc->write_thread_wait.lock);
+
+		BUG_ON(rb_parent(write_tree.rb_node));
+
+		/*
+		 * Note: we cannot walk the tree here with rb_next because
+		 * the structures may be freed when kcryptd_io_write is called.
+		 */
+		blk_start_plug(&plug);
+		do {
+			io = crypt_io_from_node(rb_first(&write_tree));
+			rb_erase(&io->rb_node, &write_tree);
+			kcryptd_io_write(io);
+		} while (!RB_EMPTY_ROOT(&write_tree));
+		blk_finish_plug(&plug);
+	}
+	return 0;
 }
 
 static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 {
 	struct bio *clone = io->ctx.bio_out;
 	struct crypt_config *cc = io->cc;
+	unsigned long flags;
+	sector_t sector;
+	struct rb_node **rbp, *parent;
 
 	if (unlikely(io->error < 0)) {
 		crypt_free_buffer_pages(cc, clone);
@@ -1182,20 +1249,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 
 	clone->bi_iter.bi_sector = cc->start + io->sector;
 
-	if (async)
-		kcryptd_queue_io(io);
-	else
+	if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
 		generic_make_request(clone);
+		return;
+	}
+
+	spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
+	rbp = &cc->write_tree.rb_node;
+	parent = NULL;
+	sector = io->sector;
+	while (*rbp) {
+		parent = *rbp;
+		if (sector < crypt_io_from_node(parent)->sector)
+			rbp = &(*rbp)->rb_left;
+		else
+			rbp = &(*rbp)->rb_right;
+	}
+	rb_link_node(&io->rb_node, parent, rbp);
+	rb_insert_color(&io->rb_node, &cc->write_tree);
+
+	wake_up_locked(&cc->write_thread_wait);
+	spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
 }
 
 static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 {
 	struct crypt_config *cc = io->cc;
 	struct bio *clone;
-	struct dm_crypt_io *new_io;
 	int crypt_finished;
-	unsigned out_of_pages = 0;
-	unsigned remaining = io->base_bio->bi_iter.bi_size;
 	sector_t sector = io->sector;
 	int r;
 
@@ -1205,80 +1286,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 	crypt_inc_pending(io);
 	crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
 
-	/*
-	 * The allocated buffers can be smaller than the whole bio,
-	 * so repeat the whole process until all the data can be handled.
-	 */
-	while (remaining) {
-		clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
-		if (unlikely(!clone)) {
-			io->error = -ENOMEM;
-			break;
-		}
-
-		io->ctx.bio_out = clone;
-		io->ctx.iter_out = clone->bi_iter;
-
-		remaining -= clone->bi_iter.bi_size;
-		sector += bio_sectors(clone);
-
-		crypt_inc_pending(io);
-
-		r = crypt_convert(cc, &io->ctx);
-		if (r < 0)
-			io->error = -EIO;
-
-		crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
-
-		/* Encryption was already finished, submit io now */
-		if (crypt_finished) {
-			kcryptd_crypt_write_io_submit(io, 0);
+	clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
+	if (unlikely(!clone)) {
+		io->error = -EIO;
+		goto dec;
+	}
 
-			/*
-			 * If there was an error, do not try next fragments.
-			 * For async, error is processed in async handler.
-			 */
-			if (unlikely(r < 0))
-				break;
+	io->ctx.bio_out = clone;
+	io->ctx.iter_out = clone->bi_iter;
 
-			io->sector = sector;
-		}
+	sector += bio_sectors(clone);
 
-		/*
-		 * Out of memory -> run queues
-		 * But don't wait if split was due to the io size restriction
-		 */
-		if (unlikely(out_of_pages))
-			congestion_wait(BLK_RW_ASYNC, HZ/100);
-
-		/*
-		 * With async crypto it is unsafe to share the crypto context
-		 * between fragments, so switch to a new dm_crypt_io structure.
-		 */
-		if (unlikely(!crypt_finished && remaining)) {
-			new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
-			crypt_io_init(new_io, io->cc, io->base_bio, sector);
-			crypt_inc_pending(new_io);
-			crypt_convert_init(cc, &new_io->ctx, NULL,
-					   io->base_bio, sector);
-			new_io->ctx.iter_in = io->ctx.iter_in;
-
-			/*
-			 * Fragments after the first use the base_io
-			 * pending count.
-			 */
-			if (!io->base_io)
-				new_io->base_io = io;
-			else {
-				new_io->base_io = io->base_io;
-				crypt_inc_pending(io->base_io);
-				crypt_dec_pending(io);
-			}
+	crypt_inc_pending(io);
+	r = crypt_convert(cc, &io->ctx);
+	if (r)
+		io->error = -EIO;
+	crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
 
-			io = new_io;
-		}
+	/* Encryption was already finished, submit io now */
+	if (crypt_finished) {
+		kcryptd_crypt_write_io_submit(io, 0);
+		io->sector = sector;
 	}
 
+dec:
 	crypt_dec_pending(io);
 }
 
@@ -1481,6 +1512,9 @@ static void crypt_dtr(struct dm_target *ti)
 	if (!cc)
 		return;
 
+	if (cc->write_thread)
+		kthread_stop(cc->write_thread);
+
 	if (cc->io_queue)
 		destroy_workqueue(cc->io_queue);
 	if (cc->crypt_queue)
@@ -1495,8 +1529,6 @@ static void crypt_dtr(struct dm_target *ti)
 		mempool_destroy(cc->page_pool);
 	if (cc->req_pool)
 		mempool_destroy(cc->req_pool);
-	if (cc->io_pool)
-		mempool_destroy(cc->io_pool);
 
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
@@ -1688,7 +1720,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	char dummy;
 
 	static struct dm_arg _args[] = {
-		{0, 1, "Invalid number of feature args"},
+		{0, 3, "Invalid number of feature args"},
 	};
 
 	if (argc < 5) {
@@ -1710,13 +1742,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (ret < 0)
 		goto bad;
 
-	ret = -ENOMEM;
-	cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
-	if (!cc->io_pool) {
-		ti->error = "Cannot allocate crypt io mempool";
-		goto bad;
-	}
-
 	cc->dmreq_start = sizeof(struct ablkcipher_request);
 	cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
 	cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request));
@@ -1734,6 +1759,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc));
 	}
 
+	ret = -ENOMEM;
 	cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
 			sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size);
 	if (!cc->req_pool) {
@@ -1746,7 +1772,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		      sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size,
 		      ARCH_KMALLOC_MINALIGN);
 
-	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
+	cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
 	if (!cc->page_pool) {
 		ti->error = "Cannot allocate page mempool";
 		goto bad;
@@ -1758,6 +1784,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
+	mutex_init(&cc->bio_alloc_lock);
+
 	ret = -EINVAL;
 	if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
 		ti->error = "Invalid iv_offset sector";
@@ -1788,15 +1816,27 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		if (ret)
 			goto bad;
 
-		opt_string = dm_shift_arg(&as);
+		ret = -EINVAL;
+		while (opt_params--) {
+			opt_string = dm_shift_arg(&as);
+			if (!opt_string) {
+				ti->error = "Not enough feature arguments";
+				goto bad;
+			}
 
-		if (opt_params == 1 && opt_string &&
-		    !strcasecmp(opt_string, "allow_discards"))
-			ti->num_discard_bios = 1;
-		else if (opt_params) {
-			ret = -EINVAL;
-			ti->error = "Invalid feature arguments";
-			goto bad;
+			if (!strcasecmp(opt_string, "allow_discards"))
+				ti->num_discard_bios = 1;
+
+			else if (!strcasecmp(opt_string, "same_cpu_crypt"))
+				set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+
+			else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
+				set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+
+			else {
+				ti->error = "Invalid feature arguments";
+				goto bad;
+			}
 		}
 	}
 
@@ -1807,13 +1847,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	cc->crypt_queue = alloc_workqueue("kcryptd",
-					  WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
+		cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+	else
+		cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+						  num_online_cpus());
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";
 		goto bad;
 	}
 
+	init_waitqueue_head(&cc->write_thread_wait);
+	cc->write_tree = RB_ROOT;
+
+	cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+	if (IS_ERR(cc->write_thread)) {
+		ret = PTR_ERR(cc->write_thread);
+		cc->write_thread = NULL;
+		ti->error = "Couldn't spawn write thread";
+		goto bad;
+	}
+	wake_up_process(cc->write_thread);
+
 	ti->num_flush_bios = 1;
 	ti->discard_zeroes_data_unsupported = true;
 
@@ -1848,7 +1903,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 
 	if (bio_data_dir(io->base_bio) == READ) {
 		if (kcryptd_io_read(io, GFP_NOWAIT))
-			kcryptd_queue_io(io);
+			kcryptd_queue_read(io);
 	} else
 		kcryptd_queue_crypt(io);
 
@@ -1860,6 +1915,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 {
 	struct crypt_config *cc = ti->private;
 	unsigned i, sz = 0;
+	int num_feature_args = 0;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -1878,8 +1934,18 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
 				cc->dev->name, (unsigned long long)cc->start);
 
-		if (ti->num_discard_bios)
-			DMEMIT(" 1 allow_discards");
+		num_feature_args += !!ti->num_discard_bios;
+		num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+		num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+		if (num_feature_args) {
+			DMEMIT(" %d", num_feature_args);
+			if (ti->num_discard_bios)
+				DMEMIT(" allow_discards");
+			if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
+				DMEMIT(" same_cpu_crypt");
+			if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
+				DMEMIT(" submit_from_crypt_cpus");
+		}
 
 		break;
 	}
@@ -1976,7 +2042,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 13, 0},
+	.version = {1, 14, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
@@ -1994,15 +2060,9 @@ static int __init dm_crypt_init(void)
 {
 	int r;
 
-	_crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
-	if (!_crypt_io_pool)
-		return -ENOMEM;
-
 	r = dm_register_target(&crypt_target);
-	if (r < 0) {
+	if (r < 0)
 		DMERR("register failed %d", r);
-		kmem_cache_destroy(_crypt_io_pool);
-	}
 
 	return r;
 }
@@ -2010,7 +2070,6 @@ static int __init dm_crypt_init(void)
 static void __exit dm_crypt_exit(void)
 {
 	dm_unregister_target(&crypt_target);
-	kmem_cache_destroy(_crypt_io_pool);
 }
 
 module_init(dm_crypt_init);
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 1d1c4d35a1a6..1c2cc6fee351 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -3221,7 +3221,6 @@ EXPORT_SYMBOL_GPL(vb2_thread_start);
 int vb2_thread_stop(struct vb2_queue *q)
 {
 	struct vb2_threadio_data *threadio = q->threadio;
-	struct vb2_fileio_data *fileio = q->fileio;
 	int err;
 
 	if (threadio == NULL)
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 9e21e4fc9599..8f43ab8fd2d6 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -223,7 +223,7 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 	vexpress_config_set_master(vexpress_sysreg_get_master());
 
 	/* Confirm board type against DT property, if available */
-	if (of_property_read_u32(of_allnodes, "arm,hbi", &dt_hbi) == 0) {
+	if (of_property_read_u32(of_root, "arm,hbi", &dt_hbi) == 0) {
 		u32 id = vexpress_get_procid(VEXPRESS_SITE_MASTER);
 		u32 hbi = (id >> SYS_PROCIDx_HBI_SHIFT) & SYS_HBI_MASK;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index fdd36794c536..1f65817d4674 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -26,7 +26,6 @@
 #include <net/vxlan.h>
 
 MODULE_VERSION(DRV_VER);
-MODULE_DEVICE_TABLE(pci, be_dev_ids);
 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
 MODULE_AUTHOR("Emulex Corporation");
 MODULE_LICENSE("GPL");
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 1a13f5b722c5..7bcaeec876c0 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -7,10 +7,9 @@ config OF
 menu "Device Tree and Open Firmware support"
 	depends on OF
 
-config OF_SELFTEST
-	bool "Device Tree Runtime self tests"
+config OF_UNITTEST
+	bool "Device Tree runtime unit tests"
 	depends on OF_IRQ && OF_EARLY_FLATTREE
-	select OF_DYNAMIC
 	select OF_RESOLVE
 	help
 	  This option builds in test cases for the device tree infrastructure
@@ -23,6 +22,7 @@ config OF_FLATTREE
 	bool
 	select DTC
 	select LIBFDT
+	select CRC32
 
 config OF_EARLY_FLATTREE
 	bool
@@ -83,4 +83,9 @@ config OF_RESERVED_MEM
 config OF_RESOLVE
 	bool
 
+config OF_OVERLAY
+	bool "Device Tree overlays"
+	select OF_DYNAMIC
+	select OF_RESOLVE
+
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index ca9209ce50cd..7563f36c71db 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -6,14 +6,15 @@ obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
 obj-$(CONFIG_OF_IRQ)    += irq.o
 obj-$(CONFIG_OF_NET)	+= of_net.o
-obj-$(CONFIG_OF_SELFTEST) += of_selftest.o
-of_selftest-objs := selftest.o testcase-data/testcases.dtb.o
+obj-$(CONFIG_OF_UNITTEST) += of_unittest.o
+of_unittest-objs := unittest.o unittest-data/testcases.dtb.o
 obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
 obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
 obj-$(CONFIG_OF_RESOLVE)  += resolver.o
+obj-$(CONFIG_OF_OVERLAY) += overlay.o
 
 CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
 CFLAGS_fdt_address.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 9e77614391a0..d5f3b07177f9 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -889,7 +889,7 @@ EXPORT_SYMBOL(of_iomap);
  *		return PTR_ERR(base);
  */
 void __iomem *of_io_request_and_map(struct device_node *np, int index,
-					char *name)
+					const char *name)
 {
 	struct resource res;
 	void __iomem *mem;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 469d2b7f47eb..df9626747afb 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -32,11 +32,12 @@
 
 LIST_HEAD(aliases_lookup);
 
-struct device_node *of_allnodes;
-EXPORT_SYMBOL(of_allnodes);
+struct device_node *of_root;
+EXPORT_SYMBOL(of_root);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
 struct device_node *of_stdout;
+static const char *of_stdout_options;
 
 struct kset *of_kset;
 
@@ -48,7 +49,7 @@ struct kset *of_kset;
  */
 DEFINE_MUTEX(of_mutex);
 
-/* use when traversing tree through the allnext, child, sibling,
+/* use when traversing tree through the child, sibling,
  * or parent members of struct device_node.
  */
 DEFINE_RAW_SPINLOCK(devtree_lock);
@@ -204,7 +205,7 @@ static int __init of_init(void)
 	mutex_unlock(&of_mutex);
 
 	/* Symlink in /proc as required by userspace ABI */
-	if (of_allnodes)
+	if (of_root)
 		proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base");
 
 	return 0;
@@ -245,6 +246,23 @@ struct property *of_find_property(const struct device_node *np,
 }
 EXPORT_SYMBOL(of_find_property);
 
+struct device_node *__of_find_all_nodes(struct device_node *prev)
+{
+	struct device_node *np;
+	if (!prev) {
+		np = of_root;
+	} else if (prev->child) {
+		np = prev->child;
+	} else {
+		/* Walk back up looking for a sibling, or the end of the structure */
+		np = prev;
+		while (np->parent && !np->sibling)
+			np = np->parent;
+		np = np->sibling; /* Might be null at the end of the tree */
+	}
+	return np;
+}
+
 /**
  * of_find_all_nodes - Get next node in global list
  * @prev:	Previous node or NULL to start iteration
@@ -259,10 +277,8 @@ struct device_node *of_find_all_nodes(struct device_node *prev)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = prev ? prev->allnext : of_allnodes;
-	for (; np != NULL; np = np->allnext)
-		if (of_node_get(np))
-			break;
+	np = __of_find_all_nodes(prev);
+	of_node_get(np);
 	of_node_put(prev);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
@@ -507,27 +523,27 @@ EXPORT_SYMBOL(of_machine_is_compatible);
  *
  *  @device: Node to check for availability, with locks already held
  *
- *  Returns 1 if the status property is absent or set to "okay" or "ok",
- *  0 otherwise
+ *  Returns true if the status property is absent or set to "okay" or "ok",
+ *  false otherwise
  */
-static int __of_device_is_available(const struct device_node *device)
+static bool __of_device_is_available(const struct device_node *device)
 {
 	const char *status;
 	int statlen;
 
 	if (!device)
-		return 0;
+		return false;
 
 	status = __of_get_property(device, "status", &statlen);
 	if (status == NULL)
-		return 1;
+		return true;
 
 	if (statlen > 0) {
 		if (!strcmp(status, "okay") || !strcmp(status, "ok"))
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /**
@@ -535,13 +551,13 @@ static int __of_device_is_available(const struct device_node *device)
  *
  *  @device: Node to check for availability
  *
- *  Returns 1 if the status property is absent or set to "okay" or "ok",
- *  0 otherwise
+ *  Returns true if the status property is absent or set to "okay" or "ok",
+ *  false otherwise
  */
-int of_device_is_available(const struct device_node *device)
+bool of_device_is_available(const struct device_node *device)
 {
 	unsigned long flags;
-	int res;
+	bool res;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	res = __of_device_is_available(device);
@@ -698,8 +714,9 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
 						const char *path)
 {
 	struct device_node *child;
-	int len = strchrnul(path, '/') - path;
+	int len;
 
+	len = strcspn(path, "/:");
 	if (!len)
 		return NULL;
 
@@ -715,11 +732,14 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
 }
 
 /**
- *	of_find_node_by_path - Find a node matching a full OF path
+ *	of_find_node_opts_by_path - Find a node matching a full OF path
  *	@path: Either the full path to match, or if the path does not
  *	       start with '/', the name of a property of the /aliases
  *	       node (an alias).  In the case of an alias, the node
  *	       matching the alias' value will be returned.
+ *	@opts: Address of a pointer into which to store the start of
+ *	       an options string appended to the end of the path with
+ *	       a ':' separator.
  *
  *	Valid paths:
  *		/foo/bar	Full path
@@ -729,19 +749,27 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
  *	Returns a node pointer with refcount incremented, use
  *	of_node_put() on it when done.
  */
-struct device_node *of_find_node_by_path(const char *path)
+struct device_node *of_find_node_opts_by_path(const char *path, const char **opts)
 {
 	struct device_node *np = NULL;
 	struct property *pp;
 	unsigned long flags;
+	const char *separator = strchr(path, ':');
+
+	if (opts)
+		*opts = separator ? separator + 1 : NULL;
 
 	if (strcmp(path, "/") == 0)
-		return of_node_get(of_allnodes);
+		return of_node_get(of_root);
 
 	/* The path could begin with an alias */
 	if (*path != '/') {
-		char *p = strchrnul(path, '/');
-		int len = p - path;
+		int len;
+		const char *p = separator;
+
+		if (!p)
+			p = strchrnul(path, '/');
+		len = p - path;
 
 		/* of_aliases must not be NULL */
 		if (!of_aliases)
@@ -761,16 +789,18 @@ struct device_node *of_find_node_by_path(const char *path)
 	/* Step down the tree matching path components */
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	if (!np)
-		np = of_node_get(of_allnodes);
+		np = of_node_get(of_root);
 	while (np && *path == '/') {
 		path++; /* Increment past '/' delimiter */
 		np = __of_find_node_by_path(np, path);
 		path = strchrnul(path, '/');
+		if (separator && separator < path)
+			break;
 	}
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
-EXPORT_SYMBOL(of_find_node_by_path);
+EXPORT_SYMBOL(of_find_node_opts_by_path);
 
 /**
  *	of_find_node_by_name - Find a node by its "name" property
@@ -790,8 +820,7 @@ struct device_node *of_find_node_by_name(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext)
+	for_each_of_allnodes_from(from, np)
 		if (np->name && (of_node_cmp(np->name, name) == 0)
 		    && of_node_get(np))
 			break;
@@ -820,8 +849,7 @@ struct device_node *of_find_node_by_type(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext)
+	for_each_of_allnodes_from(from, np)
 		if (np->type && (of_node_cmp(np->type, type) == 0)
 		    && of_node_get(np))
 			break;
@@ -852,12 +880,10 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext) {
+	for_each_of_allnodes_from(from, np)
 		if (__of_device_is_compatible(np, compatible, type, NULL) &&
 		    of_node_get(np))
 			break;
-	}
 	of_node_put(from);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
@@ -884,8 +910,7 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext) {
+	for_each_of_allnodes_from(from, np) {
 		for (pp = np->properties; pp; pp = pp->next) {
 			if (of_prop_cmp(pp->name, prop_name) == 0) {
 				of_node_get(np);
@@ -967,8 +992,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 		*match = NULL;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext) {
+	for_each_of_allnodes_from(from, np) {
 		m = __of_match_node(matches, np);
 		if (m && of_node_get(np)) {
 			if (match)
@@ -1025,7 +1049,7 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 		return NULL;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	for (np = of_allnodes; np; np = np->allnext)
+	for_each_of_allnodes(np)
 		if (np->phandle == handle)
 			break;
 	of_node_get(np);
@@ -1281,6 +1305,7 @@ int of_property_read_u64_array(const struct device_node *np,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(of_property_read_u64_array);
 
 /**
  * of_property_read_string - Find and read a string from a property
@@ -1350,7 +1375,7 @@ int of_property_match_string(struct device_node *np, const char *propname,
 EXPORT_SYMBOL_GPL(of_property_match_string);
 
 /**
- * of_property_read_string_util() - Utility helper for parsing string properties
+ * of_property_read_string_helper() - Utility helper for parsing string properties
  * @np:		device node from which the property value is to be read.
  * @propname:	name of the property to be searched.
  * @out_strs:	output array of string pointers.
@@ -1864,7 +1889,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 		if (IS_ENABLED(CONFIG_PPC) && !name)
 			name = of_get_property(of_aliases, "stdout", NULL);
 		if (name)
-			of_stdout = of_find_node_by_path(name);
+			of_stdout = of_find_node_opts_by_path(name, &of_stdout_options);
 	}
 
 	if (!of_aliases)
@@ -1990,7 +2015,8 @@ bool of_console_check(struct device_node *dn, char *name, int index)
 {
 	if (!dn || dn != of_stdout || console_set_on_cmdline)
 		return false;
-	return !add_preferred_console(name, index, NULL);
+	return !add_preferred_console(name, index,
+				      kstrdup(of_stdout_options, GFP_KERNEL));
 }
 EXPORT_SYMBOL_GPL(of_console_check);
 
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 46d6c75c1404..20c1332a0018 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -2,6 +2,9 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
@@ -66,6 +69,87 @@ int of_device_add(struct platform_device *ofdev)
 	return device_add(&ofdev->dev);
 }
 
+/**
+ * of_dma_configure - Setup DMA configuration
+ * @dev:	Device to apply DMA configuration
+ * @np:		Pointer to OF node having DMA configuration
+ *
+ * Try to get devices's DMA configuration from DT and update it
+ * accordingly.
+ *
+ * If platform code needs to use its own special DMA configuration, it
+ * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
+ * to fix up DMA configuration.
+ */
+void of_dma_configure(struct device *dev, struct device_node *np)
+{
+	u64 dma_addr, paddr, size;
+	int ret;
+	bool coherent;
+	unsigned long offset;
+	struct iommu_ops *iommu;
+
+	/*
+	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
+	 * setup the correct supported mask.
+	 */
+	if (!dev->coherent_dma_mask)
+		dev->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	/*
+	 * Set it to coherent_dma_mask by default if the architecture
+	 * code has not set it.
+	 */
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+
+	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+	if (ret < 0) {
+		dma_addr = offset = 0;
+		size = dev->coherent_dma_mask + 1;
+	} else {
+		offset = PFN_DOWN(paddr - dma_addr);
+
+		/*
+		 * Add a work around to treat the size as mask + 1 in case
+		 * it is defined in DT as a mask.
+		 */
+		if (size & 1) {
+			dev_warn(dev, "Invalid size 0x%llx for dma-range\n",
+				 size);
+			size = size + 1;
+		}
+
+		if (!size) {
+			dev_err(dev, "Adjusted size 0x%llx invalid\n", size);
+			return;
+		}
+		dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset);
+	}
+
+	dev->dma_pfn_offset = offset;
+
+	/*
+	 * Limit coherent and dma mask based on size and default mask
+	 * set by the driver.
+	 */
+	dev->coherent_dma_mask = min(dev->coherent_dma_mask,
+				     DMA_BIT_MASK(ilog2(dma_addr + size)));
+	*dev->dma_mask = min((*dev->dma_mask),
+			     DMA_BIT_MASK(ilog2(dma_addr + size)));
+
+	coherent = of_dma_is_coherent(np);
+	dev_dbg(dev, "device is%sdma coherent\n",
+		coherent ? " " : " not ");
+
+	iommu = of_iommu_configure(dev, np);
+	dev_dbg(dev, "device is%sbehind an iommu\n",
+		iommu ? " " : " not ");
+
+	arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);
+}
+EXPORT_SYMBOL_GPL(of_dma_configure);
+
 int of_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index d4994177dec2..3351ef408125 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -77,18 +77,132 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
 
-int of_reconfig_notify(unsigned long action, void *p)
+#ifdef DEBUG
+const char *action_names[] = {
+	[OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE",
+	[OF_RECONFIG_DETACH_NODE] = "DETACH_NODE",
+	[OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY",
+	[OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY",
+	[OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY",
+};
+#endif
+
+int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
 {
 	int rc;
+#ifdef DEBUG
+	struct of_reconfig_data *pr = p;
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+	case OF_RECONFIG_DETACH_NODE:
+		pr_debug("of/notify %-15s %s\n", action_names[action],
+			pr->dn->full_name);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+	case OF_RECONFIG_REMOVE_PROPERTY:
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr_debug("of/notify %-15s %s:%s\n", action_names[action],
+			pr->dn->full_name, pr->prop->name);
+		break;
 
+	}
+#endif
 	rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
 	return notifier_to_errno(rc);
 }
 
+/*
+ * of_reconfig_get_state_change()	- Returns new state of device
+ * @action	- action of the of notifier
+ * @arg		- argument of the of notifier
+ *
+ * Returns the new state of a device based on the notifier used.
+ * Returns 0 on device going from enabled to disabled, 1 on device
+ * going from disabled to enabled and -1 on no change.
+ */
+int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr)
+{
+	struct property *prop, *old_prop = NULL;
+	int is_status, status_state, old_status_state, prev_state, new_state;
+
+	/* figure out if a device should be created or destroyed */
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+	case OF_RECONFIG_DETACH_NODE:
+		prop = of_find_property(pr->dn, "status", NULL);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		prop = pr->prop;
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		prop = pr->prop;
+		old_prop = pr->old_prop;
+		break;
+	default:
+		return OF_RECONFIG_NO_CHANGE;
+	}
+
+	is_status = 0;
+	status_state = -1;
+	old_status_state = -1;
+	prev_state = -1;
+	new_state = -1;
+
+	if (prop && !strcmp(prop->name, "status")) {
+		is_status = 1;
+		status_state = !strcmp(prop->value, "okay") ||
+			       !strcmp(prop->value, "ok");
+		if (old_prop)
+			old_status_state = !strcmp(old_prop->value, "okay") ||
+					   !strcmp(old_prop->value, "ok");
+	}
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		prev_state = 0;
+		/* -1 & 0 status either missing or okay */
+		new_state = status_state != 0;
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		/* -1 & 0 status either missing or okay */
+		prev_state = status_state != 0;
+		new_state = 0;
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		if (is_status) {
+			/* no status property -> enabled (legacy) */
+			prev_state = 1;
+			new_state = status_state;
+		}
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		if (is_status) {
+			prev_state = status_state;
+			/* no status property -> enabled (legacy) */
+			new_state = 1;
+		}
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (is_status) {
+			prev_state = old_status_state != 0;
+			new_state = status_state != 0;
+		}
+		break;
+	}
+
+	if (prev_state == new_state)
+		return OF_RECONFIG_NO_CHANGE;
+
+	return new_state ? OF_RECONFIG_CHANGE_ADD : OF_RECONFIG_CHANGE_REMOVE;
+}
+EXPORT_SYMBOL_GPL(of_reconfig_get_state_change);
+
 int of_property_notify(int action, struct device_node *np,
 		       struct property *prop, struct property *oldprop)
 {
-	struct of_prop_reconfig pr;
+	struct of_reconfig_data pr;
 
 	/* only call notifiers if the node is attached */
 	if (!of_node_is_attached(np))
@@ -117,8 +231,6 @@ void __of_attach_node(struct device_node *np)
 
 	np->child = NULL;
 	np->sibling = np->parent->child;
-	np->allnext = np->parent->allnext;
-	np->parent->allnext = np;
 	np->parent->child = np;
 	of_node_clear_flag(np, OF_DETACHED);
 }
@@ -128,8 +240,12 @@ void __of_attach_node(struct device_node *np)
  */
 int of_attach_node(struct device_node *np)
 {
+	struct of_reconfig_data rd;
 	unsigned long flags;
 
+	memset(&rd, 0, sizeof(rd));
+	rd.dn = np;
+
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_attach_node(np);
@@ -138,7 +254,7 @@ int of_attach_node(struct device_node *np)
 	__of_attach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
 
-	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, &rd);
 
 	return 0;
 }
@@ -154,17 +270,6 @@ void __of_detach_node(struct device_node *np)
 	if (WARN_ON(!parent))
 		return;
 
-	if (of_allnodes == np)
-		of_allnodes = np->allnext;
-	else {
-		struct device_node *prev;
-		for (prev = of_allnodes;
-		     prev->allnext != np;
-		     prev = prev->allnext)
-			;
-		prev->allnext = np->allnext;
-	}
-
 	if (parent->child == np)
 		parent->child = np->sibling;
 	else {
@@ -187,9 +292,13 @@ void __of_detach_node(struct device_node *np)
  */
 int of_detach_node(struct device_node *np)
 {
+	struct of_reconfig_data rd;
 	unsigned long flags;
 	int rc = 0;
 
+	memset(&rd, 0, sizeof(rd));
+	rd.dn = np;
+
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_detach_node(np);
@@ -198,7 +307,7 @@ int of_detach_node(struct device_node *np)
 	__of_detach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
 
-	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, &rd);
 
 	return rc;
 }
@@ -285,36 +394,54 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
 }
 
 /**
- * __of_node_alloc() - Create an empty device node dynamically.
- * @full_name:	Full name of the new device node
- * @allocflags:	Allocation flags (typically pass GFP_KERNEL)
+ * __of_node_dup() - Duplicate or create an empty device node dynamically.
+ * @fmt: Format string (plus vargs) for new full name of the device node
  *
- * Create an empty device tree node, suitable for further modification.
- * The node data are dynamically allocated and all the node flags
- * have the OF_DYNAMIC & OF_DETACHED bits set.
- * Returns the newly allocated node or NULL on out of memory error.
+ * Create an device tree node, either by duplicating an empty node or by allocating
+ * an empty one suitable for further modification.  The node data are
+ * dynamically allocated and all the node flags have the OF_DYNAMIC &
+ * OF_DETACHED bits set. Returns the newly allocated node or NULL on out of
+ * memory error.
  */
-struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
+struct device_node *__of_node_dup(const struct device_node *np, const char *fmt, ...)
 {
+	va_list vargs;
 	struct device_node *node;
 
-	node = kzalloc(sizeof(*node), allocflags);
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
 	if (!node)
 		return NULL;
+	va_start(vargs, fmt);
+	node->full_name = kvasprintf(GFP_KERNEL, fmt, vargs);
+	va_end(vargs);
+	if (!node->full_name) {
+		kfree(node);
+		return NULL;
+	}
 
-	node->full_name = kstrdup(full_name, allocflags);
 	of_node_set_flag(node, OF_DYNAMIC);
 	of_node_set_flag(node, OF_DETACHED);
-	if (!node->full_name)
-		goto err_free;
-
 	of_node_init(node);
 
+	/* Iterate over and duplicate all properties */
+	if (np) {
+		struct property *pp, *new_pp;
+		for_each_property_of_node(np, pp) {
+			new_pp = __of_prop_dup(pp, GFP_KERNEL);
+			if (!new_pp)
+				goto err_prop;
+			if (__of_add_property(node, new_pp)) {
+				kfree(new_pp->name);
+				kfree(new_pp->value);
+				kfree(new_pp);
+				goto err_prop;
+			}
+		}
+	}
 	return node;
 
- err_free:
-	kfree(node->full_name);
-	kfree(node);
+ err_prop:
+	of_node_put(node); /* Frees the node and properties */
 	return NULL;
 }
 
@@ -330,27 +457,15 @@ static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
 {
 	switch (ce->action) {
 	case OF_RECONFIG_ADD_PROPERTY:
-		pr_debug("%p: %s %s/%s\n",
-			ce, "ADD_PROPERTY   ", ce->np->full_name,
-			ce->prop->name);
-		break;
 	case OF_RECONFIG_REMOVE_PROPERTY:
-		pr_debug("%p: %s %s/%s\n",
-			ce, "REMOVE_PROPERTY", ce->np->full_name,
-			ce->prop->name);
-		break;
 	case OF_RECONFIG_UPDATE_PROPERTY:
-		pr_debug("%p: %s %s/%s\n",
-			ce, "UPDATE_PROPERTY", ce->np->full_name,
-			ce->prop->name);
+		pr_debug("of/cset<%p> %-15s %s/%s\n", ce, action_names[ce->action],
+			ce->np->full_name, ce->prop->name);
 		break;
 	case OF_RECONFIG_ATTACH_NODE:
-		pr_debug("%p: %s %s\n",
-			ce, "ATTACH_NODE    ", ce->np->full_name);
-		break;
 	case OF_RECONFIG_DETACH_NODE:
-		pr_debug("%p: %s %s\n",
-			ce, "DETACH_NODE    ", ce->np->full_name);
+		pr_debug("of/cset<%p> %-15s %s\n", ce, action_names[ce->action],
+			ce->np->full_name);
 		break;
 	}
 }
@@ -388,6 +503,7 @@ static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
 
 static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
 {
+	struct of_reconfig_data rd;
 	struct of_changeset_entry ce_inverted;
 	int ret;
 
@@ -399,7 +515,9 @@ static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool reve
 	switch (ce->action) {
 	case OF_RECONFIG_ATTACH_NODE:
 	case OF_RECONFIG_DETACH_NODE:
-		ret = of_reconfig_notify(ce->action, ce->np);
+		memset(&rd, 0, sizeof(rd));
+		rd.dn = ce->np;
+		ret = of_reconfig_notify(ce->action, &rd);
 		break;
 	case OF_RECONFIG_ADD_PROPERTY:
 	case OF_RECONFIG_REMOVE_PROPERTY:
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d134710de96d..3a896c9aeb74 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -9,6 +9,7 @@
  * version 2 as published by the Free Software Foundation.
  */
 
+#include <linux/crc32.h>
 #include <linux/kernel.h>
 #include <linux/initrd.h>
 #include <linux/memblock.h>
@@ -22,6 +23,7 @@
 #include <linux/libfdt.h>
 #include <linux/debugfs.h>
 #include <linux/serial_core.h>
+#include <linux/sysfs.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #include <asm/page.h>
@@ -145,15 +147,15 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size,
  * @mem: Memory chunk to use for allocating device nodes and properties
  * @p: pointer to node in flat tree
  * @dad: Parent struct device_node
- * @allnextpp: pointer to ->allnext from last allocated device_node
  * @fpsize: Size of the node path up at the current depth.
  */
 static void * unflatten_dt_node(void *blob,
 				void *mem,
 				int *poffset,
 				struct device_node *dad,
-				struct device_node ***allnextpp,
-				unsigned long fpsize)
+				struct device_node **nodepp,
+				unsigned long fpsize,
+				bool dryrun)
 {
 	const __be32 *p;
 	struct device_node *np;
@@ -200,7 +202,7 @@ static void * unflatten_dt_node(void *blob,
 
 	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
 				__alignof__(struct device_node));
-	if (allnextpp) {
+	if (!dryrun) {
 		char *fn;
 		of_node_init(np);
 		np->full_name = fn = ((char *)np) + sizeof(*np);
@@ -222,16 +224,10 @@ static void * unflatten_dt_node(void *blob,
 		memcpy(fn, pathp, l);
 
 		prev_pp = &np->properties;
-		**allnextpp = np;
-		*allnextpp = &np->allnext;
 		if (dad != NULL) {
 			np->parent = dad;
-			/* we temporarily use the next field as `last_child'*/
-			if (dad->next == NULL)
-				dad->child = np;
-			else
-				dad->next->sibling = np;
-			dad->next = np;
+			np->sibling = dad->child;
+			dad->child = np;
 		}
 	}
 	/* process properties */
@@ -254,7 +250,7 @@ static void * unflatten_dt_node(void *blob,
 			has_name = 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
 					__alignof__(struct property));
-		if (allnextpp) {
+		if (!dryrun) {
 			/* We accept flattened tree phandles either in
 			 * ePAPR-style "phandle" properties, or the
 			 * legacy "linux,phandle" properties.  If both
@@ -296,7 +292,7 @@ static void * unflatten_dt_node(void *blob,
 		sz = (pa - ps) + 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
 					__alignof__(struct property));
-		if (allnextpp) {
+		if (!dryrun) {
 			pp->name = "name";
 			pp->length = sz;
 			pp->value = pp + 1;
@@ -308,7 +304,7 @@ static void * unflatten_dt_node(void *blob,
 				(char *)pp->value);
 		}
 	}
-	if (allnextpp) {
+	if (!dryrun) {
 		*prev_pp = NULL;
 		np->name = of_get_property(np, "name", NULL);
 		np->type = of_get_property(np, "device_type", NULL);
@@ -324,12 +320,30 @@ static void * unflatten_dt_node(void *blob,
 	if (depth < 0)
 		depth = 0;
 	while (*poffset > 0 && depth > old_depth)
-		mem = unflatten_dt_node(blob, mem, poffset, np, allnextpp,
-					fpsize);
+		mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
+					fpsize, dryrun);
 
 	if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
 		pr_err("unflatten: error %d processing FDT\n", *poffset);
 
+	/*
+	 * Reverse the child list. Some drivers assumes node order matches .dts
+	 * node order
+	 */
+	if (!dryrun && np->child) {
+		struct device_node *child = np->child;
+		np->child = NULL;
+		while (child) {
+			struct device_node *next = child->sibling;
+			child->sibling = np->child;
+			np->child = child;
+			child = next;
+		}
+	}
+
+	if (nodepp)
+		*nodepp = np;
+
 	return mem;
 }
 
@@ -352,7 +366,6 @@ static void __unflatten_device_tree(void *blob,
 	unsigned long size;
 	int start;
 	void *mem;
-	struct device_node **allnextp = mynodes;
 
 	pr_debug(" -> unflatten_device_tree()\n");
 
@@ -373,7 +386,7 @@ static void __unflatten_device_tree(void *blob,
 
 	/* First pass, scan for size */
 	start = 0;
-	size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0);
+	size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
 	size = ALIGN(size, 4);
 
 	pr_debug("  size is %lx, allocating...\n", size);
@@ -388,11 +401,10 @@ static void __unflatten_device_tree(void *blob,
 
 	/* Second pass, do actual unflattening */
 	start = 0;
-	unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
+	unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
 	if (be32_to_cpup(mem + size) != 0xdeadbeef)
 		pr_warning("End of tree marker overwritten: %08x\n",
 			   be32_to_cpup(mem + size));
-	*allnextp = NULL;
 
 	pr_debug(" <- unflatten_device_tree()\n");
 }
@@ -425,6 +437,8 @@ void *initial_boot_params;
 
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
+static u32 of_fdt_crc32;
+
 /**
  * res_mem_reserve_reg() - reserve all memory described in 'reg' property
  */
@@ -748,7 +762,7 @@ static inline void early_init_dt_check_for_initrd(unsigned long node)
 #ifdef CONFIG_SERIAL_EARLYCON
 extern struct of_device_id __earlycon_of_table[];
 
-int __init early_init_dt_scan_chosen_serial(void)
+static int __init early_init_dt_scan_chosen_serial(void)
 {
 	int offset;
 	const char *p;
@@ -930,6 +944,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 	const u64 phys_offset = __pa(PAGE_OFFSET);
 
 	if (!PAGE_ALIGNED(base)) {
+		if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {
+			pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
+				base, base + size);
+			return;
+		}
 		size -= PAGE_SIZE - (base & ~PAGE_MASK);
 		base = PAGE_ALIGN(base);
 	}
@@ -992,15 +1011,14 @@ bool __init early_init_dt_verify(void *params)
 	if (!params)
 		return false;
 
-	/* Setup flat device-tree pointer */
-	initial_boot_params = params;
-
 	/* check device tree validity */
-	if (fdt_check_header(params)) {
-		initial_boot_params = NULL;
+	if (fdt_check_header(params))
 		return false;
-	}
 
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+	of_fdt_crc32 = crc32_be(~0, initial_boot_params,
+				fdt_totalsize(initial_boot_params));
 	return true;
 }
 
@@ -1039,7 +1057,7 @@ bool __init early_init_dt_scan(void *params)
  */
 void __init unflatten_device_tree(void)
 {
-	__unflatten_device_tree(initial_boot_params, &of_allnodes,
+	__unflatten_device_tree(initial_boot_params, &of_root,
 				early_init_dt_alloc_memory_arch);
 
 	/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
@@ -1078,27 +1096,32 @@ void __init unflatten_and_copy_device_tree(void)
 	unflatten_device_tree();
 }
 
-#if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
-static struct debugfs_blob_wrapper flat_dt_blob;
-
-static int __init of_flat_dt_debugfs_export_fdt(void)
+#ifdef CONFIG_SYSFS
+static ssize_t of_fdt_raw_read(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t off, size_t count)
 {
-	struct dentry *d = debugfs_create_dir("device-tree", NULL);
-
-	if (!d)
-		return -ENOENT;
+	memcpy(buf, initial_boot_params + off, count);
+	return count;
+}
 
-	flat_dt_blob.data = initial_boot_params;
-	flat_dt_blob.size = fdt_totalsize(initial_boot_params);
+static int __init of_fdt_raw_init(void)
+{
+	static struct bin_attribute of_fdt_raw_attr =
+		__BIN_ATTR(fdt, S_IRUSR, of_fdt_raw_read, NULL, 0);
 
-	d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR,
-				d, &flat_dt_blob);
-	if (!d)
-		return -ENOENT;
+	if (!initial_boot_params)
+		return 0;
 
-	return 0;
+	if (of_fdt_crc32 != crc32_be(~0, initial_boot_params,
+				     fdt_totalsize(initial_boot_params))) {
+		pr_warn("fdt: not creating '/sys/firmware/fdt': CRC check failed\n");
+		return 0;
+	}
+	of_fdt_raw_attr.size = fdt_totalsize(initial_boot_params);
+	return sysfs_create_bin_file(firmware_kobj, &of_fdt_raw_attr);
 }
-module_init(of_flat_dt_debugfs_export_fdt);
+late_initcall(of_fdt_raw_init);
 #endif
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index b97363adca0b..c21e364dcd96 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -18,6 +18,7 @@
  * driver.
  */
 
+#include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -409,6 +410,7 @@ int of_irq_get(struct device_node *dev, int index)
 
 	return irq_create_of_mapping(&oirq);
 }
+EXPORT_SYMBOL_GPL(of_irq_get);
 
 /**
  * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number
@@ -576,3 +578,23 @@ err:
 		kfree(desc);
 	}
 }
+
+/**
+ * of_msi_configure - Set the msi_domain field of a device
+ * @dev: device structure to associate with an MSI irq domain
+ * @np: device node for that device
+ */
+void of_msi_configure(struct device *dev, struct device_node *np)
+{
+	struct device_node *msi_np;
+	struct irq_domain *d;
+
+	msi_np = of_parse_phandle(np, "msi-parent", 0);
+	if (!msi_np)
+		return;
+
+	d = irq_find_matching_host(msi_np, DOMAIN_BUS_PLATFORM_MSI);
+	if (!d)
+		d = irq_find_host(msi_np);
+	dev_set_msi_domain(dev, d);
+}
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index ecc5fa5640d2..b249fdf5ecf8 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/of_pci.h>
 #include <linux/slab.h>
 
@@ -116,6 +117,26 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
+/**
+ * of_pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to pci_dev struct of the PCI device
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node of host bridge's parent (if any).
+ */
+void of_pci_dma_configure(struct pci_dev *pci_dev)
+{
+	struct device *dev = &pci_dev->dev;
+	struct device *bridge = pci_get_host_bridge_device(pci_dev);
+
+	if (!bridge->parent)
+		return;
+
+	of_dma_configure(dev, bridge->parent->of_node);
+	pci_put_host_bridge_device(bridge);
+}
+EXPORT_SYMBOL_GPL(of_pci_dma_configure);
+
 #if defined(CONFIG_OF_ADDRESS)
 /**
  * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
@@ -229,7 +250,6 @@ parse_failed:
 	list_for_each_entry(window, resources, list)
 		kfree(window->res);
 	pci_free_resource_list(resources);
-	kfree(bus_range);
 	return err;
 }
 EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
@@ -240,7 +260,7 @@ EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
 static LIST_HEAD(of_pci_msi_chip_list);
 static DEFINE_MUTEX(of_pci_msi_chip_mutex);
 
-int of_pci_msi_chip_add(struct msi_chip *chip)
+int of_pci_msi_chip_add(struct msi_controller *chip)
 {
 	if (!of_property_read_bool(chip->of_node, "msi-controller"))
 		return -EINVAL;
@@ -253,7 +273,7 @@ int of_pci_msi_chip_add(struct msi_chip *chip)
 }
 EXPORT_SYMBOL_GPL(of_pci_msi_chip_add);
 
-void of_pci_msi_chip_remove(struct msi_chip *chip)
+void of_pci_msi_chip_remove(struct msi_controller *chip)
 {
 	mutex_lock(&of_pci_msi_chip_mutex);
 	list_del(&chip->list);
@@ -261,9 +281,9 @@ void of_pci_msi_chip_remove(struct msi_chip *chip)
 }
 EXPORT_SYMBOL_GPL(of_pci_msi_chip_remove);
 
-struct msi_chip *of_pci_find_msi_chip_by_node(struct device_node *of_node)
+struct msi_controller *of_pci_find_msi_chip_by_node(struct device_node *of_node)
 {
-	struct msi_chip *c;
+	struct msi_controller *c;
 
 	mutex_lock(&of_pci_msi_chip_mutex);
 	list_for_each_entry(c, &of_pci_msi_chip_list, list) {
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 858e0a5d9a11..8e882e706cd8 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -61,7 +61,7 @@ static inline int of_property_notify(int action, struct device_node *np,
  * own the devtree lock or work on detached trees only.
  */
 struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
-struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
+__printf(2, 3) struct device_node *__of_node_dup(const struct device_node *np, const char *fmt, ...);
 
 extern const void *__of_get_property(const struct device_node *np,
 				     const char *name, int *lenp);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 2ede604ff17a..26fb7b0b856c 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -267,6 +267,7 @@ int of_reserved_mem_device_init(struct device *dev)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(of_reserved_mem_device_init);
 
 /**
  * of_reserved_mem_device_release() - release reserved memory device structures
@@ -291,3 +292,4 @@ void of_reserved_mem_device_release(struct device *dev)
 
 	rmem->ops->device_release(rmem, dev);
 }
+EXPORT_SYMBOL_GPL(of_reserved_mem_device_release);
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
new file mode 100644
index 000000000000..dee9270ba547
--- /dev/null
+++ b/drivers/of/overlay.c
@@ -0,0 +1,552 @@
+/*
+ * Functions for working with device tree overlays
+ *
+ * Copyright (C) 2012 Pantelis Antoniou <panto@antoniou-consulting.com>
+ * Copyright (C) 2012 Texas Instruments Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+
+#include "of_private.h"
+
+/**
+ * struct of_overlay_info - Holds a single overlay info
+ * @target:	target of the overlay operation
+ * @overlay:	pointer to the overlay contents node
+ *
+ * Holds a single overlay state, including all the overlay logs &
+ * records.
+ */
+struct of_overlay_info {
+	struct device_node *target;
+	struct device_node *overlay;
+};
+
+/**
+ * struct of_overlay - Holds a complete overlay transaction
+ * @node:	List on which we are located
+ * @count:	Count of ovinfo structures
+ * @ovinfo_tab:	Overlay info table (count sized)
+ * @cset:	Changeset to be used
+ *
+ * Holds a complete overlay transaction
+ */
+struct of_overlay {
+	int id;
+	struct list_head node;
+	int count;
+	struct of_overlay_info *ovinfo_tab;
+	struct of_changeset cset;
+};
+
+static int of_overlay_apply_one(struct of_overlay *ov,
+		struct device_node *target, const struct device_node *overlay);
+
+static int of_overlay_apply_single_property(struct of_overlay *ov,
+		struct device_node *target, struct property *prop)
+{
+	struct property *propn, *tprop;
+
+	/* NOTE: Multiple changes of single properties not supported */
+	tprop = of_find_property(target, prop->name, NULL);
+
+	/* special properties are not meant to be updated (silent NOP) */
+	if (of_prop_cmp(prop->name, "name") == 0 ||
+	    of_prop_cmp(prop->name, "phandle") == 0 ||
+	    of_prop_cmp(prop->name, "linux,phandle") == 0)
+		return 0;
+
+	propn = __of_prop_dup(prop, GFP_KERNEL);
+	if (propn == NULL)
+		return -ENOMEM;
+
+	/* not found? add */
+	if (tprop == NULL)
+		return of_changeset_add_property(&ov->cset, target, propn);
+
+	/* found? update */
+	return of_changeset_update_property(&ov->cset, target, propn);
+}
+
+static int of_overlay_apply_single_device_node(struct of_overlay *ov,
+		struct device_node *target, struct device_node *child)
+{
+	const char *cname;
+	struct device_node *tchild;
+	int ret = 0;
+
+	cname = kbasename(child->full_name);
+	if (cname == NULL)
+		return -ENOMEM;
+
+	/* NOTE: Multiple mods of created nodes not supported */
+	tchild = of_get_child_by_name(target, cname);
+	if (tchild != NULL) {
+		/* apply overlay recursively */
+		ret = of_overlay_apply_one(ov, tchild, child);
+		of_node_put(tchild);
+	} else {
+		/* create empty tree as a target */
+		tchild = __of_node_dup(child, "%s/%s", target->full_name, cname);
+		if (!tchild)
+			return -ENOMEM;
+
+		/* point to parent */
+		tchild->parent = target;
+
+		ret = of_changeset_attach_node(&ov->cset, tchild);
+		if (ret)
+			return ret;
+
+		ret = of_overlay_apply_one(ov, tchild, child);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+/*
+ * Apply a single overlay node recursively.
+ *
+ * Note that the in case of an error the target node is left
+ * in a inconsistent state. Error recovery should be performed
+ * by using the changeset.
+ */
+static int of_overlay_apply_one(struct of_overlay *ov,
+		struct device_node *target, const struct device_node *overlay)
+{
+	struct device_node *child;
+	struct property *prop;
+	int ret;
+
+	for_each_property_of_node(overlay, prop) {
+		ret = of_overlay_apply_single_property(ov, target, prop);
+		if (ret) {
+			pr_err("%s: Failed to apply prop @%s/%s\n",
+				__func__, target->full_name, prop->name);
+			return ret;
+		}
+	}
+
+	for_each_child_of_node(overlay, child) {
+		ret = of_overlay_apply_single_device_node(ov, target, child);
+		if (ret != 0) {
+			pr_err("%s: Failed to apply single node @%s/%s\n",
+					__func__, target->full_name,
+					child->name);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * of_overlay_apply() - Apply @count overlays pointed at by @ovinfo_tab
+ * @ov:		Overlay to apply
+ *
+ * Applies the overlays given, while handling all error conditions
+ * appropriately. Either the operation succeeds, or if it fails the
+ * live tree is reverted to the state before the attempt.
+ * Returns 0, or an error if the overlay attempt failed.
+ */
+static int of_overlay_apply(struct of_overlay *ov)
+{
+	int i, err;
+
+	/* first we apply the overlays atomically */
+	for (i = 0; i < ov->count; i++) {
+		struct of_overlay_info *ovinfo = &ov->ovinfo_tab[i];
+
+		err = of_overlay_apply_one(ov, ovinfo->target, ovinfo->overlay);
+		if (err != 0) {
+			pr_err("%s: overlay failed '%s'\n",
+				__func__, ovinfo->target->full_name);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find the target node using a number of different strategies
+ * in order of preference
+ *
+ * "target" property containing the phandle of the target
+ * "target-path" property containing the path of the target
+ */
+static struct device_node *find_target_node(struct device_node *info_node)
+{
+	const char *path;
+	u32 val;
+	int ret;
+
+	/* first try to go by using the target as a phandle */
+	ret = of_property_read_u32(info_node, "target", &val);
+	if (ret == 0)
+		return of_find_node_by_phandle(val);
+
+	/* now try to locate by path */
+	ret = of_property_read_string(info_node, "target-path", &path);
+	if (ret == 0)
+		return of_find_node_by_path(path);
+
+	pr_err("%s: Failed to find target for node %p (%s)\n", __func__,
+		info_node, info_node->name);
+
+	return NULL;
+}
+
+/**
+ * of_fill_overlay_info() - Fill an overlay info structure
+ * @ov		Overlay to fill
+ * @info_node:	Device node containing the overlay
+ * @ovinfo:	Pointer to the overlay info structure to fill
+ *
+ * Fills an overlay info structure with the overlay information
+ * from a device node. This device node must have a target property
+ * which contains a phandle of the overlay target node, and an
+ * __overlay__ child node which has the overlay contents.
+ * Both ovinfo->target & ovinfo->overlay have their references taken.
+ *
+ * Returns 0 on success, or a negative error value.
+ */
+static int of_fill_overlay_info(struct of_overlay *ov,
+		struct device_node *info_node, struct of_overlay_info *ovinfo)
+{
+	ovinfo->overlay = of_get_child_by_name(info_node, "__overlay__");
+	if (ovinfo->overlay == NULL)
+		goto err_fail;
+
+	ovinfo->target = find_target_node(info_node);
+	if (ovinfo->target == NULL)
+		goto err_fail;
+
+	return 0;
+
+err_fail:
+	of_node_put(ovinfo->target);
+	of_node_put(ovinfo->overlay);
+
+	memset(ovinfo, 0, sizeof(*ovinfo));
+	return -EINVAL;
+}
+
+/**
+ * of_build_overlay_info() - Build an overlay info array
+ * @ov		Overlay to build
+ * @tree:	Device node containing all the overlays
+ *
+ * Helper function that given a tree containing overlay information,
+ * allocates and builds an overlay info array containing it, ready
+ * for use using of_overlay_apply.
+ *
+ * Returns 0 on success with the @cntp @ovinfop pointers valid,
+ * while on error a negative error value is returned.
+ */
+static int of_build_overlay_info(struct of_overlay *ov,
+		struct device_node *tree)
+{
+	struct device_node *node;
+	struct of_overlay_info *ovinfo;
+	int cnt, err;
+
+	/* worst case; every child is a node */
+	cnt = 0;
+	for_each_child_of_node(tree, node)
+		cnt++;
+
+	ovinfo = kcalloc(cnt, sizeof(*ovinfo), GFP_KERNEL);
+	if (ovinfo == NULL)
+		return -ENOMEM;
+
+	cnt = 0;
+	for_each_child_of_node(tree, node) {
+		memset(&ovinfo[cnt], 0, sizeof(*ovinfo));
+		err = of_fill_overlay_info(ov, node, &ovinfo[cnt]);
+		if (err == 0)
+			cnt++;
+	}
+
+	/* if nothing filled, return error */
+	if (cnt == 0) {
+		kfree(ovinfo);
+		return -ENODEV;
+	}
+
+	ov->count = cnt;
+	ov->ovinfo_tab = ovinfo;
+
+	return 0;
+}
+
+/**
+ * of_free_overlay_info() - Free an overlay info array
+ * @ov		Overlay to free the overlay info from
+ * @ovinfo_tab:	Array of overlay_info's to free
+ *
+ * Releases the memory of a previously allocated ovinfo array
+ * by of_build_overlay_info.
+ * Returns 0, or an error if the arguments are bogus.
+ */
+static int of_free_overlay_info(struct of_overlay *ov)
+{
+	struct of_overlay_info *ovinfo;
+	int i;
+
+	/* do it in reverse */
+	for (i = ov->count - 1; i >= 0; i--) {
+		ovinfo = &ov->ovinfo_tab[i];
+
+		of_node_put(ovinfo->target);
+		of_node_put(ovinfo->overlay);
+	}
+	kfree(ov->ovinfo_tab);
+
+	return 0;
+}
+
+static LIST_HEAD(ov_list);
+static DEFINE_IDR(ov_idr);
+
+/**
+ * of_overlay_create() - Create and apply an overlay
+ * @tree:	Device node containing all the overlays
+ *
+ * Creates and applies an overlay while also keeping track
+ * of the overlay in a list. This list can be used to prevent
+ * illegal overlay removals.
+ *
+ * Returns the id of the created overlay, or an negative error number
+ */
+int of_overlay_create(struct device_node *tree)
+{
+	struct of_overlay *ov;
+	int err, id;
+
+	/* allocate the overlay structure */
+	ov = kzalloc(sizeof(*ov), GFP_KERNEL);
+	if (ov == NULL)
+		return -ENOMEM;
+	ov->id = -1;
+
+	INIT_LIST_HEAD(&ov->node);
+
+	of_changeset_init(&ov->cset);
+
+	mutex_lock(&of_mutex);
+
+	id = idr_alloc(&ov_idr, ov, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		pr_err("%s: idr_alloc() failed for tree@%s\n",
+				__func__, tree->full_name);
+		err = id;
+		goto err_destroy_trans;
+	}
+	ov->id = id;
+
+	/* build the overlay info structures */
+	err = of_build_overlay_info(ov, tree);
+	if (err) {
+		pr_err("%s: of_build_overlay_info() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_free_idr;
+	}
+
+	/* apply the overlay */
+	err = of_overlay_apply(ov);
+	if (err) {
+		pr_err("%s: of_overlay_apply() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_abort_trans;
+	}
+
+	/* apply the changeset */
+	err = of_changeset_apply(&ov->cset);
+	if (err) {
+		pr_err("%s: of_changeset_apply() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_revert_overlay;
+	}
+
+	/* add to the tail of the overlay list */
+	list_add_tail(&ov->node, &ov_list);
+
+	mutex_unlock(&of_mutex);
+
+	return id;
+
+err_revert_overlay:
+err_abort_trans:
+	of_free_overlay_info(ov);
+err_free_idr:
+	idr_remove(&ov_idr, ov->id);
+err_destroy_trans:
+	of_changeset_destroy(&ov->cset);
+	kfree(ov);
+	mutex_unlock(&of_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_overlay_create);
+
+/* check whether the given node, lies under the given tree */
+static int overlay_subtree_check(struct device_node *tree,
+		struct device_node *dn)
+{
+	struct device_node *child;
+
+	/* match? */
+	if (tree == dn)
+		return 1;
+
+	for_each_child_of_node(tree, child) {
+		if (overlay_subtree_check(child, dn))
+			return 1;
+	}
+
+	return 0;
+}
+
+/* check whether this overlay is the topmost */
+static int overlay_is_topmost(struct of_overlay *ov, struct device_node *dn)
+{
+	struct of_overlay *ovt;
+	struct of_changeset_entry *ce;
+
+	list_for_each_entry_reverse(ovt, &ov_list, node) {
+		/* if we hit ourselves, we're done */
+		if (ovt == ov)
+			break;
+
+		/* check against each subtree affected by this overlay */
+		list_for_each_entry(ce, &ovt->cset.entries, node) {
+			if (overlay_subtree_check(ce->np, dn)) {
+				pr_err("%s: #%d clashes #%d @%s\n",
+					__func__, ov->id, ovt->id,
+					dn->full_name);
+				return 0;
+			}
+		}
+	}
+
+	/* overlay is topmost */
+	return 1;
+}
+
+/*
+ * We can safely remove the overlay only if it's the top-most one.
+ * Newly applied overlays are inserted at the tail of the overlay list,
+ * so a top most overlay is the one that is closest to the tail.
+ *
+ * The topmost check is done by exploiting this property. For each
+ * affected device node in the log list we check if this overlay is
+ * the one closest to the tail. If another overlay has affected this
+ * device node and is closest to the tail, then removal is not permited.
+ */
+static int overlay_removal_is_ok(struct of_overlay *ov)
+{
+	struct of_changeset_entry *ce;
+
+	list_for_each_entry(ce, &ov->cset.entries, node) {
+		if (!overlay_is_topmost(ov, ce->np)) {
+			pr_err("%s: overlay #%d is not topmost\n",
+					__func__, ov->id);
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+/**
+ * of_overlay_destroy() - Removes an overlay
+ * @id:	Overlay id number returned by a previous call to of_overlay_create
+ *
+ * Removes an overlay if it is permissible.
+ *
+ * Returns 0 on success, or an negative error number
+ */
+int of_overlay_destroy(int id)
+{
+	struct of_overlay *ov;
+	int err;
+
+	mutex_lock(&of_mutex);
+
+	ov = idr_find(&ov_idr, id);
+	if (ov == NULL) {
+		err = -ENODEV;
+		pr_err("%s: Could not find overlay #%d\n",
+				__func__, id);
+		goto out;
+	}
+
+	/* check whether the overlay is safe to remove */
+	if (!overlay_removal_is_ok(ov)) {
+		err = -EBUSY;
+		pr_err("%s: removal check failed for overlay #%d\n",
+				__func__, id);
+		goto out;
+	}
+
+
+	list_del(&ov->node);
+	of_changeset_revert(&ov->cset);
+	of_free_overlay_info(ov);
+	idr_remove(&ov_idr, id);
+	of_changeset_destroy(&ov->cset);
+	kfree(ov);
+
+	err = 0;
+
+out:
+	mutex_unlock(&of_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_overlay_destroy);
+
+/**
+ * of_overlay_destroy_all() - Removes all overlays from the system
+ *
+ * Removes all overlays from the system in the correct order.
+ *
+ * Returns 0 on success, or an negative error number
+ */
+int of_overlay_destroy_all(void)
+{
+	struct of_overlay *ov, *ovn;
+
+	mutex_lock(&of_mutex);
+
+	/* the tail of list is guaranteed to be safe to remove */
+	list_for_each_entry_safe_reverse(ov, ovn, &ov_list, node) {
+		list_del(&ov->node);
+		of_changeset_revert(&ov->cset);
+		of_free_overlay_info(ov);
+		idr_remove(&ov_idr, ov->id);
+		kfree(ov);
+	}
+
+	mutex_unlock(&of_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_overlay_destroy_all);
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 36b4035881b0..d2acae825af9 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -25,8 +25,7 @@
 
 static struct of_pdt_ops *of_pdt_prom_ops __initdata;
 
-void __initdata (*of_pdt_build_more)(struct device_node *dp,
-		struct device_node ***nextp);
+void __initdata (*of_pdt_build_more)(struct device_node *dp);
 
 #if defined(CONFIG_SPARC)
 unsigned int of_pdt_unique_id __initdata;
@@ -192,8 +191,7 @@ static struct device_node * __init of_pdt_create_node(phandle node,
 }
 
 static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
-						   phandle node,
-						   struct device_node ***nextp)
+						   phandle node)
 {
 	struct device_node *ret = NULL, *prev_sibling = NULL;
 	struct device_node *dp;
@@ -210,16 +208,12 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
 			ret = dp;
 		prev_sibling = dp;
 
-		*(*nextp) = dp;
-		*nextp = &dp->allnext;
-
 		dp->full_name = of_pdt_build_full_name(dp);
 
-		dp->child = of_pdt_build_tree(dp,
-				of_pdt_prom_ops->getchild(node), nextp);
+		dp->child = of_pdt_build_tree(dp, of_pdt_prom_ops->getchild(node));
 
 		if (of_pdt_build_more)
-			of_pdt_build_more(dp, nextp);
+			of_pdt_build_more(dp);
 
 		node = of_pdt_prom_ops->getsibling(node);
 	}
@@ -234,20 +228,17 @@ static void * __init kernel_tree_alloc(u64 size, u64 align)
 
 void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops)
 {
-	struct device_node **nextp;
-
 	BUG_ON(!ops);
 	of_pdt_prom_ops = ops;
 
-	of_allnodes = of_pdt_create_node(root_node, NULL);
+	of_root = of_pdt_create_node(root_node, NULL);
 #if defined(CONFIG_SPARC)
-	of_allnodes->path_component_name = "";
+	of_root->path_component_name = "";
 #endif
-	of_allnodes->full_name = "/";
+	of_root->full_name = "/";
 
-	nextp = &of_allnodes->allnext;
-	of_allnodes->child = of_pdt_build_tree(of_allnodes,
-			of_pdt_prom_ops->getchild(of_allnodes->phandle), &nextp);
+	of_root->child = of_pdt_build_tree(of_root,
+				of_pdt_prom_ops->getchild(of_root->phandle));
 
 	/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
 	of_alias_scan(kernel_tree_alloc);
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 3b64d0bf5bba..8a002d6151f2 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -25,6 +25,7 @@
 
 const struct of_device_id of_default_bus_match_table[] = {
 	{ .compatible = "simple-bus", },
+	{ .compatible = "simple-mfd", },
 #ifdef CONFIG_ARM_AMBA
 	{ .compatible = "arm,amba-bus", },
 #endif /* CONFIG_ARM_AMBA */
@@ -138,7 +139,7 @@ struct platform_device *of_device_alloc(struct device_node *np,
 	}
 
 	dev->dev.of_node = of_node_get(np);
-	dev->dev.parent = parent;
+	dev->dev.parent = parent ? : &platform_bus;
 
 	if (bus_id)
 		dev_set_name(&dev->dev, "%s", bus_id);
@@ -149,57 +150,9 @@ struct platform_device *of_device_alloc(struct device_node *np,
 }
 EXPORT_SYMBOL(of_device_alloc);
 
-/**
- * of_dma_configure - Setup DMA configuration
- * @dev:	Device to apply DMA configuration
- *
- * Try to get devices's DMA configuration from DT and update it
- * accordingly.
- *
- * In case if platform code need to use own special DMA configuration,it
- * can use Platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE event
- * to fix up DMA configuration.
- */
-static void of_dma_configure(struct device *dev)
+static void of_dma_deconfigure(struct device *dev)
 {
-	u64 dma_addr, paddr, size;
-	int ret;
-
-	/*
-	 * Set default dma-mask to 32 bit. Drivers are expected to setup
-	 * the correct supported dma_mask.
-	 */
-	dev->coherent_dma_mask = DMA_BIT_MASK(32);
-
-	/*
-	 * Set it to coherent_dma_mask by default if the architecture
-	 * code has not set it.
-	 */
-	if (!dev->dma_mask)
-		dev->dma_mask = &dev->coherent_dma_mask;
-
-	/*
-	 * if dma-coherent property exist, call arch hook to setup
-	 * dma coherent operations.
-	 */
-	if (of_dma_is_coherent(dev->of_node)) {
-		set_arch_dma_coherent_ops(dev);
-		dev_dbg(dev, "device is dma coherent\n");
-	}
-
-	/*
-	 * if dma-ranges property doesn't exist - just return else
-	 * setup the dma offset
-	 */
-	ret = of_dma_get_range(dev->of_node, &dma_addr, &paddr, &size);
-	if (ret < 0) {
-		dev_dbg(dev, "no dma range information to setup\n");
-		return;
-	}
-
-	/* DMA ranges found. Calculate and set dma_pfn_offset */
-	dev->dma_pfn_offset = PFN_DOWN(paddr - dma_addr);
-	dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", dev->dma_pfn_offset);
+	arch_teardown_dma_ops(dev);
 }
 
 /**
@@ -228,16 +181,13 @@ static struct platform_device *of_platform_device_create_pdata(
 	if (!dev)
 		goto err_clear_flag;
 
-	of_dma_configure(&dev->dev);
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
-
-	/* We do not fill the DMA ops for platform devices by default.
-	 * This is currently the responsibility of the platform code
-	 * to do such, possibly using a device notifier
-	 */
+	of_dma_configure(&dev->dev, dev->dev.of_node);
+	of_msi_configure(&dev->dev, dev->dev.of_node);
 
 	if (of_device_add(dev) != 0) {
+		of_dma_deconfigure(&dev->dev);
 		platform_device_put(dev);
 		goto err_clear_flag;
 	}
@@ -291,13 +241,13 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 
 	/* setup generic device info */
 	dev->dev.of_node = of_node_get(node);
-	dev->dev.parent = parent;
+	dev->dev.parent = parent ? : &platform_bus;
 	dev->dev.platform_data = platform_data;
 	if (bus_id)
 		dev_set_name(&dev->dev, "%s", bus_id);
 	else
 		of_device_make_bus_id(&dev->dev);
-	of_dma_configure(&dev->dev);
+	of_dma_configure(&dev->dev, dev->dev.of_node);
 
 	/* Allow the HW Peripheral ID to be overridden */
 	prop = of_get_property(node, "arm,primecell-periphid", NULL);
@@ -500,6 +450,7 @@ int of_platform_populate(struct device_node *root,
 		if (rc)
 			break;
 	}
+	of_node_set_flag(root, OF_POPULATED_BUS);
 
 	of_node_put(root);
 	return rc;
@@ -523,6 +474,7 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 		amba_device_unregister(to_amba_device(dev));
 #endif
 
+	of_dma_deconfigure(dev);
 	of_node_clear_flag(dev->of_node, OF_POPULATED);
 	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
@@ -542,8 +494,75 @@ static int of_platform_device_destroy(struct device *dev, void *data)
  */
 void of_platform_depopulate(struct device *parent)
 {
-	device_for_each_child(parent, NULL, of_platform_device_destroy);
+	if (parent->of_node && of_node_check_flag(parent->of_node, OF_POPULATED_BUS)) {
+		device_for_each_child(parent, NULL, of_platform_device_destroy);
+		of_node_clear_flag(parent->of_node, OF_POPULATED_BUS);
+	}
 }
 EXPORT_SYMBOL_GPL(of_platform_depopulate);
 
+#ifdef CONFIG_OF_DYNAMIC
+static int of_platform_notify(struct notifier_block *nb,
+				unsigned long action, void *arg)
+{
+	struct of_reconfig_data *rd = arg;
+	struct platform_device *pdev_parent, *pdev;
+	bool children_left;
+
+	switch (of_reconfig_get_state_change(action, rd)) {
+	case OF_RECONFIG_CHANGE_ADD:
+		/* verify that the parent is a bus */
+		if (!of_node_check_flag(rd->dn->parent, OF_POPULATED_BUS))
+			return NOTIFY_OK;	/* not for us */
+
+		/* already populated? (driver using of_populate manually) */
+		if (of_node_check_flag(rd->dn, OF_POPULATED))
+			return NOTIFY_OK;
+
+		/* pdev_parent may be NULL when no bus platform device */
+		pdev_parent = of_find_device_by_node(rd->dn->parent);
+		pdev = of_platform_device_create(rd->dn, NULL,
+				pdev_parent ? &pdev_parent->dev : NULL);
+		of_dev_put(pdev_parent);
+
+		if (pdev == NULL) {
+			pr_err("%s: failed to create for '%s'\n",
+					__func__, rd->dn->full_name);
+			/* of_platform_device_create tosses the error code */
+			return notifier_from_errno(-EINVAL);
+		}
+		break;
+
+	case OF_RECONFIG_CHANGE_REMOVE:
+
+		/* already depopulated? */
+		if (!of_node_check_flag(rd->dn, OF_POPULATED))
+			return NOTIFY_OK;
+
+		/* find our device by node */
+		pdev = of_find_device_by_node(rd->dn);
+		if (pdev == NULL)
+			return NOTIFY_OK;	/* no? not meant for us */
+
+		/* unregister takes one ref away */
+		of_platform_device_destroy(&pdev->dev, &children_left);
+
+		/* and put the reference of the find */
+		of_dev_put(pdev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block platform_of_notifier = {
+	.notifier_call = of_platform_notify,
+};
+
+void of_platform_register_reconfig_notifier(void)
+{
+	WARN_ON(of_reconfig_notifier_register(&platform_of_notifier));
+}
+#endif /* CONFIG_OF_DYNAMIC */
+
 #endif /* CONFIG_OF_ADDRESS */
diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index aed7959f800d..640eb4cb46e3 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -111,7 +111,8 @@ static void __of_adjust_tree_phandles(struct device_node *node,
 		__of_adjust_tree_phandles(child, phandle_delta);
 }
 
-static int __of_adjust_phandle_ref(struct device_node *node, struct property *rprop, int value, bool is_delta)
+static int __of_adjust_phandle_ref(struct device_node *node,
+		struct property *rprop, int value)
 {
 	phandle phandle;
 	struct device_node *refnode;
@@ -181,7 +182,7 @@ static int __of_adjust_phandle_ref(struct device_node *node, struct property *rp
 			goto err_fail;
 		}
 
-		phandle = is_delta ? be32_to_cpup(sprop->value + offset) + value : value;
+		phandle = value;
 		*(__be32 *)(sprop->value + offset) = cpu_to_be32(phandle);
 	}
 
@@ -190,36 +191,97 @@ err_fail:
 	return err;
 }
 
+/* compare nodes taking into account that 'name' strips out the @ part */
+static int __of_node_name_cmp(const struct device_node *dn1,
+		const struct device_node *dn2)
+{
+	const char *n1 = strrchr(dn1->full_name, '/') ? : "/";
+	const char *n2 = strrchr(dn2->full_name, '/') ? : "/";
+
+	return of_node_cmp(n1, n2);
+}
+
 /*
  * Adjust the local phandle references by the given phandle delta.
- * Assumes the existances of a __local_fixups__ node at the root
- * of the tree. Does not take any devtree locks so make sure you
- * call this on a tree which is at the detached state.
+ * Assumes the existances of a __local_fixups__ node at the root.
+ * Assumes that __of_verify_tree_phandle_references has been called.
+ * Does not take any devtree locks so make sure you call this on a tree
+ * which is at the detached state.
  */
 static int __of_adjust_tree_phandle_references(struct device_node *node,
-		int phandle_delta)
+		struct device_node *target, int phandle_delta)
 {
-	struct device_node *child;
-	struct property *rprop;
-	int err;
-
-	/* locate the symbols & fixups nodes on resolve */
-	for_each_child_of_node(node, child)
-		if (of_node_cmp(child->name, "__local_fixups__") == 0)
-			break;
+	struct device_node *child, *childtarget;
+	struct property *rprop, *sprop;
+	int err, i, count;
+	unsigned int off;
+	phandle phandle;
 
-	/* no local fixups */
-	if (!child)
+	if (node == NULL)
 		return 0;
 
-	/* find the local fixups property */
-	for_each_property_of_node(child, rprop) {
+	for_each_property_of_node(node, rprop) {
+
 		/* skip properties added automatically */
-		if (of_prop_cmp(rprop->name, "name") == 0)
+		if (of_prop_cmp(rprop->name, "name") == 0 ||
+		    of_prop_cmp(rprop->name, "phandle") == 0 ||
+		    of_prop_cmp(rprop->name, "linux,phandle") == 0)
 			continue;
 
-		err = __of_adjust_phandle_ref(node, rprop, phandle_delta, true);
-		if (err)
+		if ((rprop->length % 4) != 0 || rprop->length == 0) {
+			pr_err("%s: Illegal property (size) '%s' @%s\n",
+					__func__, rprop->name, node->full_name);
+			return -EINVAL;
+		}
+		count = rprop->length / sizeof(__be32);
+
+		/* now find the target property */
+		for_each_property_of_node(target, sprop) {
+			if (of_prop_cmp(sprop->name, rprop->name) == 0)
+				break;
+		}
+
+		if (sprop == NULL) {
+			pr_err("%s: Could not find target property '%s' @%s\n",
+					__func__, rprop->name, node->full_name);
+			return -EINVAL;
+		}
+
+		for (i = 0; i < count; i++) {
+			off = be32_to_cpu(((__be32 *)rprop->value)[i]);
+			/* make sure the offset doesn't overstep (even wrap) */
+			if (off >= sprop->length ||
+					(off + 4) > sprop->length) {
+				pr_err("%s: Illegal property '%s' @%s\n",
+						__func__, rprop->name,
+						node->full_name);
+				return -EINVAL;
+			}
+
+			if (phandle_delta) {
+				/* adjust */
+				phandle = be32_to_cpu(*(__be32 *)(sprop->value + off));
+				phandle += phandle_delta;
+				*(__be32 *)(sprop->value + off) = cpu_to_be32(phandle);
+			}
+		}
+	}
+
+	for_each_child_of_node(node, child) {
+
+		for_each_child_of_node(target, childtarget)
+			if (__of_node_name_cmp(child, childtarget) == 0)
+				break;
+
+		if (!childtarget) {
+			pr_err("%s: Could not find target child '%s' @%s\n",
+					__func__, child->name, node->full_name);
+			return -EINVAL;
+		}
+
+		err = __of_adjust_tree_phandle_references(child, childtarget,
+				phandle_delta);
+		if (err != 0)
 			return err;
 	}
 
@@ -241,7 +303,7 @@ static int __of_adjust_tree_phandle_references(struct device_node *node,
  */
 int of_resolve_phandles(struct device_node *resolve)
 {
-	struct device_node *child, *refnode;
+	struct device_node *child, *childroot, *refnode;
 	struct device_node *root_sym, *resolve_sym, *resolve_fix;
 	struct property *rprop;
 	const char *refpath;
@@ -255,9 +317,23 @@ int of_resolve_phandles(struct device_node *resolve)
 	/* first we need to adjust the phandles */
 	phandle_delta = of_get_tree_max_phandle() + 1;
 	__of_adjust_tree_phandles(resolve, phandle_delta);
-	err = __of_adjust_tree_phandle_references(resolve, phandle_delta);
-	if (err != 0)
-		return err;
+
+	/* locate the local fixups */
+	childroot = NULL;
+	for_each_child_of_node(resolve, childroot)
+		if (of_node_cmp(childroot->name, "__local_fixups__") == 0)
+			break;
+
+	if (childroot != NULL) {
+		/* resolve root is guaranteed to be the '/' */
+		err = __of_adjust_tree_phandle_references(childroot,
+				resolve, 0);
+		if (err != 0)
+			return err;
+
+		BUG_ON(__of_adjust_tree_phandle_references(childroot,
+				resolve, phandle_delta));
+	}
 
 	root_sym = NULL;
 	resolve_sym = NULL;
@@ -322,7 +398,7 @@ int of_resolve_phandles(struct device_node *resolve)
 		pr_debug("%s: %s phandle is 0x%08x\n",
 				__func__, rprop->name, phandle);
 
-		err = __of_adjust_phandle_ref(resolve, rprop, phandle, false);
+		err = __of_adjust_phandle_ref(resolve, rprop, phandle);
 		if (err)
 			break;
 	}
diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts
deleted file mode 100644
index 6994e15c24bf..000000000000
--- a/drivers/of/testcase-data/testcases.dts
+++ /dev/null
@@ -1,50 +0,0 @@
-/dts-v1/;
-/ {
-	testcase-data {
-		changeset {
-			prop-update = "hello";
-			prop-remove = "world";
-			node-remove {
-			};
-		};
-	};
-};
-#include "tests-phandle.dtsi"
-#include "tests-interrupts.dtsi"
-#include "tests-match.dtsi"
-#include "tests-platform.dtsi"
-
-/*
- * phandle fixup data - generated by dtc patches that aren't upstream.
- * This data must be regenerated whenever phandle references are modified in
- * the testdata tree.
- *
- * The format of this data may be subject to change. For the time being consider
- * this a kernel-internal data format.
- */
-/ { __local_fixups__ {
-	fixup = "/testcase-data/testcase-device2:interrupt-parent:0",
-		"/testcase-data/testcase-device1:interrupt-parent:0",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:60",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:52",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:44",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:36",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:24",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:8",
-		"/testcase-data/interrupts/interrupts-extended0:interrupts-extended:0",
-		"/testcase-data/interrupts/interrupts1:interrupt-parent:0",
-		"/testcase-data/interrupts/interrupts0:interrupt-parent:0",
-		"/testcase-data/interrupts/intmap1:interrupt-map:12",
-		"/testcase-data/interrupts/intmap0:interrupt-map:52",
-		"/testcase-data/interrupts/intmap0:interrupt-map:36",
-		"/testcase-data/interrupts/intmap0:interrupt-map:16",
-		"/testcase-data/interrupts/intmap0:interrupt-map:4",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list-bad-args:12",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list-bad-args:0",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list:56",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list:52",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list:40",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list:24",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list:8",
-		"/testcase-data/phandle-tests/consumer-a:phandle-list:0";
-}; };
diff --git a/drivers/of/unittest-data/testcases.dts b/drivers/of/unittest-data/testcases.dts
new file mode 100644
index 000000000000..12f7c3d649c8
--- /dev/null
+++ b/drivers/of/unittest-data/testcases.dts
@@ -0,0 +1,79 @@
+/dts-v1/;
+/ {
+	testcase-data {
+		changeset {
+			prop-update = "hello";
+			prop-remove = "world";
+			node-remove {
+			};
+		};
+	};
+};
+#include "tests-phandle.dtsi"
+#include "tests-interrupts.dtsi"
+#include "tests-match.dtsi"
+#include "tests-platform.dtsi"
+#include "tests-overlay.dtsi"
+
+/*
+ * phandle fixup data - generated by dtc patches that aren't upstream.
+ * This data must be regenerated whenever phandle references are modified in
+ * the testdata tree.
+ *
+ * The format of this data may be subject to change. For the time being consider
+ * this a kernel-internal data format.
+ */
+/ { __local_fixups__ {
+	testcase-data {
+		phandle-tests {
+			consumer-a {
+				phandle-list = <0x00000000 0x00000008
+						0x00000018 0x00000028
+						0x00000034 0x00000038>;
+				phandle-list-bad-args = <0x00000000 0x0000000c>;
+			};
+		};
+		interrupts {
+			intmap0 {
+				interrupt-map = <0x00000004 0x00000010
+						 0x00000024 0x00000034>;
+			};
+			intmap1 {
+				interrupt-map = <0x0000000c>;
+			};
+			interrupts0 {
+				interrupt-parent = <0x00000000>;
+			};
+			interrupts1 {
+				interrupt-parent = <0x00000000>;
+			};
+			interrupts-extended0 {
+				interrupts-extended = <0x00000000 0x00000008
+						       0x00000018 0x00000024
+						       0x0000002c 0x00000034
+						       0x0000003c>;
+			};
+		};
+		testcase-device1 {
+			interrupt-parent = <0x00000000>;
+		};
+		testcase-device2 {
+			interrupt-parent = <0x00000000>;
+		};
+		overlay2 {
+			fragment@0 {
+				target = <0x00000000>;
+			};
+		};
+		overlay3 {
+			fragment@0 {
+				target = <0x00000000>;
+			};
+		};
+		overlay4 {
+			fragment@0 {
+				target = <0x00000000>;
+			};
+		};
+	};
+}; };
diff --git a/drivers/of/testcase-data/tests-interrupts.dtsi b/drivers/of/unittest-data/tests-interrupts.dtsi
index da4695f60351..da4695f60351 100644
--- a/drivers/of/testcase-data/tests-interrupts.dtsi
+++ b/drivers/of/unittest-data/tests-interrupts.dtsi
diff --git a/drivers/of/testcase-data/tests-match.dtsi b/drivers/of/unittest-data/tests-match.dtsi
index c9e541129534..c9e541129534 100644
--- a/drivers/of/testcase-data/tests-match.dtsi
+++ b/drivers/of/unittest-data/tests-match.dtsi
diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi
new file mode 100644
index 000000000000..a2b687d5f324
--- /dev/null
+++ b/drivers/of/unittest-data/tests-overlay.dtsi
@@ -0,0 +1,235 @@
+
+/ {
+	testcase-data {
+		overlay-node {
+
+			/* test bus */
+			selftestbus: test-bus {
+				compatible = "simple-bus";
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				selftest100: test-selftest100 {
+					compatible = "selftest";
+					status = "okay";
+					reg = <100>;
+				};
+
+				selftest101: test-selftest101 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <101>;
+				};
+
+				selftest0: test-selftest0 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <0>;
+				};
+
+				selftest1: test-selftest1 {
+					compatible = "selftest";
+					status = "okay";
+					reg = <1>;
+				};
+
+				selftest2: test-selftest2 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <2>;
+				};
+
+				selftest3: test-selftest3 {
+					compatible = "selftest";
+					status = "okay";
+					reg = <3>;
+				};
+
+				selftest5: test-selftest5 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <5>;
+				};
+
+				selftest6: test-selftest6 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <6>;
+				};
+
+				selftest7: test-selftest7 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <7>;
+				};
+
+				selftest8: test-selftest8 {
+					compatible = "selftest";
+					status = "disabled";
+					reg = <8>;
+				};
+			};
+		};
+
+		/* test enable using absolute target path */
+		overlay0 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest0";
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test disable using absolute target path */
+		overlay1 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest1";
+				__overlay__ {
+					status = "disabled";
+				};
+			};
+		};
+
+		/* test enable using label */
+		overlay2 {
+			fragment@0 {
+				target = <&selftest2>;
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test disable using label */
+		overlay3 {
+			fragment@0 {
+				target = <&selftest3>;
+				__overlay__ {
+					status = "disabled";
+				};
+			};
+		};
+
+		/* test insertion of a full node */
+		overlay4 {
+			fragment@0 {
+				target = <&selftestbus>;
+				__overlay__ {
+
+					/* suppress DTC warning */
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					test-selftest4 {
+						compatible = "selftest";
+						status = "okay";
+						reg = <4>;
+					};
+				};
+			};
+		};
+
+		/* test overlay apply revert */
+		overlay5 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest5";
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test overlays application and removal in sequence */
+		overlay6 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest6";
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+		overlay7 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest7";
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+
+		/* test overlays application and removal in bad sequence */
+		overlay8 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest8";
+				__overlay__ {
+					status = "okay";
+				};
+			};
+		};
+		overlay9 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus/test-selftest8";
+				__overlay__ {
+					property-foo = "bar";
+				};
+			};
+		};
+
+		overlay10 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus";
+				__overlay__ {
+
+					/* suppress DTC warning */
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					test-selftest10 {
+						compatible = "selftest";
+						status = "okay";
+						reg = <10>;
+
+						#address-cells = <1>;
+						#size-cells = <0>;
+
+						test-selftest101 {
+							compatible = "selftest";
+							status = "okay";
+							reg = <1>;
+						};
+
+					};
+				};
+			};
+		};
+
+		overlay11 {
+			fragment@0 {
+				target-path = "/testcase-data/overlay-node/test-bus";
+				__overlay__ {
+
+					/* suppress DTC warning */
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					test-selftest11 {
+						compatible = "selftest";
+						status = "okay";
+						reg = <11>;
+
+						#address-cells = <1>;
+						#size-cells = <0>;
+
+						test-selftest111 {
+							compatible = "selftest";
+							status = "okay";
+							reg = <1>;
+						};
+
+					};
+				};
+			};
+		};
+	};
+};
diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/unittest-data/tests-phandle.dtsi
index 5b1527e8a7fb..5b1527e8a7fb 100644
--- a/drivers/of/testcase-data/tests-phandle.dtsi
+++ b/drivers/of/unittest-data/tests-phandle.dtsi
diff --git a/drivers/of/testcase-data/tests-platform.dtsi b/drivers/of/unittest-data/tests-platform.dtsi
index eb20eeb2b062..eb20eeb2b062 100644
--- a/drivers/of/testcase-data/tests-platform.dtsi
+++ b/drivers/of/unittest-data/tests-platform.dtsi
diff --git a/drivers/of/selftest.c b/drivers/of/unittest.c
index e2d79afa9dc6..e5fa3e91f82f 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/unittest.c
@@ -17,6 +17,8 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
 
 #include "of_private.h"
 
@@ -25,24 +27,22 @@ static struct selftest_results {
 	int failed;
 } selftest_results;
 
-#define NO_OF_NODES 3
-static struct device_node *nodes[NO_OF_NODES];
-static int last_node_index;
-static bool selftest_live_tree;
-
-#define selftest(result, fmt, ...) { \
-	if (!(result)) { \
+#define selftest(result, fmt, ...) ({ \
+	bool failed = !(result); \
+	if (failed) { \
 		selftest_results.failed++; \
 		pr_err("FAIL %s():%i " fmt, __func__, __LINE__, ##__VA_ARGS__); \
 	} else { \
 		selftest_results.passed++; \
 		pr_debug("pass %s():%i\n", __func__, __LINE__); \
 	} \
-}
+	failed; \
+})
 
 static void __init of_selftest_find_node_by_name(void)
 {
 	struct device_node *np;
+	const char *options;
 
 	np = of_find_node_by_path("/testcase-data");
 	selftest(np && !strcmp("/testcase-data", np->full_name),
@@ -83,6 +83,51 @@ static void __init of_selftest_find_node_by_name(void)
 	np = of_find_node_by_path("testcase-alias/missing-path");
 	selftest(!np, "non-existent alias with relative path returned node %s\n", np->full_name);
 	of_node_put(np);
+
+	np = of_find_node_opts_by_path("/testcase-data:testoption", &options);
+	selftest(np && !strcmp("testoption", options),
+		 "option path test failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("/testcase-data:test/option", &options);
+	selftest(np && !strcmp("test/option", options),
+		 "option path test, subcase #1 failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("/testcase-data/testcase-device1:test/option", &options);
+	selftest(np && !strcmp("test/option", options),
+		 "option path test, subcase #2 failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("/testcase-data:testoption", NULL);
+	selftest(np, "NULL option path test failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("testcase-alias:testaliasoption",
+				       &options);
+	selftest(np && !strcmp("testaliasoption", options),
+		 "option alias path test failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("testcase-alias:test/alias/option",
+				       &options);
+	selftest(np && !strcmp("test/alias/option", options),
+		 "option alias path test, subcase #1 failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("testcase-alias:testaliasoption", NULL);
+	selftest(np, "NULL option alias path test failed\n");
+	of_node_put(np);
+
+	options = "testoption";
+	np = of_find_node_opts_by_path("testcase-alias", &options);
+	selftest(np && !options, "option clearing test failed\n");
+	of_node_put(np);
+
+	options = "testoption";
+	np = of_find_node_opts_by_path("/", &options);
+	selftest(np && !options, "option clearing root node test failed\n");
+	of_node_put(np);
 }
 
 static void __init of_selftest_dynamic(void)
@@ -148,7 +193,7 @@ static void __init of_selftest_dynamic(void)
 
 static int __init of_selftest_check_node_linkage(struct device_node *np)
 {
-	struct device_node *child, *allnext_index = np;
+	struct device_node *child;
 	int count = 0, rc;
 
 	for_each_child_of_node(np, child) {
@@ -158,14 +203,6 @@ static int __init of_selftest_check_node_linkage(struct device_node *np)
 			return -EINVAL;
 		}
 
-		while (allnext_index && allnext_index != child)
-			allnext_index = allnext_index->allnext;
-		if (allnext_index != child) {
-			pr_err("Node %s is ordered differently in sibling and allnode lists\n",
-				 child->name);
-			return -EINVAL;
-		}
-
 		rc = of_selftest_check_node_linkage(child);
 		if (rc < 0)
 			return rc;
@@ -180,12 +217,12 @@ static void __init of_selftest_check_tree_linkage(void)
 	struct device_node *np;
 	int allnode_count = 0, child_count;
 
-	if (!of_allnodes)
+	if (!of_root)
 		return;
 
 	for_each_of_allnodes(np)
 		allnode_count++;
-	child_count = of_selftest_check_node_linkage(of_allnodes);
+	child_count = of_selftest_check_node_linkage(of_root);
 
 	selftest(child_count > 0, "Device node data structure is corrupted\n");
 	selftest(child_count == allnode_count, "allnodes list size (%i) doesn't match"
@@ -354,9 +391,9 @@ static void __init of_selftest_property_string(void)
 	rc = of_property_match_string(np, "phandle-list-names", "first");
 	selftest(rc == 0, "first expected:0 got:%i\n", rc);
 	rc = of_property_match_string(np, "phandle-list-names", "second");
-	selftest(rc == 1, "second expected:0 got:%i\n", rc);
+	selftest(rc == 1, "second expected:1 got:%i\n", rc);
 	rc = of_property_match_string(np, "phandle-list-names", "third");
-	selftest(rc == 2, "third expected:0 got:%i\n", rc);
+	selftest(rc == 2, "third expected:2 got:%i\n", rc);
 	rc = of_property_match_string(np, "phandle-list-names", "fourth");
 	selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc);
 	rc = of_property_match_string(np, "missing-property", "blah");
@@ -451,15 +488,14 @@ static void __init of_selftest_changeset(void)
 	struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
 	struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
 	struct property *ppremove;
-	struct device_node *n1, *n2, *n21, *nremove, *parent;
+	struct device_node *n1, *n2, *n21, *nremove, *parent, *np;
 	struct of_changeset chgset;
 
-	of_changeset_init(&chgset);
-	n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
+	n1 = __of_node_dup(NULL, "/testcase-data/changeset/n1");
 	selftest(n1, "testcase setup failure\n");
-	n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
+	n2 = __of_node_dup(NULL, "/testcase-data/changeset/n2");
 	selftest(n2, "testcase setup failure\n");
-	n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
+	n21 = __of_node_dup(NULL, "%s/%s", "/testcase-data/changeset/n2", "n21");
 	selftest(n21, "testcase setup failure %p\n", n21);
 	nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
 	selftest(nremove, "testcase setup failure\n");
@@ -487,6 +523,11 @@ static void __init of_selftest_changeset(void)
 	selftest(!of_changeset_apply(&chgset), "apply failed\n");
 	mutex_unlock(&of_mutex);
 
+	/* Make sure node names are constructed correctly */
+	selftest((np = of_find_node_by_path("/testcase-data/changeset/n2/n21")),
+		 "'%s' not added\n", n21->full_name);
+	of_node_put(np);
+
 	mutex_lock(&of_mutex);
 	selftest(!of_changeset_revert(&chgset), "revert failed\n");
 	mutex_unlock(&of_mutex);
@@ -702,10 +743,13 @@ static void __init of_selftest_match_node(void)
 	}
 }
 
+struct device test_bus = {
+	.init_name = "unittest-bus",
+};
 static void __init of_selftest_platform_populate(void)
 {
-	int irq;
-	struct device_node *np, *child;
+	int irq, rc;
+	struct device_node *np, *child, *grandchild;
 	struct platform_device *pdev;
 	struct of_device_id match[] = {
 		{ .compatible = "test-device", },
@@ -730,20 +774,32 @@ static void __init of_selftest_platform_populate(void)
 	irq = platform_get_irq(pdev, 0);
 	selftest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq);
 
-	np = of_find_node_by_path("/testcase-data/platform-tests");
-	if (!np) {
-		pr_err("No testcase data in device tree\n");
+	if (selftest(np = of_find_node_by_path("/testcase-data/platform-tests"),
+		     "No testcase data in device tree\n"));
+		return;
+
+	if (selftest(!(rc = device_register(&test_bus)),
+		     "testbus registration failed; rc=%i\n", rc));
 		return;
-	}
 
 	for_each_child_of_node(np, child) {
-		struct device_node *grandchild;
-		of_platform_populate(child, match, NULL, NULL);
+		of_platform_populate(child, match, NULL, &test_bus);
 		for_each_child_of_node(child, grandchild)
 			selftest(of_find_device_by_node(grandchild),
 				 "Could not create device for node '%s'\n",
 				 grandchild->name);
 	}
+
+	of_platform_depopulate(&test_bus);
+	for_each_child_of_node(np, child) {
+		for_each_child_of_node(child, grandchild)
+			selftest(!of_find_device_by_node(grandchild),
+				 "device didn't get destroyed '%s'\n",
+				 grandchild->name);
+	}
+
+	device_unregister(&test_bus);
+	of_node_put(np);
 }
 
 /**
@@ -775,33 +831,32 @@ static void update_node_properties(struct device_node *np,
  */
 static int attach_node_and_children(struct device_node *np)
 {
-	struct device_node *next, *root = np, *dup;
+	struct device_node *next, *dup, *child;
+	unsigned long flags;
 
-	/* skip root node */
-	np = np->child;
-	/* storing a copy in temporary node */
-	dup = np;
-
-	while (dup) {
-		if (WARN_ON(last_node_index >= NO_OF_NODES))
-			return -EINVAL;
-		nodes[last_node_index++] = dup;
-		dup = dup->sibling;
+	dup = of_find_node_by_path(np->full_name);
+	if (dup) {
+		update_node_properties(np, dup);
+		return 0;
 	}
-	dup = NULL;
 
-	while (np) {
-		next = np->allnext;
-		dup = of_find_node_by_path(np->full_name);
-		if (dup)
-			update_node_properties(np, dup);
-		else {
-			np->child = NULL;
-			if (np->parent == root)
-				np->parent = of_allnodes;
-			of_attach_node(np);
-		}
-		np = next;
+	child = np->child;
+	np->child = NULL;
+
+	mutex_lock(&of_mutex);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	np->sibling = np->parent->child;
+	np->parent->child = np;
+	of_node_clear_flag(np, OF_DETACHED);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	__of_attach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
+
+	while (child) {
+		next = child->sibling;
+		attach_node_and_children(child);
+		child = next;
 	}
 
 	return 0;
@@ -846,11 +901,8 @@ static int __init selftest_data_add(void)
 		return -EINVAL;
 	}
 
-	if (!of_allnodes) {
-		/* enabling flag for removing nodes */
-		selftest_live_tree = true;
-		of_allnodes = selftest_data_node;
-
+	if (!of_root) {
+		of_root = selftest_data_node;
 		for_each_of_allnodes(np)
 			__of_attach_node_sysfs(np);
 		of_aliases = of_find_node_by_path("/aliases");
@@ -859,62 +911,532 @@ static int __init selftest_data_add(void)
 	}
 
 	/* attach the sub-tree to live tree */
-	return attach_node_and_children(selftest_data_node);
+	np = selftest_data_node->child;
+	while (np) {
+		struct device_node *next = np->sibling;
+		np->parent = of_root;
+		attach_node_and_children(np);
+		np = next;
+	}
+	return 0;
 }
 
-/**
- *	detach_node_and_children - detaches node
- *	and its children from live tree
- *
- *	@np:	Node to detach from live tree
- */
-static void detach_node_and_children(struct device_node *np)
+#ifdef CONFIG_OF_OVERLAY
+
+static int selftest_probe(struct platform_device *pdev)
 {
-	while (np->child)
-		detach_node_and_children(np->child);
-	of_detach_node(np);
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+
+	if (np == NULL) {
+		dev_err(dev, "No OF data for device\n");
+		return -EINVAL;
+
+	}
+
+	dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name);
+
+	of_platform_populate(np, NULL, NULL, &pdev->dev);
+
+	return 0;
 }
 
-/**
- *	selftest_data_remove - removes the selftest data
- *	nodes from the live tree
- */
-static void selftest_data_remove(void)
+static int selftest_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+
+	dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name);
+	return 0;
+}
+
+static struct of_device_id selftest_match[] = {
+	{ .compatible = "selftest", },
+	{},
+};
+
+static struct platform_driver selftest_driver = {
+	.probe			= selftest_probe,
+	.remove			= selftest_remove,
+	.driver = {
+		.name		= "selftest",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match_ptr(selftest_match),
+	},
+};
+
+/* get the platform device instantiated at the path */
+static struct platform_device *of_path_to_platform_device(const char *path)
 {
 	struct device_node *np;
-	struct property *prop;
+	struct platform_device *pdev;
+
+	np = of_find_node_by_path(path);
+	if (np == NULL)
+		return NULL;
+
+	pdev = of_find_device_by_node(np);
+	of_node_put(np);
+
+	return pdev;
+}
+
+/* find out if a platform device exists at that path */
+static int of_path_platform_device_exists(const char *path)
+{
+	struct platform_device *pdev;
+
+	pdev = of_path_to_platform_device(path);
+	platform_device_put(pdev);
+	return pdev != NULL;
+}
+
+static const char *selftest_path(int nr)
+{
+	static char buf[256];
+
+	snprintf(buf, sizeof(buf) - 1,
+		"/testcase-data/overlay-node/test-bus/test-selftest%d", nr);
+	buf[sizeof(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static const char *overlay_path(int nr)
+{
+	static char buf[256];
+
+	snprintf(buf, sizeof(buf) - 1,
+		"/testcase-data/overlay%d", nr);
+	buf[sizeof(buf) - 1] = '\0';
+
+	return buf;
+}
+
+static const char *bus_path = "/testcase-data/overlay-node/test-bus";
+
+static int of_selftest_apply_overlay(int selftest_nr, int overlay_nr,
+		int *overlay_id)
+{
+	struct device_node *np = NULL;
+	int ret, id = -1;
+
+	np = of_find_node_by_path(overlay_path(overlay_nr));
+	if (np == NULL) {
+		selftest(0, "could not find overlay node @\"%s\"\n",
+				overlay_path(overlay_nr));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = of_overlay_create(np);
+	if (ret < 0) {
+		selftest(0, "could not create overlay from \"%s\"\n",
+				overlay_path(overlay_nr));
+		goto out;
+	}
+	id = ret;
+
+	ret = 0;
+
+out:
+	of_node_put(np);
+
+	if (overlay_id)
+		*overlay_id = id;
 
-	if (selftest_live_tree) {
-		of_node_put(of_aliases);
-		of_node_put(of_chosen);
-		of_aliases = NULL;
-		of_chosen = NULL;
-		for_each_child_of_node(of_allnodes, np)
-			detach_node_and_children(np);
-		__of_detach_node_sysfs(of_allnodes);
-		of_allnodes = NULL;
+	return ret;
+}
+
+/* apply an overlay while checking before and after states */
+static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr,
+		int before, int after)
+{
+	int ret;
+
+	/* selftest device must not be in before state */
+	if (of_path_platform_device_exists(selftest_path(selftest_nr))
+			!= before) {
+		selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				selftest_path(selftest_nr),
+				!before ? "enabled" : "disabled");
+		return -EINVAL;
+	}
+
+	ret = of_selftest_apply_overlay(overlay_nr, selftest_nr, NULL);
+	if (ret != 0) {
+		/* of_selftest_apply_overlay already called selftest() */
+		return ret;
+	}
+
+	/* selftest device must be to set to after state */
+	if (of_path_platform_device_exists(selftest_path(selftest_nr))
+			!= after) {
+		selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				selftest_path(selftest_nr),
+				!after ? "enabled" : "disabled");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* apply an overlay and then revert it while checking before, after states */
+static int of_selftest_apply_revert_overlay_check(int overlay_nr,
+		int selftest_nr, int before, int after)
+{
+	int ret, ov_id;
+
+	/* selftest device must be in before state */
+	if (of_path_platform_device_exists(selftest_path(selftest_nr))
+			!= before) {
+		selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				selftest_path(selftest_nr),
+				!before ? "enabled" : "disabled");
+		return -EINVAL;
+	}
+
+	/* apply the overlay */
+	ret = of_selftest_apply_overlay(overlay_nr, selftest_nr, &ov_id);
+	if (ret != 0) {
+		/* of_selftest_apply_overlay already called selftest() */
+		return ret;
+	}
+
+	/* selftest device must be in after state */
+	if (of_path_platform_device_exists(selftest_path(selftest_nr))
+			!= after) {
+		selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				selftest_path(selftest_nr),
+				!after ? "enabled" : "disabled");
+		return -EINVAL;
+	}
+
+	ret = of_overlay_destroy(ov_id);
+	if (ret != 0) {
+		selftest(0, "overlay @\"%s\" failed to be destroyed @\"%s\"\n",
+				overlay_path(overlay_nr),
+				selftest_path(selftest_nr));
+		return ret;
+	}
+
+	/* selftest device must be again in before state */
+	if (of_path_platform_device_exists(selftest_path(selftest_nr))
+			!= before) {
+		selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+				overlay_path(overlay_nr),
+				selftest_path(selftest_nr),
+				!before ? "enabled" : "disabled");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* test activation of device */
+static void of_selftest_overlay_0(void)
+{
+	int ret;
+
+	/* device should enable */
+	ret = of_selftest_apply_overlay_check(0, 0, 0, 1);
+	if (ret != 0)
+		return;
+
+	selftest(1, "overlay test %d passed\n", 0);
+}
+
+/* test deactivation of device */
+static void of_selftest_overlay_1(void)
+{
+	int ret;
+
+	/* device should disable */
+	ret = of_selftest_apply_overlay_check(1, 1, 1, 0);
+	if (ret != 0)
+		return;
+
+	selftest(1, "overlay test %d passed\n", 1);
+}
+
+/* test activation of device */
+static void of_selftest_overlay_2(void)
+{
+	int ret;
+
+	/* device should enable */
+	ret = of_selftest_apply_overlay_check(2, 2, 0, 1);
+	if (ret != 0)
+		return;
+
+	selftest(1, "overlay test %d passed\n", 2);
+}
+
+/* test deactivation of device */
+static void of_selftest_overlay_3(void)
+{
+	int ret;
+
+	/* device should disable */
+	ret = of_selftest_apply_overlay_check(3, 3, 1, 0);
+	if (ret != 0)
+		return;
+
+	selftest(1, "overlay test %d passed\n", 3);
+}
+
+/* test activation of a full device node */
+static void of_selftest_overlay_4(void)
+{
+	int ret;
+
+	/* device should disable */
+	ret = of_selftest_apply_overlay_check(4, 4, 0, 1);
+	if (ret != 0)
+		return;
+
+	selftest(1, "overlay test %d passed\n", 4);
+}
+
+/* test overlay apply/revert sequence */
+static void of_selftest_overlay_5(void)
+{
+	int ret;
+
+	/* device should disable */
+	ret = of_selftest_apply_revert_overlay_check(5, 5, 0, 1);
+	if (ret != 0)
 		return;
+
+	selftest(1, "overlay test %d passed\n", 5);
+}
+
+/* test overlay application in sequence */
+static void of_selftest_overlay_6(void)
+{
+	struct device_node *np;
+	int ret, i, ov_id[2];
+	int overlay_nr = 6, selftest_nr = 6;
+	int before = 0, after = 1;
+
+	/* selftest device must be in before state */
+	for (i = 0; i < 2; i++) {
+		if (of_path_platform_device_exists(
+					selftest_path(selftest_nr + i))
+				!= before) {
+			selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+					overlay_path(overlay_nr + i),
+					selftest_path(selftest_nr + i),
+					!before ? "enabled" : "disabled");
+			return;
+		}
 	}
 
-	while (last_node_index-- > 0) {
-		if (nodes[last_node_index]) {
-			np = of_find_node_by_path(nodes[last_node_index]->full_name);
-			if (np == nodes[last_node_index]) {
-				if (of_aliases == np) {
-					of_node_put(of_aliases);
-					of_aliases = NULL;
-				}
-				detach_node_and_children(np);
-			} else {
-				for_each_property_of_node(np, prop) {
-					if (strcmp(prop->name, "testcase-alias") == 0)
-						of_remove_property(np, prop);
-				}
-			}
+	/* apply the overlays */
+	for (i = 0; i < 2; i++) {
+
+		np = of_find_node_by_path(overlay_path(overlay_nr + i));
+		if (np == NULL) {
+			selftest(0, "could not find overlay node @\"%s\"\n",
+					overlay_path(overlay_nr + i));
+			return;
+		}
+
+		ret = of_overlay_create(np);
+		if (ret < 0)  {
+			selftest(0, "could not create overlay from \"%s\"\n",
+					overlay_path(overlay_nr + i));
+			return;
+		}
+		ov_id[i] = ret;
+	}
+
+	for (i = 0; i < 2; i++) {
+		/* selftest device must be in after state */
+		if (of_path_platform_device_exists(
+					selftest_path(selftest_nr + i))
+				!= after) {
+			selftest(0, "overlay @\"%s\" failed @\"%s\" %s\n",
+					overlay_path(overlay_nr + i),
+					selftest_path(selftest_nr + i),
+					!after ? "enabled" : "disabled");
+			return;
 		}
 	}
+
+	for (i = 1; i >= 0; i--) {
+		ret = of_overlay_destroy(ov_id[i]);
+		if (ret != 0) {
+			selftest(0, "overlay @\"%s\" failed destroy @\"%s\"\n",
+					overlay_path(overlay_nr + i),
+					selftest_path(selftest_nr + i));
+			return;
+		}
+	}
+
+	for (i = 0; i < 2; i++) {
+		/* selftest device must be again in before state */
+		if (of_path_platform_device_exists(
+					selftest_path(selftest_nr + i))
+				!= before) {
+			selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n",
+					overlay_path(overlay_nr + i),
+					selftest_path(selftest_nr + i),
+					!before ? "enabled" : "disabled");
+			return;
+		}
+	}
+
+	selftest(1, "overlay test %d passed\n", 6);
+}
+
+/* test overlay application in sequence */
+static void of_selftest_overlay_8(void)
+{
+	struct device_node *np;
+	int ret, i, ov_id[2];
+	int overlay_nr = 8, selftest_nr = 8;
+
+	/* we don't care about device state in this test */
+
+	/* apply the overlays */
+	for (i = 0; i < 2; i++) {
+
+		np = of_find_node_by_path(overlay_path(overlay_nr + i));
+		if (np == NULL) {
+			selftest(0, "could not find overlay node @\"%s\"\n",
+					overlay_path(overlay_nr + i));
+			return;
+		}
+
+		ret = of_overlay_create(np);
+		if (ret < 0)  {
+			selftest(0, "could not create overlay from \"%s\"\n",
+					overlay_path(overlay_nr + i));
+			return;
+		}
+		ov_id[i] = ret;
+	}
+
+	/* now try to remove first overlay (it should fail) */
+	ret = of_overlay_destroy(ov_id[0]);
+	if (ret == 0) {
+		selftest(0, "overlay @\"%s\" was destroyed @\"%s\"\n",
+				overlay_path(overlay_nr + 0),
+				selftest_path(selftest_nr));
+		return;
+	}
+
+	/* removing them in order should work */
+	for (i = 1; i >= 0; i--) {
+		ret = of_overlay_destroy(ov_id[i]);
+		if (ret != 0) {
+			selftest(0, "overlay @\"%s\" not destroyed @\"%s\"\n",
+					overlay_path(overlay_nr + i),
+					selftest_path(selftest_nr));
+			return;
+		}
+	}
+
+	selftest(1, "overlay test %d passed\n", 8);
+}
+
+/* test insertion of a bus with parent devices */
+static void of_selftest_overlay_10(void)
+{
+	int ret;
+	char *child_path;
+
+	/* device should disable */
+	ret = of_selftest_apply_overlay_check(10, 10, 0, 1);
+	if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 10))
+		return;
+
+	child_path = kasprintf(GFP_KERNEL, "%s/test-selftest101",
+			selftest_path(10));
+	if (selftest(child_path, "overlay test %d failed; kasprintf\n", 10))
+		return;
+
+	ret = of_path_platform_device_exists(child_path);
+	kfree(child_path);
+	if (selftest(ret, "overlay test %d failed; no child device\n", 10))
+		return;
+}
+
+/* test insertion of a bus with parent devices (and revert) */
+static void of_selftest_overlay_11(void)
+{
+	int ret;
+
+	/* device should disable */
+	ret = of_selftest_apply_revert_overlay_check(11, 11, 0, 1);
+	if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 11))
+		return;
+}
+
+static void __init of_selftest_overlay(void)
+{
+	struct device_node *bus_np = NULL;
+	int ret;
+
+	ret = platform_driver_register(&selftest_driver);
+	if (ret != 0) {
+		selftest(0, "could not register selftest driver\n");
+		goto out;
+	}
+
+	bus_np = of_find_node_by_path(bus_path);
+	if (bus_np == NULL) {
+		selftest(0, "could not find bus_path \"%s\"\n", bus_path);
+		goto out;
+	}
+
+	ret = of_platform_populate(bus_np, of_default_bus_match_table,
+			NULL, NULL);
+	if (ret != 0) {
+		selftest(0, "could not populate bus @ \"%s\"\n", bus_path);
+		goto out;
+	}
+
+	if (!of_path_platform_device_exists(selftest_path(100))) {
+		selftest(0, "could not find selftest0 @ \"%s\"\n",
+				selftest_path(100));
+		goto out;
+	}
+
+	if (of_path_platform_device_exists(selftest_path(101))) {
+		selftest(0, "selftest1 @ \"%s\" should not exist\n",
+				selftest_path(101));
+		goto out;
+	}
+
+	selftest(1, "basic infrastructure of overlays passed");
+
+	/* tests in sequence */
+	of_selftest_overlay_0();
+	of_selftest_overlay_1();
+	of_selftest_overlay_2();
+	of_selftest_overlay_3();
+	of_selftest_overlay_4();
+	of_selftest_overlay_5();
+	of_selftest_overlay_6();
+	of_selftest_overlay_8();
+
+	of_selftest_overlay_10();
+	of_selftest_overlay_11();
+
+out:
+	of_node_put(bus_np);
 }
 
+#else
+static inline void __init of_selftest_overlay(void) { }
+#endif
+
 static int __init of_selftest(void)
 {
 	struct device_node *np;
@@ -947,9 +1469,7 @@ static int __init of_selftest(void)
 	of_selftest_parse_interrupts_extended();
 	of_selftest_match_node();
 	of_selftest_platform_populate();
-
-	/* removing selftest data from live tree */
-	selftest_data_remove();
+	of_selftest_overlay();
 
 	/* Double check linkage after removing testcase data */
 	of_selftest_check_tree_linkage();
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 893503fa1782..cced84233ac0 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -4,6 +4,7 @@
 config PCI_MSI
 	bool "Message Signaled Interrupts (MSI and MSI-X)"
 	depends on PCI
+	select GENERIC_MSI_IRQ
 	help
 	   This allows device drivers to enable MSI (Message Signaled
 	   Interrupts).  Message Signaled Interrupts enable a device to
@@ -16,6 +17,11 @@ config PCI_MSI
 
 	   If you don't know what to do here, say Y.
 
+config PCI_MSI_IRQ_DOMAIN
+	bool
+	depends on PCI_MSI
+	select GENERIC_MSI_IRQ_DOMAIN
+
 config PCI_DEBUG
 	bool "PCI Debugging"
 	depends on PCI && DEBUG_KERNEL
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index e04fe2d9df3b..e9815acec4a3 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_PCI_IOV) += iov.o
 #
 obj-$(CONFIG_ALPHA) += setup-irq.o
 obj-$(CONFIG_ARM) += setup-irq.o
+obj-$(CONFIG_ARM64) += setup-irq.o
 obj-$(CONFIG_UNICORE32) += setup-irq.o
 obj-$(CONFIG_SUPERH) += setup-irq.o
 obj-$(CONFIG_MIPS) += setup-irq.o
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 7f249b9ab2ce..b965c12168b7 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -67,6 +67,93 @@ EXPORT_SYMBOL(pci_bus_write_config_byte);
 EXPORT_SYMBOL(pci_bus_write_config_word);
 EXPORT_SYMBOL(pci_bus_write_config_dword);
 
+int pci_generic_config_read(struct pci_bus *bus, unsigned int devfn,
+			    int where, int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = bus->ops->map_bus(bus, devfn, where);
+	if (!addr) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (size == 1)
+		*val = readb(addr);
+	else if (size == 2)
+		*val = readw(addr);
+	else
+		*val = readl(addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_read);
+
+int pci_generic_config_write(struct pci_bus *bus, unsigned int devfn,
+			     int where, int size, u32 val)
+{
+	void __iomem *addr;
+
+	addr = bus->ops->map_bus(bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 1)
+		writeb(val, addr);
+	else if (size == 2)
+		writew(val, addr);
+	else
+		writel(val, addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_write);
+
+int pci_generic_config_read32(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 *val)
+{
+	void __iomem *addr;
+
+	addr = bus->ops->map_bus(bus, devfn, where & ~0x3);
+	if (!addr) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	*val = readl(addr);
+
+	if (size <= 2)
+		*val = (*val >> (8 * (where & 3))) & ((1 << (size * 8)) - 1);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_read32);
+
+int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 val)
+{
+	void __iomem *addr;
+	u32 mask, tmp;
+
+	addr = bus->ops->map_bus(bus, devfn, where & ~0x3);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 4) {
+		writel(val, addr);
+		return PCIBIOS_SUCCESSFUL;
+	} else {
+		mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
+	}
+
+	tmp = readl(addr) & mask;
+	tmp |= val << ((where & 0x3) * 8);
+	writel(tmp, addr);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_generic_config_write32);
+
 /**
  * pci_bus_set_ops - Set raw operations of pci bus
  * @bus:	pci bus struct
diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index 0e5f3c95af5b..f58e05b0f8d5 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -23,6 +23,20 @@ static struct pci_host_bridge *find_pci_host_bridge(struct pci_bus *bus)
 	return to_pci_host_bridge(root_bus->bridge);
 }
 
+struct device *pci_get_host_bridge_device(struct pci_dev *dev)
+{
+	struct pci_bus *root_bus = find_pci_root_bus(dev->bus);
+	struct device *bridge = root_bus->bridge;
+
+	kobject_get(&bridge->kobj);
+	return bridge;
+}
+
+void  pci_put_host_bridge_device(struct device *dev)
+{
+	kobject_put(&dev->kobj);
+}
+
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 				 void (*release_fn)(struct pci_host_bridge *),
 				 void *release_data)
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 3dc25fad490c..5aacabb59402 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -50,7 +50,7 @@ config PCI_RCAR_GEN2_PCIE
 
 config PCI_HOST_GENERIC
 	bool "Generic PCI host controller"
-	depends on ARM && OF
+	depends on (ARM || ARM64) && OF
 	help
 	  Say Y here if you want to support a simple generic PCI host
 	  controller, such as the one emulated by kvmtool.
@@ -86,9 +86,18 @@ config PCI_XGENE
 	depends on ARCH_XGENE
 	depends on OF
 	select PCIEPORTBUS
+	select PCI_MSI_IRQ_DOMAIN if PCI_MSI
 	help
 	  Say Y here if you want internal PCI support on APM X-Gene SoC.
 	  There are 5 internal PCIe ports available. Each port is GEN3 capable
 	  and have varied lanes from x1 to x8.
 
+config PCI_XGENE_MSI
+	bool "X-Gene v1 PCIe MSI feature"
+	depends on PCI_XGENE && PCI_MSI
+	default y
+	help
+	  Say Y here if you want PCIe MSI support for the APM X-Gene v1 SoC.
+	  This MSI driver supports 5 PCIe ports on the APM X-Gene v1 SoC.
+
 endmenu
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 26b3461d68d7..852ba92f2067 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
 obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone-dw.o pci-keystone.o
 obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
 obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
+obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c
index b2d093ea5ff2..6be9485c8245 100644
--- a/drivers/pci/host/pci-dra7xx.c
+++ b/drivers/pci/host/pci-dra7xx.c
@@ -447,7 +447,6 @@ static struct platform_driver dra7xx_pcie_driver = {
 	.remove		= __exit_p(dra7xx_pcie_remove),
 	.driver = {
 		.name	= "dra7-pcie",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_dra7xx_pcie_match,
 	},
 };
diff --git a/drivers/pci/host/pci-exynos.c b/drivers/pci/host/pci-exynos.c
index ab8bcaabaf21..3be96171842f 100644
--- a/drivers/pci/host/pci-exynos.c
+++ b/drivers/pci/host/pci-exynos.c
@@ -398,7 +398,7 @@ static void exynos_pcie_enable_irq_pulse(struct pcie_port *pp)
 
 	/* enable INTX interrupt */
 	val = IRQ_INTA_ASSERT | IRQ_INTB_ASSERT |
-		IRQ_INTC_ASSERT | IRQ_INTD_ASSERT,
+		IRQ_INTC_ASSERT | IRQ_INTD_ASSERT;
 	exynos_elb_writel(exynos_pcie, val, PCIE_IRQ_EN_PULSE);
 	return;
 }
@@ -650,7 +650,6 @@ static struct platform_driver exynos_pcie_driver = {
 	.remove		= __exit_p(exynos_pcie_remove),
 	.driver = {
 		.name	= "exynos-pcie",
-		.owner	= THIS_MODULE,
 		.of_match_table = exynos_pcie_of_match,
 	},
 };
diff --git a/drivers/pci/host/pci-host-generic.c b/drivers/pci/host/pci-host-generic.c
index 3d2076f59911..7f8b9ebc5453 100644
--- a/drivers/pci/host/pci-host-generic.c
+++ b/drivers/pci/host/pci-host-generic.c
@@ -32,13 +32,22 @@ struct gen_pci_cfg_bus_ops {
 
 struct gen_pci_cfg_windows {
 	struct resource				res;
-	struct resource				bus_range;
+	struct resource				*bus_range;
 	void __iomem				**win;
 
 	const struct gen_pci_cfg_bus_ops	*ops;
 };
 
+/*
+ * ARM pcibios functions expect the ARM struct pci_sys_data as the PCI
+ * sysdata.  Add pci_sys_data as the first element in struct gen_pci so
+ * that when we use a gen_pci pointer as sysdata, it is also a pointer to
+ * a struct pci_sys_data.
+ */
 struct gen_pci {
+#ifdef CONFIG_ARM
+	struct pci_sys_data			sys;
+#endif
 	struct pci_host_bridge			host;
 	struct gen_pci_cfg_windows		cfg;
 	struct list_head			resources;
@@ -48,9 +57,8 @@ static void __iomem *gen_pci_map_cfg_bus_cam(struct pci_bus *bus,
 					     unsigned int devfn,
 					     int where)
 {
-	struct pci_sys_data *sys = bus->sysdata;
-	struct gen_pci *pci = sys->private_data;
-	resource_size_t idx = bus->number - pci->cfg.bus_range.start;
+	struct gen_pci *pci = bus->sysdata;
+	resource_size_t idx = bus->number - pci->cfg.bus_range->start;
 
 	return pci->cfg.win[idx] + ((devfn << 8) | where);
 }
@@ -64,9 +72,8 @@ static void __iomem *gen_pci_map_cfg_bus_ecam(struct pci_bus *bus,
 					      unsigned int devfn,
 					      int where)
 {
-	struct pci_sys_data *sys = bus->sysdata;
-	struct gen_pci *pci = sys->private_data;
-	resource_size_t idx = bus->number - pci->cfg.bus_range.start;
+	struct gen_pci *pci = bus->sysdata;
+	resource_size_t idx = bus->number - pci->cfg.bus_range->start;
 
 	return pci->cfg.win[idx] + ((devfn << 12) | where);
 }
@@ -76,55 +83,9 @@ static struct gen_pci_cfg_bus_ops gen_pci_cfg_ecam_bus_ops = {
 	.map_bus	= gen_pci_map_cfg_bus_ecam,
 };
 
-static int gen_pci_config_read(struct pci_bus *bus, unsigned int devfn,
-				int where, int size, u32 *val)
-{
-	void __iomem *addr;
-	struct pci_sys_data *sys = bus->sysdata;
-	struct gen_pci *pci = sys->private_data;
-
-	addr = pci->cfg.ops->map_bus(bus, devfn, where);
-
-	switch (size) {
-	case 1:
-		*val = readb(addr);
-		break;
-	case 2:
-		*val = readw(addr);
-		break;
-	default:
-		*val = readl(addr);
-	}
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int gen_pci_config_write(struct pci_bus *bus, unsigned int devfn,
-				 int where, int size, u32 val)
-{
-	void __iomem *addr;
-	struct pci_sys_data *sys = bus->sysdata;
-	struct gen_pci *pci = sys->private_data;
-
-	addr = pci->cfg.ops->map_bus(bus, devfn, where);
-
-	switch (size) {
-	case 1:
-		writeb(val, addr);
-		break;
-	case 2:
-		writew(val, addr);
-		break;
-	default:
-		writel(val, addr);
-	}
-
-	return PCIBIOS_SUCCESSFUL;
-}
-
 static struct pci_ops gen_pci_ops = {
-	.read	= gen_pci_config_read,
-	.write	= gen_pci_config_write,
+	.read	= pci_generic_config_read,
+	.write	= pci_generic_config_write,
 };
 
 static const struct of_device_id gen_pci_of_match[] = {
@@ -138,106 +99,50 @@ static const struct of_device_id gen_pci_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, gen_pci_of_match);
 
-static int gen_pci_calc_io_offset(struct device *dev,
-				  struct of_pci_range *range,
-				  struct resource *res,
-				  resource_size_t *offset)
-{
-	static atomic_t wins = ATOMIC_INIT(0);
-	int err, idx, max_win;
-	unsigned int window;
-
-	if (!PAGE_ALIGNED(range->cpu_addr))
-		return -EINVAL;
-
-	max_win = (IO_SPACE_LIMIT + 1) / SZ_64K;
-	idx = atomic_inc_return(&wins);
-	if (idx > max_win)
-		return -ENOSPC;
-
-	window = (idx - 1) * SZ_64K;
-	err = pci_ioremap_io(window, range->cpu_addr);
-	if (err)
-		return err;
-
-	of_pci_range_to_resource(range, dev->of_node, res);
-	res->start = window;
-	res->end = res->start + range->size - 1;
-	*offset = window - range->pci_addr;
-	return 0;
-}
-
-static int gen_pci_calc_mem_offset(struct device *dev,
-				   struct of_pci_range *range,
-				   struct resource *res,
-				   resource_size_t *offset)
-{
-	of_pci_range_to_resource(range, dev->of_node, res);
-	*offset = range->cpu_addr - range->pci_addr;
-	return 0;
-}
-
 static void gen_pci_release_of_pci_ranges(struct gen_pci *pci)
 {
-	struct pci_host_bridge_window *win;
-
-	list_for_each_entry(win, &pci->resources, list)
-		release_resource(win->res);
-
 	pci_free_resource_list(&pci->resources);
 }
 
 static int gen_pci_parse_request_of_pci_ranges(struct gen_pci *pci)
 {
-	struct of_pci_range range;
-	struct of_pci_range_parser parser;
 	int err, res_valid = 0;
 	struct device *dev = pci->host.dev.parent;
 	struct device_node *np = dev->of_node;
+	resource_size_t iobase;
+	struct pci_host_bridge_window *win;
 
-	if (of_pci_range_parser_init(&parser, np)) {
-		dev_err(dev, "missing \"ranges\" property\n");
-		return -EINVAL;
-	}
-
-	for_each_of_pci_range(&parser, &range) {
-		struct resource *parent, *res;
-		resource_size_t offset;
-		u32 restype = range.flags & IORESOURCE_TYPE_BITS;
+	err = of_pci_get_host_bridge_resources(np, 0, 0xff, &pci->resources,
+					       &iobase);
+	if (err)
+		return err;
 
-		res = devm_kmalloc(dev, sizeof(*res), GFP_KERNEL);
-		if (!res) {
-			err = -ENOMEM;
-			goto out_release_res;
-		}
+	list_for_each_entry(win, &pci->resources, list) {
+		struct resource *parent, *res = win->res;
 
-		switch (restype) {
+		switch (resource_type(res)) {
 		case IORESOURCE_IO:
 			parent = &ioport_resource;
-			err = gen_pci_calc_io_offset(dev, &range, res, &offset);
+			err = pci_remap_iospace(res, iobase);
+			if (err) {
+				dev_warn(dev, "error %d: failed to map resource %pR\n",
+					 err, res);
+				continue;
+			}
 			break;
 		case IORESOURCE_MEM:
 			parent = &iomem_resource;
-			err = gen_pci_calc_mem_offset(dev, &range, res, &offset);
-			res_valid |= !(res->flags & IORESOURCE_PREFETCH || err);
+			res_valid |= !(res->flags & IORESOURCE_PREFETCH);
 			break;
+		case IORESOURCE_BUS:
+			pci->cfg.bus_range = res;
 		default:
-			err = -EINVAL;
 			continue;
 		}
 
-		if (err) {
-			dev_warn(dev,
-				 "error %d: failed to add resource [type 0x%x, %lld bytes]\n",
-				 err, restype, range.size);
-			continue;
-		}
-
-		err = request_resource(parent, res);
+		err = devm_request_resource(dev, parent, res);
 		if (err)
 			goto out_release_res;
-
-		pci_add_resource_offset(&pci->resources, res, offset);
 	}
 
 	if (!res_valid) {
@@ -262,38 +167,30 @@ static int gen_pci_parse_map_cfg_windows(struct gen_pci *pci)
 	struct device *dev = pci->host.dev.parent;
 	struct device_node *np = dev->of_node;
 
-	if (of_pci_parse_bus_range(np, &pci->cfg.bus_range))
-		pci->cfg.bus_range = (struct resource) {
-			.name	= np->name,
-			.start	= 0,
-			.end	= 0xff,
-			.flags	= IORESOURCE_BUS,
-		};
-
 	err = of_address_to_resource(np, 0, &pci->cfg.res);
 	if (err) {
 		dev_err(dev, "missing \"reg\" property\n");
 		return err;
 	}
 
-	pci->cfg.win = devm_kcalloc(dev, resource_size(&pci->cfg.bus_range),
+	/* Limit the bus-range to fit within reg */
+	bus_max = pci->cfg.bus_range->start +
+		  (resource_size(&pci->cfg.res) >> pci->cfg.ops->bus_shift) - 1;
+	pci->cfg.bus_range->end = min_t(resource_size_t,
+					pci->cfg.bus_range->end, bus_max);
+
+	pci->cfg.win = devm_kcalloc(dev, resource_size(pci->cfg.bus_range),
 				    sizeof(*pci->cfg.win), GFP_KERNEL);
 	if (!pci->cfg.win)
 		return -ENOMEM;
 
-	/* Limit the bus-range to fit within reg */
-	bus_max = pci->cfg.bus_range.start +
-		  (resource_size(&pci->cfg.res) >> pci->cfg.ops->bus_shift) - 1;
-	pci->cfg.bus_range.end = min_t(resource_size_t, pci->cfg.bus_range.end,
-				       bus_max);
-
 	/* Map our Configuration Space windows */
 	if (!devm_request_mem_region(dev, pci->cfg.res.start,
 				     resource_size(&pci->cfg.res),
 				     "Configuration Space"))
 		return -ENOMEM;
 
-	bus_range = &pci->cfg.bus_range;
+	bus_range = pci->cfg.bus_range;
 	for (busn = bus_range->start; busn <= bus_range->end; ++busn) {
 		u32 idx = busn - bus_range->start;
 		u32 sz = 1 << pci->cfg.ops->bus_shift;
@@ -305,18 +202,9 @@ static int gen_pci_parse_map_cfg_windows(struct gen_pci *pci)
 			return -ENOMEM;
 	}
 
-	/* Register bus resource */
-	pci_add_resource(&pci->resources, bus_range);
 	return 0;
 }
 
-static int gen_pci_setup(int nr, struct pci_sys_data *sys)
-{
-	struct gen_pci *pci = sys->private_data;
-	list_splice_init(&pci->resources, &sys->resources);
-	return 1;
-}
-
 static int gen_pci_probe(struct platform_device *pdev)
 {
 	int err;
@@ -326,13 +214,7 @@ static int gen_pci_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
 	struct gen_pci *pci = devm_kzalloc(dev, sizeof(*pci), GFP_KERNEL);
-	struct hw_pci hw = {
-		.nr_controllers	= 1,
-		.private_data	= (void **)&pci,
-		.setup		= gen_pci_setup,
-		.map_irq	= of_irq_parse_and_map_pci,
-		.ops		= &gen_pci_ops,
-	};
+	struct pci_bus *bus, *child;
 
 	if (!pci)
 		return -ENOMEM;
@@ -353,6 +235,7 @@ static int gen_pci_probe(struct platform_device *pdev)
 
 	of_id = of_match_node(gen_pci_of_match, np);
 	pci->cfg.ops = of_id->data;
+	gen_pci_ops.map_bus = pci->cfg.ops->map_bus;
 	pci->host.dev.parent = dev;
 	INIT_LIST_HEAD(&pci->host.windows);
 	INIT_LIST_HEAD(&pci->resources);
@@ -369,14 +252,33 @@ static int gen_pci_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	pci_common_init_dev(dev, &hw);
+	/* Do not reassign resources if probe only */
+	if (!pci_has_flag(PCI_PROBE_ONLY))
+		pci_add_flags(PCI_REASSIGN_ALL_RSRC | PCI_REASSIGN_ALL_BUS);
+
+	bus = pci_scan_root_bus(dev, 0, &gen_pci_ops, pci, &pci->resources);
+	if (!bus) {
+		dev_err(dev, "Scanning rootbus failed");
+		return -ENODEV;
+	}
+
+	pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci);
+
+	if (!pci_has_flag(PCI_PROBE_ONLY)) {
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+
+		list_for_each_entry(child, &bus->children, node)
+			pcie_bus_configure_settings(child);
+	}
+
+	pci_bus_add_devices(bus);
 	return 0;
 }
 
 static struct platform_driver gen_pci_driver = {
 	.driver = {
 		.name = "pci-host-generic",
-		.owner = THIS_MODULE,
 		.of_match_table = gen_pci_of_match,
 	},
 	.probe = gen_pci_probe,
diff --git a/drivers/pci/host/pci-imx6.c b/drivers/pci/host/pci-imx6.c
index f1f239f60685..ebcb0ac8512b 100644
--- a/drivers/pci/host/pci-imx6.c
+++ b/drivers/pci/host/pci-imx6.c
@@ -644,7 +644,6 @@ MODULE_DEVICE_TABLE(of, imx6_pcie_of_match);
 static struct platform_driver imx6_pcie_driver = {
 	.driver = {
 		.name	= "imx6q-pcie",
-		.owner	= THIS_MODULE,
 		.of_match_table = imx6_pcie_of_match,
 	},
 	.shutdown = imx6_pcie_shutdown,
diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index 34086ce88e8e..1d7b0ba76d1e 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c
@@ -104,14 +104,13 @@ static void ks_dw_pcie_msi_irq_ack(struct irq_data *d)
 {
 	u32 offset, reg_offset, bit_pos;
 	struct keystone_pcie *ks_pcie;
-	unsigned int irq = d->irq;
 	struct msi_desc *msi;
 	struct pcie_port *pp;
 
-	msi = irq_get_msi_desc(irq);
-	pp = sys_to_pcie(msi->dev->bus->sysdata);
+	msi = irq_data_get_msi_desc(d);
+	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
 	ks_pcie = to_keystone_pcie(pp);
-	offset = irq - irq_linear_revmap(pp->irq_domain, 0);
+	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 	update_reg_offset_bit_pos(offset, &reg_offset, &bit_pos);
 
 	writel(BIT(bit_pos),
@@ -142,20 +141,19 @@ void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq)
 static void ks_dw_pcie_msi_irq_mask(struct irq_data *d)
 {
 	struct keystone_pcie *ks_pcie;
-	unsigned int irq = d->irq;
 	struct msi_desc *msi;
 	struct pcie_port *pp;
 	u32 offset;
 
-	msi = irq_get_msi_desc(irq);
-	pp = sys_to_pcie(msi->dev->bus->sysdata);
+	msi = irq_data_get_msi_desc(d);
+	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
 	ks_pcie = to_keystone_pcie(pp);
-	offset = irq - irq_linear_revmap(pp->irq_domain, 0);
+	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
 	/* Mask the end point if PVM implemented */
 	if (IS_ENABLED(CONFIG_PCI_MSI)) {
 		if (msi->msi_attrib.maskbit)
-			mask_msi_irq(d);
+			pci_msi_mask_irq(d);
 	}
 
 	ks_dw_pcie_msi_clear_irq(pp, offset);
@@ -164,20 +162,19 @@ static void ks_dw_pcie_msi_irq_mask(struct irq_data *d)
 static void ks_dw_pcie_msi_irq_unmask(struct irq_data *d)
 {
 	struct keystone_pcie *ks_pcie;
-	unsigned int irq = d->irq;
 	struct msi_desc *msi;
 	struct pcie_port *pp;
 	u32 offset;
 
-	msi = irq_get_msi_desc(irq);
-	pp = sys_to_pcie(msi->dev->bus->sysdata);
+	msi = irq_data_get_msi_desc(d);
+	pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
 	ks_pcie = to_keystone_pcie(pp);
-	offset = irq - irq_linear_revmap(pp->irq_domain, 0);
+	offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
 
 	/* Mask the end point if PVM implemented */
 	if (IS_ENABLED(CONFIG_PCI_MSI)) {
 		if (msi->msi_attrib.maskbit)
-			unmask_msi_irq(d);
+			pci_msi_unmask_irq(d);
 	}
 
 	ks_dw_pcie_msi_set_irq(pp, offset);
@@ -205,7 +202,7 @@ const struct irq_domain_ops ks_dw_pcie_msi_domain_ops = {
 	.map = ks_dw_pcie_msi_map,
 };
 
-int ks_dw_pcie_msi_host_init(struct pcie_port *pp, struct msi_chip *chip)
+int ks_dw_pcie_msi_host_init(struct pcie_port *pp, struct msi_controller *chip)
 {
 	struct keystone_pcie *ks_pcie = to_keystone_pcie(pp);
 	int i;
diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c
index 1b893bc8b842..8a2707885735 100644
--- a/drivers/pci/host/pci-keystone.c
+++ b/drivers/pci/host/pci-keystone.c
@@ -403,7 +403,6 @@ static struct platform_driver ks_pcie_driver __refdata = {
 	.remove = __exit_p(ks_pcie_remove),
 	.driver = {
 		.name	= "keystone-pcie",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(ks_pcie_of_match),
 	},
 };
diff --git a/drivers/pci/host/pci-keystone.h b/drivers/pci/host/pci-keystone.h
index 1fc1fceede9e..478d932b602d 100644
--- a/drivers/pci/host/pci-keystone.h
+++ b/drivers/pci/host/pci-keystone.h
@@ -55,4 +55,4 @@ void ks_dw_pcie_msi_set_irq(struct pcie_port *pp, int irq);
 void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq);
 void ks_dw_pcie_v3_65_scan_bus(struct pcie_port *pp);
 int ks_dw_pcie_msi_host_init(struct pcie_port *pp,
-		struct msi_chip *chip);
+		struct msi_controller *chip);
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index b1315e197ffb..b0b181690055 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -99,7 +99,7 @@ struct mvebu_pcie_port;
 struct mvebu_pcie {
 	struct platform_device *pdev;
 	struct mvebu_pcie_port *ports;
-	struct msi_chip *msi;
+	struct msi_controller *msi;
 	struct resource io;
 	char io_name[30];
 	struct resource realio;
@@ -774,12 +774,6 @@ static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 	return bus;
 }
 
-static void mvebu_pcie_add_bus(struct pci_bus *bus)
-{
-	struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
-	bus->msi = pcie->msi;
-}
-
 static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
 						 const struct resource *res,
 						 resource_size_t start,
@@ -816,6 +810,10 @@ static void mvebu_pcie_enable(struct mvebu_pcie *pcie)
 
 	memset(&hw, 0, sizeof(hw));
 
+#ifdef CONFIG_PCI_MSI
+	hw.msi_ctrl = pcie->msi;
+#endif
+
 	hw.nr_controllers = 1;
 	hw.private_data   = (void **)&pcie;
 	hw.setup          = mvebu_pcie_setup;
@@ -823,7 +821,6 @@ static void mvebu_pcie_enable(struct mvebu_pcie *pcie)
 	hw.map_irq        = of_irq_parse_and_map_pci;
 	hw.ops            = &mvebu_pcie_ops;
 	hw.align_resource = mvebu_pcie_align_resource;
-	hw.add_bus        = mvebu_pcie_add_bus;
 
 	pci_common_init(&hw);
 }
@@ -1082,7 +1079,6 @@ MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table);
 
 static struct platform_driver mvebu_pcie_driver = {
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "mvebu-pcie",
 		.of_match_table = mvebu_pcie_of_match_table,
 		/* driver unloading/unbinding currently not supported */
diff --git a/drivers/pci/host/pci-rcar-gen2.c b/drivers/pci/host/pci-rcar-gen2.c
index 3ef854f5a5b5..d9c042febb1a 100644
--- a/drivers/pci/host/pci-rcar-gen2.c
+++ b/drivers/pci/host/pci-rcar-gen2.c
@@ -412,7 +412,6 @@ MODULE_DEVICE_TABLE(of, rcar_pci_of_match);
 static struct platform_driver rcar_pci_driver = {
 	.driver = {
 		.name = "pci-rcar-gen2",
-		.owner = THIS_MODULE,
 		.suppress_bind_attrs = true,
 		.of_match_table = rcar_pci_of_match,
 	},
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 2898f1597c84..07019ff0ad7c 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -238,7 +238,7 @@
 	)
 
 struct tegra_msi {
-	struct msi_chip chip;
+	struct msi_controller chip;
 	DECLARE_BITMAP(used, INT_PCI_MSI_NR);
 	struct irq_domain *domain;
 	unsigned long pages;
@@ -259,7 +259,7 @@ struct tegra_pcie_soc_data {
 	bool has_gen2;
 };
 
-static inline struct tegra_msi *to_tegra_msi(struct msi_chip *chip)
+static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
 {
 	return container_of(chip, struct tegra_msi, chip);
 }
@@ -692,15 +692,6 @@ static int tegra_pcie_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
 	return irq;
 }
 
-static void tegra_pcie_add_bus(struct pci_bus *bus)
-{
-	if (IS_ENABLED(CONFIG_PCI_MSI)) {
-		struct tegra_pcie *pcie = sys_to_pcie(bus->sysdata);
-
-		bus->msi = &pcie->msi.chip;
-	}
-}
-
 static struct pci_bus *tegra_pcie_scan_bus(int nr, struct pci_sys_data *sys)
 {
 	struct tegra_pcie *pcie = sys_to_pcie(sys);
@@ -1280,8 +1271,8 @@ static irqreturn_t tegra_pcie_msi_irq(int irq, void *data)
 	return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int tegra_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
-			       struct msi_desc *desc)
+static int tegra_msi_setup_irq(struct msi_controller *chip,
+			       struct pci_dev *pdev, struct msi_desc *desc)
 {
 	struct tegra_msi *msi = to_tegra_msi(chip);
 	struct msi_msg msg;
@@ -1305,12 +1296,13 @@ static int tegra_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 	msg.address_hi = 0;
 	msg.data = hwirq;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
 
-static void tegra_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void tegra_msi_teardown_irq(struct msi_controller *chip,
+				   unsigned int irq)
 {
 	struct tegra_msi *msi = to_tegra_msi(chip);
 	struct irq_data *d = irq_get_irq_data(irq);
@@ -1322,10 +1314,10 @@ static void tegra_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
 
 static struct irq_chip tegra_msi_irq_chip = {
 	.name = "Tegra PCIe MSI",
-	.irq_enable = unmask_msi_irq,
-	.irq_disable = mask_msi_irq,
-	.irq_mask = mask_msi_irq,
-	.irq_unmask = unmask_msi_irq,
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
 };
 
 static int tegra_msi_map(struct irq_domain *domain, unsigned int irq,
@@ -1893,11 +1885,14 @@ static int tegra_pcie_enable(struct tegra_pcie *pcie)
 
 	memset(&hw, 0, sizeof(hw));
 
+#ifdef CONFIG_PCI_MSI
+	hw.msi_ctrl = &pcie->msi.chip;
+#endif
+
 	hw.nr_controllers = 1;
 	hw.private_data = (void **)&pcie;
 	hw.setup = tegra_pcie_setup;
 	hw.map_irq = tegra_pcie_map_irq;
-	hw.add_bus = tegra_pcie_add_bus;
 	hw.scan = tegra_pcie_scan_bus;
 	hw.ops = &tegra_pcie_ops;
 
@@ -2134,7 +2129,6 @@ put_resources:
 static struct platform_driver tegra_pcie_driver = {
 	.driver = {
 		.name = "tegra-pcie",
-		.owner = THIS_MODULE,
 		.of_match_table = tegra_pcie_of_match,
 		.suppress_bind_attrs = true,
 	},
diff --git a/drivers/pci/host/pci-versatile.c b/drivers/pci/host/pci-versatile.c
new file mode 100644
index 000000000000..e3a2450db2b8
--- /dev/null
+++ b/drivers/pci/host/pci-versatile.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright 2004 Koninklijke Philips Electronics NV
+ *
+ * Conversion to platform driver and DT:
+ * Copyright 2014 Linaro Ltd.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * 14/04/2005 Initial version, colin.king@philips.com
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+static void __iomem *versatile_pci_base;
+static void __iomem *versatile_cfg_base[2];
+
+#define PCI_IMAP(m)		(versatile_pci_base + ((m) * 4))
+#define PCI_SMAP(m)		(versatile_pci_base + 0x14 + ((m) * 4))
+#define PCI_SELFID		(versatile_pci_base + 0xc)
+
+#define VP_PCI_DEVICE_ID		0x030010ee
+#define VP_PCI_CLASS_ID			0x0b400000
+
+static u32 pci_slot_ignore;
+
+static int __init versatile_pci_slot_ignore(char *str)
+{
+	int retval;
+	int slot;
+
+	while ((retval = get_option(&str, &slot))) {
+		if ((slot < 0) || (slot > 31))
+			pr_err("Illegal slot value: %d\n", slot);
+		else
+			pci_slot_ignore |= (1 << slot);
+	}
+	return 1;
+}
+__setup("pci_slot_ignore=", versatile_pci_slot_ignore);
+
+
+static void __iomem *versatile_map_bus(struct pci_bus *bus,
+				       unsigned int devfn, int offset)
+{
+	unsigned int busnr = bus->number;
+
+	if (pci_slot_ignore & (1 << PCI_SLOT(devfn)))
+		return NULL;
+
+	return versatile_cfg_base[1] + ((busnr << 16) | (devfn << 8) | offset);
+}
+
+static struct pci_ops pci_versatile_ops = {
+	.map_bus = versatile_map_bus,
+	.read	= pci_generic_config_read32,
+	.write	= pci_generic_config_write,
+};
+
+static int versatile_pci_parse_request_of_pci_ranges(struct device *dev,
+						     struct list_head *res)
+{
+	int err, mem = 1, res_valid = 0;
+	struct device_node *np = dev->of_node;
+	resource_size_t iobase;
+	struct resource_entry *win;
+
+	err = of_pci_get_host_bridge_resources(np, 0, 0xff, res, &iobase);
+	if (err)
+		return err;
+
+	resource_list_for_each_entry(win, res, list) {
+		struct resource *parent, *res = win->res;
+
+		switch (resource_type(res)) {
+		case IORESOURCE_IO:
+			parent = &ioport_resource;
+			err = pci_remap_iospace(res, iobase);
+			if (err) {
+				dev_warn(dev, "error %d: failed to map resource %pR\n",
+					 err, res);
+				continue;
+			}
+			break;
+		case IORESOURCE_MEM:
+			parent = &iomem_resource;
+			res_valid |= !(res->flags & IORESOURCE_PREFETCH);
+
+			writel(res->start >> 28, PCI_IMAP(mem));
+			writel(PHYS_OFFSET >> 28, PCI_SMAP(mem));
+			mem++;
+
+			break;
+		case IORESOURCE_BUS:
+		default:
+			continue;
+		}
+
+		err = devm_request_resource(dev, parent, res);
+		if (err)
+			goto out_release_res;
+	}
+
+	if (!res_valid) {
+		dev_err(dev, "non-prefetchable memory resource required\n");
+		err = -EINVAL;
+		goto out_release_res;
+	}
+
+	return 0;
+
+out_release_res:
+	pci_free_resource_list(res);
+	return err;
+}
+
+/* Unused, temporary to satisfy ARM arch code */
+struct pci_sys_data sys;
+
+static int versatile_pci_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int ret, i, myslot = -1;
+	u32 val;
+	void __iomem *local_pci_cfg_base;
+	struct pci_bus *bus;
+	LIST_HEAD(pci_res);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+	versatile_pci_base = devm_ioremap_resource(&pdev->dev, res);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+	versatile_cfg_base[0] = devm_ioremap_resource(&pdev->dev, res);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	if (!res)
+		return -ENODEV;
+	versatile_cfg_base[1] = devm_ioremap_resource(&pdev->dev, res);
+
+	ret = versatile_pci_parse_request_of_pci_ranges(&pdev->dev, &pci_res);
+	if (ret)
+		return ret;
+
+	/*
+	 * We need to discover the PCI core first to configure itself
+	 * before the main PCI probing is performed
+	 */
+	for (i = 0; i < 32; i++) {
+		if ((readl(versatile_cfg_base[0] + (i << 11) + PCI_VENDOR_ID) == VP_PCI_DEVICE_ID) &&
+		    (readl(versatile_cfg_base[0] + (i << 11) + PCI_CLASS_REVISION) == VP_PCI_CLASS_ID)) {
+			myslot = i;
+			break;
+		}
+	}
+	if (myslot == -1) {
+		dev_err(&pdev->dev, "Cannot find PCI core!\n");
+		return -EIO;
+	}
+	/*
+	 * Do not to map Versatile FPGA PCI device into memory space
+	 */
+	pci_slot_ignore |= (1 << myslot);
+
+	dev_info(&pdev->dev, "PCI core found (slot %d)\n", myslot);
+
+	writel(myslot, PCI_SELFID);
+	local_pci_cfg_base = versatile_cfg_base[1] + (myslot << 11);
+
+	val = readl(local_pci_cfg_base + PCI_COMMAND);
+	val |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE;
+	writel(val, local_pci_cfg_base + PCI_COMMAND);
+
+	/*
+	 * Configure the PCI inbound memory windows to be 1:1 mapped to SDRAM
+	 */
+	writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_0);
+	writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_1);
+	writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2);
+
+	/*
+	 * For many years the kernel and QEMU were symbiotically buggy
+	 * in that they both assumed the same broken IRQ mapping.
+	 * QEMU therefore attempts to auto-detect old broken kernels
+	 * so that they still work on newer QEMU as they did on old
+	 * QEMU. Since we now use the correct (ie matching-hardware)
+	 * IRQ mapping we write a definitely different value to a
+	 * PCI_INTERRUPT_LINE register to tell QEMU that we expect
+	 * real hardware behaviour and it need not be backwards
+	 * compatible for us. This write is harmless on real hardware.
+	 */
+	writel(0, versatile_cfg_base[0] + PCI_INTERRUPT_LINE);
+
+	pci_add_flags(PCI_ENABLE_PROC_DOMAINS);
+	pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC);
+
+	bus = pci_scan_root_bus(&pdev->dev, 0, &pci_versatile_ops, &sys, &pci_res);
+	if (!bus)
+		return -ENOMEM;
+
+	pci_fixup_irqs(pci_common_swizzle, of_irq_parse_and_map_pci);
+	pci_assign_unassigned_bus_resources(bus);
+	pci_bus_add_devices(bus);
+
+	return 0;
+}
+
+static const struct of_device_id versatile_pci_of_match[] = {
+	{ .compatible = "arm,versatile-pci", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, versatile_pci_of_match);
+
+static struct platform_driver versatile_pci_driver = {
+	.driver = {
+		.name = "versatile-pci",
+		.of_match_table = versatile_pci_of_match,
+	},
+	.probe = versatile_pci_probe,
+};
+module_platform_driver(versatile_pci_driver);
+
+MODULE_DESCRIPTION("Versatile PCI driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c
new file mode 100644
index 000000000000..ec5a14b3189b
--- /dev/null
+++ b/drivers/pci/host/pci-xgene-msi.c
@@ -0,0 +1,587 @@
+/*
+ * APM X-Gene MSI Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Author: Tanmay Inamdar <tinamdar@apm.com>
+ *	   Duc Dang <dhdang@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/of_pci.h>
+
+#define MSI_IR0			0x000000
+#define MSI_INT0		0x800000
+#define IDX_PER_GROUP		8
+#define IRQS_PER_IDX		16
+#define NR_HW_IRQS		16
+#define NR_MSI_VEC		(IDX_PER_GROUP * IRQS_PER_IDX * NR_HW_IRQS)
+
+struct xgene_msi_group {
+	struct xgene_msi	*msi;
+	int			gic_irq;
+	u32			msi_grp;
+};
+
+struct xgene_msi {
+	struct device_node	*node;
+	struct irq_domain	*inner_domain;
+	struct irq_domain	*msi_domain;
+	u64			msi_addr;
+	void __iomem		*msi_regs;
+	unsigned long		*bitmap;
+	struct mutex		bitmap_lock;
+	struct xgene_msi_group	*msi_groups;
+	int			num_cpus;
+};
+
+/* Global data */
+static struct xgene_msi xgene_msi_ctrl;
+
+static struct irq_chip xgene_msi_top_irq_chip = {
+	.name		= "X-Gene1 MSI",
+	.irq_enable	= pci_msi_unmask_irq,
+	.irq_disable	= pci_msi_mask_irq,
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+};
+
+static struct  msi_domain_info xgene_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		  MSI_FLAG_PCI_MSIX),
+	.chip	= &xgene_msi_top_irq_chip,
+};
+
+/*
+ * X-Gene v1 has 16 groups of MSI termination registers MSInIRx, where
+ * n is group number (0..F), x is index of registers in each group (0..7)
+ * The register layout is as follows:
+ * MSI0IR0			base_addr
+ * MSI0IR1			base_addr +  0x10000
+ * ...				...
+ * MSI0IR6			base_addr +  0x60000
+ * MSI0IR7			base_addr +  0x70000
+ * MSI1IR0			base_addr +  0x80000
+ * MSI1IR1			base_addr +  0x90000
+ * ...				...
+ * MSI1IR7			base_addr +  0xF0000
+ * MSI2IR0			base_addr + 0x100000
+ * ...				...
+ * MSIFIR0			base_addr + 0x780000
+ * MSIFIR1			base_addr + 0x790000
+ * ...				...
+ * MSIFIR7			base_addr + 0x7F0000
+ * MSIINT0			base_addr + 0x800000
+ * MSIINT1			base_addr + 0x810000
+ * ...				...
+ * MSIINTF			base_addr + 0x8F0000
+ *
+ * Each index register supports 16 MSI vectors (0..15) to generate interrupt.
+ * There are total 16 GIC IRQs assigned for these 16 groups of MSI termination
+ * registers.
+ *
+ * Each MSI termination group has 1 MSIINTn register (n is 0..15) to indicate
+ * the MSI pending status caused by 1 of its 8 index registers.
+ */
+
+/* MSInIRx read helper */
+static u32 xgene_msi_ir_read(struct xgene_msi *msi,
+				    u32 msi_grp, u32 msir_idx)
+{
+	return readl_relaxed(msi->msi_regs + MSI_IR0 +
+			      (msi_grp << 19) + (msir_idx << 16));
+}
+
+/* MSIINTn read helper */
+static u32 xgene_msi_int_read(struct xgene_msi *msi, u32 msi_grp)
+{
+	return readl_relaxed(msi->msi_regs + MSI_INT0 + (msi_grp << 16));
+}
+
+/*
+ * With 2048 MSI vectors supported, the MSI message can be constructed using
+ * following scheme:
+ * - Divide into 8 256-vector groups
+ *		Group 0: 0-255
+ *		Group 1: 256-511
+ *		Group 2: 512-767
+ *		...
+ *		Group 7: 1792-2047
+ * - Each 256-vector group is divided into 16 16-vector groups
+ *	As an example: 16 16-vector groups for 256-vector group 0-255 is
+ *		Group 0: 0-15
+ *		Group 1: 16-32
+ *		...
+ *		Group 15: 240-255
+ * - The termination address of MSI vector in 256-vector group n and 16-vector
+ *   group x is the address of MSIxIRn
+ * - The data for MSI vector in 16-vector group x is x
+ */
+static u32 hwirq_to_reg_set(unsigned long hwirq)
+{
+	return (hwirq / (NR_HW_IRQS * IRQS_PER_IDX));
+}
+
+static u32 hwirq_to_group(unsigned long hwirq)
+{
+	return (hwirq % NR_HW_IRQS);
+}
+
+static u32 hwirq_to_msi_data(unsigned long hwirq)
+{
+	return ((hwirq / NR_HW_IRQS) % IRQS_PER_IDX);
+}
+
+static void xgene_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(data);
+	u32 reg_set = hwirq_to_reg_set(data->hwirq);
+	u32 group = hwirq_to_group(data->hwirq);
+	u64 target_addr = msi->msi_addr + (((8 * group) + reg_set) << 16);
+
+	msg->address_hi = upper_32_bits(target_addr);
+	msg->address_lo = lower_32_bits(target_addr);
+	msg->data = hwirq_to_msi_data(data->hwirq);
+}
+
+/*
+ * X-Gene v1 only has 16 MSI GIC IRQs for 2048 MSI vectors.  To maintain
+ * the expected behaviour of .set_affinity for each MSI interrupt, the 16
+ * MSI GIC IRQs are statically allocated to 8 X-Gene v1 cores (2 GIC IRQs
+ * for each core).  The MSI vector is moved fom 1 MSI GIC IRQ to another
+ * MSI GIC IRQ to steer its MSI interrupt to correct X-Gene v1 core.  As a
+ * consequence, the total MSI vectors that X-Gene v1 supports will be
+ * reduced to 256 (2048/8) vectors.
+ */
+static int hwirq_to_cpu(unsigned long hwirq)
+{
+	return (hwirq % xgene_msi_ctrl.num_cpus);
+}
+
+static unsigned long hwirq_to_canonical_hwirq(unsigned long hwirq)
+{
+	return (hwirq - hwirq_to_cpu(hwirq));
+}
+
+static int xgene_msi_set_affinity(struct irq_data *irqdata,
+				  const struct cpumask *mask, bool force)
+{
+	int target_cpu = cpumask_first(mask);
+	int curr_cpu;
+
+	curr_cpu = hwirq_to_cpu(irqdata->hwirq);
+	if (curr_cpu == target_cpu)
+		return IRQ_SET_MASK_OK_DONE;
+
+	/* Update MSI number to target the new CPU */
+	irqdata->hwirq = hwirq_to_canonical_hwirq(irqdata->hwirq) + target_cpu;
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip xgene_msi_bottom_irq_chip = {
+	.name			= "MSI",
+	.irq_set_affinity       = xgene_msi_set_affinity,
+	.irq_compose_msi_msg	= xgene_compose_msi_msg,
+};
+
+static int xgene_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *args)
+{
+	struct xgene_msi *msi = domain->host_data;
+	int msi_irq;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	msi_irq = bitmap_find_next_zero_area(msi->bitmap, NR_MSI_VEC, 0,
+					     msi->num_cpus, 0);
+	if (msi_irq < NR_MSI_VEC)
+		bitmap_set(msi->bitmap, msi_irq, msi->num_cpus);
+	else
+		msi_irq = -ENOSPC;
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	if (msi_irq < 0)
+		return msi_irq;
+
+	irq_domain_set_info(domain, virq, msi_irq,
+			    &xgene_msi_bottom_irq_chip, domain->host_data,
+			    handle_simple_irq, NULL, NULL);
+	set_irq_flags(virq, IRQF_VALID);
+
+	return 0;
+}
+
+static void xgene_irq_domain_free(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct xgene_msi *msi = irq_data_get_irq_chip_data(d);
+	u32 hwirq;
+
+	mutex_lock(&msi->bitmap_lock);
+
+	hwirq = hwirq_to_canonical_hwirq(d->hwirq);
+	bitmap_clear(msi->bitmap, hwirq, msi->num_cpus);
+
+	mutex_unlock(&msi->bitmap_lock);
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+	.alloc  = xgene_irq_domain_alloc,
+	.free   = xgene_irq_domain_free,
+};
+
+static int xgene_allocate_domains(struct xgene_msi *msi)
+{
+	msi->inner_domain = irq_domain_add_linear(NULL, NR_MSI_VEC,
+						  &msi_domain_ops, msi);
+	if (!msi->inner_domain)
+		return -ENOMEM;
+
+	msi->msi_domain = pci_msi_create_irq_domain(msi->node,
+						    &xgene_msi_domain_info,
+						    msi->inner_domain);
+
+	if (!msi->msi_domain) {
+		irq_domain_remove(msi->inner_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void xgene_free_domains(struct xgene_msi *msi)
+{
+	if (msi->msi_domain)
+		irq_domain_remove(msi->msi_domain);
+	if (msi->inner_domain)
+		irq_domain_remove(msi->inner_domain);
+}
+
+static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi)
+{
+	int size = BITS_TO_LONGS(NR_MSI_VEC) * sizeof(long);
+
+	xgene_msi->bitmap = kzalloc(size, GFP_KERNEL);
+	if (!xgene_msi->bitmap)
+		return -ENOMEM;
+
+	mutex_init(&xgene_msi->bitmap_lock);
+
+	xgene_msi->msi_groups = kcalloc(NR_HW_IRQS,
+					sizeof(struct xgene_msi_group),
+					GFP_KERNEL);
+	if (!xgene_msi->msi_groups)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void xgene_msi_isr(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct xgene_msi_group *msi_groups;
+	struct xgene_msi *xgene_msi;
+	unsigned int virq;
+	int msir_index, msir_val, hw_irq;
+	u32 intr_index, grp_select, msi_grp;
+
+	chained_irq_enter(chip, desc);
+
+	msi_groups = irq_desc_get_handler_data(desc);
+	xgene_msi = msi_groups->msi;
+	msi_grp = msi_groups->msi_grp;
+
+	/*
+	 * MSIINTn (n is 0..F) indicates if there is a pending MSI interrupt
+	 * If bit x of this register is set (x is 0..7), one or more interupts
+	 * corresponding to MSInIRx is set.
+	 */
+	grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
+	while (grp_select) {
+		msir_index = ffs(grp_select) - 1;
+		/*
+		 * Calculate MSInIRx address to read to check for interrupts
+		 * (refer to termination address and data assignment
+		 * described in xgene_compose_msi_msg() )
+		 */
+		msir_val = xgene_msi_ir_read(xgene_msi, msi_grp, msir_index);
+		while (msir_val) {
+			intr_index = ffs(msir_val) - 1;
+			/*
+			 * Calculate MSI vector number (refer to the termination
+			 * address and data assignment described in
+			 * xgene_compose_msi_msg function)
+			 */
+			hw_irq = (((msir_index * IRQS_PER_IDX) + intr_index) *
+				 NR_HW_IRQS) + msi_grp;
+			/*
+			 * As we have multiple hw_irq that maps to single MSI,
+			 * always look up the virq using the hw_irq as seen from
+			 * CPU0
+			 */
+			hw_irq = hwirq_to_canonical_hwirq(hw_irq);
+			virq = irq_find_mapping(xgene_msi->inner_domain, hw_irq);
+			WARN_ON(!virq);
+			if (virq != 0)
+				generic_handle_irq(virq);
+			msir_val &= ~(1 << intr_index);
+		}
+		grp_select &= ~(1 << msir_index);
+
+		if (!grp_select) {
+			/*
+			 * We handled all interrupts happened in this group,
+			 * resample this group MSI_INTx register in case
+			 * something else has been made pending in the meantime
+			 */
+			grp_select = xgene_msi_int_read(xgene_msi, msi_grp);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int xgene_msi_remove(struct platform_device *pdev)
+{
+	int virq, i;
+	struct xgene_msi *msi = platform_get_drvdata(pdev);
+
+	for (i = 0; i < NR_HW_IRQS; i++) {
+		virq = msi->msi_groups[i].gic_irq;
+		if (virq != 0) {
+			irq_set_chained_handler(virq, NULL);
+			irq_set_handler_data(virq, NULL);
+		}
+	}
+	kfree(msi->msi_groups);
+
+	kfree(msi->bitmap);
+	msi->bitmap = NULL;
+
+	xgene_free_domains(msi);
+
+	return 0;
+}
+
+static int xgene_msi_hwirq_alloc(unsigned int cpu)
+{
+	struct xgene_msi *msi = &xgene_msi_ctrl;
+	struct xgene_msi_group *msi_group;
+	cpumask_var_t mask;
+	int i;
+	int err;
+
+	for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) {
+		msi_group = &msi->msi_groups[i];
+		if (!msi_group->gic_irq)
+			continue;
+
+		irq_set_chained_handler(msi_group->gic_irq,
+					xgene_msi_isr);
+		err = irq_set_handler_data(msi_group->gic_irq, msi_group);
+		if (err) {
+			pr_err("failed to register GIC IRQ handler\n");
+			return -EINVAL;
+		}
+		/*
+		 * Statically allocate MSI GIC IRQs to each CPU core.
+		 * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated
+		 * to each core.
+		 */
+		if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
+			cpumask_clear(mask);
+			cpumask_set_cpu(cpu, mask);
+			err = irq_set_affinity(msi_group->gic_irq, mask);
+			if (err)
+				pr_err("failed to set affinity for GIC IRQ");
+			free_cpumask_var(mask);
+		} else {
+			pr_err("failed to alloc CPU mask for affinity\n");
+			err = -EINVAL;
+		}
+
+		if (err) {
+			irq_set_chained_handler(msi_group->gic_irq, NULL);
+			irq_set_handler_data(msi_group->gic_irq, NULL);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void xgene_msi_hwirq_free(unsigned int cpu)
+{
+	struct xgene_msi *msi = &xgene_msi_ctrl;
+	struct xgene_msi_group *msi_group;
+	int i;
+
+	for (i = cpu; i < NR_HW_IRQS; i += msi->num_cpus) {
+		msi_group = &msi->msi_groups[i];
+		if (!msi_group->gic_irq)
+			continue;
+
+		irq_set_chained_handler(msi_group->gic_irq, NULL);
+		irq_set_handler_data(msi_group->gic_irq, NULL);
+	}
+}
+
+static int xgene_msi_cpu_callback(struct notifier_block *nfb,
+				  unsigned long action, void *hcpu)
+{
+	unsigned cpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		xgene_msi_hwirq_alloc(cpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		xgene_msi_hwirq_free(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block xgene_msi_cpu_notifier = {
+	.notifier_call = xgene_msi_cpu_callback,
+};
+
+static const struct of_device_id xgene_msi_match_table[] = {
+	{.compatible = "apm,xgene1-msi"},
+	{},
+};
+
+static int xgene_msi_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int rc, irq_index;
+	struct xgene_msi *xgene_msi;
+	unsigned int cpu;
+	int virt_msir;
+	u32 msi_val, msi_idx;
+
+	xgene_msi = &xgene_msi_ctrl;
+
+	platform_set_drvdata(pdev, xgene_msi);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xgene_msi->msi_regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xgene_msi->msi_regs)) {
+		dev_err(&pdev->dev, "no reg space\n");
+		rc = -EINVAL;
+		goto error;
+	}
+	xgene_msi->msi_addr = res->start;
+	xgene_msi->node = pdev->dev.of_node;
+	xgene_msi->num_cpus = num_possible_cpus();
+
+	rc = xgene_msi_init_allocator(xgene_msi);
+	if (rc) {
+		dev_err(&pdev->dev, "Error allocating MSI bitmap\n");
+		goto error;
+	}
+
+	rc = xgene_allocate_domains(xgene_msi);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to allocate MSI domain\n");
+		goto error;
+	}
+
+	for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) {
+		virt_msir = platform_get_irq(pdev, irq_index);
+		if (virt_msir < 0) {
+			dev_err(&pdev->dev, "Cannot translate IRQ index %d\n",
+				irq_index);
+			rc = -EINVAL;
+			goto error;
+		}
+		xgene_msi->msi_groups[irq_index].gic_irq = virt_msir;
+		xgene_msi->msi_groups[irq_index].msi_grp = irq_index;
+		xgene_msi->msi_groups[irq_index].msi = xgene_msi;
+	}
+
+	/*
+	 * MSInIRx registers are read-to-clear; before registering
+	 * interrupt handlers, read all of them to clear spurious
+	 * interrupts that may occur before the driver is probed.
+	 */
+	for (irq_index = 0; irq_index < NR_HW_IRQS; irq_index++) {
+		for (msi_idx = 0; msi_idx < IDX_PER_GROUP; msi_idx++)
+			msi_val = xgene_msi_ir_read(xgene_msi, irq_index,
+						    msi_idx);
+		/* Read MSIINTn to confirm */
+		msi_val = xgene_msi_int_read(xgene_msi, irq_index);
+		if (msi_val) {
+			dev_err(&pdev->dev, "Failed to clear spurious IRQ\n");
+			rc = -EINVAL;
+			goto error;
+		}
+	}
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu)
+		if (xgene_msi_hwirq_alloc(cpu)) {
+			dev_err(&pdev->dev, "failed to register MSI handlers\n");
+			cpu_notifier_register_done();
+			goto error;
+		}
+
+	rc = __register_hotcpu_notifier(&xgene_msi_cpu_notifier);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to add CPU MSI notifier\n");
+		cpu_notifier_register_done();
+		goto error;
+	}
+
+	cpu_notifier_register_done();
+
+	dev_info(&pdev->dev, "APM X-Gene PCIe MSI driver loaded\n");
+
+	return 0;
+
+error:
+	xgene_msi_remove(pdev);
+	return rc;
+}
+
+static struct platform_driver xgene_msi_driver = {
+	.driver = {
+		.name = "xgene-msi",
+		.owner = THIS_MODULE,
+		.of_match_table = xgene_msi_match_table,
+	},
+	.probe = xgene_msi_probe,
+	.remove = xgene_msi_remove,
+};
+
+static int __init xgene_pcie_msi_init(void)
+{
+	return platform_driver_register(&xgene_msi_driver);
+}
+subsys_initcall(xgene_pcie_msi_init);
diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c
index 2988fe136c1e..9d2fd461e9c7 100644
--- a/drivers/pci/host/pci-xgene.c
+++ b/drivers/pci/host/pci-xgene.c
@@ -600,6 +600,23 @@ static int xgene_pcie_setup(struct xgene_pcie_port *port,
 	return 0;
 }
 
+static int xgene_pcie_msi_enable(struct pci_bus *bus)
+{
+	struct device_node *msi_node;
+
+	msi_node = of_parse_phandle(bus->dev.of_node,
+					"msi-parent", 0);
+	if (!msi_node)
+		return -ENODEV;
+
+	bus->msi = of_pci_find_msi_chip_by_node(msi_node);
+	if (!bus->msi)
+		return -ENODEV;
+
+	bus->msi->dev = &bus->dev;
+	return 0;
+}
+
 static int xgene_pcie_probe_bridge(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
@@ -636,6 +653,10 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev)
 	if (!bus)
 		return -ENOMEM;
 
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		if (xgene_pcie_msi_enable(bus))
+			dev_info(port->dev, "failed to enable MSI\n");
+
 	pci_scan_child_bus(bus);
 	pci_assign_unassigned_bus_resources(bus);
 	pci_bus_add_devices(bus);
@@ -652,7 +673,6 @@ static const struct of_device_id xgene_pcie_match_table[] = {
 static struct platform_driver xgene_pcie_driver = {
 	.driver = {
 		   .name = "xgene-pcie",
-		   .owner = THIS_MODULE,
 		   .of_match_table = of_match_ptr(xgene_pcie_match_table),
 	},
 	.probe = xgene_pcie_probe_bridge,
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index f69b0d0a5ee1..a36a157dd9c3 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -152,10 +152,10 @@ static int dw_pcie_wr_own_conf(struct pcie_port *pp, int where, int size,
 
 static struct irq_chip dw_msi_irq_chip = {
 	.name = "PCI-MSI",
-	.irq_enable = unmask_msi_irq,
-	.irq_disable = mask_msi_irq,
-	.irq_mask = mask_msi_irq,
-	.irq_unmask = unmask_msi_irq,
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
 };
 
 /* MSI int handler */
@@ -238,7 +238,7 @@ static void dw_pcie_msi_set_irq(struct pcie_port *pp, int irq)
 static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
 {
 	int irq, pos0, i;
-	struct pcie_port *pp = sys_to_pcie(desc->dev->bus->sysdata);
+	struct pcie_port *pp = sys_to_pcie(msi_desc_to_pci_sysdata(desc));
 
 	pos0 = bitmap_find_free_region(pp->msi_irq_in_use, MAX_MSI_IRQS,
 				       order_base_2(no_irqs));
@@ -276,7 +276,7 @@ no_valid_irq:
 	return -ENOSPC;
 }
 
-static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int dw_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
 			struct msi_desc *desc)
 {
 	int irq, pos;
@@ -301,21 +301,21 @@ static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 	else
 		msg.data = pos;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
 
-static void dw_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void dw_msi_teardown_irq(struct msi_controller *chip, unsigned int irq)
 {
 	struct irq_data *data = irq_get_irq_data(irq);
-	struct msi_desc *msi = irq_data_get_msi(data);
-	struct pcie_port *pp = sys_to_pcie(msi->dev->bus->sysdata);
+	struct msi_desc *msi = irq_data_get_msi_desc(data);
+	struct pcie_port *pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
 
 	clear_irq_range(pp, irq, 1, data->hwirq);
 }
 
-static struct msi_chip dw_pcie_msi_chip = {
+static struct msi_controller dw_pcie_msi_chip = {
 	.setup_irq = dw_msi_setup_irq,
 	.teardown_irq = dw_msi_teardown_irq,
 };
@@ -501,6 +501,11 @@ int dw_pcie_host_init(struct pcie_port *pp)
 	val |= PORT_LOGIC_SPEED_CHANGE;
 	dw_pcie_wr_own_conf(pp, PCIE_LINK_WIDTH_SPEED_CONTROL, 4, val);
 
+#ifdef CONFIG_PCI_MSI
+	dw_pcie_msi_chip.dev = pp->dev;
+	dw_pci.msi_ctrl = &dw_pcie_msi_chip;
+#endif
+
 	dw_pci.nr_controllers = 1;
 	dw_pci.private_data = (void **)&pp;
 
@@ -750,21 +755,10 @@ static int dw_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 	return irq;
 }
 
-static void dw_pcie_add_bus(struct pci_bus *bus)
-{
-	if (IS_ENABLED(CONFIG_PCI_MSI)) {
-		struct pcie_port *pp = sys_to_pcie(bus->sysdata);
-
-		dw_pcie_msi_chip.dev = pp->dev;
-		bus->msi = &dw_pcie_msi_chip;
-	}
-}
-
 static struct hw_pci dw_pci = {
 	.setup		= dw_pcie_setup,
 	.scan		= dw_pcie_scan_bus,
 	.map_irq	= dw_pcie_map_irq,
-	.add_bus	= dw_pcie_add_bus,
 };
 
 void dw_pcie_setup_rc(struct pcie_port *pp)
diff --git a/drivers/pci/host/pcie-designware.h b/drivers/pci/host/pcie-designware.h
index c6256751daff..d0bbd276840d 100644
--- a/drivers/pci/host/pcie-designware.h
+++ b/drivers/pci/host/pcie-designware.h
@@ -73,7 +73,7 @@ struct pcie_host_ops {
 	u32 (*get_msi_addr)(struct pcie_port *pp);
 	u32 (*get_msi_data)(struct pcie_port *pp, int pos);
 	void (*scan_bus)(struct pcie_port *pp);
-	int (*msi_host_init)(struct pcie_port *pp, struct msi_chip *chip);
+	int (*msi_host_init)(struct pcie_port *pp, struct msi_controller *chip);
 };
 
 int dw_pcie_cfg_read(void __iomem *addr, int where, int size, u32 *val);
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index bfab844f772d..28b387e6fc03 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -111,14 +111,14 @@
 struct rcar_msi {
 	DECLARE_BITMAP(used, INT_PCI_MSI_NR);
 	struct irq_domain *domain;
-	struct msi_chip chip;
+	struct msi_controller chip;
 	unsigned long pages;
 	struct mutex lock;
 	int irq1;
 	int irq2;
 };
 
-static inline struct rcar_msi *to_rcar_msi(struct msi_chip *chip)
+static inline struct rcar_msi *to_rcar_msi(struct msi_controller *chip)
 {
 	return container_of(chip, struct rcar_msi, chip);
 }
@@ -380,20 +380,10 @@ static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
 	return 1;
 }
 
-static void rcar_pcie_add_bus(struct pci_bus *bus)
-{
-	if (IS_ENABLED(CONFIG_PCI_MSI)) {
-		struct rcar_pcie *pcie = sys_to_pcie(bus->sysdata);
-
-		bus->msi = &pcie->msi.chip;
-	}
-}
-
 struct hw_pci rcar_pci = {
 	.setup          = rcar_pcie_setup,
 	.map_irq        = of_irq_parse_and_map_pci,
 	.ops            = &rcar_pcie_ops,
-	.add_bus        = rcar_pcie_add_bus,
 };
 
 static void rcar_pcie_enable(struct rcar_pcie *pcie)
@@ -402,6 +392,9 @@ static void rcar_pcie_enable(struct rcar_pcie *pcie)
 
 	rcar_pci.nr_controllers = 1;
 	rcar_pci.private_data = (void **)&pcie;
+#ifdef CONFIG_PCI_MSI
+	rcar_pci.msi_ctrl = &pcie->msi.chip;
+#endif
 
 	pci_common_init_dev(&pdev->dev, &rcar_pci);
 #ifdef CONFIG_PCI_DOMAINS
@@ -622,7 +615,7 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int rcar_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int rcar_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
 			      struct msi_desc *desc)
 {
 	struct rcar_msi *msi = to_rcar_msi(chip);
@@ -647,12 +640,12 @@ static int rcar_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 	msg.address_hi = rcar_pci_read_reg(pcie, PCIEMSIAUR);
 	msg.data = hwirq;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
 
-static void rcar_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void rcar_msi_teardown_irq(struct msi_controller *chip, unsigned int irq)
 {
 	struct rcar_msi *msi = to_rcar_msi(chip);
 	struct irq_data *d = irq_get_irq_data(irq);
@@ -662,10 +655,10 @@ static void rcar_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
 
 static struct irq_chip rcar_msi_irq_chip = {
 	.name = "R-Car PCIe MSI",
-	.irq_enable = unmask_msi_irq,
-	.irq_disable = mask_msi_irq,
-	.irq_mask = mask_msi_irq,
-	.irq_unmask = unmask_msi_irq,
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
 };
 
 static int rcar_msi_map(struct irq_domain *domain, unsigned int irq,
@@ -990,7 +983,6 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 static struct platform_driver rcar_pcie_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table = rcar_pcie_of_match,
 		.suppress_bind_attrs = true,
 	},
diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c
index 925cfbad5be6..109dfadd314c 100644
--- a/drivers/pci/host/pcie-spear13xx.c
+++ b/drivers/pci/host/pcie-spear13xx.c
@@ -376,7 +376,6 @@ static struct platform_driver spear13xx_pcie_driver = {
 	.probe		= spear13xx_pcie_probe,
 	.driver = {
 		.name	= "spear-pcie",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(spear13xx_pcie_of_match),
 	},
 };
diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c
index 0b68a45a8d97..c81d0cefe42d 100644
--- a/drivers/pci/host/pcie-xilinx.c
+++ b/drivers/pci/host/pcie-xilinx.c
@@ -297,18 +297,16 @@ static struct pci_ops xilinx_pcie_ops = {
  */
 static void xilinx_pcie_destroy_msi(unsigned int irq)
 {
-	struct irq_desc *desc;
 	struct msi_desc *msi;
 	struct xilinx_pcie_port *port;
 
-	desc = irq_to_desc(irq);
-	msi = irq_desc_get_msi_desc(desc);
-	port = sys_to_pcie(msi->dev->bus->sysdata);
-
-	if (!test_bit(irq, msi_irq_in_use))
+	if (!test_bit(irq, msi_irq_in_use)) {
+		msi = irq_get_msi_desc(irq);
+		port = sys_to_pcie(msi_desc_to_pci_sys_data(msi));
 		dev_err(port->dev, "Trying to free unused MSI#%d\n", irq);
-	else
+	} else {
 		clear_bit(irq, msi_irq_in_use);
+	}
 }
 
 /**
@@ -335,7 +333,8 @@ static int xilinx_pcie_assign_msi(struct xilinx_pcie_port *port)
  * @chip: MSI Chip descriptor
  * @irq: MSI IRQ to destroy
  */
-static void xilinx_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void xilinx_msi_teardown_irq(struct msi_controller *chip,
+				    unsigned int irq)
 {
 	xilinx_pcie_destroy_msi(irq);
 }
@@ -348,7 +347,7 @@ static void xilinx_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
  *
  * Return: '0' on success and error value on failure
  */
-static int xilinx_pcie_msi_setup_irq(struct msi_chip *chip,
+static int xilinx_pcie_msi_setup_irq(struct msi_controller *chip,
 				     struct pci_dev *pdev,
 				     struct msi_desc *desc)
 {
@@ -374,13 +373,13 @@ static int xilinx_pcie_msi_setup_irq(struct msi_chip *chip,
 	msg.address_lo = msg_addr;
 	msg.data = irq;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
 
 /* MSI Chip Descriptor */
-static struct msi_chip xilinx_pcie_msi_chip = {
+static struct msi_controller xilinx_pcie_msi_chip = {
 	.setup_irq = xilinx_pcie_msi_setup_irq,
 	.teardown_irq = xilinx_msi_teardown_irq,
 };
@@ -388,10 +387,10 @@ static struct msi_chip xilinx_pcie_msi_chip = {
 /* HW Interrupt Chip Descriptor */
 static struct irq_chip xilinx_msi_irq_chip = {
 	.name = "Xilinx PCIe MSI",
-	.irq_enable = unmask_msi_irq,
-	.irq_disable = mask_msi_irq,
-	.irq_mask = mask_msi_irq,
-	.irq_unmask = unmask_msi_irq,
+	.irq_enable = pci_msi_unmask_irq,
+	.irq_disable = pci_msi_mask_irq,
+	.irq_mask = pci_msi_mask_irq,
+	.irq_unmask = pci_msi_unmask_irq,
 };
 
 /**
@@ -431,20 +430,6 @@ static void xilinx_pcie_enable_msi(struct xilinx_pcie_port *port)
 	pcie_write(port, msg_addr, XILINX_PCIE_REG_MSIBASE2);
 }
 
-/**
- * xilinx_pcie_add_bus - Add MSI chip info to PCIe bus
- * @bus: PCIe bus
- */
-static void xilinx_pcie_add_bus(struct pci_bus *bus)
-{
-	if (IS_ENABLED(CONFIG_PCI_MSI)) {
-		struct xilinx_pcie_port *port = sys_to_pcie(bus->sysdata);
-
-		xilinx_pcie_msi_chip.dev = port->dev;
-		bus->msi = &xilinx_pcie_msi_chip;
-	}
-}
-
 /* INTx Functions */
 
 /**
@@ -925,10 +910,14 @@ static int xilinx_pcie_probe(struct platform_device *pdev)
 		.private_data	= (void **)&port,
 		.setup		= xilinx_pcie_setup,
 		.map_irq	= of_irq_parse_and_map_pci,
-		.add_bus	= xilinx_pcie_add_bus,
 		.scan		= xilinx_pcie_scan_bus,
 		.ops		= &xilinx_pcie_ops,
 	};
+
+#ifdef CONFIG_PCI_MSI
+	xilinx_pcie_msi_chip.dev = port->dev;
+	hw.msi_ctrl = &xilinx_pcie_msi_chip;
+#endif
 	pci_common_init_dev(dev, &hw);
 
 	return 0;
@@ -957,7 +946,6 @@ static struct of_device_id xilinx_pcie_of_match[] = {
 static struct platform_driver xilinx_pcie_driver = {
 	.driver = {
 		.name = "xilinx-pcie",
-		.owner = THIS_MODULE,
 		.of_match_table = xilinx_pcie_of_match,
 		.suppress_bind_attrs = true,
 	},
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 3efaf4c38528..1d5b646e3db8 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -740,7 +740,7 @@ static void ibm_unconfigure_device(struct pci_func *func)
  */
 static u8 bus_structure_fixup(u8 busno)
 {
-	struct pci_bus *bus;
+	struct pci_bus *bus, *b;
 	struct pci_dev *dev;
 	u16 l;
 
@@ -767,7 +767,11 @@ static u8 bus_structure_fixup(u8 busno)
 					(l != 0x0000) && (l != 0xffff)) {
 			debug("%s - Inside bus_structure_fixup()\n",
 							__func__);
-			pci_scan_bus(busno, ibmphp_pci_bus->ops, NULL);
+			b = pci_scan_bus(busno, ibmphp_pci_bus->ops, NULL);
+			if (!b)
+				continue;
+
+			pci_bus_add_devices(b);
 			break;
 		}
 	}
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 084587d7cd13..1c7f6a8e69d1 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -19,19 +19,81 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/irqdomain.h>
 
 #include "pci.h"
 
 static int pci_msi_enable = 1;
+int pci_msi_ignore_mask;
 
 #define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
 
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+static struct irq_domain *pci_msi_default_domain;
+static DEFINE_MUTEX(pci_msi_domain_lock);
+
+struct irq_domain * __weak arch_get_pci_msi_domain(struct pci_dev *dev)
+{
+	return pci_msi_default_domain;
+}
+
+static struct irq_domain *pci_msi_get_domain(struct pci_dev *dev)
+{
+	struct irq_domain *domain;
+
+	domain = dev_get_msi_domain(&dev->dev);
+	if (domain)
+		return domain;
+
+	return arch_get_pci_msi_domain(dev);
+}
+
+static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct irq_domain *domain;
+
+	domain = pci_msi_get_domain(dev);
+	if (domain)
+		return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
+
+	return arch_setup_msi_irqs(dev, nvec, type);
+}
+
+static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct irq_domain *domain;
+
+	domain = pci_msi_get_domain(dev);
+	if (domain)
+		pci_msi_domain_free_irqs(domain, dev);
+	else
+		arch_teardown_msi_irqs(dev);
+}
+#else
+#define pci_msi_setup_msi_irqs		arch_setup_msi_irqs
+#define pci_msi_teardown_msi_irqs	arch_teardown_msi_irqs
+#endif
 
 /* Arch hooks */
 
+struct msi_controller * __weak pcibios_msi_controller(struct pci_dev *dev)
+{
+	return NULL;
+}
+
+static struct msi_controller *pci_msi_controller(struct pci_dev *dev)
+{
+	struct msi_controller *msi_ctrl = dev->bus->msi;
+
+	if (msi_ctrl)
+		return msi_ctrl;
+
+	return pcibios_msi_controller(dev);
+}
+
 int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	struct msi_chip *chip = dev->bus->msi;
+	struct msi_controller *chip = pci_msi_controller(dev);
 	int err;
 
 	if (!chip || !chip->setup_irq)
@@ -48,7 +110,7 @@ int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 
 void __weak arch_teardown_msi_irq(unsigned int irq)
 {
-	struct msi_chip *chip = irq_get_chip_data(irq);
+	struct msi_controller *chip = irq_get_chip_data(irq);
 
 	if (!chip || !chip->teardown_irq)
 		return;
@@ -85,19 +147,13 @@ int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
  */
 void default_teardown_msi_irqs(struct pci_dev *dev)
 {
+	int i;
 	struct msi_desc *entry;
 
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		int i, nvec;
-		if (entry->irq == 0)
-			continue;
-		if (entry->nvec_used)
-			nvec = entry->nvec_used;
-		else
-			nvec = 1 << entry->msi_attrib.multiple;
-		for (i = 0; i < nvec; i++)
-			arch_teardown_msi_irq(entry->irq + i);
-	}
+	list_for_each_entry(entry, &dev->msi_list, list)
+		if (entry->irq)
+			for (i = 0; i < entry->nvec_used; i++)
+				arch_teardown_msi_irq(entry->irq + i);
 }
 
 void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
@@ -120,7 +176,7 @@ static void default_restore_msi_irq(struct pci_dev *dev, int irq)
 	}
 
 	if (entry)
-		__write_msi_msg(entry, &entry->msg);
+		__pci_write_msi_msg(entry, &entry->msg);
 }
 
 void __weak arch_restore_msi_irqs(struct pci_dev *dev)
@@ -128,27 +184,6 @@ void __weak arch_restore_msi_irqs(struct pci_dev *dev)
 	return default_restore_msi_irqs(dev);
 }
 
-static void msi_set_enable(struct pci_dev *dev, int enable)
-{
-	u16 control;
-
-	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
-	control &= ~PCI_MSI_FLAGS_ENABLE;
-	if (enable)
-		control |= PCI_MSI_FLAGS_ENABLE;
-	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
-}
-
-static void msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
-{
-	u16 ctrl;
-
-	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
-	ctrl &= ~clear;
-	ctrl |= set;
-	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
-}
-
 static inline __attribute_const__ u32 msi_mask(unsigned x)
 {
 	/* Don't shift by >= width of type */
@@ -163,28 +198,24 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
  * reliably as devices without an INTx disable bit will then generate a
  * level IRQ which will never be cleared.
  */
-u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 
-	if (!desc->msi_attrib.maskbit)
+	if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
 		return 0;
 
 	mask_bits &= ~mask;
 	mask_bits |= flag;
-	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
+	pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos,
+			       mask_bits);
 
 	return mask_bits;
 }
 
-__weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
-	return default_msi_mask_irq(desc, mask, flag);
-}
-
 static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
-	desc->masked = arch_msi_mask_irq(desc, mask, flag);
+	desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag);
 }
 
 /*
@@ -194,11 +225,15 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
  * file.  This saves a few milliseconds when initialising devices with lots
  * of MSI-X interrupts.
  */
-u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
+u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
 						PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+	if (pci_msi_ignore_mask)
+		return 0;
+
 	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
 	if (flag)
 		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
@@ -207,19 +242,14 @@ u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
 	return mask_bits;
 }
 
-__weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
-{
-	return default_msix_mask_irq(desc, flag);
-}
-
 static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 {
-	desc->masked = arch_msix_mask_irq(desc, flag);
+	desc->masked = __pci_msix_desc_mask_irq(desc, flag);
 }
 
 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 {
-	struct msi_desc *desc = irq_data_get_msi(data);
+	struct msi_desc *desc = irq_data_get_msi_desc(data);
 
 	if (desc->msi_attrib.is_msix) {
 		msix_mask_irq(desc, flag);
@@ -230,12 +260,20 @@ static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 	}
 }
 
-void mask_msi_irq(struct irq_data *data)
+/**
+ * pci_msi_mask_irq - Generic irq chip callback to mask PCI/MSI interrupts
+ * @data:	pointer to irqdata associated to that interrupt
+ */
+void pci_msi_mask_irq(struct irq_data *data)
 {
 	msi_set_mask_bit(data, 1);
 }
 
-void unmask_msi_irq(struct irq_data *data)
+/**
+ * pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts
+ * @data:	pointer to irqdata associated to that interrupt
+ */
+void pci_msi_unmask_irq(struct irq_data *data)
 {
 	msi_set_mask_bit(data, 0);
 }
@@ -244,14 +282,15 @@ void default_restore_msi_irqs(struct pci_dev *dev)
 {
 	struct msi_desc *entry;
 
-	list_for_each_entry(entry, &dev->msi_list, list) {
+	list_for_each_entry(entry, &dev->msi_list, list)
 		default_restore_msi_irq(dev, entry->irq);
-	}
 }
 
-void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	BUG_ON(entry->dev->current_state != PCI_D0);
+	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
+
+	BUG_ON(dev->current_state != PCI_D0);
 
 	if (entry->msi_attrib.is_msix) {
 		void __iomem *base = entry->mask_base +
@@ -261,7 +300,6 @@ void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
 		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
 	} else {
-		struct pci_dev *dev = entry->dev;
 		int pos = dev->msi_cap;
 		u16 data;
 
@@ -279,34 +317,11 @@ void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 	}
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
-	struct msi_desc *entry = irq_get_msi_desc(irq);
-
-	__read_msi_msg(entry, msg);
-}
-
-void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
-	/* Assert that the cache is valid, assuming that
-	 * valid messages are not all-zeroes. */
-	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
-		 entry->msg.data));
+	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
 
-	*msg = entry->msg;
-}
-
-void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
-	struct msi_desc *entry = irq_get_msi_desc(irq);
-
-	__get_cached_msi_msg(entry, msg);
-}
-EXPORT_SYMBOL_GPL(get_cached_msi_msg);
-
-void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
-{
-	if (entry->dev->current_state != PCI_D0) {
+	if (dev->current_state != PCI_D0) {
 		/* Don't touch the hardware now */
 	} else if (entry->msi_attrib.is_msix) {
 		void __iomem *base;
@@ -317,7 +332,6 @@ void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
 		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
 	} else {
-		struct pci_dev *dev = entry->dev;
 		int pos = dev->msi_cap;
 		u16 msgctl;
 
@@ -341,34 +355,27 @@ void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 	entry->msg = *msg;
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
 	struct msi_desc *entry = irq_get_msi_desc(irq);
 
-	__write_msi_msg(entry, msg);
+	__pci_write_msi_msg(entry, msg);
 }
-EXPORT_SYMBOL_GPL(write_msi_msg);
+EXPORT_SYMBOL_GPL(pci_write_msi_msg);
 
 static void free_msi_irqs(struct pci_dev *dev)
 {
 	struct msi_desc *entry, *tmp;
 	struct attribute **msi_attrs;
 	struct device_attribute *dev_attr;
-	int count = 0;
+	int i, count = 0;
 
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		int i, nvec;
-		if (!entry->irq)
-			continue;
-		if (entry->nvec_used)
-			nvec = entry->nvec_used;
-		else
-			nvec = 1 << entry->msi_attrib.multiple;
-		for (i = 0; i < nvec; i++)
-			BUG_ON(irq_has_action(entry->irq + i));
-	}
+	list_for_each_entry(entry, &dev->msi_list, list)
+		if (entry->irq)
+			for (i = 0; i < entry->nvec_used; i++)
+				BUG_ON(irq_has_action(entry->irq + i));
 
-	arch_teardown_msi_irqs(dev);
+	pci_msi_teardown_msi_irqs(dev);
 
 	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
 		if (entry->msi_attrib.is_msix) {
@@ -426,7 +433,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
 	entry = irq_get_msi_desc(dev->irq);
 
 	pci_intx_for_msi(dev, 0);
-	msi_set_enable(dev, 0);
+	pci_msi_set_enable(dev, 0);
 	arch_restore_msi_irqs(dev);
 
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
@@ -447,15 +454,14 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
 
 	/* route the table */
 	pci_intx_for_msi(dev, 0);
-	msix_clear_and_set_ctrl(dev, 0,
+	pci_msix_clear_and_set_ctrl(dev, 0,
 				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
 
 	arch_restore_msi_irqs(dev);
-	list_for_each_entry(entry, &dev->msi_list, list) {
+	list_for_each_entry(entry, &dev->msi_list, list)
 		msix_mask_irq(entry, entry->masked);
-	}
 
-	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
 }
 
 void pci_restore_msi_state(struct pci_dev *dev)
@@ -497,9 +503,8 @@ static int populate_msi_sysfs(struct pci_dev *pdev)
 	int count = 0;
 
 	/* Determine how many msi entries we have */
-	list_for_each_entry(entry, &pdev->msi_list, list) {
+	list_for_each_entry(entry, &pdev->msi_list, list)
 		++num_msi;
-	}
 	if (!num_msi)
 		return 0;
 
@@ -559,7 +564,7 @@ error_attrs:
 	return ret;
 }
 
-static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
+static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
 {
 	u16 control;
 	struct msi_desc *entry;
@@ -577,6 +582,8 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
 	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
+	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
+	entry->nvec_used		= nvec;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -621,9 +628,9 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 	int ret;
 	unsigned mask;
 
-	msi_set_enable(dev, 0);	/* Disable MSI during set up */
+	pci_msi_set_enable(dev, 0);	/* Disable MSI during set up */
 
-	entry = msi_setup_entry(dev);
+	entry = msi_setup_entry(dev, nvec);
 	if (!entry)
 		return -ENOMEM;
 
@@ -634,7 +641,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 	list_add_tail(&entry->list, &dev->msi_list);
 
 	/* Configure MSI capability structure */
-	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
+	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
 	if (ret) {
 		msi_mask_irq(entry, mask, ~mask);
 		free_msi_irqs(dev);
@@ -657,7 +664,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
-	msi_set_enable(dev, 1);
+	pci_msi_set_enable(dev, 1);
 	dev->msi_enabled = 1;
 
 	dev->irq = entry->irq;
@@ -701,6 +708,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 		entry->msi_attrib.entry_nr	= entries[i].entry;
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
+		entry->nvec_used		= 1;
 
 		list_add_tail(&entry->list, &dev->msi_list);
 	}
@@ -719,7 +727,6 @@ static void msix_program_entries(struct pci_dev *dev,
 						PCI_MSIX_ENTRY_VECTOR_CTRL;
 
 		entries[i].vector = entry->irq;
-		irq_set_msi_desc(entry->irq, entry);
 		entry->masked = readl(entry->mask_base + offset);
 		msix_mask_irq(entry, 1);
 		i++;
@@ -744,7 +751,7 @@ static int msix_capability_init(struct pci_dev *dev,
 	void __iomem *base;
 
 	/* Ensure MSI-X is disabled while it is set up */
-	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
 
 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
 	/* Request & Map MSI-X table region */
@@ -756,7 +763,7 @@ static int msix_capability_init(struct pci_dev *dev,
 	if (ret)
 		return ret;
 
-	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
+	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
 	if (ret)
 		goto out_avail;
 
@@ -770,7 +777,7 @@ static int msix_capability_init(struct pci_dev *dev,
 	 * MSI-X registers.  We need to mask all the vectors to prevent
 	 * interrupts coming in before they're fully set up.
 	 */
-	msix_clear_and_set_ctrl(dev, 0,
+	pci_msix_clear_and_set_ctrl(dev, 0,
 				PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE);
 
 	msix_program_entries(dev, entries);
@@ -783,7 +790,7 @@ static int msix_capability_init(struct pci_dev *dev,
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
 
-	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
 
 	return 0;
 
@@ -888,14 +895,14 @@ void pci_msi_shutdown(struct pci_dev *dev)
 	BUG_ON(list_empty(&dev->msi_list));
 	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
 
-	msi_set_enable(dev, 0);
+	pci_msi_set_enable(dev, 0);
 	pci_intx_for_msi(dev, 1);
 	dev->msi_enabled = 0;
 
 	/* Return the device with MSI unmasked as initial states */
 	mask = msi_mask(desc->msi_attrib.multi_cap);
 	/* Keep cached state to be restored */
-	arch_msi_mask_irq(desc, mask, ~mask);
+	__pci_msi_desc_mask_irq(desc, mask, ~mask);
 
 	/* Restore dev->irq to its default pin-assertion irq */
 	dev->irq = desc->msi_attrib.default_irq;
@@ -993,10 +1000,10 @@ void pci_msix_shutdown(struct pci_dev *dev)
 	/* Return the device with MSI-X masked as initial states */
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		/* Keep cached states to be restored */
-		arch_msix_mask_irq(entry, 1);
+		__pci_msix_desc_mask_irq(entry, 1);
 	}
 
-	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
+	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
 	pci_intx_for_msi(dev, 1);
 	dev->msix_enabled = 0;
 }
@@ -1031,18 +1038,6 @@ EXPORT_SYMBOL(pci_msi_enabled);
 void pci_msi_init_pci_dev(struct pci_dev *dev)
 {
 	INIT_LIST_HEAD(&dev->msi_list);
-
-	/* Disable the msi hardware to avoid screaming interrupts
-	 * during boot.  This is the power on reset default so
-	 * usually this should be a noop.
-	 */
-	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
-	if (dev->msi_cap)
-		msi_set_enable(dev, 0);
-
-	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-	if (dev->msix_cap)
-		msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
 }
 
 /**
@@ -1138,3 +1133,212 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 	return nvec;
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
+
+void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
+{
+	struct pci_dev *dev = msi_desc_to_pci_dev(desc);
+
+	return dev->bus->sysdata;
+}
+EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdata);
+
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+/**
+ * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
+ * @irq_data:	Pointer to interrupt data of the MSI interrupt
+ * @msg:	Pointer to the message
+ */
+void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
+{
+	struct msi_desc *desc = irq_data->msi_desc;
+
+	/*
+	 * For MSI-X desc->irq is always equal to irq_data->irq. For
+	 * MSI only the first interrupt of MULTI MSI passes the test.
+	 */
+	if (desc->irq == irq_data->irq)
+		__pci_write_msi_msg(desc, msg);
+}
+
+/**
+ * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
+ * @dev:	Pointer to the PCI device
+ * @desc:	Pointer to the msi descriptor
+ *
+ * The ID number is only used within the irqdomain.
+ */
+irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
+					  struct msi_desc *desc)
+{
+	return (irq_hw_number_t)desc->msi_attrib.entry_nr |
+		PCI_DEVID(dev->bus->number, dev->devfn) << 11 |
+		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
+}
+
+static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
+{
+	return !desc->msi_attrib.is_msix && desc->nvec_used > 1;
+}
+
+/**
+ * pci_msi_domain_check_cap - Verify that @domain supports the capabilities for @dev
+ * @domain:	The interrupt domain to check
+ * @info:	The domain info for verification
+ * @dev:	The device to check
+ *
+ * Returns:
+ *  0 if the functionality is supported
+ *  1 if Multi MSI is requested, but the domain does not support it
+ *  -ENOTSUPP otherwise
+ */
+int pci_msi_domain_check_cap(struct irq_domain *domain,
+			     struct msi_domain_info *info, struct device *dev)
+{
+	struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
+
+	/* Special handling to support pci_enable_msi_range() */
+	if (pci_msi_desc_is_multi_msi(desc) &&
+	    !(info->flags & MSI_FLAG_MULTI_PCI_MSI))
+		return 1;
+	else if (desc->msi_attrib.is_msix && !(info->flags & MSI_FLAG_PCI_MSIX))
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static int pci_msi_domain_handle_error(struct irq_domain *domain,
+				       struct msi_desc *desc, int error)
+{
+	/* Special handling to support pci_enable_msi_range() */
+	if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC)
+		return 1;
+
+	return error;
+}
+
+#ifdef GENERIC_MSI_DOMAIN_OPS
+static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
+				    struct msi_desc *desc)
+{
+	arg->desc = desc;
+	arg->hwirq = pci_msi_domain_calc_hwirq(msi_desc_to_pci_dev(desc),
+					       desc);
+}
+#else
+#define pci_msi_domain_set_desc		NULL
+#endif
+
+static struct msi_domain_ops pci_msi_domain_ops_default = {
+	.set_desc	= pci_msi_domain_set_desc,
+	.msi_check	= pci_msi_domain_check_cap,
+	.handle_error	= pci_msi_domain_handle_error,
+};
+
+static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info)
+{
+	struct msi_domain_ops *ops = info->ops;
+
+	if (ops == NULL) {
+		info->ops = &pci_msi_domain_ops_default;
+	} else {
+		if (ops->set_desc == NULL)
+			ops->set_desc = pci_msi_domain_set_desc;
+		if (ops->msi_check == NULL)
+			ops->msi_check = pci_msi_domain_check_cap;
+		if (ops->handle_error == NULL)
+			ops->handle_error = pci_msi_domain_handle_error;
+	}
+}
+
+static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
+{
+	struct irq_chip *chip = info->chip;
+
+	BUG_ON(!chip);
+	if (!chip->irq_write_msi_msg)
+		chip->irq_write_msi_msg = pci_msi_domain_write_msg;
+}
+
+/**
+ * pci_msi_create_irq_domain - Creat a MSI interrupt domain
+ * @node:	Optional device-tree node of the interrupt controller
+ * @info:	MSI domain info
+ * @parent:	Parent irq domain
+ *
+ * Updates the domain and chip ops and creates a MSI interrupt domain.
+ *
+ * Returns:
+ * A domain pointer or NULL in case of failure.
+ */
+struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
+					     struct msi_domain_info *info,
+					     struct irq_domain *parent)
+{
+	struct irq_domain *domain;
+
+	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
+		pci_msi_domain_update_dom_ops(info);
+	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+		pci_msi_domain_update_chip_ops(info);
+
+	domain = msi_create_irq_domain(node, info, parent);
+	if (!domain)
+		return NULL;
+
+	domain->bus_token = DOMAIN_BUS_PCI_MSI;
+	return domain;
+}
+
+/**
+ * pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain
+ * @domain:	The interrupt domain to allocate from
+ * @dev:	The device for which to allocate
+ * @nvec:	The number of interrupts to allocate
+ * @type:	Unused to allow simpler migration from the arch_XXX interfaces
+ *
+ * Returns:
+ * A virtual interrupt number or an error code in case of failure
+ */
+int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
+			      int nvec, int type)
+{
+	return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
+}
+
+/**
+ * pci_msi_domain_free_irqs - Free interrupts for @dev in @domain
+ * @domain:	The interrupt domain
+ * @dev:	The device for which to free interrupts
+ */
+void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev)
+{
+	msi_domain_free_irqs(domain, &dev->dev);
+}
+
+/**
+ * pci_msi_create_default_irq_domain - Create a default MSI interrupt domain
+ * @node:	Optional device-tree node of the interrupt controller
+ * @info:	MSI domain info
+ * @parent:	Parent irq domain
+ *
+ * Returns: A domain pointer or NULL in case of failure. If successful
+ * the default PCI/MSI irqdomain pointer is updated.
+ */
+struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
+		struct msi_domain_info *info, struct irq_domain *parent)
+{
+	struct irq_domain *domain;
+
+	mutex_lock(&pci_msi_domain_lock);
+	if (pci_msi_default_domain) {
+		pr_err("PCI: default irq domain for PCI MSI has already been created.\n");
+		domain = NULL;
+	} else {
+		domain = pci_msi_create_irq_domain(node, info, parent);
+		pci_msi_default_domain = domain;
+	}
+	mutex_unlock(&pci_msi_domain_lock);
+
+	return domain;
+}
+#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index f0929934bb7a..2e99a500cb83 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/of.h>
@@ -59,3 +60,32 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
 		return of_node_get(bus->bridge->parent->of_node);
 	return NULL;
 }
+
+struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
+{
+#ifdef CONFIG_IRQ_DOMAIN
+	struct device_node *np;
+	struct irq_domain *d;
+
+	if (!bus->dev.of_node)
+		return NULL;
+
+	/* Start looking for a phandle to an MSI controller. */
+	np = of_parse_phandle(bus->dev.of_node, "msi-parent", 0);
+
+	/*
+	 * If we don't have an msi-parent property, look for a domain
+	 * directly attached to the host bridge.
+	 */
+	if (!np)
+		np = bus->dev.of_node;
+
+	d = irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI);
+	if (d)
+		return d;
+
+	return irq_find_host(np);
+#else
+	return NULL;
+#endif
+}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce0aa47222f6..7371cc01d4cb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -124,15 +124,16 @@ EXPORT_SYMBOL_GPL(pci_bus_max_busnr);
 #ifdef CONFIG_HAS_IOMEM
 void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
+	struct resource *res = &pdev->resource[bar];
+
 	/*
 	 * Make sure the BAR is actually a memory resource, not an IO resource
 	 */
-	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
-		WARN_ON(1);
+	if (res->flags & IORESOURCE_UNSET || !(res->flags & IORESOURCE_MEM)) {
+		dev_warn(&pdev->dev, "can't ioremap BAR %d: %pR\n", bar, res);
 		return NULL;
 	}
-	return ioremap_nocache(pci_resource_start(pdev, bar),
-				     pci_resource_len(pdev, bar));
+	return ioremap_nocache(res->start, resource_size(res));
 }
 EXPORT_SYMBOL_GPL(pci_ioremap_bar);
 #endif
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b5defca86795..df2169edd23b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -140,6 +140,27 @@ static inline void pci_no_msi(void) { }
 static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { }
 #endif
 
+static inline void pci_msi_set_enable(struct pci_dev *dev, int enable)
+{
+	u16 control;
+
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+	control &= ~PCI_MSI_FLAGS_ENABLE;
+	if (enable)
+		control |= PCI_MSI_FLAGS_ENABLE;
+	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
+}
+
+static inline void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
+{
+	u16 ctrl;
+
+	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+	ctrl &= ~clear;
+	ctrl |= set;
+	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+}
+
 void pci_realloc_get_opt(char *);
 
 static inline int pci_no_d1d2(struct pci_dev *dev)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7de026897f1d..d5c10c5cadb4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/of_pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -676,6 +677,35 @@ static void pci_set_bus_speed(struct pci_bus *bus)
 	}
 }
 
+static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
+{
+	struct irq_domain *d;
+
+	/*
+	 * Any firmware interface that can resolve the msi_domain
+	 * should be called from here.
+	 */
+	d = pci_host_bridge_of_msi_domain(bus);
+
+	return d;
+}
+
+static void pci_set_bus_msi_domain(struct pci_bus *bus)
+{
+	struct irq_domain *d;
+
+	/*
+	 * Either bus is the root, and we must obtain it from the
+	 * firmware, or we inherit it from the bridge device.
+	 */
+	if (pci_is_root_bus(bus))
+		d = pci_host_bridge_msi_domain(bus);
+	else
+		d = dev_get_msi_domain(&bus->self->dev);
+
+	dev_set_msi_domain(&bus->dev, d);
+}
+
 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 					   struct pci_dev *bridge, int busnr)
 {
@@ -729,6 +759,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 	bridge->subordinate = child;
 
 add_dev:
+	pci_set_bus_msi_domain(child);
 	ret = device_register(&child->dev);
 	WARN_ON(ret < 0);
 
@@ -1118,6 +1149,22 @@ int pci_cfg_space_size(struct pci_dev *dev)
 
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 
+static void pci_msi_setup_pci_dev(struct pci_dev *dev)
+{
+	/*
+	 * Disable the MSI hardware to avoid screaming interrupts
+	 * during boot.  This is the power on reset default so
+	 * usually this should be a noop.
+	 */
+	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (dev->msi_cap)
+		pci_msi_set_enable(dev, 0);
+
+	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	if (dev->msix_cap)
+		pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
+}
+
 /**
  * pci_setup_device - fill in class and map information of a device
  * @dev: the device structure to fill
@@ -1174,6 +1221,8 @@ int pci_setup_device(struct pci_dev *dev)
 	/* "Unknown power state" */
 	dev->current_state = PCI_UNKNOWN;
 
+	pci_msi_setup_pci_dev(dev);
+
 	/* Early fixups, before probing the BARs */
 	pci_fixup_device(pci_fixup_early, dev);
 	/* device class may be changed after fixup */
@@ -1552,6 +1601,17 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	pci_enable_acs(dev);
 }
 
+static void pci_set_msi_domain(struct pci_dev *dev)
+{
+	/*
+	 * If no domain has been set through the pcibios_add_device
+	 * callback, inherit the default from the bus device.
+	 */
+	if (!dev_get_msi_domain(&dev->dev))
+		dev_set_msi_domain(&dev->dev,
+				   dev_get_msi_domain(&dev->bus->dev));
+}
+
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 {
 	int ret;
@@ -1565,6 +1625,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
+	of_pci_dma_configure(dev);
 
 	pci_set_dma_max_seg_size(dev, 65536);
 	pci_set_dma_seg_boundary(dev, 0xffffffff);
@@ -1592,6 +1653,9 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	ret = pcibios_add_device(dev);
 	WARN_ON(ret < 0);
 
+	/* Setup MSI irq domain */
+	pci_set_msi_domain(dev);
+
 	/* Notifier could use PCI capabilities */
 	dev->match_driver = false;
 	ret = device_add(&dev->dev);
@@ -1982,6 +2046,7 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 	b->bridge = get_device(&bridge->dev);
 	device_enable_async_suspend(b->bridge);
 	pci_set_bus_of_node(b);
+	pci_set_bus_msi_domain(b);
 
 	if (!parent)
 		set_dev_node(b->bridge, pcibus_to_node(b));
@@ -2132,7 +2197,6 @@ struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 	if (!found)
 		pci_bus_update_busn_res_end(b, max);
 
-	pci_bus_add_devices(b);
 	return b;
 }
 EXPORT_SYMBOL(pci_scan_root_bus);
@@ -2168,7 +2232,6 @@ struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
 	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
 	if (b) {
 		pci_scan_child_bus(b);
-		pci_bus_add_devices(b);
 	} else {
 		pci_free_resource_list(&resources);
 	}
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index b7c3a5ea1fca..232f9254c11a 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -120,6 +120,7 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 	if (!root) {
 		dev_info(&dev->dev, "can't claim BAR %d %pR: no compatible bridge window\n",
 			 resource, res);
+		res->flags |= IORESOURCE_UNSET;
 		return -EINVAL;
 	}
 
@@ -127,6 +128,7 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
 	if (conflict) {
 		dev_info(&dev->dev, "can't claim BAR %d %pR: address conflict with %s %pR\n",
 			 resource, res, conflict->name, conflict);
+		res->flags |= IORESOURCE_UNSET;
 		return -EBUSY;
 	}
 
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index f94467c05225..7f21f75baa59 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -330,7 +330,8 @@ static int acerhdf_bind(struct thermal_zone_device *thermal,
 		return 0;
 
 	if (thermal_zone_bind_cooling_device(thermal, 0, cdev,
-			THERMAL_NO_LIMIT, THERMAL_NO_LIMIT)) {
+			THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+			THERMAL_WEIGHT_DEFAULT)) {
 		pr_err("error binding cooling dev\n");
 		return -EINVAL;
 	}
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index ebf0d6710b5a..943c1cb9566c 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -246,13 +246,16 @@ static void quirk_system_pci_resources(struct pnp_dev *dev)
 	 */
 	for_each_pci_dev(pdev) {
 		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-			unsigned long type;
+			unsigned long flags, type;
 
-			type = pci_resource_flags(pdev, i) &
-					(IORESOURCE_IO | IORESOURCE_MEM);
+			flags = pci_resource_flags(pdev, i);
+			type = flags & (IORESOURCE_IO | IORESOURCE_MEM);
 			if (!type || pci_resource_len(pdev, i) == 0)
 				continue;
 
+			if (flags & IORESOURCE_UNSET)
+				continue;
+
 			pci_start = pci_resource_start(pdev, i);
 			pci_end = pci_resource_end(pdev, i);
 			for (j = 0;
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index c82fe65c4128..f263e8c8b641 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -738,11 +738,11 @@ static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *act)
 		return ZFCP_ERP_FAILED;
 
 	if (mempool_resize(act->adapter->pool.sr_data,
-			   act->adapter->stat_read_buf_num, GFP_KERNEL))
+			   act->adapter->stat_read_buf_num))
 		return ZFCP_ERP_FAILED;
 
 	if (mempool_resize(act->adapter->pool.status_read_req,
-			   act->adapter->stat_read_buf_num, GFP_KERNEL))
+			   act->adapter->stat_read_buf_num))
 		return ZFCP_ERP_FAILED;
 
 	atomic_set(&act->adapter->stat_miss, act->adapter->stat_read_buf_num);
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 3ed37dc28b3c..543b7d54d7aa 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -48,7 +48,6 @@ static unsigned int be_iopoll_budget = 10;
 static unsigned int be_max_phys_size = 64;
 static unsigned int enable_msix = 1;
 
-MODULE_DEVICE_TABLE(pci, beiscsi_pci_id_table);
 MODULE_DESCRIPTION(DRV_DESC " " BUILD_STR);
 MODULE_VERSION(BUILD_STR);
 MODULE_AUTHOR("Emulex Corporation");
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f554d25b4399..8b7d47f2f3aa 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -42,6 +42,17 @@ config THERMAL_OF
 	  Say 'Y' here if you need to build thermal infrastructure
 	  based on device tree.
 
+config THERMAL_WRITABLE_TRIPS
+	bool "Enable writable trip points"
+	help
+	  This option allows the system integrator to choose whether
+	  trip temperatures can be changed from userspace. The
+	  writable trips need to be specified when setting up the
+	  thermal zone but the choice here takes precedence.
+
+	  Say 'Y' here if you would like to allow userspace tools to
+	  change trip temperatures.
+
 choice
 	prompt "Default Thermal governor"
 	default THERMAL_DEFAULT_GOV_STEP_WISE
@@ -71,6 +82,14 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
 	  Select this if you want to let the user space manage the
 	  platform thermals.
 
+config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
+	bool "power_allocator"
+	select THERMAL_GOV_POWER_ALLOCATOR
+	help
+	  Select this if you want to control temperature based on
+	  system and device power allocation. This governor can only
+	  operate on cooling devices that implement the power API.
+
 endchoice
 
 config THERMAL_GOV_FAIR_SHARE
@@ -99,6 +118,13 @@ config THERMAL_GOV_USER_SPACE
 	help
 	  Enable this to let the user space manage the platform thermals.
 
+config THERMAL_GOV_POWER_ALLOCATOR
+	bool "Power allocator thermal governor"
+	select THERMAL_POWER_ACTOR
+	help
+	  Enable this to manage platform thermals by dynamically
+	  allocating and limiting power to devices.
+
 config CPU_THERMAL
 	bool "generic cpu cooling support"
 	depends on CPU_FREQ
@@ -112,6 +138,18 @@ config CPU_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config CLOCK_THERMAL
+	bool "Generic clock cooling support"
+	depends on COMMON_CLK
+	depends on PM_OPP
+	help
+	  This entry implements the generic clock cooling mechanism through
+	  frequency clipping. Typically used to cool off co-processors. The
+	  device that is configured to use this cooling mechanism will be
+	  controlled to reduce clock frequency whenever temperature is high.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
@@ -185,6 +223,16 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
+config TEGRA_SOCTHERM
+	tristate "Tegra SOCTHERM thermal management"
+	depends on ARCH_TEGRA
+	help
+	  Enable this option for integrated thermal management support on NVIDIA
+	  Tegra124 systems-on-chip. The driver supports four thermal zones
+	  (CPU, GPU, MEM, PLLX). Cooling devices can be bound to the thermal
+	  zones to manage temperatures. This option is also required for the
+	  emergency thermal reset (thermtrip) feature to function.
+
 config DB8500_CPUFREQ_COOLING
 	tristate "DB8500 cpufreq cooling"
 	depends on ARCH_U8500
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 39c4fe87da2f..dae37c089bb4 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -14,10 +14,14 @@ thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE)	+= fair_share.o
 thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG)	+= gov_bang_bang.o
 thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE)	+= step_wise.o
 thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o
+thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR)	+= power_allocator.o
 
 # cpufreq cooling
 thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 
+# clock cooling
+thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
@@ -34,3 +38,4 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL)	+= intel_soc_dts_thermal.o
 obj-$(CONFIG_TI_SOC_THERMAL)	+= ti-soc-thermal/
 obj-$(CONFIG_INT340X_THERMAL)  += int340x_thermal/
 obj-$(CONFIG_ST_THERMAL)	+= st/
+obj-$(CONFIG_TEGRA_SOCTHERM)	+= tegra_soctherm.o
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
new file mode 100644
index 000000000000..1b4ff0f4c716
--- /dev/null
+++ b/drivers/thermal/clock_cooling.c
@@ -0,0 +1,485 @@
+/*
+ *  drivers/thermal/clock_cooling.c
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/clock_cooling.h>
+
+/**
+ * struct clock_cooling_device - data for cooling device with clock
+ * @id: unique integer value corresponding to each clock_cooling_device
+ *	registered.
+ * @dev: struct device pointer to the device being used to cool off using
+ *       clock frequencies.
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ *	registered cooling device.
+ * @clk_rate_change_nb: reference to notifier block used to receive clock
+ *                      rate changes.
+ * @freq_table: frequency table used to keep track of available frequencies.
+ * @clock_state: integer value representing the current state of clock
+ *	cooling	devices.
+ * @clock_val: integer value representing the absolute value of the clipped
+ *	frequency.
+ * @clk: struct clk reference used to enforce clock limits.
+ * @lock: mutex lock to protect this struct.
+ *
+ * This structure is required for keeping information of each
+ * clock_cooling_device registered. In order to prevent corruption of this a
+ * mutex @lock is used.
+ */
+struct clock_cooling_device {
+	int id;
+	struct device *dev;
+	struct thermal_cooling_device *cdev;
+	struct notifier_block clk_rate_change_nb;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned long clock_state;
+	unsigned long clock_val;
+	struct clk *clk;
+	struct mutex lock; /* lock to protect the content of this struct */
+};
+#define to_clock_cooling_device(x) \
+		container_of(x, struct clock_cooling_device, clk_rate_change_nb)
+static DEFINE_IDR(clock_idr);
+static DEFINE_MUTEX(cooling_clock_lock);
+
+/**
+ * clock_cooling_get_idr - function to get an unique id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int clock_cooling_get_idr(int *id)
+{
+	int ret;
+
+	mutex_lock(&cooling_clock_lock);
+	ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL);
+	mutex_unlock(&cooling_clock_lock);
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
+
+	return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(int id)
+{
+	mutex_lock(&cooling_clock_lock);
+	idr_remove(&clock_idr, id);
+	mutex_unlock(&cooling_clock_lock);
+}
+
+/* Below code defines functions to be used for clock as cooling device */
+
+enum clock_cooling_property {
+	GET_LEVEL,
+	GET_FREQ,
+	GET_MAXL,
+};
+
+/**
+ * clock_cooling_get_property - fetch a property of interest for a give cpu.
+ * @ccdev: clock cooling device reference
+ * @input: query parameter
+ * @output: query return
+ * @property: type of query (frequency, level, max level)
+ *
+ * This is the common function to
+ * 1. get maximum clock cooling states
+ * 2. translate frequency to cooling state
+ * 3. translate cooling state to frequency
+ * Note that the code may be not in good shape
+ * but it is written in this way in order to:
+ * a) reduce duplicate code as most of the code can be shared.
+ * b) make sure the logic is consistent when translating between
+ *    cooling states and frequencies.
+ *
+ * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ */
+static int clock_cooling_get_property(struct clock_cooling_device *ccdev,
+				      unsigned long input,
+				      unsigned long *output,
+				      enum clock_cooling_property property)
+{
+	int i;
+	unsigned long max_level = 0, level = 0;
+	unsigned int freq = CPUFREQ_ENTRY_INVALID;
+	int descend = -1;
+	struct cpufreq_frequency_table *pos, *table = ccdev->freq_table;
+
+	if (!output)
+		return -EINVAL;
+
+	if (!table)
+		return -EINVAL;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* get the frequency order */
+		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
+			descend = freq > pos->frequency;
+
+		freq = pos->frequency;
+		max_level++;
+	}
+
+	/* No valid cpu frequency entry */
+	if (max_level == 0)
+		return -EINVAL;
+
+	/* max_level is an index, not a counter */
+	max_level--;
+
+	/* get max level */
+	if (property == GET_MAXL) {
+		*output = max_level;
+		return 0;
+	}
+
+	if (property == GET_FREQ)
+		level = descend ? input : (max_level - input);
+
+	i = 0;
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* now we have a valid frequency entry */
+		freq = pos->frequency;
+
+		if (property == GET_LEVEL && (unsigned int)input == freq) {
+			/* get level by frequency */
+			*output = descend ? i : (max_level - i);
+			return 0;
+		}
+		if (property == GET_FREQ && level == i) {
+			/* get frequency by level */
+			*output = freq;
+			return 0;
+		}
+		i++;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * clock_cooling_get_level - return the cooling level of given clock cooling.
+ * @cdev: reference of a thermal cooling device of used as clock cooling device
+ * @freq: the frequency of interest
+ *
+ * This function will match the cooling level corresponding to the
+ * requested @freq and return it.
+ *
+ * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
+ * otherwise.
+ */
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long val;
+
+	if (clock_cooling_get_property(ccdev, (unsigned long)freq, &val,
+				       GET_LEVEL))
+		return THERMAL_CSTATE_INVALID;
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_get_level);
+
+/**
+ * clock_cooling_get_frequency - get the absolute value of frequency from level.
+ * @ccdev: clock cooling device reference
+ * @level: cooling level
+ *
+ * This function matches cooling level with frequency. Based on a cooling level
+ * of frequency, equals cooling state of cpu cooling device, it will return
+ * the corresponding frequency.
+ *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ *
+ * Return: 0 on error, the corresponding frequency otherwise.
+ */
+static unsigned long
+clock_cooling_get_frequency(struct clock_cooling_device *ccdev,
+			    unsigned long level)
+{
+	int ret = 0;
+	unsigned long freq;
+
+	ret = clock_cooling_get_property(ccdev, level, &freq, GET_FREQ);
+	if (ret)
+		return 0;
+
+	return freq;
+}
+
+/**
+ * clock_cooling_apply - function to apply frequency clipping.
+ * @ccdev: clock_cooling_device pointer containing frequency clipping data.
+ * @cooling_state: value of the cooling state.
+ *
+ * Function used to make sure the clock layer is aware of current thermal
+ * limits. The limits are applied by updating the clock rate in case it is
+ * higher than the corresponding frequency based on the requested cooling_state.
+ *
+ * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
+ * cooling state).
+ */
+static int clock_cooling_apply(struct clock_cooling_device *ccdev,
+			       unsigned long cooling_state)
+{
+	unsigned long clip_freq, cur_freq;
+	int ret = 0;
+
+	/* Here we write the clipping */
+	/* Check if the old cooling action is same as new cooling action */
+	if (ccdev->clock_state == cooling_state)
+		return 0;
+
+	clip_freq = clock_cooling_get_frequency(ccdev, cooling_state);
+	if (!clip_freq)
+		return -EINVAL;
+
+	cur_freq = clk_get_rate(ccdev->clk);
+
+	mutex_lock(&ccdev->lock);
+	ccdev->clock_state = cooling_state;
+	ccdev->clock_val = clip_freq;
+	/* enforce clock level */
+	if (cur_freq > clip_freq)
+		ret = clk_set_rate(ccdev->clk, clip_freq);
+	mutex_unlock(&ccdev->lock);
+
+	return ret;
+}
+
+/**
+ * clock_cooling_clock_notifier - notifier callback on clock rate changes.
+ * @nb:	struct notifier_block * with callback info.
+ * @event: value showing clock event for which this function invoked.
+ * @data: callback-specific data
+ *
+ * Callback to hijack the notification on clock transition.
+ * Every time there is a clock change, we intercept all pre change events
+ * and block the transition in case the new rate infringes thermal limits.
+ *
+ * Return: NOTIFY_DONE (success) or NOTIFY_BAD (new_rate > thermal limit).
+ */
+static int clock_cooling_clock_notifier(struct notifier_block *nb,
+					unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct clock_cooling_device *ccdev = to_clock_cooling_device(nb);
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+		/*
+		 * checks on current state
+		 * TODO: current method is not best we can find as it
+		 * allows possibly voltage transitions, in case DVFS
+		 * layer is also hijacking clock pre notifications.
+		 */
+		if (ndata->new_rate > ccdev->clock_val)
+			return NOTIFY_BAD;
+		/* fall through */
+	case POST_RATE_CHANGE:
+	case ABORT_RATE_CHANGE:
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+/* clock cooling device thermal callback functions are defined below */
+
+/**
+ * clock_cooling_get_max_state - callback function to get the max cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the max cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * max cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_get_max_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long count = 0;
+	int ret;
+
+	ret = clock_cooling_get_property(ccdev, 0, &count, GET_MAXL);
+	if (!ret)
+		*state = count;
+
+	return ret;
+}
+
+/**
+ * clock_cooling_get_cur_state - function to get the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the current cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * current cooling state.
+ *
+ * Return: 0 (success)
+ */
+static int clock_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+
+	*state = ccdev->clock_state;
+
+	return 0;
+}
+
+/**
+ * clock_cooling_set_cur_state - function to set the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: set this variable to the current cooling state.
+ *
+ * Callback for the thermal cooling device to change the clock cooling
+ * current cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long state)
+{
+	struct clock_cooling_device *clock_device = cdev->devdata;
+
+	return clock_cooling_apply(clock_device, state);
+}
+
+/* Bind clock callbacks to thermal cooling device ops */
+static struct thermal_cooling_device_ops const clock_cooling_ops = {
+	.get_max_state = clock_cooling_get_max_state,
+	.get_cur_state = clock_cooling_get_cur_state,
+	.set_cur_state = clock_cooling_set_cur_state,
+};
+
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ *
+ * This interface function registers the clock cooling device with the name
+ * "thermal-clock-%x". The cooling device is based on clock frequencies.
+ * The struct device is assumed to be capable of DVFS transitions.
+ * The OPP layer is used to fetch and fill the available frequencies for
+ * the referred device. The ordered frequency table is used to control
+ * the clock cooling device cooling states and to limit clock transitions
+ * based on the cooling state requested by the thermal framework.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	struct thermal_cooling_device *cdev;
+	struct clock_cooling_device *ccdev = NULL;
+	char dev_name[THERMAL_NAME_LENGTH];
+	int ret = 0;
+
+	ccdev = devm_kzalloc(dev, sizeof(*ccdev), GFP_KERNEL);
+	if (!ccdev)
+		return ERR_PTR(-ENOMEM);
+
+	ccdev->dev = dev;
+	ccdev->clk = devm_clk_get(dev, clock_name);
+	if (IS_ERR(ccdev->clk))
+		return ERR_CAST(ccdev->clk);
+
+	ret = clock_cooling_get_idr(&ccdev->id);
+	if (ret)
+		return ERR_PTR(-EINVAL);
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id);
+
+	cdev = thermal_cooling_device_register(dev_name, ccdev,
+					       &clock_cooling_ops);
+	if (IS_ERR(cdev)) {
+		release_idr(ccdev->id);
+		return ERR_PTR(-EINVAL);
+	}
+	ccdev->cdev = cdev;
+	ccdev->clk_rate_change_nb.notifier_call = clock_cooling_clock_notifier;
+
+	/* Assuming someone has already filled the opp table for this device */
+	ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table);
+	if (ret) {
+		release_idr(ccdev->id);
+		return ERR_PTR(ret);
+	}
+	ccdev->clock_state = 0;
+	ccdev->clock_val = clock_cooling_get_frequency(ccdev, 0);
+
+	clk_notifier_register(ccdev->clk, &ccdev->clk_rate_change_nb);
+
+	return cdev;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_register);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ *
+ * This interface function unregisters the "thermal-clock-%x" cooling device.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+	struct clock_cooling_device *ccdev;
+
+	if (!cdev)
+		return;
+
+	ccdev = cdev->devdata;
+
+	clk_notifier_unregister(ccdev->clk, &ccdev->clk_rate_change_nb);
+	dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table);
+
+	thermal_cooling_device_unregister(ccdev->cdev);
+	release_idr(ccdev->id);
+}
+EXPORT_SYMBOL_GPL(clock_cooling_unregister);
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index ad09e51ffae4..6509c61b9648 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -4,6 +4,8 @@
  *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
  *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
  *
+ *  Copyright (C) 2014  Viresh Kumar <viresh.kumar@linaro.org>
+ *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -24,10 +26,40 @@
 #include <linux/thermal.h>
 #include <linux/cpufreq.h>
 #include <linux/err.h>
+#include <linux/pm_opp.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 
+#include <trace/events/thermal.h>
+
+/*
+ * Cooling state <-> CPUFreq frequency
+ *
+ * Cooling states are translated to frequencies throughout this driver and this
+ * is the relation between them.
+ *
+ * Highest cooling state corresponds to lowest possible frequency.
+ *
+ * i.e.
+ *	level 0 --> 1st Max Freq
+ *	level 1 --> 2nd Max Freq
+ *	...
+ */
+
+/**
+ * struct power_table - frequency to power conversion
+ * @frequency:	frequency in KHz
+ * @power:	power in mW
+ *
+ * This structure is built when the cooling device registers and helps
+ * in translating frequency to power and viceversa.
+ */
+struct power_table {
+	u32 frequency;
+	u32 power;
+};
+
 /**
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
@@ -38,25 +70,43 @@
  *	cooling	devices.
  * @cpufreq_val: integer value representing the absolute value of the clipped
  *	frequency.
+ * @max_level: maximum cooling level. One less than total number of valid
+ *	cpufreq frequencies.
  * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
+ * @node: list_head to link all cpufreq_cooling_device together.
+ * @last_load: load measured by the latest call to cpufreq_get_actual_power()
+ * @time_in_idle: previous reading of the absolute time that this cpu was idle
+ * @time_in_idle_timestamp: wall time of the last invocation of
+ *	get_cpu_idle_time_us()
+ * @dyn_power_table: array of struct power_table for frequency to power
+ *	conversion, sorted in ascending order.
+ * @dyn_power_table_entries: number of entries in the @dyn_power_table array
+ * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
+ * @plat_get_static_power: callback to calculate the static power
  *
- * This structure is required for keeping information of each
- * cpufreq_cooling_device registered. In order to prevent corruption of this a
- * mutex lock cooling_cpufreq_lock is used.
+ * This structure is required for keeping information of each registered
+ * cpufreq_cooling_device.
  */
 struct cpufreq_cooling_device {
 	int id;
 	struct thermal_cooling_device *cool_dev;
 	unsigned int cpufreq_state;
 	unsigned int cpufreq_val;
+	unsigned int max_level;
+	unsigned int *freq_table;	/* In descending order */
 	struct cpumask allowed_cpus;
 	struct list_head node;
+	u32 last_load;
+	u64 *time_in_idle;
+	u64 *time_in_idle_timestamp;
+	struct power_table *dyn_power_table;
+	int dyn_power_table_entries;
+	struct device *cpu_dev;
+	get_static_t plat_get_static_power;
 };
 static DEFINE_IDR(cpufreq_idr);
 static DEFINE_MUTEX(cooling_cpufreq_lock);
 
-static unsigned int cpufreq_dev_count;
-
 static LIST_HEAD(cpufreq_dev_list);
 
 /**
@@ -98,120 +148,30 @@ static void release_idr(struct idr *idr, int id)
 /* Below code defines functions to be used for cpufreq as cooling device */
 
 /**
- * is_cpufreq_valid - function to check frequency transitioning capability.
- * @cpu: cpu for which check is needed.
+ * get_level: Find the level for a particular frequency
+ * @cpufreq_dev: cpufreq_dev for which the property is required
+ * @freq: Frequency
  *
- * This function will check the current state of the system if
- * it is capable of changing the frequency for a given @cpu.
- *
- * Return: 0 if the system is not currently capable of changing
- * the frequency of given cpu. !0 in case the frequency is changeable.
- */
-static int is_cpufreq_valid(int cpu)
-{
-	struct cpufreq_policy policy;
-
-	return !cpufreq_get_policy(&policy, cpu);
-}
-
-enum cpufreq_cooling_property {
-	GET_LEVEL,
-	GET_FREQ,
-	GET_MAXL,
-};
-
-/**
- * get_property - fetch a property of interest for a give cpu.
- * @cpu: cpu for which the property is required
- * @input: query parameter
- * @output: query return
- * @property: type of query (frequency, level, max level)
- *
- * This is the common function to
- * 1. get maximum cpu cooling states
- * 2. translate frequency to cooling state
- * 3. translate cooling state to frequency
- * Note that the code may be not in good shape
- * but it is written in this way in order to:
- * a) reduce duplicate code as most of the code can be shared.
- * b) make sure the logic is consistent when translating between
- *    cooling states and frequencies.
- *
- * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ * Return: level on success, THERMAL_CSTATE_INVALID on error.
  */
-static int get_property(unsigned int cpu, unsigned long input,
-			unsigned int *output,
-			enum cpufreq_cooling_property property)
+static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
+			       unsigned int freq)
 {
-	int i;
-	unsigned long max_level = 0, level = 0;
-	unsigned int freq = CPUFREQ_ENTRY_INVALID;
-	int descend = -1;
-	struct cpufreq_frequency_table *pos, *table =
-					cpufreq_frequency_get_table(cpu);
-
-	if (!output)
-		return -EINVAL;
-
-	if (!table)
-		return -EINVAL;
-
-	cpufreq_for_each_valid_entry(pos, table) {
-		/* ignore duplicate entry */
-		if (freq == pos->frequency)
-			continue;
-
-		/* get the frequency order */
-		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
-			descend = freq > pos->frequency;
-
-		freq = pos->frequency;
-		max_level++;
-	}
-
-	/* No valid cpu frequency entry */
-	if (max_level == 0)
-		return -EINVAL;
+	unsigned long level;
 
-	/* max_level is an index, not a counter */
-	max_level--;
-
-	/* get max level */
-	if (property == GET_MAXL) {
-		*output = (unsigned int)max_level;
-		return 0;
-	}
-
-	if (property == GET_FREQ)
-		level = descend ? input : (max_level - input);
-
-	i = 0;
-	cpufreq_for_each_valid_entry(pos, table) {
-		/* ignore duplicate entry */
-		if (freq == pos->frequency)
-			continue;
-
-		/* now we have a valid frequency entry */
-		freq = pos->frequency;
+	for (level = 0; level <= cpufreq_dev->max_level; level++) {
+		if (freq == cpufreq_dev->freq_table[level])
+			return level;
 
-		if (property == GET_LEVEL && (unsigned int)input == freq) {
-			/* get level by frequency */
-			*output = descend ? i : (max_level - i);
-			return 0;
-		}
-		if (property == GET_FREQ && level == i) {
-			/* get frequency by level */
-			*output = freq;
-			return 0;
-		}
-		i++;
+		if (freq > cpufreq_dev->freq_table[level])
+			break;
 	}
 
-	return -EINVAL;
+	return THERMAL_CSTATE_INVALID;
 }
 
 /**
- * cpufreq_cooling_get_level - for a give cpu, return the cooling level.
+ * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
  * @cpu: cpu for which the level is required
  * @freq: the frequency of interest
  *
@@ -223,119 +183,272 @@ static int get_property(unsigned int cpu, unsigned long input,
  */
 unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
 {
-	unsigned int val;
+	struct cpufreq_cooling_device *cpufreq_dev;
 
-	if (get_property(cpu, (unsigned long)freq, &val, GET_LEVEL))
-		return THERMAL_CSTATE_INVALID;
+	mutex_lock(&cooling_cpufreq_lock);
+	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
+		if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
+			mutex_unlock(&cooling_cpufreq_lock);
+			return get_level(cpufreq_dev, freq);
+		}
+	}
+	mutex_unlock(&cooling_cpufreq_lock);
 
-	return (unsigned long)val;
+	pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
+	return THERMAL_CSTATE_INVALID;
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
 
 /**
- * get_cpu_frequency - get the absolute value of frequency from level.
- * @cpu: cpu for which frequency is fetched.
- * @level: cooling level
+ * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
+ * @nb:	struct notifier_block * with callback info.
+ * @event: value showing cpufreq event for which this function invoked.
+ * @data: callback-specific data
  *
- * This function matches cooling level with frequency. Based on a cooling level
- * of frequency, equals cooling state of cpu cooling device, it will return
- * the corresponding frequency.
- *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ * Callback to hijack the notification on cpufreq policy transition.
+ * Every time there is a change in policy, we will intercept and
+ * update the cpufreq policy with thermal constraints.
  *
- * Return: 0 on error, the corresponding frequency otherwise.
+ * Return: 0 (success)
  */
-static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level)
+static int cpufreq_thermal_notifier(struct notifier_block *nb,
+				    unsigned long event, void *data)
 {
-	int ret = 0;
-	unsigned int freq;
+	struct cpufreq_policy *policy = data;
+	unsigned long max_freq = 0;
+	struct cpufreq_cooling_device *cpufreq_dev;
 
-	ret = get_property(cpu, level, &freq, GET_FREQ);
-	if (ret)
-		return 0;
+	switch (event) {
+
+	case CPUFREQ_ADJUST:
+		mutex_lock(&cooling_cpufreq_lock);
+		list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
+			if (!cpumask_test_cpu(policy->cpu,
+					      &cpufreq_dev->allowed_cpus))
+				continue;
 
-	return freq;
+			max_freq = cpufreq_dev->cpufreq_val;
+
+			if (policy->max != max_freq)
+				cpufreq_verify_within_limits(policy, 0,
+							     max_freq);
+		}
+		mutex_unlock(&cooling_cpufreq_lock);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
 }
 
 /**
- * cpufreq_apply_cooling - function to apply frequency clipping.
- * @cpufreq_device: cpufreq_cooling_device pointer containing frequency
- *	clipping data.
- * @cooling_state: value of the cooling state.
+ * build_dyn_power_table() - create a dynamic power to frequency table
+ * @cpufreq_device:	the cpufreq cooling device in which to store the table
+ * @capacitance: dynamic power coefficient for these cpus
  *
- * Function used to make sure the cpufreq layer is aware of current thermal
- * limits. The limits are applied by updating the cpufreq policy.
+ * Build a dynamic power to frequency table for this cpu and store it
+ * in @cpufreq_device.  This table will be used in cpu_power_to_freq() and
+ * cpu_freq_to_power() to convert between power and frequency
+ * efficiently.  Power is stored in mW, frequency in KHz.  The
+ * resulting table is in ascending order.
  *
- * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
- * cooling state).
+ * Return: 0 on success, -E* on error.
  */
-static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device,
-				 unsigned long cooling_state)
+static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
+				 u32 capacitance)
 {
-	unsigned int cpuid, clip_freq;
-	struct cpumask *mask = &cpufreq_device->allowed_cpus;
-	unsigned int cpu = cpumask_any(mask);
+	struct power_table *power_table;
+	struct dev_pm_opp *opp;
+	struct device *dev = NULL;
+	int num_opps = 0, cpu, i, ret = 0;
+	unsigned long freq;
+
+	rcu_read_lock();
+
+	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+		dev = get_cpu_device(cpu);
+		if (!dev) {
+			dev_warn(&cpufreq_device->cool_dev->device,
+				 "No cpu device for cpu %d\n", cpu);
+			continue;
+		}
 
+		num_opps = dev_pm_opp_get_opp_count(dev);
+		if (num_opps > 0) {
+			break;
+		} else if (num_opps < 0) {
+			ret = num_opps;
+			goto unlock;
+		}
+	}
 
-	/* Check if the old cooling action is same as new cooling action */
-	if (cpufreq_device->cpufreq_state == cooling_state)
-		return 0;
+	if (num_opps == 0) {
+		ret = -EINVAL;
+		goto unlock;
+	}
 
-	clip_freq = get_cpu_frequency(cpu, cooling_state);
-	if (!clip_freq)
-		return -EINVAL;
+	power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
+	if (!power_table) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
 
-	cpufreq_device->cpufreq_state = cooling_state;
-	cpufreq_device->cpufreq_val = clip_freq;
+	for (freq = 0, i = 0;
+	     opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
+	     freq++, i++) {
+		u32 freq_mhz, voltage_mv;
+		u64 power;
+
+		freq_mhz = freq / 1000000;
+		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
+
+		/*
+		 * Do the multiplication with MHz and millivolt so as
+		 * to not overflow.
+		 */
+		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
+		do_div(power, 1000000000);
 
-	for_each_cpu(cpuid, mask) {
-		if (is_cpufreq_valid(cpuid))
-			cpufreq_update_policy(cpuid);
+		/* frequency is stored in power_table in KHz */
+		power_table[i].frequency = freq / 1000;
+
+		/* power is stored in mW */
+		power_table[i].power = power;
 	}
 
-	return 0;
+	if (i == 0) {
+		ret = PTR_ERR(opp);
+		goto unlock;
+	}
+
+	cpufreq_device->cpu_dev = dev;
+	cpufreq_device->dyn_power_table = power_table;
+	cpufreq_device->dyn_power_table_entries = i;
+
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
+			     u32 freq)
+{
+	int i;
+	struct power_table *pt = cpufreq_device->dyn_power_table;
+
+	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+		if (freq < pt[i].frequency)
+			break;
+
+	return pt[i - 1].power;
+}
+
+static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
+			     u32 power)
+{
+	int i;
+	struct power_table *pt = cpufreq_device->dyn_power_table;
+
+	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+		if (power < pt[i].power)
+			break;
+
+	return pt[i - 1].frequency;
 }
 
 /**
- * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
- * @nb:	struct notifier_block * with callback info.
- * @event: value showing cpufreq event for which this function invoked.
- * @data: callback-specific data
+ * get_load() - get load for a cpu since last updated
+ * @cpufreq_device:	&struct cpufreq_cooling_device for this cpu
+ * @cpu:	cpu number
  *
- * Callback to hijack the notification on cpufreq policy transition.
- * Every time there is a change in policy, we will intercept and
- * update the cpufreq policy with thermal constraints.
- *
- * Return: 0 (success)
+ * Return: The average load of cpu @cpu in percentage since this
+ * function was last called.
  */
-static int cpufreq_thermal_notifier(struct notifier_block *nb,
-				    unsigned long event, void *data)
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
 {
-	struct cpufreq_policy *policy = data;
-	unsigned long max_freq = 0;
-	struct cpufreq_cooling_device *cpufreq_dev;
+	u32 load;
+	u64 now, now_idle, delta_time, delta_idle;
+
+	now_idle = get_cpu_idle_time(cpu, &now, 0);
+	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
+	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
+
+	if (delta_time <= delta_idle)
+		load = 0;
+	else
+		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 
-	if (event != CPUFREQ_ADJUST)
+	cpufreq_device->time_in_idle[cpu] = now_idle;
+	cpufreq_device->time_in_idle_timestamp[cpu] = now;
+
+	return load;
+}
+
+/**
+ * get_static_power() - calculate the static power consumed by the cpus
+ * @cpufreq_device:	struct &cpufreq_cooling_device for this cpu cdev
+ * @tz:		thermal zone device in which we're operating
+ * @freq:	frequency in KHz
+ * @power:	pointer in which to store the calculated static power
+ *
+ * Calculate the static power consumed by the cpus described by
+ * @cpu_actor running at frequency @freq.  This function relies on a
+ * platform specific function that should have been provided when the
+ * actor was registered.  If it wasn't, the static power is assumed to
+ * be negligible.  The calculated static power is stored in @power.
+ *
+ * Return: 0 on success, -E* on failure.
+ */
+static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
+			    struct thermal_zone_device *tz, unsigned long freq,
+			    u32 *power)
+{
+	struct dev_pm_opp *opp;
+	unsigned long voltage;
+	struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
+	unsigned long freq_hz = freq * 1000;
+
+	if (!cpufreq_device->plat_get_static_power ||
+	    !cpufreq_device->cpu_dev) {
+		*power = 0;
 		return 0;
+	}
 
-	mutex_lock(&cooling_cpufreq_lock);
-	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
-		if (!cpumask_test_cpu(policy->cpu,
-					&cpufreq_dev->allowed_cpus))
-			continue;
+	rcu_read_lock();
 
-		if (!cpufreq_dev->cpufreq_val)
-			cpufreq_dev->cpufreq_val = get_cpu_frequency(
-					cpumask_any(&cpufreq_dev->allowed_cpus),
-					cpufreq_dev->cpufreq_state);
+	opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
+					 true);
+	voltage = dev_pm_opp_get_voltage(opp);
 
-		max_freq = cpufreq_dev->cpufreq_val;
+	rcu_read_unlock();
 
-		if (policy->max != max_freq)
-			cpufreq_verify_within_limits(policy, 0, max_freq);
+	if (voltage == 0) {
+		dev_warn_ratelimited(cpufreq_device->cpu_dev,
+				     "Failed to get voltage for frequency %lu: %ld\n",
+				     freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		return -EINVAL;
 	}
-	mutex_unlock(&cooling_cpufreq_lock);
 
-	return 0;
+	return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
+						     voltage, power);
+}
+
+/**
+ * get_dynamic_power() - calculate the dynamic power
+ * @cpufreq_device:	&cpufreq_cooling_device for this cdev
+ * @freq:	current frequency
+ *
+ * Return: the dynamic power consumed by the cpus described by
+ * @cpufreq_device.
+ */
+static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
+			     unsigned long freq)
+{
+	u32 raw_cpu_power;
+
+	raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
+	return (raw_cpu_power * cpufreq_device->last_load) / 100;
 }
 
 /* cpufreq cooling device callback functions are defined below */
@@ -354,19 +467,9 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
 {
 	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
-	struct cpumask *mask = &cpufreq_device->allowed_cpus;
-	unsigned int cpu;
-	unsigned int count = 0;
-	int ret;
 
-	cpu = cpumask_any(mask);
-
-	ret = get_property(cpu, 0, &count, GET_MAXL);
-
-	if (count > 0)
-		*state = count;
-
-	return ret;
+	*state = cpufreq_device->max_level;
+	return 0;
 }
 
 /**
@@ -403,12 +506,225 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long state)
 {
 	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
+	unsigned int clip_freq;
+
+	/* Request state should be less than max_level */
+	if (WARN_ON(state > cpufreq_device->max_level))
+		return -EINVAL;
+
+	/* Check if the old cooling action is same as new cooling action */
+	if (cpufreq_device->cpufreq_state == state)
+		return 0;
+
+	clip_freq = cpufreq_device->freq_table[state];
+	cpufreq_device->cpufreq_state = state;
+	cpufreq_device->cpufreq_val = clip_freq;
+
+	cpufreq_update_policy(cpu);
+
+	return 0;
+}
+
+/**
+ * cpufreq_get_requested_power() - get the current power
+ * @cdev:	&thermal_cooling_device pointer
+ * @tz:		a valid thermal zone device pointer
+ * @power:	pointer in which to store the resulting power
+ *
+ * Calculate the current power consumption of the cpus in milliwatts
+ * and store it in @power.  This function should actually calculate
+ * the requested power, but it's hard to get the frequency that
+ * cpufreq would have assigned if there were no thermal limits.
+ * Instead, we calculate the current power on the assumption that the
+ * immediate future will look like the immediate past.
+ *
+ * We use the current frequency and the average load since this
+ * function was last called.  In reality, there could have been
+ * multiple opps since this function was last called and that affects
+ * the load calculation.  While it's not perfectly accurate, this
+ * simplification is good enough and works.  REVISIT this, as more
+ * complex code may be needed if experiments show that it's not
+ * accurate enough.
+ *
+ * Return: 0 on success, -E* if getting the static power failed.
+ */
+static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       u32 *power)
+{
+	unsigned long freq;
+	int i = 0, cpu, ret;
+	u32 static_power, dynamic_power, total_load = 0;
+	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	u32 *load_cpu = NULL;
+
+	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+	/*
+	 * All the CPUs are offline, thus the requested power by
+	 * the cdev is 0
+	 */
+	if (cpu >= nr_cpu_ids) {
+		*power = 0;
+		return 0;
+	}
+
+	freq = cpufreq_quick_get(cpu);
+
+	if (trace_thermal_power_cpu_get_power_enabled()) {
+		u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
+
+		load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu),
+					GFP_KERNEL);
+	}
+
+	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+		u32 load;
+
+		if (cpu_online(cpu))
+			load = get_load(cpufreq_device, cpu);
+		else
+			load = 0;
+
+		total_load += load;
+		if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
+			load_cpu[i] = load;
+
+		i++;
+	}
+
+	cpufreq_device->last_load = total_load;
+
+	dynamic_power = get_dynamic_power(cpufreq_device, freq);
+	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	if (ret) {
+		if (load_cpu)
+			devm_kfree(&cdev->device, load_cpu);
+		return ret;
+	}
+
+	if (load_cpu) {
+		trace_thermal_power_cpu_get_power(
+			&cpufreq_device->allowed_cpus,
+			freq, load_cpu, i, dynamic_power, static_power);
+
+		devm_kfree(&cdev->device, load_cpu);
+	}
+
+	*power = static_power + dynamic_power;
+	return 0;
+}
+
+/**
+ * cpufreq_state2power() - convert a cpu cdev state to power consumed
+ * @cdev:	&thermal_cooling_device pointer
+ * @tz:		a valid thermal zone device pointer
+ * @state:	cooling device state to be converted
+ * @power:	pointer in which to store the resulting power
+ *
+ * Convert cooling device state @state into power consumption in
+ * milliwatts assuming 100% load.  Store the calculated power in
+ * @power.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device state could not
+ * be converted into a frequency or other -E* if there was an error
+ * when calculating the static power.
+ */
+static int cpufreq_state2power(struct thermal_cooling_device *cdev,
+			       struct thermal_zone_device *tz,
+			       unsigned long state, u32 *power)
+{
+	unsigned int freq, num_cpus;
+	cpumask_t cpumask;
+	u32 static_power, dynamic_power;
+	int ret;
+	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+	cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
+	num_cpus = cpumask_weight(&cpumask);
+
+	/* None of our cpus are online, so no power */
+	if (num_cpus == 0) {
+		*power = 0;
+		return 0;
+	}
+
+	freq = cpufreq_device->freq_table[state];
+	if (!freq)
+		return -EINVAL;
 
-	return cpufreq_apply_cooling(cpufreq_device, state);
+	dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
+	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	if (ret)
+		return ret;
+
+	*power = static_power + dynamic_power;
+	return 0;
+}
+
+/**
+ * cpufreq_power2state() - convert power to a cooling device state
+ * @cdev:	&thermal_cooling_device pointer
+ * @tz:		a valid thermal zone device pointer
+ * @power:	power in milliwatts to be converted
+ * @state:	pointer in which to store the resulting state
+ *
+ * Calculate a cooling device state for the cpus described by @cdev
+ * that would allow them to consume at most @power mW and store it in
+ * @state.  Note that this calculation depends on external factors
+ * such as the cpu load or the current static power.  Calling this
+ * function with the same power as input can yield different cooling
+ * device states depending on those external factors.
+ *
+ * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
+ * the calculated frequency could not be converted to a valid state.
+ * The latter should not happen unless the frequencies available to
+ * cpufreq have changed since the initialization of the cpu cooling
+ * device.
+ */
+static int cpufreq_power2state(struct thermal_cooling_device *cdev,
+			       struct thermal_zone_device *tz, u32 power,
+			       unsigned long *state)
+{
+	unsigned int cpu, cur_freq, target_freq;
+	int ret;
+	s32 dyn_power;
+	u32 last_load, normalised_power, static_power;
+	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+	/* None of our cpus are online */
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+
+	cur_freq = cpufreq_quick_get(cpu);
+	ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
+	if (ret)
+		return ret;
+
+	dyn_power = power - static_power;
+	dyn_power = dyn_power > 0 ? dyn_power : 0;
+	last_load = cpufreq_device->last_load ?: 1;
+	normalised_power = (dyn_power * 100) / last_load;
+	target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
+
+	*state = cpufreq_cooling_get_level(cpu, target_freq);
+	if (*state == THERMAL_CSTATE_INVALID) {
+		dev_warn_ratelimited(&cdev->device,
+				     "Failed to convert %dKHz for cpu %d into a cdev state\n",
+				     target_freq, cpu);
+		return -EINVAL;
+	}
+
+	trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
+				      target_freq, *state, power);
+	return 0;
 }
 
 /* Bind cpufreq callbacks to thermal cooling device ops */
-static struct thermal_cooling_device_ops const cpufreq_cooling_ops = {
+static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
 	.get_max_state = cpufreq_get_max_state,
 	.get_cur_state = cpufreq_get_cur_state,
 	.set_cur_state = cpufreq_set_cur_state,
@@ -419,10 +735,28 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
 	.notifier_call = cpufreq_thermal_notifier,
 };
 
+static unsigned int find_next_max(struct cpufreq_frequency_table *table,
+				  unsigned int prev_max)
+{
+	struct cpufreq_frequency_table *pos;
+	unsigned int max = 0;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		if (pos->frequency > max && pos->frequency < prev_max)
+			max = pos->frequency;
+	}
+
+	return max;
+}
+
 /**
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * Normally this should be same as cpufreq policy->related_cpus.
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ *                    cpus (optional)
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -434,40 +768,77 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
  */
 static struct thermal_cooling_device *
 __cpufreq_cooling_register(struct device_node *np,
-			   const struct cpumask *clip_cpus)
+			const struct cpumask *clip_cpus, u32 capacitance,
+			get_static_t plat_static_func)
 {
 	struct thermal_cooling_device *cool_dev;
-	struct cpufreq_cooling_device *cpufreq_dev = NULL;
-	unsigned int min = 0, max = 0;
+	struct cpufreq_cooling_device *cpufreq_dev;
 	char dev_name[THERMAL_NAME_LENGTH];
-	int ret = 0, i;
-	struct cpufreq_policy policy;
+	struct cpufreq_frequency_table *pos, *table;
+	unsigned int freq, i, num_cpus;
+	int ret;
 
-	/* Verify that all the clip cpus have same freq_min, freq_max limit */
-	for_each_cpu(i, clip_cpus) {
-		/* continue if cpufreq policy not found and not return error */
-		if (!cpufreq_get_policy(&policy, i))
-			continue;
-		if (min == 0 && max == 0) {
-			min = policy.cpuinfo.min_freq;
-			max = policy.cpuinfo.max_freq;
-		} else {
-			if (min != policy.cpuinfo.min_freq ||
-			    max != policy.cpuinfo.max_freq)
-				return ERR_PTR(-EINVAL);
-		}
+	table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
+	if (!table) {
+		pr_debug("%s: CPUFreq table not found\n", __func__);
+		return ERR_PTR(-EPROBE_DEFER);
 	}
-	cpufreq_dev = kzalloc(sizeof(struct cpufreq_cooling_device),
-			      GFP_KERNEL);
+
+	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
 	if (!cpufreq_dev)
 		return ERR_PTR(-ENOMEM);
 
+	num_cpus = cpumask_weight(clip_cpus);
+	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
+					    sizeof(*cpufreq_dev->time_in_idle),
+					    GFP_KERNEL);
+	if (!cpufreq_dev->time_in_idle) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto free_cdev;
+	}
+
+	cpufreq_dev->time_in_idle_timestamp =
+		kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
+			GFP_KERNEL);
+	if (!cpufreq_dev->time_in_idle_timestamp) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto free_time_in_idle;
+	}
+
+	/* Find max levels */
+	cpufreq_for_each_valid_entry(pos, table)
+		cpufreq_dev->max_level++;
+
+	cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
+					  cpufreq_dev->max_level, GFP_KERNEL);
+	if (!cpufreq_dev->freq_table) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto free_time_in_idle_timestamp;
+	}
+
+	/* max_level is an index, not a counter */
+	cpufreq_dev->max_level--;
+
 	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 
+	if (capacitance) {
+		cpufreq_cooling_ops.get_requested_power =
+			cpufreq_get_requested_power;
+		cpufreq_cooling_ops.state2power = cpufreq_state2power;
+		cpufreq_cooling_ops.power2state = cpufreq_power2state;
+		cpufreq_dev->plat_get_static_power = plat_static_func;
+
+		ret = build_dyn_power_table(cpufreq_dev, capacitance);
+		if (ret) {
+			cool_dev = ERR_PTR(ret);
+			goto free_table;
+		}
+	}
+
 	ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
 	if (ret) {
-		kfree(cpufreq_dev);
-		return ERR_PTR(-EINVAL);
+		cool_dev = ERR_PTR(ret);
+		goto free_table;
 	}
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
@@ -475,25 +846,48 @@ __cpufreq_cooling_register(struct device_node *np,
 
 	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
 						      &cpufreq_cooling_ops);
-	if (IS_ERR(cool_dev)) {
-		release_idr(&cpufreq_idr, cpufreq_dev->id);
-		kfree(cpufreq_dev);
-		return cool_dev;
+	if (IS_ERR(cool_dev))
+		goto remove_idr;
+
+	/* Fill freq-table in descending order of frequencies */
+	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
+		freq = find_next_max(table, freq);
+		cpufreq_dev->freq_table[i] = freq;
+
+		/* Warn for duplicate entries */
+		if (!freq)
+			pr_warn("%s: table has duplicate entries\n", __func__);
+		else
+			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
+
+	cpufreq_dev->cpufreq_val = cpufreq_dev->freq_table[0];
 	cpufreq_dev->cool_dev = cool_dev;
-	cpufreq_dev->cpufreq_state = 0;
+
 	mutex_lock(&cooling_cpufreq_lock);
 
 	/* Register the notifier for first cpufreq cooling device */
-	if (cpufreq_dev_count == 0)
+	if (list_empty(&cpufreq_dev_list))
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
-	cpufreq_dev_count++;
 	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
 
 	mutex_unlock(&cooling_cpufreq_lock);
 
 	return cool_dev;
+
+remove_idr:
+	release_idr(&cpufreq_idr, cpufreq_dev->id);
+free_table:
+	kfree(cpufreq_dev->freq_table);
+free_time_in_idle_timestamp:
+	kfree(cpufreq_dev->time_in_idle_timestamp);
+free_time_in_idle:
+	kfree(cpufreq_dev->time_in_idle);
+free_cdev:
+	kfree(cpufreq_dev);
+
+	return cool_dev;
 }
 
 /**
@@ -510,7 +904,7 @@ __cpufreq_cooling_register(struct device_node *np,
 struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus);
+	return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
@@ -534,11 +928,78 @@ of_cpufreq_cooling_register(struct device_node *np,
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus);
+	return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
 
 /**
+ * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @capacitance:	dynamic power coefficient for these cpus
+ * @plat_static_func:	function to calculate the static power consumed by these
+ *			cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x".  This api can support multiple
+ * instances of cpufreq cooling devices.  Using this function, the
+ * cooling device will implement the power extensions by using a
+ * simple cpu power model.  The cpus must have registered their OPPs
+ * using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus.  If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+			       get_static_t plat_static_func)
+{
+	return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+				plat_static_func);
+}
+EXPORT_SYMBOL(cpufreq_power_cooling_register);
+
+/**
+ * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @np:	a valid struct device_node to the cooling device device tree node
+ * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @capacitance:	dynamic power coefficient for these cpus
+ * @plat_static_func:	function to calculate the static power consumed by these
+ *			cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x".  This api can support multiple
+ * instances of cpufreq cooling devices.  Using this API, the cpufreq
+ * cooling device will be linked to the device tree node provided.
+ * Using this function, the cooling device will implement the power
+ * extensions by using a simple cpu power model.  The cpus must have
+ * registered their OPPs using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus.  If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func)
+{
+	if (!np)
+		return ERR_PTR(-EINVAL);
+
+	return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+				plat_static_func);
+}
+EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
+
+/**
  * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  * @cdev: thermal cooling device pointer.
  *
@@ -554,16 +1015,18 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 	cpufreq_dev = cdev->devdata;
 	mutex_lock(&cooling_cpufreq_lock);
 	list_del(&cpufreq_dev->node);
-	cpufreq_dev_count--;
 
 	/* Unregister the notifier for the last cpufreq cooling device */
-	if (cpufreq_dev_count == 0)
+	if (list_empty(&cpufreq_dev_list))
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 	mutex_unlock(&cooling_cpufreq_lock);
 
 	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
 	release_idr(&cpufreq_idr, cpufreq_dev->id);
+	kfree(cpufreq_dev->time_in_idle_timestamp);
+	kfree(cpufreq_dev->time_in_idle);
+	kfree(cpufreq_dev->freq_table);
 	kfree(cpufreq_dev);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
index 786d19263ab0..1ac7ec651c3f 100644
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ b/drivers/thermal/db8500_cpufreq_cooling.c
@@ -18,7 +18,6 @@
  */
 
 #include <linux/cpu_cooling.h>
-#include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -30,10 +29,6 @@ static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
 	struct thermal_cooling_device *cdev;
 	struct cpumask mask_val;
 
-	/* make sure cpufreq driver has been initialized */
-	if (!cpufreq_frequency_get_table(0))
-		return -EPROBE_DEFER;
-
 	cpumask_set_cpu(0, &mask_val);
 	cdev = cpufreq_cooling_register(&mask_val);
 
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index 1e3b3bf9f993..e3ccc2218eb3 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -76,7 +76,7 @@ static int db8500_cdev_bind(struct thermal_zone_device *thermal,
 		upper = lower = i > max_state ? max_state : i;
 
 		ret = thermal_zone_bind_cooling_device(thermal, i, cdev,
-			upper, lower);
+			upper, lower, THERMAL_WEIGHT_DEFAULT);
 
 		dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type,
 			i, ret, ret ? "fail" : "succeed");
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 6e0a3fbfae86..8c50b8d6afb7 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -59,13 +59,13 @@ static int get_trip_level(struct thermal_zone_device *tz)
 }
 
 static long get_target_state(struct thermal_zone_device *tz,
-		struct thermal_cooling_device *cdev, int weight, int level)
+		struct thermal_cooling_device *cdev, int percentage, int level)
 {
 	unsigned long max_state;
 
 	cdev->ops->get_max_state(cdev, &max_state);
 
-	return (long)(weight * level * max_state) / (100 * tz->trips);
+	return (long)(percentage * level * max_state) / (100 * tz->trips);
 }
 
 /**
@@ -77,7 +77,7 @@ static long get_target_state(struct thermal_zone_device *tz,
  *
  * Parameters used for Throttling:
  * P1. max_state: Maximum throttle state exposed by the cooling device.
- * P2. weight[i]/100:
+ * P2. percentage[i]/100:
  *	How 'effective' the 'i'th device is, in cooling the given zone.
  * P3. cur_trip_level/max_no_of_trips:
  *	This describes the extent to which the devices should be throttled.
@@ -88,28 +88,33 @@ static long get_target_state(struct thermal_zone_device *tz,
  */
 static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
 {
-	const struct thermal_zone_params *tzp;
-	struct thermal_cooling_device *cdev;
 	struct thermal_instance *instance;
-	int i;
+	int total_weight = 0;
+	int total_instance = 0;
 	int cur_trip_level = get_trip_level(tz);
 
-	if (!tz->tzp || !tz->tzp->tbp)
-		return -EINVAL;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if (instance->trip != trip)
+			continue;
+
+		total_weight += instance->weight;
+		total_instance++;
+	}
 
-	tzp = tz->tzp;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		int percentage;
+		struct thermal_cooling_device *cdev = instance->cdev;
 
-	for (i = 0; i < tzp->num_tbps; i++) {
-		if (!tzp->tbp[i].cdev)
+		if (instance->trip != trip)
 			continue;
 
-		cdev = tzp->tbp[i].cdev;
-		instance = get_thermal_instance(tz, cdev, trip);
-		if (!instance)
-			continue;
+		if (!total_weight)
+			percentage = 100 / total_instance;
+		else
+			percentage = (instance->weight * 100) / total_weight;
 
-		instance->target = get_target_state(tz, cdev,
-					tzp->tbp[i].weight, cur_trip_level);
+		instance->target = get_target_state(tz, cdev, percentage,
+						    cur_trip_level);
 
 		instance->cdev->updated = false;
 		thermal_cdev_update(cdev);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 5a1f1070b702..31ada06c7dc6 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -9,7 +9,6 @@
 
 #include <linux/clk.h>
 #include <linux/cpu_cooling.h>
-#include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -307,7 +306,8 @@ static int imx_bind(struct thermal_zone_device *tz,
 
 	ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev,
 					       THERMAL_NO_LIMIT,
-					       THERMAL_NO_LIMIT);
+					       THERMAL_NO_LIMIT,
+					       THERMAL_WEIGHT_DEFAULT);
 	if (ret) {
 		dev_err(&tz->device,
 			"binding zone %s with cdev %s failed:%d\n",
@@ -459,10 +459,6 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	int measure_freq;
 	int ret;
 
-	if (!cpufreq_get_current_driver()) {
-		dev_dbg(&pdev->dev, "no cpufreq driver!");
-		return -EPROBE_DEFER;
-	}
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index 6e9fb62eb817..76910d35eecb 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -471,7 +471,6 @@ static struct platform_driver int3403_driver = {
 	.remove = int3403_remove,
 	.driver = {
 		.name = "int3403 thermal",
-		.owner  = THIS_MODULE,
 		.acpi_match_table = int3403_device_ids,
 	},
 };
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 62143ba31001..b295b2b6c191 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -30,27 +30,13 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/string.h>
+#include <linux/thermal.h>
 
 #include "thermal_core.h"
 
 /***   Private data structures to represent thermal device tree data ***/
 
 /**
- * struct __thermal_trip - representation of a point in temperature domain
- * @np: pointer to struct device_node that this trip point was created from
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-
-struct __thermal_trip {
-	struct device_node *np;
-	unsigned long int temperature;
-	unsigned long int hysteresis;
-	enum thermal_trip_type type;
-};
-
-/**
  * struct __thermal_bind_param - a match between trip and cooling device
  * @cooling_device: a pointer to identify the referred cooling device
  * @trip_id: the trip point index
@@ -72,23 +58,26 @@ struct __thermal_bind_params {
  * @mode: current thermal zone device mode (enabled/disabled)
  * @passive_delay: polling interval while passive cooling is activated
  * @polling_delay: zone polling interval
+ * @slope: slope of the temperature adjustment curve
+ * @offset: offset of the temperature adjustment curve
  * @ntrips: number of trip points
  * @trips: an array of trip points (0..ntrips - 1)
  * @num_tbps: number of thermal bind params
  * @tbps: an array of thermal bind params (0..num_tbps - 1)
  * @sensor_data: sensor private data used while reading temperature and trend
- * @get_temp: sensor callback to read temperature
- * @get_trend: sensor callback to read temperature trend
+ * @ops: set of callbacks to handle the thermal zone based on DT
  */
 
 struct __thermal_zone {
 	enum thermal_device_mode mode;
 	int passive_delay;
 	int polling_delay;
+	int slope;
+	int offset;
 
 	/* trip data */
 	int ntrips;
-	struct __thermal_trip *trips;
+	struct thermal_trip *trips;
 
 	/* cooling binding data */
 	int num_tbps;
@@ -96,8 +85,7 @@ struct __thermal_zone {
 
 	/* sensor interface */
 	void *sensor_data;
-	int (*get_temp)(void *, long *);
-	int (*get_trend)(void *, long *);
+	const struct thermal_zone_of_device_ops *ops;
 };
 
 /***   DT thermal zone device callbacks   ***/
@@ -107,10 +95,96 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz,
 {
 	struct __thermal_zone *data = tz->devdata;
 
-	if (!data->get_temp)
+	if (!data->ops->get_temp)
 		return -EINVAL;
 
-	return data->get_temp(data->sensor_data, temp);
+	return data->ops->get_temp(data->sensor_data, temp);
+}
+
+/**
+ * of_thermal_get_ntrips - function to export number of available trip
+ *			   points.
+ * @tz: pointer to a thermal zone
+ *
+ * This function is a globally visible wrapper to get number of trip points
+ * stored in the local struct __thermal_zone
+ *
+ * Return: number of available trip points, -ENODEV when data not available
+ */
+int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	return data->ntrips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_ntrips);
+
+/**
+ * of_thermal_is_trip_valid - function to check if trip point is valid
+ *
+ * @tz:	pointer to a thermal zone
+ * @trip:	trip point to evaluate
+ *
+ * This function is responsible for checking if passed trip point is valid
+ *
+ * Return: true if trip point is valid, false otherwise
+ */
+bool of_thermal_is_trip_valid(struct thermal_zone_device *tz, int trip)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data || trip >= data->ntrips || trip < 0)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(of_thermal_is_trip_valid);
+
+/**
+ * of_thermal_get_trip_points - function to get access to a globally exported
+ *				trip points
+ *
+ * @tz:	pointer to a thermal zone
+ *
+ * This function provides a pointer to trip points table
+ *
+ * Return: pointer to trip points table, NULL otherwise
+ */
+const struct thermal_trip *
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data)
+		return NULL;
+
+	return data->trips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_trip_points);
+
+/**
+ * of_thermal_set_emul_temp - function to set emulated temperature
+ *
+ * @tz:	pointer to a thermal zone
+ * @temp:	temperature to set
+ *
+ * This function gives the ability to set emulated value of temperature,
+ * which is handy for debugging
+ *
+ * Return: zero on success, error code otherwise
+ */
+static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
+				    unsigned long temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data->ops || !data->ops->set_emul_temp)
+		return -EINVAL;
+
+	return data->ops->set_emul_temp(data->sensor_data, temp);
 }
 
 static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
@@ -120,10 +194,10 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
 	long dev_trend;
 	int r;
 
-	if (!data->get_trend)
+	if (!data->ops->get_trend)
 		return -EINVAL;
 
-	r = data->get_trend(data->sensor_data, &dev_trend);
+	r = data->ops->get_trend(data->sensor_data, &dev_trend);
 	if (r)
 		return r;
 
@@ -157,7 +231,8 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
 			ret = thermal_zone_bind_cooling_device(thermal,
 						tbp->trip_id, cdev,
 						tbp->max,
-						tbp->min);
+						tbp->min,
+						tbp->usage);
 			if (ret)
 				return ret;
 		}
@@ -324,8 +399,7 @@ static struct thermal_zone_device_ops of_thermal_ops = {
 static struct thermal_zone_device *
 thermal_zone_of_add_sensor(struct device_node *zone,
 			   struct device_node *sensor, void *data,
-			   int (*get_temp)(void *, long *),
-			   int (*get_trend)(void *, long *))
+			   const struct thermal_zone_of_device_ops *ops)
 {
 	struct thermal_zone_device *tzd;
 	struct __thermal_zone *tz;
@@ -336,13 +410,16 @@ thermal_zone_of_add_sensor(struct device_node *zone,
 
 	tz = tzd->devdata;
 
+	if (!ops)
+		return ERR_PTR(-EINVAL);
+
 	mutex_lock(&tzd->lock);
-	tz->get_temp = get_temp;
-	tz->get_trend = get_trend;
+	tz->ops = ops;
 	tz->sensor_data = data;
 
 	tzd->ops->get_temp = of_thermal_get_temp;
 	tzd->ops->get_trend = of_thermal_get_trend;
+	tzd->ops->set_emul_temp = of_thermal_set_emul_temp;
 	mutex_unlock(&tzd->lock);
 
 	return tzd;
@@ -356,8 +433,7 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  *             than one sensors
  * @data: a private pointer (owned by the caller) that will be passed
  *        back, when a temperature reading is needed.
- * @get_temp: a pointer to a function that reads the sensor temperature.
- * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
  *
  * This function will search the list of thermal zones described in device
  * tree and look for the zone that refer to the sensor device pointed by
@@ -382,9 +458,8 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  * check the return value with help of IS_ERR() helper.
  */
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	struct device_node *np, *child, *sensor_np;
 	struct thermal_zone_device *tzd = ERR_PTR(-ENODEV);
@@ -426,9 +501,10 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
 
 		if (sensor_specs.np == sensor_np && id == sensor_id) {
 			tzd = thermal_zone_of_add_sensor(child, sensor_np,
-							 data,
-							 get_temp,
-							 get_trend);
+							 data, ops);
+			if (!IS_ERR(tzd))
+				tzd->ops->set_mode(tzd, THERMAL_DEVICE_ENABLED);
+
 			of_node_put(sensor_specs.np);
 			of_node_put(child);
 			goto exit;
@@ -475,9 +551,9 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 	mutex_lock(&tzd->lock);
 	tzd->ops->get_temp = NULL;
 	tzd->ops->get_trend = NULL;
+	tzd->ops->set_emul_temp = NULL;
 
-	tz->get_temp = NULL;
-	tz->get_trend = NULL;
+	tz->ops = NULL;
 	tz->sensor_data = NULL;
 	mutex_unlock(&tzd->lock);
 }
@@ -501,7 +577,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
  */
 static int thermal_of_populate_bind_params(struct device_node *np,
 					   struct __thermal_bind_params *__tbp,
-					   struct __thermal_trip *trips,
+					   struct thermal_trip *trips,
 					   int ntrips)
 {
 	struct of_phandle_args cooling_spec;
@@ -510,7 +586,7 @@ static int thermal_of_populate_bind_params(struct device_node *np,
 	u32 prop;
 
 	/* Default weight. Usage is optional */
-	__tbp->usage = 0;
+	__tbp->usage = THERMAL_WEIGHT_DEFAULT;
 	ret = of_property_read_u32(np, "contribution", &prop);
 	if (ret == 0)
 		__tbp->usage = prop;
@@ -604,7 +680,7 @@ static int thermal_of_get_trip_type(struct device_node *np,
  * Return: 0 on success, proper error code otherwise
  */
 static int thermal_of_populate_trip(struct device_node *np,
-				    struct __thermal_trip *trip)
+				    struct thermal_trip *trip)
 {
 	int prop;
 	int ret;
@@ -644,7 +720,7 @@ static int thermal_of_populate_trip(struct device_node *np,
  * @np parameter and fills the read data into a __thermal_zone data structure
  * and return this pointer.
  *
- * TODO: Missing properties to parse: thermal-sensor-names and coefficients
+ * TODO: Missing properties to parse: thermal-sensor-names
  *
  * Return: On success returns a valid struct __thermal_zone,
  * otherwise, it returns a corresponding ERR_PTR(). Caller must
@@ -656,7 +732,7 @@ thermal_of_build_thermal_zone(struct device_node *np)
 	struct device_node *child = NULL, *gchild;
 	struct __thermal_zone *tz;
 	int ret, i;
-	u32 prop;
+	u32 prop, coef[2];
 
 	if (!np) {
 		pr_err("no thermal zone np\n");
@@ -681,6 +757,20 @@ thermal_of_build_thermal_zone(struct device_node *np)
 	}
 	tz->polling_delay = prop;
 
+	/*
+	 * REVIST: for now, the thermal framework supports only
+	 * one sensor per thermal zone. Thus, we are considering
+	 * only the first two values as slope and offset.
+	 */
+	ret = of_property_read_u32_array(np, "coefficients", coef, 2);
+	if (ret == 0) {
+		tz->slope = coef[0];
+		tz->offset = coef[1];
+	} else {
+		tz->slope = 1;
+		tz->offset = 0;
+	}
+
 	/* trips */
 	child = of_get_child_by_name(np, "trips");
 
@@ -794,6 +884,8 @@ int __init of_parse_thermal_zones(void)
 	for_each_child_of_node(np, child) {
 		struct thermal_zone_device *zone;
 		struct thermal_zone_params *tzp;
+		int i, mask = 0;
+		u32 prop;
 
 		/* Check whether child is enabled or not */
 		if (!of_device_is_available(child))
@@ -820,8 +912,18 @@ int __init of_parse_thermal_zones(void)
 		/* No hwmon because there might be hwmon drivers registering */
 		tzp->no_hwmon = true;
 
+		if (!of_property_read_u32(child, "sustainable-power", &prop))
+			tzp->sustainable_power = prop;
+
+		for (i = 0; i < tz->ntrips; i++)
+			mask |= 1 << i;
+
+		/* these two are left for temperature drivers to use */
+		tzp->slope = tz->slope;
+		tzp->offset = tz->offset;
+
 		zone = thermal_zone_device_register(child->name, tz->ntrips,
-						    0, tz,
+						    mask, tz,
 						    ops, tzp,
 						    tz->passive_delay,
 						    tz->polling_delay);
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
new file mode 100644
index 000000000000..251676902869
--- /dev/null
+++ b/drivers/thermal/power_allocator.c
@@ -0,0 +1,544 @@
+/*
+ * A power allocator to manage temperature
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "Power allocator: " fmt
+
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/thermal_power_allocator.h>
+
+#include "thermal_core.h"
+
+#define FRAC_BITS 10
+#define int_to_frac(x) ((x) << FRAC_BITS)
+#define frac_to_int(x) ((x) >> FRAC_BITS)
+
+/**
+ * mul_frac() - multiply two fixed-point numbers
+ * @x:	first multiplicand
+ * @y:	second multiplicand
+ *
+ * Return: the result of multiplying two fixed-point numbers.  The
+ * result is also a fixed-point number.
+ */
+static inline s64 mul_frac(s64 x, s64 y)
+{
+	return (x * y) >> FRAC_BITS;
+}
+
+/**
+ * div_frac() - divide two fixed-point numbers
+ * @x:	the dividend
+ * @y:	the divisor
+ *
+ * Return: the result of dividing two fixed-point numbers.  The
+ * result is also a fixed-point number.
+ */
+static inline s64 div_frac(s64 x, s64 y)
+{
+	return div_s64(x << FRAC_BITS, y);
+}
+
+/**
+ * struct power_allocator_params - parameters for the power allocator governor
+ * @err_integral:	accumulated error in the PID controller.
+ * @prev_err:	error in the previous iteration of the PID controller.
+ *		Used to calculate the derivative term.
+ * @trip_switch_on:	first passive trip point of the thermal zone.  The
+ *			governor switches on when this trip point is crossed.
+ * @trip_max_desired_temperature:	last passive trip point of the thermal
+ *					zone.  The temperature we are
+ *					controlling for.
+ */
+struct power_allocator_params {
+	s64 err_integral;
+	s32 prev_err;
+	int trip_switch_on;
+	int trip_max_desired_temperature;
+};
+
+/**
+ * pid_controller() - PID controller
+ * @tz:	thermal zone we are operating in
+ * @current_temp:	the current temperature in millicelsius
+ * @control_temp:	the target temperature in millicelsius
+ * @max_allocatable_power:	maximum allocatable power for this thermal zone
+ *
+ * This PID controller increases the available power budget so that the
+ * temperature of the thermal zone gets as close as possible to
+ * @control_temp and limits the power if it exceeds it.  k_po is the
+ * proportional term when we are overshooting, k_pu is the
+ * proportional term when we are undershooting.  integral_cutoff is a
+ * threshold below which we stop accumulating the error.  The
+ * accumulated error is only valid if the requested power will make
+ * the system warmer.  If the system is mostly idle, there's no point
+ * in accumulating positive error.
+ *
+ * Return: The power budget for the next period.
+ */
+static u32 pid_controller(struct thermal_zone_device *tz,
+			  unsigned long current_temp,
+			  unsigned long control_temp,
+			  u32 max_allocatable_power)
+{
+	s64 p, i, d, power_range;
+	s32 err, max_power_frac;
+	struct power_allocator_params *params = tz->governor_data;
+
+	max_power_frac = int_to_frac(max_allocatable_power);
+
+	err = ((s32)control_temp - (s32)current_temp);
+	err = int_to_frac(err);
+
+	/* Calculate the proportional term */
+	p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err);
+
+	/*
+	 * Calculate the integral term
+	 *
+	 * if the error is less than cut off allow integration (but
+	 * the integral is limited to max power)
+	 */
+	i = mul_frac(tz->tzp->k_i, params->err_integral);
+
+	if (err < int_to_frac(tz->tzp->integral_cutoff)) {
+		s64 i_next = i + mul_frac(tz->tzp->k_i, err);
+
+		if (abs64(i_next) < max_power_frac) {
+			i = i_next;
+			params->err_integral += err;
+		}
+	}
+
+	/*
+	 * Calculate the derivative term
+	 *
+	 * We do err - prev_err, so with a positive k_d, a decreasing
+	 * error (i.e. driving closer to the line) results in less
+	 * power being applied, slowing down the controller)
+	 */
+	d = mul_frac(tz->tzp->k_d, err - params->prev_err);
+	d = div_frac(d, tz->passive_delay);
+	params->prev_err = err;
+
+	power_range = p + i + d;
+
+	/* feed-forward the known sustainable dissipatable power */
+	power_range = tz->tzp->sustainable_power + frac_to_int(power_range);
+
+	power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power);
+
+	trace_thermal_power_allocator_pid(tz, frac_to_int(err),
+					  frac_to_int(params->err_integral),
+					  frac_to_int(p), frac_to_int(i),
+					  frac_to_int(d), power_range);
+
+	return power_range;
+}
+
+/**
+ * divvy_up_power() - divvy the allocated power between the actors
+ * @req_power:	each actor's requested power
+ * @max_power:	each actor's maximum available power
+ * @num_actors:	size of the @req_power, @max_power and @granted_power's array
+ * @total_req_power: sum of @req_power
+ * @power_range:	total allocated power
+ * @granted_power:	output array: each actor's granted power
+ * @extra_actor_power:	an appropriately sized array to be used in the
+ *			function as temporary storage of the extra power given
+ *			to the actors
+ *
+ * This function divides the total allocated power (@power_range)
+ * fairly between the actors.  It first tries to give each actor a
+ * share of the @power_range according to how much power it requested
+ * compared to the rest of the actors.  For example, if only one actor
+ * requests power, then it receives all the @power_range.  If
+ * three actors each requests 1mW, each receives a third of the
+ * @power_range.
+ *
+ * If any actor received more than their maximum power, then that
+ * surplus is re-divvied among the actors based on how far they are
+ * from their respective maximums.
+ *
+ * Granted power for each actor is written to @granted_power, which
+ * should've been allocated by the calling function.
+ */
+static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,
+			   u32 total_req_power, u32 power_range,
+			   u32 *granted_power, u32 *extra_actor_power)
+{
+	u32 extra_power, capped_extra_power;
+	int i;
+
+	/*
+	 * Prevent division by 0 if none of the actors request power.
+	 */
+	if (!total_req_power)
+		total_req_power = 1;
+
+	capped_extra_power = 0;
+	extra_power = 0;
+	for (i = 0; i < num_actors; i++) {
+		u64 req_range = req_power[i] * power_range;
+
+		granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range,
+							 total_req_power);
+
+		if (granted_power[i] > max_power[i]) {
+			extra_power += granted_power[i] - max_power[i];
+			granted_power[i] = max_power[i];
+		}
+
+		extra_actor_power[i] = max_power[i] - granted_power[i];
+		capped_extra_power += extra_actor_power[i];
+	}
+
+	if (!extra_power)
+		return;
+
+	/*
+	 * Re-divvy the reclaimed extra among actors based on
+	 * how far they are from the max
+	 */
+	extra_power = min(extra_power, capped_extra_power);
+	if (capped_extra_power > 0)
+		for (i = 0; i < num_actors; i++)
+			granted_power[i] += (extra_actor_power[i] *
+					extra_power) / capped_extra_power;
+}
+
+static int allocate_power(struct thermal_zone_device *tz,
+			  unsigned long current_temp,
+			  unsigned long control_temp)
+{
+	struct thermal_instance *instance;
+	struct power_allocator_params *params = tz->governor_data;
+	u32 *req_power, *max_power, *granted_power, *extra_actor_power;
+	u32 *weighted_req_power;
+	u32 total_req_power, max_allocatable_power, total_weighted_req_power;
+	u32 total_granted_power, power_range;
+	int i, num_actors, total_weight, ret = 0;
+	int trip_max_desired_temperature = params->trip_max_desired_temperature;
+
+	mutex_lock(&tz->lock);
+
+	num_actors = 0;
+	total_weight = 0;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if ((instance->trip == trip_max_desired_temperature) &&
+		    cdev_is_power_actor(instance->cdev)) {
+			num_actors++;
+			total_weight += instance->weight;
+		}
+	}
+
+	/*
+	 * We need to allocate five arrays of the same size:
+	 * req_power, max_power, granted_power, extra_actor_power and
+	 * weighted_req_power.  They are going to be needed until this
+	 * function returns.  Allocate them all in one go to simplify
+	 * the allocation and deallocation logic.
+	 */
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power));
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power));
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power));
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*weighted_req_power));
+	req_power = kcalloc(num_actors * 5, sizeof(*req_power), GFP_KERNEL);
+	if (!req_power) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	max_power = &req_power[num_actors];
+	granted_power = &req_power[2 * num_actors];
+	extra_actor_power = &req_power[3 * num_actors];
+	weighted_req_power = &req_power[4 * num_actors];
+
+	i = 0;
+	total_weighted_req_power = 0;
+	total_req_power = 0;
+	max_allocatable_power = 0;
+
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		int weight;
+		struct thermal_cooling_device *cdev = instance->cdev;
+
+		if (instance->trip != trip_max_desired_temperature)
+			continue;
+
+		if (!cdev_is_power_actor(cdev))
+			continue;
+
+		if (cdev->ops->get_requested_power(cdev, tz, &req_power[i]))
+			continue;
+
+		if (!total_weight)
+			weight = 1 << FRAC_BITS;
+		else
+			weight = instance->weight;
+
+		weighted_req_power[i] = frac_to_int(weight * req_power[i]);
+
+		if (power_actor_get_max_power(cdev, tz, &max_power[i]))
+			continue;
+
+		total_req_power += req_power[i];
+		max_allocatable_power += max_power[i];
+		total_weighted_req_power += weighted_req_power[i];
+
+		i++;
+	}
+
+	power_range = pid_controller(tz, current_temp, control_temp,
+				     max_allocatable_power);
+
+	divvy_up_power(weighted_req_power, max_power, num_actors,
+		       total_weighted_req_power, power_range, granted_power,
+		       extra_actor_power);
+
+	total_granted_power = 0;
+	i = 0;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if (instance->trip != trip_max_desired_temperature)
+			continue;
+
+		if (!cdev_is_power_actor(instance->cdev))
+			continue;
+
+		power_actor_set_power(instance->cdev, instance,
+				      granted_power[i]);
+		total_granted_power += granted_power[i];
+
+		i++;
+	}
+
+	trace_thermal_power_allocator(tz, req_power, total_req_power,
+				      granted_power, total_granted_power,
+				      num_actors, power_range,
+				      max_allocatable_power, current_temp,
+				      (s32)control_temp - (s32)current_temp);
+
+	kfree(req_power);
+unlock:
+	mutex_unlock(&tz->lock);
+
+	return ret;
+}
+
+static int get_governor_trips(struct thermal_zone_device *tz,
+			      struct power_allocator_params *params)
+{
+	int i, ret, last_passive;
+	bool found_first_passive;
+
+	found_first_passive = false;
+	last_passive = -1;
+	ret = -EINVAL;
+
+	for (i = 0; i < tz->trips; i++) {
+		enum thermal_trip_type type;
+
+		ret = tz->ops->get_trip_type(tz, i, &type);
+		if (ret)
+			return ret;
+
+		if (!found_first_passive) {
+			if (type == THERMAL_TRIP_PASSIVE) {
+				params->trip_switch_on = i;
+				found_first_passive = true;
+			}
+		} else if (type == THERMAL_TRIP_PASSIVE) {
+			last_passive = i;
+		} else {
+			break;
+		}
+	}
+
+	if (last_passive != -1) {
+		params->trip_max_desired_temperature = last_passive;
+		ret = 0;
+	} else {
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void reset_pid_controller(struct power_allocator_params *params)
+{
+	params->err_integral = 0;
+	params->prev_err = 0;
+}
+
+static void allow_maximum_power(struct thermal_zone_device *tz)
+{
+	struct thermal_instance *instance;
+	struct power_allocator_params *params = tz->governor_data;
+
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if ((instance->trip != params->trip_max_desired_temperature) ||
+		    (!cdev_is_power_actor(instance->cdev)))
+			continue;
+
+		instance->target = 0;
+		instance->cdev->updated = false;
+		thermal_cdev_update(instance->cdev);
+	}
+}
+
+/**
+ * power_allocator_bind() - bind the power_allocator governor to a thermal zone
+ * @tz:	thermal zone to bind it to
+ *
+ * Check that the thermal zone is valid for this governor, that is, it
+ * has two thermal trips.  If so, initialize the PID controller
+ * parameters and bind it to the thermal zone.
+ *
+ * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM
+ * if we ran out of memory.
+ */
+static int power_allocator_bind(struct thermal_zone_device *tz)
+{
+	int ret;
+	struct power_allocator_params *params;
+	unsigned long switch_on_temp, control_temp;
+	u32 temperature_threshold;
+
+	if (!tz->tzp || !tz->tzp->sustainable_power) {
+		dev_err(&tz->device,
+			"power_allocator: missing sustainable_power\n");
+		return -EINVAL;
+	}
+
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	ret = get_governor_trips(tz, params);
+	if (ret) {
+		dev_err(&tz->device,
+			"thermal zone %s has wrong trip setup for power allocator\n",
+			tz->type);
+		goto free;
+	}
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+				     &switch_on_temp);
+	if (ret)
+		goto free;
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+				     &control_temp);
+	if (ret)
+		goto free;
+
+	temperature_threshold = control_temp - switch_on_temp;
+
+	tz->tzp->k_po = tz->tzp->k_po ?:
+		int_to_frac(tz->tzp->sustainable_power) / temperature_threshold;
+	tz->tzp->k_pu = tz->tzp->k_pu ?:
+		int_to_frac(2 * tz->tzp->sustainable_power) /
+		temperature_threshold;
+	tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000;
+	/*
+	 * The default for k_d and integral_cutoff is 0, so we can
+	 * leave them as they are.
+	 */
+
+	reset_pid_controller(params);
+
+	tz->governor_data = params;
+
+	return 0;
+
+free:
+	kfree(params);
+	return ret;
+}
+
+static void power_allocator_unbind(struct thermal_zone_device *tz)
+{
+	dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
+	kfree(tz->governor_data);
+	tz->governor_data = NULL;
+}
+
+static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
+{
+	int ret;
+	unsigned long switch_on_temp, control_temp, current_temp;
+	struct power_allocator_params *params = tz->governor_data;
+
+	/*
+	 * We get called for every trip point but we only need to do
+	 * our calculations once
+	 */
+	if (trip != params->trip_max_desired_temperature)
+		return 0;
+
+	ret = thermal_zone_get_temp(tz, &current_temp);
+	if (ret) {
+		dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
+		return ret;
+	}
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+				     &switch_on_temp);
+	if (ret) {
+		dev_warn(&tz->device,
+			 "Failed to get switch on temperature: %d\n", ret);
+		return ret;
+	}
+
+	if (current_temp < switch_on_temp) {
+		tz->passive = 0;
+		reset_pid_controller(params);
+		allow_maximum_power(tz);
+		return 0;
+	}
+
+	tz->passive = 1;
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+				&control_temp);
+	if (ret) {
+		dev_warn(&tz->device,
+			 "Failed to get the maximum desired temperature: %d\n",
+			 ret);
+		return ret;
+	}
+
+	return allocate_power(tz, current_temp, control_temp);
+}
+
+static struct thermal_governor thermal_gov_power_allocator = {
+	.name		= "power_allocator",
+	.bind_to_tz	= power_allocator_bind,
+	.unbind_from_tz	= power_allocator_unbind,
+	.throttle	= power_allocator_throttle,
+};
+
+int thermal_gov_power_allocator_register(void)
+{
+	return thermal_register_governor(&thermal_gov_power_allocator);
+}
+
+void thermal_gov_power_allocator_unregister(void)
+{
+	thermal_unregister_governor(&thermal_gov_power_allocator);
+}
diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig
index f760389a204c..c43306ecc0ab 100644
--- a/drivers/thermal/samsung/Kconfig
+++ b/drivers/thermal/samsung/Kconfig
@@ -1,6 +1,6 @@
 config EXYNOS_THERMAL
 	tristate "Exynos thermal management unit driver"
-	depends on ARCH_HAS_BANDGAP && OF
+	depends on OF
 	help
 	  If you say yes here you get support for the TMU (Thermal Management
 	  Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises
diff --git a/drivers/thermal/samsung/exynos_thermal_common.c b/drivers/thermal/samsung/exynos_thermal_common.c
index b6be572704a4..7c97db1cf829 100644
--- a/drivers/thermal/samsung/exynos_thermal_common.c
+++ b/drivers/thermal/samsung/exynos_thermal_common.c
@@ -163,7 +163,7 @@ static int exynos_bind(struct thermal_zone_device *thermal,
 		case MONITOR_ZONE:
 		case WARN_ZONE:
 			if (thermal_zone_bind_cooling_device(thermal, i, cdev,
-								level, 0)) {
+							     level, 0, THERMAL_WEIGHT_DEFAULT)) {
 				dev_err(data->dev,
 					"error unbinding cdev inst=%d\n", i);
 				ret = -EINVAL;
@@ -371,9 +371,11 @@ int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 		th_zone->cool_dev[th_zone->cool_dev_size] =
 					cpufreq_cooling_register(&mask_val);
 		if (IS_ERR(th_zone->cool_dev[th_zone->cool_dev_size])) {
-			dev_err(sensor_conf->dev,
-				"Failed to register cpufreq cooling device\n");
-			ret = -EINVAL;
+			ret = PTR_ERR(th_zone->cool_dev[th_zone->cool_dev_size]);
+			if (ret != -EPROBE_DEFER)
+				dev_err(sensor_conf->dev,
+					"Failed to register cpufreq cooling device: %d\n",
+					ret);
 			goto err_unregister;
 		}
 		th_zone->cool_dev_size++;
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 49c09243fd38..2afca9bf40d5 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -683,7 +683,10 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 	/* Register the sensor with thermal management interface */
 	ret = exynos_register_thermal(sensor_conf);
 	if (ret) {
-		dev_err(&pdev->dev, "Failed to register thermal interface\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"Failed to register thermal interface: %d\n",
+				ret);
 		goto err_clk;
 	}
 	data->reg_conf = sensor_conf;
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
new file mode 100644
index 000000000000..9197fc05c5cc
--- /dev/null
+++ b/drivers/thermal/tegra_soctherm.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Author:
+ *	Mikko Perttunen <mperttunen@nvidia.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/thermal.h>
+
+#include <soc/tegra/fuse.h>
+
+#define SENSOR_CONFIG0				0
+#define SENSOR_CONFIG0_STOP			BIT(0)
+#define SENSOR_CONFIG0_TALL_SHIFT		8
+#define SENSOR_CONFIG0_TCALC_OVER		BIT(4)
+#define SENSOR_CONFIG0_OVER			BIT(3)
+#define SENSOR_CONFIG0_CPTR_OVER		BIT(2)
+
+#define SENSOR_CONFIG1				4
+#define SENSOR_CONFIG1_TSAMPLE_SHIFT		0
+#define SENSOR_CONFIG1_TIDDQ_EN_SHIFT		15
+#define SENSOR_CONFIG1_TEN_COUNT_SHIFT		24
+#define SENSOR_CONFIG1_TEMP_ENABLE		BIT(31)
+
+#define SENSOR_CONFIG2				8
+#define SENSOR_CONFIG2_THERMA_SHIFT		16
+#define SENSOR_CONFIG2_THERMB_SHIFT		0
+
+#define SENSOR_PDIV				0x1c0
+#define SENSOR_PDIV_T124			0x8888
+#define SENSOR_HOTSPOT_OFF			0x1c4
+#define SENSOR_HOTSPOT_OFF_T124			0x00060600
+#define SENSOR_TEMP1				0x1c8
+#define SENSOR_TEMP2				0x1cc
+
+#define SENSOR_TEMP_MASK			0xffff
+#define READBACK_VALUE_MASK			0xff00
+#define READBACK_VALUE_SHIFT			8
+#define READBACK_ADD_HALF			BIT(7)
+#define READBACK_NEGATE				BIT(1)
+
+#define FUSE_TSENSOR8_CALIB			0x180
+#define FUSE_SPARE_REALIGNMENT_REG_0		0x1fc
+
+#define FUSE_TSENSOR_CALIB_CP_TS_BASE_MASK	0x1fff
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK	(0x1fff << 13)
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT	13
+
+#define FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK	0x3ff
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK	(0x7ff << 10)
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT	10
+
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_CP_MASK 0x3f
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK (0x1f << 21)
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT 21
+
+#define NOMINAL_CALIB_FT_T124			105
+#define NOMINAL_CALIB_CP_T124			25
+
+struct tegra_tsensor_configuration {
+	u32 tall, tsample, tiddq_en, ten_count, pdiv, tsample_ate, pdiv_ate;
+};
+
+struct tegra_tsensor {
+	const struct tegra_tsensor_configuration *config;
+	u32 base, calib_fuse_offset;
+	/* Correction values used to modify values read from calibration fuses */
+	s32 fuse_corr_alpha, fuse_corr_beta;
+};
+
+struct tegra_thermctl_zone {
+	void __iomem *reg;
+	unsigned int shift;
+};
+
+static const struct tegra_tsensor_configuration t124_tsensor_config = {
+	.tall = 16300,
+	.tsample = 120,
+	.tiddq_en = 1,
+	.ten_count = 1,
+	.pdiv = 8,
+	.tsample_ate = 480,
+	.pdiv_ate = 8
+};
+
+static const struct tegra_tsensor t124_tsensors[] = {
+	{
+		.config = &t124_tsensor_config,
+		.base = 0xc0,
+		.calib_fuse_offset = 0x098,
+		.fuse_corr_alpha = 1135400,
+		.fuse_corr_beta = -6266900,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0xe0,
+		.calib_fuse_offset = 0x084,
+		.fuse_corr_alpha = 1122220,
+		.fuse_corr_beta = -5700700,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x100,
+		.calib_fuse_offset = 0x088,
+		.fuse_corr_alpha = 1127000,
+		.fuse_corr_beta = -6768200,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x120,
+		.calib_fuse_offset = 0x12c,
+		.fuse_corr_alpha = 1110900,
+		.fuse_corr_beta = -6232000,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x140,
+		.calib_fuse_offset = 0x158,
+		.fuse_corr_alpha = 1122300,
+		.fuse_corr_beta = -5936400,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x160,
+		.calib_fuse_offset = 0x15c,
+		.fuse_corr_alpha = 1145700,
+		.fuse_corr_beta = -7124600,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x180,
+		.calib_fuse_offset = 0x154,
+		.fuse_corr_alpha = 1120100,
+		.fuse_corr_beta = -6000500,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x1a0,
+		.calib_fuse_offset = 0x160,
+		.fuse_corr_alpha = 1106500,
+		.fuse_corr_beta = -6729300,
+	},
+};
+
+struct tegra_soctherm {
+	struct reset_control *reset;
+	struct clk *clock_tsensor;
+	struct clk *clock_soctherm;
+	void __iomem *regs;
+
+	struct thermal_zone_device *thermctl_tzs[4];
+};
+
+struct tsensor_shared_calibration {
+	u32 base_cp, base_ft;
+	u32 actual_temp_cp, actual_temp_ft;
+};
+
+static int calculate_shared_calibration(struct tsensor_shared_calibration *r)
+{
+	u32 val, shifted_cp, shifted_ft;
+	int err;
+
+	err = tegra_fuse_readl(FUSE_TSENSOR8_CALIB, &val);
+	if (err)
+		return err;
+	r->base_cp = val & FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK;
+	r->base_ft = (val & FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK)
+		>> FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT;
+	val = ((val & FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK)
+		>> FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT);
+	shifted_ft = sign_extend32(val, 4);
+
+	err = tegra_fuse_readl(FUSE_SPARE_REALIGNMENT_REG_0, &val);
+	if (err)
+		return err;
+	shifted_cp = sign_extend32(val, 5);
+
+	r->actual_temp_cp = 2 * NOMINAL_CALIB_CP_T124 + shifted_cp;
+	r->actual_temp_ft = 2 * NOMINAL_CALIB_FT_T124 + shifted_ft;
+
+	return 0;
+}
+
+static s64 div64_s64_precise(s64 a, s64 b)
+{
+	s64 r, al;
+
+	/* Scale up for increased precision division */
+	al = a << 16;
+
+	r = div64_s64(al * 2 + 1, 2 * b);
+	return r >> 16;
+}
+
+static int
+calculate_tsensor_calibration(const struct tegra_tsensor *sensor,
+			      const struct tsensor_shared_calibration *shared,
+			      u32 *calib)
+{
+	u32 val;
+	s32 actual_tsensor_ft, actual_tsensor_cp, delta_sens, delta_temp,
+	    mult, div;
+	s16 therma, thermb;
+	s64 tmp;
+	int err;
+
+	err = tegra_fuse_readl(sensor->calib_fuse_offset, &val);
+	if (err)
+		return err;
+
+	actual_tsensor_cp = (shared->base_cp * 64) + sign_extend32(val, 12);
+	val = (val & FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK)
+		>> FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT;
+	actual_tsensor_ft = (shared->base_ft * 32) + sign_extend32(val, 12);
+
+	delta_sens = actual_tsensor_ft - actual_tsensor_cp;
+	delta_temp = shared->actual_temp_ft - shared->actual_temp_cp;
+
+	mult = sensor->config->pdiv * sensor->config->tsample_ate;
+	div = sensor->config->tsample * sensor->config->pdiv_ate;
+
+	therma = div64_s64_precise((s64) delta_temp * (1LL << 13) * mult,
+				   (s64) delta_sens * div);
+
+	tmp = (s64)actual_tsensor_ft * shared->actual_temp_cp -
+	      (s64)actual_tsensor_cp * shared->actual_temp_ft;
+	thermb = div64_s64_precise(tmp, (s64)delta_sens);
+
+	therma = div64_s64_precise((s64)therma * sensor->fuse_corr_alpha,
+				   (s64)1000000LL);
+	thermb = div64_s64_precise((s64)thermb * sensor->fuse_corr_alpha +
+				   sensor->fuse_corr_beta, (s64)1000000LL);
+
+	*calib = ((u16)therma << SENSOR_CONFIG2_THERMA_SHIFT) |
+		 ((u16)thermb << SENSOR_CONFIG2_THERMB_SHIFT);
+
+	return 0;
+}
+
+static int enable_tsensor(struct tegra_soctherm *tegra,
+			  const struct tegra_tsensor *sensor,
+			  const struct tsensor_shared_calibration *shared)
+{
+	void __iomem *base = tegra->regs + sensor->base;
+	unsigned int val;
+	u32 calib;
+	int err;
+
+	err = calculate_tsensor_calibration(sensor, shared, &calib);
+	if (err)
+		return err;
+
+	val = sensor->config->tall << SENSOR_CONFIG0_TALL_SHIFT;
+	writel(val, base + SENSOR_CONFIG0);
+
+	val  = (sensor->config->tsample - 1) << SENSOR_CONFIG1_TSAMPLE_SHIFT;
+	val |= sensor->config->tiddq_en << SENSOR_CONFIG1_TIDDQ_EN_SHIFT;
+	val |= sensor->config->ten_count << SENSOR_CONFIG1_TEN_COUNT_SHIFT;
+	val |= SENSOR_CONFIG1_TEMP_ENABLE;
+	writel(val, base + SENSOR_CONFIG1);
+
+	writel(calib, base + SENSOR_CONFIG2);
+
+	return 0;
+}
+
+/*
+ * Translate from soctherm readback format to millicelsius.
+ * The soctherm readback format in bits is as follows:
+ *   TTTTTTTT H______N
+ * where T's contain the temperature in Celsius,
+ * H denotes an addition of 0.5 Celsius and N denotes negation
+ * of the final value.
+ */
+static long translate_temp(u16 val)
+{
+	long t;
+
+	t = ((val & READBACK_VALUE_MASK) >> READBACK_VALUE_SHIFT) * 1000;
+	if (val & READBACK_ADD_HALF)
+		t += 500;
+	if (val & READBACK_NEGATE)
+		t *= -1;
+
+	return t;
+}
+
+static int tegra_thermctl_get_temp(void *data, long *out_temp)
+{
+	struct tegra_thermctl_zone *zone = data;
+	u32 val;
+
+	val = (readl(zone->reg) >> zone->shift) & SENSOR_TEMP_MASK;
+	*out_temp = translate_temp(val);
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = {
+	.get_temp = tegra_thermctl_get_temp,
+};
+
+static const struct of_device_id tegra_soctherm_of_match[] = {
+	{ .compatible = "nvidia,tegra124-soctherm" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_soctherm_of_match);
+
+struct thermctl_zone_desc {
+	unsigned int offset;
+	unsigned int shift;
+};
+
+static const struct thermctl_zone_desc t124_thermctl_temp_zones[] = {
+	{ SENSOR_TEMP1, 16 },
+	{ SENSOR_TEMP2, 16 },
+	{ SENSOR_TEMP1, 0 },
+	{ SENSOR_TEMP2, 0 }
+};
+
+static int tegra_soctherm_probe(struct platform_device *pdev)
+{
+	struct tegra_soctherm *tegra;
+	struct thermal_zone_device *tz;
+	struct tsensor_shared_calibration shared_calib;
+	struct resource *res;
+	unsigned int i;
+	int err;
+
+	const struct tegra_tsensor *tsensors = t124_tsensors;
+
+	tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
+	if (!tegra)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tegra->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(tegra->regs))
+		return PTR_ERR(tegra->regs);
+
+	tegra->reset = devm_reset_control_get(&pdev->dev, "soctherm");
+	if (IS_ERR(tegra->reset)) {
+		dev_err(&pdev->dev, "can't get soctherm reset\n");
+		return PTR_ERR(tegra->reset);
+	}
+
+	tegra->clock_tsensor = devm_clk_get(&pdev->dev, "tsensor");
+	if (IS_ERR(tegra->clock_tsensor)) {
+		dev_err(&pdev->dev, "can't get tsensor clock\n");
+		return PTR_ERR(tegra->clock_tsensor);
+	}
+
+	tegra->clock_soctherm = devm_clk_get(&pdev->dev, "soctherm");
+	if (IS_ERR(tegra->clock_soctherm)) {
+		dev_err(&pdev->dev, "can't get soctherm clock\n");
+		return PTR_ERR(tegra->clock_soctherm);
+	}
+
+	reset_control_assert(tegra->reset);
+
+	err = clk_prepare_enable(tegra->clock_soctherm);
+	if (err)
+		return err;
+
+	err = clk_prepare_enable(tegra->clock_tsensor);
+	if (err) {
+		clk_disable_unprepare(tegra->clock_soctherm);
+		return err;
+	}
+
+	reset_control_deassert(tegra->reset);
+
+	/* Initialize raw sensors */
+
+	err = calculate_shared_calibration(&shared_calib);
+	if (err)
+		goto disable_clocks;
+
+	for (i = 0; i < ARRAY_SIZE(t124_tsensors); ++i) {
+		err = enable_tsensor(tegra, tsensors + i, &shared_calib);
+		if (err)
+			goto disable_clocks;
+	}
+
+	writel(SENSOR_PDIV_T124, tegra->regs + SENSOR_PDIV);
+	writel(SENSOR_HOTSPOT_OFF_T124, tegra->regs + SENSOR_HOTSPOT_OFF);
+
+	/* Initialize thermctl sensors */
+
+	for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+		struct tegra_thermctl_zone *zone =
+			devm_kzalloc(&pdev->dev, sizeof(*zone), GFP_KERNEL);
+		if (!zone) {
+			err = -ENOMEM;
+			goto unregister_tzs;
+		}
+
+		zone->reg = tegra->regs + t124_thermctl_temp_zones[i].offset;
+		zone->shift = t124_thermctl_temp_zones[i].shift;
+
+		tz = thermal_zone_of_sensor_register(&pdev->dev, i, zone,
+						     &tegra_of_thermal_ops);
+		if (IS_ERR(tz)) {
+			err = PTR_ERR(tz);
+			dev_err(&pdev->dev, "failed to register sensor: %d\n",
+				err);
+			goto unregister_tzs;
+		}
+
+		tegra->thermctl_tzs[i] = tz;
+	}
+
+	return 0;
+
+unregister_tzs:
+	while (i--)
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  tegra->thermctl_tzs[i]);
+
+disable_clocks:
+	clk_disable_unprepare(tegra->clock_tsensor);
+	clk_disable_unprepare(tegra->clock_soctherm);
+
+	return err;
+}
+
+static int tegra_soctherm_remove(struct platform_device *pdev)
+{
+	struct tegra_soctherm *tegra = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  tegra->thermctl_tzs[i]);
+	}
+
+	clk_disable_unprepare(tegra->clock_tsensor);
+	clk_disable_unprepare(tegra->clock_soctherm);
+
+	return 0;
+}
+
+static struct platform_driver tegra_soctherm_driver = {
+	.probe = tegra_soctherm_probe,
+	.remove = tegra_soctherm_remove,
+	.driver = {
+		.name = "tegra-soctherm",
+		.of_match_table = tegra_soctherm_of_match,
+	},
+};
+module_platform_driver(tegra_soctherm_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra SOCTHERM thermal management driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 885adc345f57..97633ca4b0a7 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -78,6 +78,58 @@ static struct thermal_governor *__find_governor(const char *name)
 	return NULL;
 }
 
+/**
+ * bind_previous_governor() - bind the previous governor of the thermal zone
+ * @tz:		a valid pointer to a struct thermal_zone_device
+ * @failed_gov_name:	the name of the governor that failed to register
+ *
+ * Register the previous governor of the thermal zone after a new
+ * governor has failed to be bound.
+ */
+static void bind_previous_governor(struct thermal_zone_device *tz,
+				   const char *failed_gov_name)
+{
+	if (tz->governor && tz->governor->bind_to_tz) {
+		if (tz->governor->bind_to_tz(tz)) {
+			dev_err(&tz->device,
+				"governor %s failed to bind and the previous one (%s) failed to bind again, thermal zone %s has no governor\n",
+				failed_gov_name, tz->governor->name, tz->type);
+			tz->governor = NULL;
+		}
+	}
+}
+
+/**
+ * thermal_set_governor() - Switch to another governor
+ * @tz:		a valid pointer to a struct thermal_zone_device
+ * @new_gov:	pointer to the new governor
+ *
+ * Change the governor of thermal zone @tz.
+ *
+ * Return: 0 on success, an error if the new governor's bind_to_tz() failed.
+ */
+static int thermal_set_governor(struct thermal_zone_device *tz,
+				struct thermal_governor *new_gov)
+{
+	int ret = 0;
+
+	if (tz->governor && tz->governor->unbind_from_tz)
+		tz->governor->unbind_from_tz(tz);
+
+	if (new_gov && new_gov->bind_to_tz) {
+		ret = new_gov->bind_to_tz(tz);
+		if (ret) {
+			bind_previous_governor(tz, new_gov->name);
+
+			return ret;
+		}
+	}
+
+	tz->governor = new_gov;
+
+	return ret;
+}
+
 int thermal_register_governor(struct thermal_governor *governor)
 {
 	int err;
@@ -110,8 +162,15 @@ int thermal_register_governor(struct thermal_governor *governor)
 
 		name = pos->tzp->governor_name;
 
-		if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH))
-			pos->governor = governor;
+		if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) {
+			int ret;
+
+			ret = thermal_set_governor(pos, governor);
+			if (ret)
+				dev_err(&pos->device,
+					"Failed to set governor %s for thermal zone %s: %d\n",
+					governor->name, pos->type, ret);
+		}
 	}
 
 	mutex_unlock(&thermal_list_lock);
@@ -137,7 +196,7 @@ void thermal_unregister_governor(struct thermal_governor *governor)
 	list_for_each_entry(pos, &thermal_tz_list, node) {
 		if (!strncasecmp(pos->governor->name, governor->name,
 						THERMAL_NAME_LENGTH))
-			pos->governor = NULL;
+			thermal_set_governor(pos, NULL);
 	}
 
 	mutex_unlock(&thermal_list_lock);
@@ -221,7 +280,8 @@ static void print_bind_err_msg(struct thermal_zone_device *tz,
 
 static void __bind(struct thermal_zone_device *tz, int mask,
 			struct thermal_cooling_device *cdev,
-			unsigned long *limits)
+			unsigned long *limits,
+			unsigned int weight)
 {
 	int i, ret;
 
@@ -236,7 +296,8 @@ static void __bind(struct thermal_zone_device *tz, int mask,
 				upper = limits[i * 2 + 1];
 			}
 			ret = thermal_zone_bind_cooling_device(tz, i, cdev,
-							       upper, lower);
+							       upper, lower,
+							       weight);
 			if (ret)
 				print_bind_err_msg(tz, cdev, ret);
 		}
@@ -283,7 +344,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
 				continue;
 			tzp->tbp[i].cdev = cdev;
 			__bind(pos, tzp->tbp[i].trip_mask, cdev,
-			       tzp->tbp[i].binding_limits);
+			       tzp->tbp[i].binding_limits,
+			       tzp->tbp[i].weight);
 		}
 	}
 
@@ -322,7 +384,8 @@ static void bind_tz(struct thermal_zone_device *tz)
 				continue;
 			tzp->tbp[i].cdev = pos;
 			__bind(tz, tzp->tbp[i].trip_mask, pos,
-			       tzp->tbp[i].binding_limits);
+			       tzp->tbp[i].binding_limits,
+			       tzp->tbp[i].weight);
 		}
 	}
 exit:
@@ -371,7 +434,7 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 	tz->ops->get_trip_temp(tz, trip, &trip_temp);
 
 	/* If we have not crossed the trip_temp, we do not care. */
-	if (tz->temperature < trip_temp)
+	if (trip_temp <= 0 || tz->temperature < trip_temp)
 		return;
 
 	trace_thermal_zone_trip(tz, trip, trip_type);
@@ -735,7 +798,8 @@ passive_store(struct device *dev, struct device_attribute *attr,
 				thermal_zone_bind_cooling_device(tz,
 						THERMAL_TRIPS_NONE, cdev,
 						THERMAL_NO_LIMIT,
-						THERMAL_NO_LIMIT);
+						THERMAL_NO_LIMIT,
+						THERMAL_WEIGHT_DEFAULT);
 		}
 		mutex_unlock(&thermal_list_lock);
 		if (!tz->passive_delay)
@@ -781,15 +845,18 @@ policy_store(struct device *dev, struct device_attribute *attr,
 	snprintf(name, sizeof(name), "%s", buf);
 
 	mutex_lock(&thermal_governor_lock);
+	mutex_lock(&tz->lock);
 
 	gov = __find_governor(strim(name));
 	if (!gov)
 		goto exit;
 
-	tz->governor = gov;
-	ret = count;
+	ret = thermal_set_governor(tz, gov);
+	if (!ret)
+		ret = count;
 
 exit:
+	mutex_unlock(&tz->lock);
 	mutex_unlock(&thermal_governor_lock);
 	return ret;
 }
@@ -830,6 +897,158 @@ emul_temp_store(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
 #endif/*CONFIG_THERMAL_EMULATION*/
 
+static ssize_t
+sustainable_power_show(struct device *dev, struct device_attribute *devattr,
+		       char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	if (tz->tzp)
+		return sprintf(buf, "%u\n", tz->tzp->sustainable_power);
+	else
+		return -EIO;
+}
+
+static ssize_t
+sustainable_power_store(struct device *dev, struct device_attribute *devattr,
+			const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	u32 sustainable_power;
+
+	if (!tz->tzp)
+		return -EIO;
+
+	if (kstrtou32(buf, 10, &sustainable_power))
+		return -EINVAL;
+
+	tz->tzp->sustainable_power = sustainable_power;
+
+	return count;
+}
+static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show,
+		sustainable_power_store);
+
+#define create_s32_tzp_attr(name)					\
+	static ssize_t							\
+	name##_show(struct device *dev, struct device_attribute *devattr, \
+		char *buf)						\
+	{								\
+	struct thermal_zone_device *tz = to_thermal_zone(dev);		\
+									\
+	if (tz->tzp)							\
+		return sprintf(buf, "%u\n", tz->tzp->name);		\
+	else								\
+		return -EIO;						\
+	}								\
+									\
+	static ssize_t							\
+	name##_store(struct device *dev, struct device_attribute *devattr, \
+		const char *buf, size_t count)				\
+	{								\
+		struct thermal_zone_device *tz = to_thermal_zone(dev);	\
+		s32 value;						\
+									\
+		if (!tz->tzp)						\
+			return -EIO;					\
+									\
+		if (kstrtos32(buf, 10, &value))				\
+			return -EINVAL;					\
+									\
+		tz->tzp->name = value;					\
+									\
+		return count;						\
+	}								\
+	static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store)
+
+create_s32_tzp_attr(k_po);
+create_s32_tzp_attr(k_pu);
+create_s32_tzp_attr(k_i);
+create_s32_tzp_attr(k_d);
+create_s32_tzp_attr(integral_cutoff);
+create_s32_tzp_attr(slope);
+create_s32_tzp_attr(offset);
+#undef create_s32_tzp_attr
+
+static struct device_attribute *dev_tzp_attrs[] = {
+	&dev_attr_sustainable_power,
+	&dev_attr_k_po,
+	&dev_attr_k_pu,
+	&dev_attr_k_i,
+	&dev_attr_k_d,
+	&dev_attr_integral_cutoff,
+	&dev_attr_slope,
+	&dev_attr_offset,
+};
+
+static int create_tzp_attrs(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_tzp_attrs); i++) {
+		int ret;
+		struct device_attribute *dev_attr = dev_tzp_attrs[i];
+
+		ret = device_create_file(dev, dev_attr);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * power_actor_get_max_power() - get the maximum power that a cdev can consume
+ * @cdev:	pointer to &thermal_cooling_device
+ * @tz:		a valid thermal zone device pointer
+ * @max_power:	pointer in which to store the maximum power
+ *
+ * Calculate the maximum power consumption in milliwats that the
+ * cooling device can currently consume and store it in @max_power.
+ *
+ * Return: 0 on success, -EINVAL if @cdev doesn't support the
+ * power_actor API or -E* on other error.
+ */
+int power_actor_get_max_power(struct thermal_cooling_device *cdev,
+			      struct thermal_zone_device *tz, u32 *max_power)
+{
+	if (!cdev_is_power_actor(cdev))
+		return -EINVAL;
+
+	return cdev->ops->state2power(cdev, tz, 0, max_power);
+}
+
+/**
+ * power_actor_set_power() - limit the maximum power that a cooling device can consume
+ * @cdev:	pointer to &thermal_cooling_device
+ * @instance:	thermal instance to update
+ * @power:	the power in milliwatts
+ *
+ * Set the cooling device to consume at most @power milliwatts.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device does not
+ * implement the power actor API or -E* for other failures.
+ */
+int power_actor_set_power(struct thermal_cooling_device *cdev,
+			  struct thermal_instance *instance, u32 power)
+{
+	unsigned long state;
+	int ret;
+
+	if (!cdev_is_power_actor(cdev))
+		return -EINVAL;
+
+	ret = cdev->ops->power2state(cdev, instance->tz, power, &state);
+	if (ret)
+		return ret;
+
+	instance->target = state;
+	cdev->updated = false;
+	thermal_cdev_update(cdev);
+
+	return 0;
+}
+
 static DEVICE_ATTR(type, 0444, type_show, NULL);
 static DEVICE_ATTR(temp, 0444, temp_show, NULL);
 static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -921,6 +1140,50 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 		return sprintf(buf, "%d\n", instance->trip);
 }
 
+static struct attribute *cooling_device_attrs[] = {
+	&dev_attr_cdev_type.attr,
+	&dev_attr_max_state.attr,
+	&dev_attr_cur_state.attr,
+	NULL,
+};
+
+static const struct attribute_group cooling_device_attr_group = {
+	.attrs = cooling_device_attrs,
+};
+
+static const struct attribute_group *cooling_device_attr_groups[] = {
+	&cooling_device_attr_group,
+	NULL,
+};
+
+static ssize_t
+thermal_cooling_device_weight_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct thermal_instance *instance;
+
+	instance = container_of(attr, struct thermal_instance, weight_attr);
+
+	return sprintf(buf, "%d\n", instance->weight);
+}
+
+static ssize_t
+thermal_cooling_device_weight_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct thermal_instance *instance;
+	int ret, weight;
+
+	ret = kstrtoint(buf, 0, &weight);
+	if (ret)
+		return ret;
+
+	instance = container_of(attr, struct thermal_instance, weight_attr);
+	instance->weight = weight;
+
+	return count;
+}
 /* Device management */
 
 /**
@@ -935,6 +1198,9 @@ thermal_cooling_device_trip_point_show(struct device *dev,
  * @lower:	the Minimum cooling state can be used for this trip point.
  *		THERMAL_NO_LIMIT means no lower limit,
  *		and the cooling device can be in cooling state 0.
+ * @weight:	The weight of the cooling device to be bound to the
+ *		thermal zone. Use THERMAL_WEIGHT_DEFAULT for the
+ *		default value
  *
  * This interface function bind a thermal cooling device to the certain trip
  * point of a thermal zone device.
@@ -945,7 +1211,8 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 				     int trip,
 				     struct thermal_cooling_device *cdev,
-				     unsigned long upper, unsigned long lower)
+				     unsigned long upper, unsigned long lower,
+				     unsigned int weight)
 {
 	struct thermal_instance *dev;
 	struct thermal_instance *pos;
@@ -988,6 +1255,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	dev->upper = upper;
 	dev->lower = lower;
 	dev->target = THERMAL_NO_TARGET;
+	dev->weight = weight;
 
 	result = get_idr(&tz->idr, &tz->lock, &dev->id);
 	if (result)
@@ -1008,6 +1276,16 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (result)
 		goto remove_symbol_link;
 
+	sprintf(dev->weight_attr_name, "cdev%d_weight", dev->id);
+	sysfs_attr_init(&dev->weight_attr.attr);
+	dev->weight_attr.attr.name = dev->weight_attr_name;
+	dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO;
+	dev->weight_attr.show = thermal_cooling_device_weight_show;
+	dev->weight_attr.store = thermal_cooling_device_weight_store;
+	result = device_create_file(&tz->device, &dev->weight_attr);
+	if (result)
+		goto remove_trip_file;
+
 	mutex_lock(&tz->lock);
 	mutex_lock(&cdev->lock);
 	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
@@ -1026,6 +1304,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (!result)
 		return 0;
 
+	device_remove_file(&tz->device, &dev->weight_attr);
+remove_trip_file:
 	device_remove_file(&tz->device, &dev->attr);
 remove_symbol_link:
 	sysfs_remove_link(&tz->device.kobj, dev->name);
@@ -1152,6 +1432,7 @@ __thermal_cooling_device_register(struct device_node *np,
 	cdev->ops = ops;
 	cdev->updated = false;
 	cdev->device.class = &thermal_class;
+	cdev->device.groups = cooling_device_attr_groups;
 	cdev->devdata = devdata;
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
 	result = device_register(&cdev->device);
@@ -1161,21 +1442,6 @@ __thermal_cooling_device_register(struct device_node *np,
 		return ERR_PTR(result);
 	}
 
-	/* sys I/F */
-	if (type) {
-		result = device_create_file(&cdev->device, &dev_attr_cdev_type);
-		if (result)
-			goto unregister;
-	}
-
-	result = device_create_file(&cdev->device, &dev_attr_max_state);
-	if (result)
-		goto unregister;
-
-	result = device_create_file(&cdev->device, &dev_attr_cur_state);
-	if (result)
-		goto unregister;
-
 	/* Add 'this' new cdev to the global cdev list */
 	mutex_lock(&thermal_list_lock);
 	list_add(&cdev->node, &thermal_cdev_list);
@@ -1191,11 +1457,6 @@ __thermal_cooling_device_register(struct device_node *np,
 	mutex_unlock(&thermal_list_lock);
 
 	return cdev;
-
-unregister:
-	release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
-	device_unregister(&cdev->device);
-	return ERR_PTR(result);
 }
 
 /**
@@ -1406,7 +1667,8 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 						tz->trip_temp_attrs[indx].name;
 		tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO;
 		tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show;
-		if (mask & (1 << indx)) {
+		if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) &&
+		    mask & (1 << indx)) {
 			tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR;
 			tz->trip_temp_attrs[indx].attr.store =
 							trip_point_temp_store;
@@ -1483,7 +1745,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
 struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	int trips, int mask, void *devdata,
 	struct thermal_zone_device_ops *ops,
-	const struct thermal_zone_params *tzp,
+	struct thermal_zone_params *tzp,
 	int passive_delay, int polling_delay)
 {
 	struct thermal_zone_device *tz;
@@ -1492,6 +1754,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	int result;
 	int count;
 	int passive = 0;
+	struct thermal_governor *governor;
 
 	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
 		return ERR_PTR(-EINVAL);
@@ -1586,13 +1849,24 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (result)
 		goto unregister;
 
+	/* Add thermal zone params */
+	result = create_tzp_attrs(&tz->device);
+	if (result)
+		goto unregister;
+
 	/* Update 'this' zone's governor information */
 	mutex_lock(&thermal_governor_lock);
 
 	if (tz->tzp)
-		tz->governor = __find_governor(tz->tzp->governor_name);
+		governor = __find_governor(tz->tzp->governor_name);
 	else
-		tz->governor = def_governor;
+		governor = def_governor;
+
+	result = thermal_set_governor(tz, governor);
+	if (result) {
+		mutex_unlock(&thermal_governor_lock);
+		goto unregister;
+	}
 
 	mutex_unlock(&thermal_governor_lock);
 
@@ -1684,7 +1958,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 		device_remove_file(&tz->device, &dev_attr_mode);
 	device_remove_file(&tz->device, &dev_attr_policy);
 	remove_trip_attrs(tz);
-	tz->governor = NULL;
+	thermal_set_governor(tz, NULL);
 
 	thermal_remove_hwmon_sysfs(tz);
 	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
@@ -1844,7 +2118,11 @@ static int __init thermal_register_governors(void)
 	if (result)
 		return result;
 
-	return thermal_gov_user_space_register();
+	result = thermal_gov_user_space_register();
+	if (result)
+		return result;
+
+	return thermal_gov_power_allocator_register();
 }
 
 static void thermal_unregister_governors(void)
@@ -1853,6 +2131,7 @@ static void thermal_unregister_governors(void)
 	thermal_gov_fair_share_unregister();
 	thermal_gov_bang_bang_unregister();
 	thermal_gov_user_space_unregister();
+	thermal_gov_power_allocator_unregister();
 }
 
 static int thermal_pm_notify(struct notifier_block *nb,
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 64ddc2f0ec5c..9e20e4dbedd4 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -47,8 +47,11 @@ struct thermal_instance {
 	unsigned long target;	/* expected cooling state */
 	char attr_name[THERMAL_NAME_LENGTH];
 	struct device_attribute attr;
+	char weight_attr_name[THERMAL_NAME_LENGTH];
+	struct device_attribute weight_attr;
 	struct list_head tz_node; /* node in tz->thermal_instances */
 	struct list_head cdev_node; /* node in cdev->thermal_instances */
+	unsigned int weight; /* The weight of the cooling device */
 };
 
 int thermal_register_governor(struct thermal_governor *);
@@ -86,13 +89,39 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
 static inline void thermal_gov_user_space_unregister(void) {}
 #endif /* CONFIG_THERMAL_GOV_USER_SPACE */
 
+#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
+int thermal_gov_power_allocator_register(void);
+void thermal_gov_power_allocator_unregister(void);
+#else
+static inline int thermal_gov_power_allocator_register(void) { return 0; }
+static inline void thermal_gov_power_allocator_unregister(void) {}
+#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
+
 /* device tree support */
 #ifdef CONFIG_THERMAL_OF
 int of_parse_thermal_zones(void);
 void of_thermal_destroy_zones(void);
+int of_thermal_get_ntrips(struct thermal_zone_device *);
+bool of_thermal_is_trip_valid(struct thermal_zone_device *, int);
+const struct thermal_trip *
+of_thermal_get_trip_points(struct thermal_zone_device *);
 #else
 static inline int of_parse_thermal_zones(void) { return 0; }
 static inline void of_thermal_destroy_zones(void) { }
+static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+	return 0;
+}
+static inline bool of_thermal_is_trip_valid(struct thermal_zone_device *tz,
+					    int trip)
+{
+	return 0;
+}
+static inline const struct thermal_trip *
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+	return NULL;
+}
 #endif
 
 #endif /* __THERMAL_CORE_H__ */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 9eec26dc0448..68f53fcb8fb1 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -28,7 +28,6 @@
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
 #include <linux/thermal.h>
-#include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
 #include <linux/of.h>
@@ -147,7 +146,8 @@ static int ti_thermal_bind(struct thermal_zone_device *thermal,
 	return thermal_zone_bind_cooling_device(thermal, 0, cdev,
 	/* bind with min and max states defined by cpu_cooling */
 						THERMAL_NO_LIMIT,
-						THERMAL_NO_LIMIT);
+						THERMAL_NO_LIMIT,
+						THERMAL_WEIGHT_DEFAULT);
 }
 
 /* Unbind callback functions for thermal zone */
@@ -286,6 +286,11 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
 }
 
+static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
+	.get_temp = __ti_thermal_get_temp,
+	.get_trend = __ti_thermal_get_trend,
+};
+
 static struct thermal_zone_device_ops ti_thermal_ops = {
 	.get_temp = ti_thermal_get_temp,
 	.get_trend = ti_thermal_get_trend,
@@ -333,8 +338,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 
 	/* in case this is specified by DT */
 	data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
-					data, __ti_thermal_get_temp,
-					__ti_thermal_get_trend);
+					data, &ti_of_thermal_ops);
 	if (IS_ERR(data->ti_thermal)) {
 		/* Create thermal zone */
 		data->ti_thermal = thermal_zone_device_register(domain,
@@ -403,11 +407,6 @@ int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
 	if (!data)
 		return -EINVAL;
 
-	if (!cpufreq_get_current_driver()) {
-		dev_dbg(bgp->dev, "no cpufreq driver yet\n");
-		return -EPROBE_DEFER;
-	}
-
 	/* Register cooling device */
 	data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
 	if (IS_ERR(data->cool_dev)) {
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 553212f037c3..e8d695b3f54e 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -560,7 +560,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 		struct msi_msg msg;
 
 		get_cached_msi_msg(irq, &msg);
-		write_msi_msg(irq, &msg);
+		pci_write_msi_msg(irq, &msg);
 	}
 
 	ret = request_irq(irq, vfio_msihandler, 0,