1060 files changed, 128100 insertions, 7096 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
new file mode 100644
index 000000000000..acb19b91c192
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-wakeup_reasons
@@ -0,0 +1,16 @@
+What:		/sys/kernel/wakeup_reasons/last_resume_reason
+Date:		February 2014
+Contact:	Ruchi Kandoi <kandoiruchi@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_resume_reason is
+		used to report wakeup reasons after system exited suspend.
+
+What:		/sys/kernel/wakeup_reasons/last_suspend_time
+Date:		March 2015
+Contact:	jinqian <jinqian@google.com>
+Description:
+		The /sys/kernel/wakeup_reasons/last_suspend_time is
+		used to report time spent in last suspend cycle. It contains
+		two numbers (in seconds) separated by space. First number is
+		the time spent in suspend and resume processes. Second number
+		is the time spent in sleep state.
+\ No newline at end of file
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index f9df3b872c16..69f7a4e0b52b 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -1162,7 +1162,7 @@ int max_width, max_height;</synopsis>
           </para>
           <para>
             If a page flip can be successfully scheduled the driver must set the
-            <code>drm_crtc-&lt;fb</code> field to the new framebuffer pointed to
+            <code>drm_crtc-&gt;fb</code> field to the new framebuffer pointed to
             by <code>fb</code>. This is important so that the reference counting
             on framebuffers stays balanced.
           </para>
diff --git a/Documentation/android.txt b/Documentation/android.txt
new file mode 100644
index 000000000000..0f40a78b045f
--- /dev/null
+++ b/Documentation/android.txt
@@ -0,0 +1,121 @@
+				=============
+				A N D R O I D
+				=============
+
+Copyright (C) 2009 Google, Inc.
+Written by Mike Chan <mike@android.com>
+
+CONTENTS:
+---------
+
+1. Android
+  1.1 Required enabled config options
+  1.2 Required disabled config options
+  1.3 Recommended enabled config options
+2. Contact
+
+
+1. Android
+==========
+
+Android (www.android.com) is an open source operating system for mobile devices.
+This document describes configurations needed to run the Android framework on
+top of the Linux kernel.
+
+To see a working defconfig look at msm_defconfig or goldfish_defconfig
+which can be found at http://android.git.kernel.org in kernel/common.git
+and kernel/msm.git
+
+
+1.1 Required enabled config options
+-----------------------------------
+After building a standard defconfig, ensure that these options are enabled in
+your .config or defconfig if they are not already. Based off the msm_defconfig.
+You should keep the rest of the default options enabled in the defconfig
+unless you know what you are doing.
+
+ANDROID_PARANOID_NETWORK
+ASHMEM
+CONFIG_FB_MODE_HELPERS
+CONFIG_FONT_8x16
+CONFIG_FONT_8x8
+CONFIG_YAFFS_SHORT_NAMES_IN_RAM
+DAB
+EARLYSUSPEND
+FB
+FB_CFB_COPYAREA
+FB_CFB_FILLRECT
+FB_CFB_IMAGEBLIT
+FB_DEFERRED_IO
+FB_TILEBLITTING
+HIGH_RES_TIMERS
+INOTIFY
+INOTIFY_USER
+INPUT_EVDEV
+INPUT_GPIO
+INPUT_MISC
+LEDS_CLASS
+LEDS_GPIO
+LOCK_KERNEL
+LkOGGER
+LOW_MEMORY_KILLER
+MISC_DEVICES
+NEW_LEDS
+NO_HZ
+POWER_SUPPLY
+PREEMPT
+RAMFS
+RTC_CLASS
+RTC_LIB
+SWITCH
+SWITCH_GPIO
+TMPFS
+UID_STAT
+UID16
+USB_FUNCTION
+USB_FUNCTION_ADB
+USER_WAKELOCK
+VIDEO_OUTPUT_CONTROL
+WAKELOCK
+YAFFS_AUTO_YAFFS2
+YAFFS_FS
+YAFFS_YAFFS1
+YAFFS_YAFFS2
+
+
+1.2 Required disabled config options
+------------------------------------
+CONFIG_YAFFS_DISABLE_LAZY_LOAD
+DNOTIFY
+
+
+1.3 Recommended enabled config options
+------------------------------
+ANDROID_PMEM
+PSTORE_CONSOLE
+PSTORE_RAM
+SCHEDSTATS
+DEBUG_PREEMPT
+DEBUG_MUTEXES
+DEBUG_SPINLOCK_SLEEP
+DEBUG_INFO
+FRAME_POINTER
+CPU_FREQ
+CPU_FREQ_TABLE
+CPU_FREQ_DEFAULT_GOV_ONDEMAND
+CPU_FREQ_GOV_ONDEMAND
+CRC_CCITT
+EMBEDDED
+INPUT_TOUCHSCREEN
+I2C
+I2C_BOARDINFO
+LOG_BUF_SHIFT=17
+SERIAL_CORE
+SERIAL_CORE_CONSOLE
+
+
+2. Contact
+==========
+website: http://android.git.kernel.org
+
+mailing-lists: android-kernel@googlegroups.com
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 638bf17ff869..61dc0ec5c9a9 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -598,6 +598,15 @@ is completely unused; @cgrp->parent is still valid. (Note - can also
 be called for a newly-created cgroup if an error occurs after this
 subsystem's create() method has been called for the new cgroup).
 
+int allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+(cgroup_mutex held by caller)
+
+Called prior to moving a task into a cgroup; if the subsystem
+returns an error, this will abort the attach operation.  Used
+to extend the permission checks - if all subsystems in a cgroup
+return 0, the attach will be allowed to proceed, even if the
+default permission check (root or same user) fails.
+
 int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 (cgroup_mutex held by caller)
 
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index 219970ba54b7..875eecbed8c8 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -28,6 +28,7 @@ Contents:
 2.3  Userspace
 2.4  Ondemand
 2.5  Conservative
+2.6  Interactive
 
 3.   The Governor Interface in the CPUfreq Core
 
@@ -218,6 +219,90 @@ a decision on when to decrease the frequency while running in any
 speed. Load for frequency increase is still evaluated every
 sampling rate.
 
+2.6 Interactive
+---------------
+
+The CPUfreq governor "interactive" is designed for latency-sensitive,
+interactive workloads. This governor sets the CPU speed depending on
+usage, similar to "ondemand" and "conservative" governors, but with a
+different set of configurable behaviors.
+
+The tuneable values for this governor are:
+
+target_loads: CPU load values used to adjust speed to influence the
+current CPU load toward that value.  In general, the lower the target
+load, the more often the governor will raise CPU speeds to bring load
+below the target.  The format is a single target load, optionally
+followed by pairs of CPU speeds and CPU loads to target at or above
+those speeds.  Colons can be used between the speeds and associated
+target loads for readability.  For example:
+
+   85 1000000:90 1700000:99
+
+targets CPU load 85% below speed 1GHz, 90% at or above 1GHz, until
+1.7GHz and above, at which load 99% is targeted.  If speeds are
+specified these must appear in ascending order.  Higher target load
+values are typically specified for higher speeds, that is, target load
+values also usually appear in an ascending order. The default is
+target load 90% for all speeds.
+
+min_sample_time: The minimum amount of time to spend at the current
+frequency before ramping down. Default is 80000 uS.
+
+hispeed_freq: An intermediate "hi speed" at which to initially ramp
+when CPU load hits the value specified in go_hispeed_load.  If load
+stays high for the amount of time specified in above_hispeed_delay,
+then speed may be bumped higher.  Default is the maximum speed
+allowed by the policy at governor initialization time.
+
+go_hispeed_load: The CPU load at which to ramp to hispeed_freq.
+Default is 99%.
+
+above_hispeed_delay: When speed is at or above hispeed_freq, wait for
+this long before raising speed in response to continued high load.
+The format is a single delay value, optionally followed by pairs of
+CPU speeds and the delay to use at or above those speeds.  Colons can
+be used between the speeds and associated delays for readability.  For
+example:
+
+   80000 1300000:200000 1500000:40000
+
+uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay
+200000 uS is used until speed 1.5 GHz, at which speed (and above)
+delay 40000 uS is used.  If speeds are specified these must appear in
+ascending order.  Default is 20000 uS.
+
+timer_rate: Sample rate for reevaluating CPU load when the CPU is not
+idle.  A deferrable timer is used, such that the CPU will not be woken
+from idle to service this timer until something else needs to run.
+(The maximum time to allow deferring this timer when not running at
+minimum speed is configurable via timer_slack.)  Default is 20000 uS.
+
+timer_slack: Maximum additional time to defer handling the governor
+sampling timer beyond timer_rate when running at speeds above the
+minimum.  For platforms that consume additional power at idle when
+CPUs are running at speeds greater than minimum, this places an upper
+bound on how long the timer will be deferred prior to re-evaluating
+load and dropping speed.  For example, if timer_rate is 20000uS and
+timer_slack is 10000uS then timers will be deferred for up to 30msec
+when not at lowest speed.  A value of -1 means defer timers
+indefinitely at all speeds.  Default is 80000 uS.
+
+boost: If non-zero, immediately boost speed of all CPUs to at least
+hispeed_freq until zero is written to this attribute.  If zero, allow
+CPU speeds to drop below hispeed_freq according to load as usual.
+Default is zero.
+
+boostpulse: On each write, immediately boost speed of all CPUs to
+hispeed_freq for at least the period of time specified by
+boostpulse_duration, after which speeds are allowed to drop below
+hispeed_freq according to load as usual.
+
+boostpulse_duration: Length of time to hold CPU speed at hispeed_freq
+on a write to boostpulse, before allowing speed to drop according to
+load as usual.  Default is 80000 uS.
+
+
 3. The Governor Interface in the CPUfreq Core
 =============================================
 
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index 9884681535ee..2929f6b1ccf1 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -10,7 +10,7 @@ Construction Parameters
     <version> <dev> <hash_dev>
     <data_block_size> <hash_block_size>
     <num_data_blocks> <hash_start_block>
-    <algorithm> <digest> <salt>
+    <algorithm> <digest> <salt> <mode>
 
 <version>
     This is the type of the on-disk hash format.
@@ -62,6 +62,16 @@ Construction Parameters
 <salt>
     The hexadecimal encoding of the salt value.
 
+<mode>
+    Optional. The mode of operation.
+
+    0 is the normal mode of operation where a corrupted block will result in an
+      I/O error.
+
+    1 is logging mode where corrupted blocks are logged and a uevent is sent to
+      notify user space.
+
+
 Theory of operation
 ===================
 
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index 06fc7602593a..6afdb1b19d6e 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -39,6 +39,9 @@ Example:
 
 - reg : The control frame base address.
 
+- arm,cnttidr : The value of the CNTTIDR register if not accessible from
+  the level the kernel is running at. To be used only in extreme cases.
+
 Note that #address-cells, #size-cells, and ranges shall be present to ensure
 the CPU can address a frame's registers.
 
diff --git a/Documentation/devicetree/bindings/arm/mali-midgard.txt b/Documentation/devicetree/bindings/arm/mali-midgard.txt
new file mode 100755
index 000000000000..fb0a992c875a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mali-midgard.txt
@@ -0,0 +1,43 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+* ARM Mali Midgard devices
+
+
+Required properties:
+
+- compatible : Should be mali<chip>, replacing digits with x from the back,
+until malit<Major>xx, ending with arm,mali-midgard, the latter not optional.
+- reg : Physical base address of the device and length of the register area.
+- interrupts : Contains the three IRQ lines required by T-6xx devices
+- interrupt-names : Contains the names of IRQ resources in the order they were
+provided in the interrupts property. Must contain: "JOB, "MMU", "GPU".
+
+Optional:
+
+- clocks : Phandle to clock for the Mali T-6xx device.
+- clock-names : Shall be "clk_mali".
+
+Example for a Mali-T602:
+
+gpu@0xfc010000 {
+	compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard";
+	reg = <0xfc010000 0x4000>;
+	interrupts = <0 36 4>, <0 37 4>, <0 38 4>;
+	interrupt-names = "JOB", "MMU", "GPU";
+
+	clocks = <&pclk_mali>;
+	clock-names = "clk_mali";
+};
diff --git a/Documentation/devicetree/bindings/drm/i2c/tda998x.txt b/Documentation/devicetree/bindings/drm/i2c/tda998x.txt
new file mode 100644
index 000000000000..adac4dd2de5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/i2c/tda998x.txt
@@ -0,0 +1,43 @@
+Device-Tree bindings for the NXP TDA998x HDMI transmitter
+
+Required properties;
+  - compatible: must be "nxp,tda998x"
+
+  - reg: I2C address
+
+Optional properties:
+  - interrupts: interrupt number and trigger type
+	default: polling
+
+  - pinctrl-0: pin control group to be used for
+	screen plug/unplug interrupt.
+
+  - pinctrl-names: must contain a "default" entry.
+
+  - video-ports: 24 bits value which defines how the video controller
+	output is wired to the TDA998x input - default: <0x230145>
+
+  - audio-ports: one or two values corresponding to entries in
+       the audio-port-names property.
+
+  - audio-port-names: must contain "i2s", "spdif" entries
+       matching entries in the audio-ports property.
+
+  - #sound-dai-cells: must be set to <1> for use with the simple-card.
+       The DAI 0 is the I2S input and the DAI 1 is the S/PDIF input.
+
+Example:
+
+	tda998x: hdmi-encoder {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <27 2>;		/* falling edge */
+		pinctrl-0 = <&pmx_camera>;
+		pinctrl-names = "default";
+
+		audio-ports = <0x03>, <0x04>;
+		audio-port-names = "i2s", "spdif";
+		#sound-dai-cells = <1>;
+         };
+	};
diff --git a/Documentation/devicetree/bindings/gpu/arm,hdlcd.txt b/Documentation/devicetree/bindings/gpu/arm,hdlcd.txt
new file mode 100644
index 000000000000..e4af93b25ee0
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/arm,hdlcd.txt
@@ -0,0 +1,64 @@
+ARM HDLCD
+
+This is a driver for a display controller found on several development
+platforms produced by ARM Ltd and in more modern of its' Fast Models.
+The HDLCD is an RGB streamer that reads the data from a framebuffer
+and sends it to a single HDMI transmitter.
+
+Required properties:
+- compatible: "arm,hdlcd"
+- reg: Physical base address and length of the controller's registers.
+- interrupts: One interrupt used by the controller to multiplex all
+  the sources programmed in the interrupt mask register.
+- clocks: A list of phandle + clock-specifier pairs, one for each entry
+  in clock-names.
+- clock-names: A list of clock names. For HDLD it should containt:
+   * "pxlclk" for the clock feeding the output PLL of the controller.
+
+Optional properties:
+- i2c-slave: If the HDLCD is connected to an HDMI transmitter that is
+  reacheable via i2c, then use the phandle of the DDC slave here.
+
+Optional children:
+- display-timings: If the HDLCD is emulated by the Fast Model, place the
+  videomode(s) of the desired resolution(s) here. If multiple videomodes
+  are being used, a 'native-mode' property should be used to indicate the
+  preferred/default resolution. Refer to
+  Documentation/devicetree/bindings/video/display-timing.txt for details.
+
+
+Example:
+
+/ {
+	...
+
+	hdlcd@2b000000 {
+		compatible = "arm,hdlcd";
+		reg = <0 0x2b000000 0 0x1000>;
+		interrupts = <0 85 4>;
+		i2c-slave = <&dvi_i2c_slave>;
+		clocks = <&oscclk5>;
+		clock-names = "pxlclk";
+	};
+
+	/* DVI I2C bus */
+	v2m_i2c_dvi: i2c@160000 {
+		compatible = "arm,versatile-i2c";
+		reg = <0x160000 0x1000>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		dvi_i2c_slave: dvi-transmitter@39 {
+			compatible = "sil,sii9022-tpi", "sil,sii9022";
+			reg = <0x39>;
+		};
+
+		dvi-transmitter@60 {
+			compatible = "sil,sii9022-cpi", "sil,sii9022";
+			reg = <0x60>;
+		};
+	};
+
+	...
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-designware.txt b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
index e42a2ee233e6..5199b0c8cf7a 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-designware.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
@@ -10,6 +10,16 @@ Recommended properties :
 
  - clock-frequency : desired I2C bus clock frequency in Hz.
 
+Optional properties :
+ - i2c-sda-hold-time-ns : should contain the SDA hold time in nanoseconds.
+   This option is only supported in hardware blocks version 1.11a or newer.
+
+ - i2c-scl-falling-time : should contain the SCL falling time in nanoseconds.
+   This value which is by default 300ns is used to compute the tLOW period.
+
+ - i2c-sda-falling-time : should contain the SDA falling time in nanoseconds.
+   This value which is by default 300ns is used to compute the tHIGH period.
+
 Example :
 
 	i2c@f0000 {
@@ -20,3 +30,16 @@ Example :
 		interrupts = <11>;
 		clock-frequency = <400000>;
 	};
+
+	i2c@1120000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,designware-i2c";
+		reg = <0x1120000 0x1000>;
+		interrupt-parent = <&ictl>;
+		interrupts = <12 1>;
+		clock-frequency = <400000>;
+		i2c-sda-hold-time-ns = <300>;
+		i2c-sda-falling-time-ns = <300>;
+		i2c-scl-falling-time-ns = <300>;
+	};
diff --git a/Documentation/devicetree/bindings/mailbox/arm,mhu.txt b/Documentation/devicetree/bindings/mailbox/arm,mhu.txt
new file mode 100644
index 000000000000..4ac5e0143b85
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/arm,mhu.txt
@@ -0,0 +1,29 @@
+ARM Message Handling Unit(MHU)
+------------------------------
+
+Required properties:
+
+- compatible : should be "arm,mhu"
+- reg : exactly one register range with length 0x1000(4kB)
+- interrupts : list of interrupt specifiers associated with the device
+- interrupt-names : contains names of the interrupt in the order in which
+		    they were specified in the interrupts property
+- #mbox-cells : number of cells in a mailbox specifier, should be 1.
+		The 'mboxes' specifier should be a linear index to the
+		mailbox this device supports.
+
+See Documentation/devicetree/bindings/mailbox/mailbox.txt
+for more details about the generic mailbox controller and
+client driver bindings.
+
+Example:
+
+mailbox: mhu@2b1f0000 {
+	compatible = "arm,mhu";
+	reg = <0x0 0x2b1f0000 0x0 0x1000>;
+	interrupts = <0 35 4>,
+		     <0 36 4>;
+	interrupt-names = "mhu_hpri_rx",
+			  "mhu_lpri_rx";
+	#mbox-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/mailbox/arm,scpi.txt b/Documentation/devicetree/bindings/mailbox/arm,scpi.txt
new file mode 100644
index 000000000000..5db235f69e54
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/arm,scpi.txt
@@ -0,0 +1,121 @@
+System Control and Power Interface (SCPI) Message Protocol
+----------------------------------------------------------
+
+Required properties:
+
+- compatible : should be "arm,scpi"
+- mboxes: List of phandle and mailbox channel specifiers
+- shmem : List of phandle pointing to the shared memory(SHM) area between the
+	  processors using these mailboxes for IPC, one for each mailbox
+
+See Documentation/devicetree/bindings/mailbox/mailbox.txt
+for more details about the generic mailbox controller and
+client driver bindings.
+
+Clock bindings for the clocks based on SCPI Message Protocol
+------------------------------------------------------------
+
+This binding uses the common clock binding[1].
+
+Required properties:
+- compatible : shall be one of the following:
+	"arm,scpi-clocks" - for the container node with all the clocks
+		based on the SCPI protocol
+	"arm,scpi-dvfs" - all the clocks that are variable and index based.
+		These clocks don't provide the full range between the limits
+		but only discrete points within the range. The firmware
+		provides the mapping for each such operating frequency and the
+		index associated with it. The firmware also manages the
+		voltage scaling appropriately with the clock scaling.
+	"arm,scpi-clk" - all the clocks that are variable and provide full
+		range within the specified range. The firmware provides the
+		supported range for each clock.
+
+Required properties for all clocks(all from common clock binding):
+- #clock-cells : should be set to 1 as each of the SCPI clocks have multiple
+	outputs. The clock specifier will be the index to an entry in the list
+	of output clocks.
+- clock-output-names : shall be the corresponding names of the outputs.
+- clock-indices: The identifyng number for the clocks(clock_id) in the node as
+	expected by the firmware. It can be non linear and hence provide the
+	mapping	of identifiers into the clock-output-names array.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Example:
+
+sram: sram@50000000 {
+	compatible = "arm,juno-sram-ns", "mmio-sram";
+	reg = <0x0 0x50000000 0x0 0x10000>;
+
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges = <0 0x0 0x50000000 0x10000>;
+
+	cpu_scp_lpri: scp-shmem@0 {
+		compatible = "arm,juno-scp-shmem";
+		reg = <0x0 0x200>;
+	};
+
+	cpu_scp_hpri: scp-shmem@200 {
+		compatible = "arm,juno-scp-shmem";
+		reg = <0x200 0x200>;
+	};
+};
+
+mailbox: mailbox0@40000000 {
+	....
+	#mbox-cells = <1>;
+};
+
+scpi_protocol: scpi@2e000000 {
+	compatible = "arm,scpi";
+	mboxes = <&mailbox 0 &mailbox 1>;
+	shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
+
+	clocks {
+		compatible = "arm,scpi-clocks";
+
+		scpi_dvfs: scpi_clocks@0 {
+			compatible = "arm,scpi-dvfs";
+			#clock-cells = <1>;
+			clock-indices = <0>, <1>, <2>;
+			clock-output-names = "vbig", "vlittle", "vgpu";
+		};
+		scpi_clk: scpi_clocks@3 {
+			compatible = "arm,scpi-clk";
+			#clock-cells = <1>;
+			clock-indices = <3>, <4>;
+			clock-output-names = "pxlclk0", "pxlclk1";
+		};
+	};
+};
+
+cpu@0 {
+	...
+	reg = <0 0>;
+	clocks = <&scpi_dvfs 0>;
+	clock-names = "big";
+};
+
+hdlcd@7ff60000 {
+	...
+	reg = <0 0x7ff60000 0 0x1000>;
+	clocks = <&scpi_clk 1>;
+	clock-names = "pxlclk";
+};
+
+In the above example, the #clock-cells is set to 1 as required.
+scpi_dvfs has 3 output clocks namely: vbig, vlittle and vgpu with 0, 1
+and 2 as clock-indices. scpi_clk has 2 output clocks namely: pxlclk0 and
+pxlclk1 with 3 and 4 as clock-indices.
+
+The first consumer in the example is cpu@0 and it has vbig as input clock.
+The index '0' in the clock specifier here points to the first entry in the
+output clocks of scpi_dvfs for which clock_id asrequired by the firmware
+is 0.
+
+Similarly the second example is hdlcd@7ff60000 and it has pxlclk0 as input
+clock. The index '1' in the clock specifier here points to the second entry
+in the output clocks of scpi_clocks for which clock_id as required by the
+firmware is 4.
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
index f5db6b72a36f..99d6608c9d5f 100644
--- a/Documentation/devicetree/bindings/thermal/thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -167,6 +167,13 @@ Optional property:
 			by means of sensor ID. Additional coefficients are
 			interpreted as constant offset.
 
+- sustainable-power:	An estimate of the sustainable power (in mW) that the
+  Type: unsigned	thermal zone can dissipate at the desired
+  Size: one cell	control temperature.  For reference, the
+			sustainable power of a 4'' phone is typically
+			2000mW, while on a 10'' tablet is around
+			4500mW.
+
 Note: The delay properties are bound to the maximum dT/dt (temperature
 derivative over time) in two situations for a thermal zone:
 (i)  - when passive cooling is activated (polling-delay-passive); and
@@ -546,6 +553,8 @@ thermal-zones {
 		 */
 		coefficients =		<1200	-345	890>;
 
+		sustainable-power = <2500>;
+
 		trips {
 			/* Trips are based on resulting linear equation */
 			cpu-trip: cpu-trip {
diff --git a/Documentation/dma-buf-test-exporter.txt b/Documentation/dma-buf-test-exporter.txt
new file mode 100755
index 000000000000..4208010cc788
--- /dev/null
+++ b/Documentation/dma-buf-test-exporter.txt
@@ -0,0 +1,40 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+=====================
+dma-buf-test-exporter
+=====================
+
+Overview
+--------
+
+The dma-buf-test-exporter is a simple exporter of dma_buf objects.
+It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds.
+The private API allows:
+* simple allocation of physically non-contiguous buffers
+* simple allocation of physically contiguous buffers
+* query kernel side API usage stats (number of attachments, number of mappings, mmaps)
+* failure mode configuration (fail attach, mapping, mmap)
+* kernel side memset of buffers
+
+The buffers support all of the dma_buf API, including mmap.
+
+It supports being compiled as a module both in-tree and out-of-tree.
+
+See include/linux/dma-buf-test-exporter.h for the ioctl interface.
+See Documentation/dma-buf-sharing.txt for details on dma_buf.
+
+
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 4db22f6491e0..85a4a033bae7 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -445,3 +445,6 @@ object doesn't exist.  It's remote/distributed ones that might care...
 [mandatory]
 	FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate()
 in your dentry operations instead.
+--
+[mandatory]
+	vfs_readdir() is gone; switch to iterate_dir() instead
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 954eab8c7fec..be35913d282b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -369,6 +369,8 @@ is not associated with a file:
  [stack:1001]             = the stack of the thread with tid 1001
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [anon:<name>]            = an anonymous mapping that has been
+                            named by userspace
 
  or if empty, the mapping is anonymous.
 
@@ -419,6 +421,7 @@ KernelPageSize:        4 kB
 MMUPageSize:           4 kB
 Locked:              374 kB
 VmFlags: rd ex mr mw me de
+Name:           name from userspace
 
 the first of these lines shows the same information as is displayed for the
 mapping in /proc/PID/maps.  The remaining lines show the size of the mapping
@@ -469,6 +472,9 @@ Note that there is no guarantee that every flag and associated mnemonic will
 be present in all further kernel releases. Things get changed, the flags may
 be vanished or the reverse -- new added.
 
+The "Name" field will only be present on a mapping that has been named by
+userspace, and will show the name passed in by userspace.
+
 This file is only present if the CONFIG_MMU kernel configuration option is
 enabled.
 
@@ -484,6 +490,10 @@ To clear the bits for the file mapped pages associated with the process
     > echo 3 > /proc/PID/clear_refs
 Any other value written to /proc/PID/clear_refs will have no effect.
 
+To reset the peak resident set size ("high water mark") to the process's
+current value:
+    > echo 5 > /proc/PID/clear_refs
+
 The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
 using /proc/kpageflags and number of times a page is mapped using
 /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt.
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt
index 403c090aca39..e5274f84dc56 100644
--- a/Documentation/filesystems/squashfs.txt
+++ b/Documentation/filesystems/squashfs.txt
@@ -2,10 +2,10 @@ SQUASHFS 4.0 FILESYSTEM
 =======================
 
 Squashfs is a compressed read-only filesystem for Linux.
-It uses zlib/lzo/xz compression to compress files, inodes and directories.
-Inodes in the system are very small and all blocks are packed to minimise
-data overhead. Block sizes greater than 4K are supported up to a maximum
-of 1Mbytes (default block size 128K).
+It uses zlib, lz4, lzo, or xz compression to compress files, inodes and
+directories.  Inodes in the system are very small and all blocks are packed to
+minimise data overhead. Block sizes greater than 4K are supported up to a
+maximum of 1Mbytes (default block size 128K).
 
 Squashfs is intended for general read-only filesystem use, for archival
 use (i.e. in cases where a .tar.gz file may be used), and in constrained
diff --git a/Documentation/kds.txt b/Documentation/kds.txt
new file mode 100755
index 000000000000..639288c106b7
--- /dev/null
+++ b/Documentation/kds.txt
@@ -0,0 +1,268 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+==============================
+kds - Kernel Dependency System
+==============================
+
+Introduction
+------------
+kds provides a mechanism for clients to atomically lock down multiple abstract resources.
+This can be done either synchronously or asynchronously.
+Abstract resources is used to allow a set of clients to use kds to control access to any
+resource, an example is structured memory buffers.
+
+kds supports that buffer is locked for exclusive access and sharing of buffers.
+
+kds can be built as either a integrated feature of the kernel or as a module.
+It supports being compiled as a module both in-tree and out-of-tree.
+
+
+Concepts
+--------
+A core concept in kds is abstract resources.
+A kds resource is just an abstraction for some client object, kds doesn't care what it is.
+Typically EGL will consider UMP buffers as being a resource, thus each UMP buffer has
+a kds resource for synchronization to the buffer.
+
+kds allows a client to create and destroy the abstract resource objects.
+A new resource object is made available asap (it is just a simple malloc with some initializations),
+while destroy it requires some external synchronization.
+
+The other core concept in kds is consumer of resources.
+kds is requested to allow a client to consume a set of resources and the client will be notified when it can consume the resources.
+
+Exclusive access allows only one client to consume a resource.
+Shared access permits multiple consumers to acceess a resource concurrently.
+
+
+APIs
+----
+kds provides simple resource allocate and destroy functions.
+Clients use this to instantiate and control the lifetime of the resources kds manages.
+
+kds provides two ways to wait for resources:
+- Asynchronous wait: the client specifies a function pointer to be called when wait is over
+- Synchronous wait: Function blocks until access is gained.
+
+The synchronous API has a timeout for the wait.
+The call can early out if a signal is delivered.
+
+After a client is done consuming the resource kds must be notified to release the resources and let some other client take ownership.
+This is done via resource set release call.
+
+A Windows comparison:
+kds implements WaitForMultipleObjectsEx(..., bWaitAll = TRUE, ...) but also has an asynchronous version in addition.
+kds resources can be seen as being the same as NT object manager resources.
+
+Internals
+---------
+kds guarantees atomicity when a set of resources is operated on.
+This is implemented via a global resource lock which is taken by kds when it updates resource objects.
+
+Internally a resource in kds is a linked list head with some flags.
+
+When a consumer requests access to a set of resources it is queued on each of the resources.
+The link from the consumer to the resources can be triggered. Once all links are triggered
+the registered callback is called or the blocking function returns.
+A link is considered triggered if it is the first on the list of consumers of a resource,
+or if all the links ahead of it is marked as shared and itself is of the type shared.
+
+When the client is done consuming the consumer object is removed from the linked lists of
+the resources and a potential new consumer becomes the head of the resources.
+As we add and remove consumers atomically across all resources we can guarantee that
+we never introduces a A->B + B->A type of loops/deadlocks.
+
+
+kbase/base implementation
+-------------------------
+A HW job needs access to a set of shared resources.
+EGL tracks this and encodes the set along with the atom in the ringbuffer.
+EGL allocates a (k)base dep object to represent the dependency to the set of resources and encodes that along with the list of resources.
+This dep object is use to create a dependency from a job chain(atom) to the resources it needs to run.
+When kbase decodes the atom in the ringbuffer it finds the set of resources and calls kds to request all the needed resources.
+As EGL needs to know when the kds request is delivered a new base event object is needed: atom enqueued. This event is only delivered for atoms which uses kds.
+The callback kbase registers trigger the dependency object described which would trigger the existing JD system to release the job chain.
+When the atom is done kds resource set release is call to release the resources.
+
+EGL will typically use exclusive access to the render target, while all buffers used as input can be marked as shared.
+
+
+Buffer publish/vsync
+--------------------
+EGL will use a separate ioctl or DRM flip to request the flip.
+If the LCD driver is integrated with kds EGL can do these operations early.
+The LCD driver must then implement the ioctl or DRM flip to be asynchronous with kds async call.
+The LCD driver binds a kds resource to each virtual buffer (2 buffers in case of double-buffering).
+EGL will make a dependency to the target kds resource in the kbase atom.
+After EGL receives a atom enqueued event it can ask the LCD driver to pan to the target kds resource.
+When the atom is completed it'll release the resource and the LCD driver will get its callback.
+In the callback it'll load the target buffer into the DMA unit of the LCD hardware.
+The LCD driver will be the consumer of both buffers for a short period.
+The LCD driver will call kds resource set release on the previous on-screen buffer when the next vsync/dma read end is handled.
+
+===============================================
+Kernel driver kds client design considerations
+=============================================== 
+
+Number of resources
+--------------------
+
+The kds api allows a client to wait for ownership of a number of resources, where by the client does not take on ownership of any of the resources in the resource set 
+until all of the resources in the set are released.  Consideration must be made with respect to performance, as waiting on large number of resources will incur
+a greater overhead and may increase system latency.  It may be worth considering how independent each of the resources are, for example if the same set of resources 
+are waited upon by each of the clients, then it may be possible to aggregate these into one resource that each client waits upon.
+
+Clients with shared access
+---------------------------
+
+The kds api allows a number of clients to gain shared access to a resource simultaneously, consideration must be made with respect to performance, large numbers of clients
+wanting shared access can incur a performance penalty and may increase system latency, specifically when the clients are granted access. Having an excessively high 
+number of clients with shared access should be avoided, consideration should be made to the call back configuration being used. See Callbacks and Scenario 1 below.
+
+Callbacks
+----------
+
+Careful consideration must be made as to which callback type is most appropriate for kds clients, direct callbacks are called immediately from the context in which the
+ownership of the resource is passed to the next waiter in the list.  Where as when using deferred callbacks the callback is deferred and called from outside the context
+that is relinquishing ownership, while this reduces the latency in the releasing clients context it does incur a cost as there is more latency between a resource 
+becoming free and the new client owning the resource callback being executed.  
+
+Obviously direct callbacks have a performance advantage, as the call back is immediate and does not have to wait for the kernel to context switch to schedule in the 
+execution of the callback.  
+
+However as the callback is immediate and within the context that is granting ownership it is important that the callback perform the MINIMUM amount of work necessary, 
+long call backs could cause poor system latency.  Special care and attention must be taken if the direct callbacks can be called from IRQ handlers, such as when 
+kds_resource_set_release is called from an IRQ handler, in this case you have to avoid any calls that may sleep.  
+
+Deferred contexts have the advantage that the call backs are deferred until they are scheduled by the kernel, therefore they are allowed to call functions that may sleep
+and if scheduled from IRQ context not incur as much system latency as would be seen with direct callbacks from within the IRQ.
+
+Once the clients callback has been called, the client is considered to be owning the resource.  Within the callback the client may only need to perform a small amount of work
+before the client need to give up owner ship.  The kds_resource_release function may be called from with in the call back, but consideration must be made when using direct 
+callbacks, with both respect to execution time and stack usage. Consider the example in Scenario 2 with direct callbacks:
+
+Scenario 1 - Shared client access - direct callbacks:
+
+Resources: X
+Clients: A(S), B(S), C(S), D(S), E(E)
+where: (S) = shared user, (E) = exclusive
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+}
+
+Where <client> is either A,B,C,D
+                                        Queue   |Owner 
+1.  E Requests X exclusive                      |
+2.  E Owns     X exclusive                      |E
+3.  A Requests X shared                        A|E
+4.  B Requests X shared                       BA|E
+5.  C Requests X shared                      CBA|E
+6.  D Requests X shared                     DCBA|E
+7.  E Releases X                                |DCBA
+8.  A Owns X shared                             |DCBA
+9.  B Owns X shared                             |DCBA
+10. C Owns X shared                             |DCBA
+11. D Owns X shared                             |DCBA
+
+At point 7 it is important to note that when E releases X; A,B,C and D become the new shared owners of X and the call back for each of the client(A,B,C,D) triggered, so consideration
+must be made as to whether a direct or deferred callback is suitable, using direct callbacks would result in the call graph.
+
+Call graph when E releases X:
+    kds_resource_set_release( .. )
+        +->client_A_cb( .. )
+        +->client_B_cb( .. )
+        +->client_C_cb( .. )
+        +->client_D_cb( .. )
+
+
+Scenario 2 - Immediate resource release - direct callbacks:
+
+Resource: X
+Clients: A, B, C, D
+
+Clients kds callback handler:
+
+client_<client>_cb( p1, p2 )
+{
+    kds_resource_set_release( .. );
+}
+
+Where <client> is either A,B,C,D
+
+1. A Owns     X exclusive
+2. B Requests X exclusive	(direct callback)
+3. C Requests X exclusive	(direct callback)
+4. D Requests X exclusive	(direct callback)
+5. A Releases X
+
+Call graph when A releases X:
+    kds_resource_set_release( .. )
+        +->client_B_cb( .. )
+            +->kds_resource_set_release( .. )
+                +->client_C_cb( .. )
+                    +->kds_resource_set_release( .. )
+                        +->client_D_cb( .. )
+
+As can be seen when a client releases the resource, with direct call backs it is possible to create nested calls
+
+IRQ Considerations
+-------------------
+
+Usage of kds_resource_release in IRQ handlers should be carefully considered.
+
+Things to keep in mind:
+
+1.) Are you using direct or deferred callbacks?
+2.) How many resources are you releasing?
+3.) How many shared waiters are pending on the resource?
+
+Releasing ownership and wait cancellation
+------------------------------------------
+
+Client Wait Cancellation
+-------------------------
+
+It may be necessary in certain circumstances for the client to cancel the wait for kds resources for error handling, process termination etc.  Cancellation is 
+performed using kds_resource_set_release or kds_resource_set_release_sync using the rset that was received from kds_async_waitall, kds_resource_set_release_sync 
+being used for waits which are using deferred callbacks.
+
+It is possible that while the request to cancel the wait is being issued by the client, the client is granted access to the resources.  Normally after the client
+has taken ownership and finishes with that resource, it will release ownership to signal other waiters which are pending, this causes a race with the cancellation.
+To prevent KDS trying to remove a wait twice from the internal list and accessing memory that is potentially freed, it is very important that all releasers use the
+same rset pointer.  Here is a simplified example of bad usage that must be avoided in any client implementation:
+
+Senario 3 - Bad release from multiple contexts:
+
+	This scenaro is highlighting bad usage of the kds API
+
+	kds_resource_set * rset;
+	kds_resource_set * rset_copy;
+
+	kds_async_waitall( &rset, ... ... ... );
+
+	/* Don't do this */
+	rset_copy = rset;
+
+Context A:
+	kds_resource_set_release( &rset )
+
+Context B:
+	kds_resource_set_release( &rset_copy )
+
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a59ee432a98f..fbfc9e0d75e0 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -22,6 +22,15 @@ ip_no_pmtu_disc - BOOLEAN
 min_pmtu - INTEGER
 	default 552 - minimum discovered Path MTU
 
+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv4 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMP echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to. Similarly affects the fwmark
+	used by internal routing lookups triggered by incoming packets, such as
+	the ones used for Path MTU Discovery.
+	Default: 0
+
 route/max_size - INTEGER
 	Maximum number of routes allowed in the kernel.  Increase
 	this when using large numbers of interfaces and/or routes.
@@ -468,6 +477,16 @@ tcp_fastopen - INTEGER
 
 	See include/net/tcp.h and the code for more details.
 
+tcp_fwmark_accept - BOOLEAN
+	If set, incoming connections to listening sockets that do not have a
+	socket mark will set the mark of the accepting socket to the fwmark of
+	the incoming SYN packet. This will cause all packets on that connection
+	(starting from the first SYNACK) to be sent with that fwmark. The
+	listening socket's mark is unchanged. Listening sockets that already
+	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+	unaffected.
+	Default: 0
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
@@ -1093,6 +1112,15 @@ conf/all/forwarding - BOOLEAN
 proxy_ndp - BOOLEAN
 	Do proxy ndp.
 
+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv6 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to. Similarly affects the fwmark
+	used by internal routing lookups triggered by incoming packets, such as
+	the ones used for Path MTU Discovery.
+	Default: 0
+
 conf/interface/*:
 	Change special settings per interface.
 
@@ -1311,6 +1339,19 @@ ndisc_notify - BOOLEAN
 	1 - Generate unsolicited neighbour advertisements when device is brought
 	    up or hardware address changes.
 
+optimistic_dad - BOOLEAN
+	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
+		0: disabled (default)
+		1: enabled
+
+use_optimistic - BOOLEAN
+	If enabled, do not classify optimistic addresses as deprecated during
+	source address selection.  Preferred addresses will still be chosen
+	before optimistic addresses, subject to other ranking in the source
+	address selection algorithm.
+		0: disabled (default)
+		1: enabled
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/Documentation/sync.txt b/Documentation/sync.txt
new file mode 100644
index 000000000000..a2d05e7fa193
--- /dev/null
+++ b/Documentation/sync.txt
@@ -0,0 +1,75 @@
+Motivation:
+
+In complicated DMA pipelines such as graphics (multimedia, camera, gpu, display)
+a consumer of a buffer needs to know when the producer has finished producing
+it.  Likewise the producer needs to know when the consumer is finished with the
+buffer so it can reuse it.  A particular buffer may be consumed by multiple
+consumers which will retain the buffer for different amounts of time.  In
+addition, a consumer may consume multiple buffers atomically.
+The sync framework adds an API which allows synchronization between the
+producers and consumers in a generic way while also allowing platforms which
+have shared hardware synchronization primitives to exploit them.
+
+Goals:
+	* provide a generic API for expressing synchronization dependencies
+	* allow drivers to exploit hardware synchronization between hardware
+	  blocks
+	* provide a userspace API that allows a compositor to manage
+	  dependencies.
+	* provide rich telemetry data to allow debugging slowdowns and stalls of
+	   the graphics pipeline.
+
+Objects:
+	* sync_timeline
+	* sync_pt
+	* sync_fence
+
+sync_timeline:
+
+A sync_timeline is an abstract monotonically increasing counter. In general,
+each driver/hardware block context will have one of these.  They can be backed
+by the appropriate hardware or rely on the generic sw_sync implementation.
+Timelines are only ever created through their specific implementations
+(i.e. sw_sync.)
+
+sync_pt:
+
+A sync_pt is an abstract value which marks a point on a sync_timeline. Sync_pts
+have a single timeline parent.  They have 3 states: active, signaled, and error.
+They start in active state and transition, once, to either signaled (when the
+timeline counter advances beyond the sync_pt’s value) or error state.
+
+sync_fence:
+
+Sync_fences are the primary primitives used by drivers to coordinate
+synchronization of their buffers.  They are a collection of sync_pts which may
+or may not have the same timeline parent.  A sync_pt can only exist in one fence
+and the fence's list of sync_pts is immutable once created.  Fences can be
+waited on synchronously or asynchronously.  Two fences can also be merged to
+create a third fence containing a copy of the two fences’ sync_pts.  Fences are
+backed by file descriptors to allow userspace to coordinate the display pipeline
+dependencies.
+
+Use:
+
+A driver implementing sync support should have a work submission function which:
+     * takes a fence argument specifying when to begin work
+     * asynchronously queues that work to kick off when the fence is signaled
+     * returns a fence to indicate when its work will be done.
+     * signals the returned fence once the work is completed.
+
+Consider an imaginary display driver that has the following API:
+/*
+ * assumes buf is ready to be displayed.
+ * blocks until the buffer is on screen.
+ */
+    void display_buffer(struct dma_buf *buf);
+
+The new API will become:
+/*
+ * will display buf when fence is signaled.
+ * returns immediately with a fence that will signal when buf
+ * is no longer displayed.
+ */
+struct sync_fence* display_buffer(struct dma_buf *buf,
+                                 struct sync_fence *fence);
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index dcc75a9ed919..b81fca90f7fe 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm:
 - dirty_writeback_centisecs
 - drop_caches
 - extfrag_threshold
+- extra_free_kbytes
 - hugepages_treat_as_movable
 - hugetlb_shm_group
 - laptop_mode
@@ -198,6 +199,21 @@ fragmentation index is <= extfrag_threshold. The default value is 500.
 
 ==============================================================
 
+extra_free_kbytes
+
+This parameter tells the VM to keep extra free memory between the threshold
+where background reclaim (kswapd) kicks in, and the threshold where direct
+reclaim (by allocating processes) kicks in.
+
+This is useful for workloads that require low latency memory allocations
+and have a bounded burstiness in memory allocations, for example a
+realtime application that receives and transmits network traffic
+(causing in-kernel memory allocations) with a maximum total message burst
+size of 200MB may need 200MB of extra free memory to avoid direct reclaim
+related latencies.
+
+==============================================================
+
 hugepages_treat_as_movable
 
 This parameter is only useful when kernelcore= is specified at boot time to
diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt
index fca24c931ec8..ad04aa0e7499 100644
--- a/Documentation/thermal/cpu-cooling-api.txt
+++ b/Documentation/thermal/cpu-cooling-api.txt
@@ -25,8 +25,173 @@ the user. The registration APIs returns the cooling device pointer.
 
    clip_cpus: cpumask of cpus where the frequency constraints will happen.
 
-1.1.2 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register(
+	struct device_node *np, const struct cpumask *clip_cpus)
+
+    This interface function registers the cpufreq cooling device with
+    the name "thermal-cpufreq-%x" linking it with a device tree node, in
+    order to bind it via the thermal DT code. This api can support multiple
+    instances of cpufreq cooling devices.
+
+    np: pointer to the cooling device device tree node
+    clip_cpus: cpumask of cpus where the frequency constraints will happen.
+
+1.1.3 struct thermal_cooling_device *cpufreq_power_cooling_register(
+    const struct cpumask *clip_cpus, u32 capacitance,
+    get_static_t plat_static_func)
+
+Similar to cpufreq_cooling_register, this function registers a cpufreq
+cooling device.  Using this function, the cooling device will
+implement the power extensions by using a simple cpu power model.  The
+cpus must have registered their OPPs using the OPP library.
+
+The additional parameters are needed for the power model (See 2. Power
+models).  "capacitance" is the dynamic power coefficient (See 2.1
+Dynamic power).  "plat_static_func" is a function to calculate the
+static power consumed by these cpus (See 2.2 Static power).
+
+1.1.4 struct thermal_cooling_device *of_cpufreq_power_cooling_register(
+    struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance,
+    get_static_t plat_static_func)
+
+Similar to cpufreq_power_cooling_register, this function register a
+cpufreq cooling device with power extensions using the device tree
+information supplied by the np parameter.
+
+1.1.5 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
     This interface function unregisters the "thermal-cpufreq-%x" cooling device.
 
     cdev: Cooling device pointer which has to be unregistered.
+
+2. Power models
+
+The power API registration functions provide a simple power model for
+CPUs.  The current power is calculated as dynamic + (optionally)
+static power.  This power model requires that the operating-points of
+the CPUs are registered using the kernel's opp library and the
+`cpufreq_frequency_table` is assigned to the `struct device` of the
+cpu.  If you are using CONFIG_CPUFREQ_DT then the
+`cpufreq_frequency_table` should already be assigned to the cpu
+device.
+
+The `plat_static_func` parameter of `cpufreq_power_cooling_register()`
+and `of_cpufreq_power_cooling_register()` is optional.  If you don't
+provide it, only dynamic power will be considered.
+
+2.1 Dynamic power
+
+The dynamic power consumption of a processor depends on many factors.
+For a given processor implementation the primary factors are:
+
+- The time the processor spends running, consuming dynamic power, as
+  compared to the time in idle states where dynamic consumption is
+  negligible.  Herein we refer to this as 'utilisation'.
+- The voltage and frequency levels as a result of DVFS.  The DVFS
+  level is a dominant factor governing power consumption.
+- In running time the 'execution' behaviour (instruction types, memory
+  access patterns and so forth) causes, in most cases, a second order
+  variation.  In pathological cases this variation can be significant,
+  but typically it is of a much lesser impact than the factors above.
+
+A high level dynamic power consumption model may then be represented as:
+
+Pdyn = f(run) * Voltage^2 * Frequency * Utilisation
+
+f(run) here represents the described execution behaviour and its
+result has a units of Watts/Hz/Volt^2 (this often expressed in
+mW/MHz/uVolt^2)
+
+The detailed behaviour for f(run) could be modelled on-line.  However,
+in practice, such an on-line model has dependencies on a number of
+implementation specific processor support and characterisation
+factors.  Therefore, in initial implementation that contribution is
+represented as a constant coefficient.  This is a simplification
+consistent with the relative contribution to overall power variation.
+
+In this simplified representation our model becomes:
+
+Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation
+
+Where `capacitance` is a constant that represents an indicative
+running time dynamic power coefficient in fundamental units of
+mW/MHz/uVolt^2.  Typical values for mobile CPUs might lie in range
+from 100 to 500.  For reference, the approximate values for the SoC in
+ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
+140 for the Cortex-A53 cluster.
+
+
+2.2 Static power
+
+Static leakage power consumption depends on a number of factors.  For a
+given circuit implementation the primary factors are:
+
+- Time the circuit spends in each 'power state'
+- Temperature
+- Operating voltage
+- Process grade
+
+The time the circuit spends in each 'power state' for a given
+evaluation period at first order means OFF or ON.  However,
+'retention' states can also be supported that reduce power during
+inactive periods without loss of context.
+
+Note: The visibility of state entries to the OS can vary, according to
+platform specifics, and this can then impact the accuracy of a model
+based on OS state information alone.  It might be possible in some
+cases to extract more accurate information from system resources.
+
+The temperature, operating voltage and process 'grade' (slow to fast)
+of the circuit are all significant factors in static leakage power
+consumption.  All of these have complex relationships to static power.
+
+Circuit implementation specific factors include the chosen silicon
+process as well as the type, number and size of transistors in both
+the logic gates and any RAM elements included.
+
+The static power consumption modelling must take into account the
+power managed regions that are implemented.  Taking the example of an
+ARM processor cluster, the modelling would take into account whether
+each CPU can be powered OFF separately or if only a single power
+region is implemented for the complete cluster.
+
+In one view, there are others, a static power consumption model can
+then start from a set of reference values for each power managed
+region (e.g. CPU, Cluster/L2) in each state (e.g. ON, OFF) at an
+arbitrary process grade, voltage and temperature point.  These values
+are then scaled for all of the following: the time in each state, the
+process grade, the current temperature and the operating voltage.
+However, since both implementation specific and complex relationships
+dominate the estimate, the appropriate interface to the model from the
+cpu cooling device is to provide a function callback that calculates
+the static power in this platform.  When registering the cpu cooling
+device pass a function pointer that follows the `get_static_t`
+prototype:
+
+    int plat_get_static(cpumask_t *cpumask, int interval,
+                        unsigned long voltage, u32 &power);
+
+`cpumask` is the cpumask of the cpus involved in the calculation.
+`voltage` is the voltage at which they are operating.  The function
+should calculate the average static power for the last `interval`
+milliseconds.  It returns 0 on success, -E* on error.  If it
+succeeds, it should store the static power in `power`.  Reading the
+temperature of the cpus described by `cpumask` is left for
+plat_get_static() to do as the platform knows best which thermal
+sensor is closest to the cpu.
+
+If `plat_static_func` is NULL, static power is considered to be
+negligible for this platform and only dynamic power is considered.
+
+The platform specific callback can then use any combination of tables
+and/or equations to permute the estimated value.  Process grade
+information is not passed to the model since access to such data, from
+on-chip measurement capability or manufacture time data, is platform
+specific.
+
+Note: the significance of static power for CPUs in comparison to
+dynamic power is highly dependent on implementation.  Given the
+potential complexity in implementation, the importance and accuracy of
+its inclusion when using cpu cooling devices should be assessed on a
+case by case basis.
+
diff --git a/Documentation/thermal/power_allocator.txt b/Documentation/thermal/power_allocator.txt
new file mode 100644
index 000000000000..c3797b529991
--- /dev/null
+++ b/Documentation/thermal/power_allocator.txt
@@ -0,0 +1,247 @@
+Power allocator governor tunables
+=================================
+
+Trip points
+-----------
+
+The governor requires the following two passive trip points:
+
+1.  "switch on" trip point: temperature above which the governor
+    control loop starts operating.  This is the first passive trip
+    point of the thermal zone.
+
+2.  "desired temperature" trip point: it should be higher than the
+    "switch on" trip point.  This the target temperature the governor
+    is controlling for.  This is the last passive trip point of the
+    thermal zone.
+
+PID Controller
+--------------
+
+The power allocator governor implements a
+Proportional-Integral-Derivative controller (PID controller) with
+temperature as the control input and power as the controlled output:
+
+    P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power
+
+where
+    e = desired_temperature - current_temperature
+    err_integral is the sum of previous errors
+    diff_err = e - previous_error
+
+It is similar to the one depicted below:
+
+                                      k_d
+                                       |
+current_temp                           |
+     |                                 v
+     |                +----------+   +---+
+     |         +----->| diff_err |-->| X |------+
+     |         |      +----------+   +---+      |
+     |         |                                |      tdp        actor
+     |         |                      k_i       |       |  get_requested_power()
+     |         |                       |        |       |        |     |
+     |         |                       |        |       |        |     | ...
+     v         |                       v        v       v        v     v
+   +---+       |      +-------+      +---+    +---+   +---+   +----------+
+   | S |-------+----->| sum e |----->| X |--->| S |-->| S |-->|power     |
+   +---+       |      +-------+      +---+    +---+   +---+   |allocation|
+     ^         |                                ^             +----------+
+     |         |                                |                |     |
+     |         |        +---+                   |                |     |
+     |         +------->| X |-------------------+                v     v
+     |                  +---+                               granted performance
+desired_temperature       ^
+                          |
+                          |
+                      k_po/k_pu
+
+Sustainable power
+-----------------
+
+An estimate of the sustainable dissipatable power (in mW) should be
+provided while registering the thermal zone.  This estimates the
+sustained power that can be dissipated at the desired control
+temperature.  This is the maximum sustained power for allocation at
+the desired maximum temperature.  The actual sustained power can vary
+for a number of reasons.  The closed loop controller will take care of
+variations such as environmental conditions, and some factors related
+to the speed-grade of the silicon.  `sustainable_power` is therefore
+simply an estimate, and may be tuned to affect the aggressiveness of
+the thermal ramp. For reference, the sustainable power of a 4" phone
+is typically 2000mW, while on a 10" tablet is around 4500mW (may vary
+depending on screen size).
+
+If you are using device tree, do add it as a property of the
+thermal-zone.  For example:
+
+	thermal-zones {
+		soc_thermal {
+			polling-delay = <1000>;
+			polling-delay-passive = <100>;
+			sustainable-power = <2500>;
+			...
+
+Instead, if the thermal zone is registered from the platform code, pass a
+`thermal_zone_params` that has a `sustainable_power`.  If no
+`thermal_zone_params` were being passed, then something like below
+will suffice:
+
+	static const struct thermal_zone_params tz_params = {
+		.sustainable_power = 3500,
+	};
+
+and then pass `tz_params` as the 5th parameter to
+`thermal_zone_device_register()`
+
+k_po and k_pu
+-------------
+
+The implementation of the PID controller in the power allocator
+thermal governor allows the configuration of two proportional term
+constants: `k_po` and `k_pu`.  `k_po` is the proportional term
+constant during temperature overshoot periods (current temperature is
+above "desired temperature" trip point).  Conversely, `k_pu` is the
+proportional term constant during temperature undershoot periods
+(current temperature below "desired temperature" trip point).
+
+These controls are intended as the primary mechanism for configuring
+the permitted thermal "ramp" of the system.  For instance, a lower
+`k_pu` value will provide a slower ramp, at the cost of capping
+available capacity at a low temperature.  On the other hand, a high
+value of `k_pu` will result in the governor granting very high power
+whilst temperature is low, and may lead to temperature overshooting.
+
+The default value for `k_pu` is:
+
+    2 * sustainable_power / (desired_temperature - switch_on_temp)
+
+This means that at `switch_on_temp` the output of the controller's
+proportional term will be 2 * `sustainable_power`.  The default value
+for `k_po` is:
+
+    sustainable_power / (desired_temperature - switch_on_temp)
+
+Focusing on the proportional and feed forward values of the PID
+controller equation we have:
+
+    P_max = k_p * e + sustainable_power
+
+The proportional term is proportional to the difference between the
+desired temperature and the current one.  When the current temperature
+is the desired one, then the proportional component is zero and
+`P_max` = `sustainable_power`.  That is, the system should operate in
+thermal equilibrium under constant load.  `sustainable_power` is only
+an estimate, which is the reason for closed-loop control such as this.
+
+Expanding `k_pu` we get:
+    P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) +
+        sustainable_power
+
+where
+    T_set is the desired temperature
+    T is the current temperature
+    T_on is the switch on temperature
+
+When the current temperature is the switch_on temperature, the above
+formula becomes:
+
+    P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) +
+        sustainable_power = 2 * sustainable_power + sustainable_power =
+        3 * sustainable_power
+
+Therefore, the proportional term alone linearly decreases power from
+3 * `sustainable_power` to `sustainable_power` as the temperature
+rises from the switch on temperature to the desired temperature.
+
+k_i and integral_cutoff
+-----------------------
+
+`k_i` configures the PID loop's integral term constant.  This term
+allows the PID controller to compensate for long term drift and for
+the quantized nature of the output control: cooling devices can't set
+the exact power that the governor requests.  When the temperature
+error is below `integral_cutoff`, errors are accumulated in the
+integral term.  This term is then multiplied by `k_i` and the result
+added to the output of the controller.  Typically `k_i` is set low (1
+or 2) and `integral_cutoff` is 0.
+
+k_d
+---
+
+`k_d` configures the PID loop's derivative term constant.  It's
+recommended to leave it as the default: 0.
+
+Cooling device power API
+========================
+
+Cooling devices controlled by this governor must supply the additional
+"power" API in their `cooling_device_ops`.  It consists on three ops:
+
+1. int get_requested_power(struct thermal_cooling_device *cdev,
+	struct thermal_zone_device *tz, u32 *power);
+@cdev: The `struct thermal_cooling_device` pointer
+@tz: thermal zone in which we are currently operating
+@power: pointer in which to store the calculated power
+
+`get_requested_power()` calculates the power requested by the device
+in milliwatts and stores it in @power .  It should return 0 on
+success, -E* on failure.  This is currently used by the power
+allocator governor to calculate how much power to give to each cooling
+device.
+
+2. int state2power(struct thermal_cooling_device *cdev, struct
+        thermal_zone_device *tz, unsigned long state, u32 *power);
+@cdev: The `struct thermal_cooling_device` pointer
+@tz: thermal zone in which we are currently operating
+@state: A cooling device state
+@power: pointer in which to store the equivalent power
+
+Convert cooling device state @state into power consumption in
+milliwatts and store it in @power.  It should return 0 on success, -E*
+on failure.  This is currently used by thermal core to calculate the
+maximum power that an actor can consume.
+
+3. int power2state(struct thermal_cooling_device *cdev, u32 power,
+	unsigned long *state);
+@cdev: The `struct thermal_cooling_device` pointer
+@power: power in milliwatts
+@state: pointer in which to store the resulting state
+
+Calculate a cooling device state that would make the device consume at
+most @power mW and store it in @state.  It should return 0 on success,
+-E* on failure.  This is currently used by the thermal core to convert
+a given power set by the power allocator governor to a state that the
+cooling device can set.  It is a function because this conversion may
+depend on external factors that may change so this function should the
+best conversion given "current circumstances".
+
+Cooling device weights
+----------------------
+
+Weights are a mechanism to bias the allocation among cooling
+devices.  They express the relative power efficiency of different
+cooling devices.  Higher weight can be used to express higher power
+efficiency.  Weighting is relative such that if each cooling device
+has a weight of one they are considered equal.  This is particularly
+useful in heterogeneous systems where two cooling devices may perform
+the same kind of compute, but with different efficiency.  For example,
+a system with two different types of processors.
+
+If the thermal zone is registered using
+`thermal_zone_device_register()` (i.e., platform code), then weights
+are passed as part of the thermal zone's `thermal_bind_parameters`.
+If the platform is registered using device tree, then they are passed
+as the `contribution` property of each map in the `cooling-maps` node.
+
+Limitations of the power allocator governor
+===========================================
+
+The power allocator governor's PID controller works best if there is a
+periodic tick.  If you have a driver that calls
+`thermal_zone_device_update()` (or anything that ends up calling the
+governor's `throttle()` function) repetitively, the governor response
+won't be very good.  Note that this is not particular to this
+governor, step-wise will also misbehave if you call its throttle()
+faster than the normal thermal framework tick (due to interrupts for
+example) as it will overreact.
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index 37c54863f611..c3370c56b5df 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -95,7 +95,7 @@ temperature) and throttle appropriate devices.
 1.3 interface for binding a thermal zone device with a thermal cooling device
 1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	int trip, struct thermal_cooling_device *cdev,
-	unsigned long upper, unsigned long lower);
+	unsigned long upper, unsigned long lower, unsigned int weight);
 
     This interface function bind a thermal cooling device to the certain trip
     point of a thermal zone device.
@@ -110,6 +110,8 @@ temperature) and throttle appropriate devices.
     lower:the Minimum cooling state can be used for this trip point.
           THERMAL_NO_LIMIT means no lower limit,
 	  and the cooling device can be in cooling state 0.
+    weight: the influence of this cooling device in this thermal
+            zone.  See 1.4.1 below for more information.
 
 1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
 		int trip, struct thermal_cooling_device *cdev);
@@ -127,9 +129,15 @@ temperature) and throttle appropriate devices.
     This structure defines the following parameters that are used to bind
     a zone with a cooling device for a particular trip point.
     .cdev: The cooling device pointer
-    .weight: The 'influence' of a particular cooling device on this zone.
-             This is on a percentage scale. The sum of all these weights
-             (for a particular zone) cannot exceed 100.
+    .weight: The 'influence' of a particular cooling device on this
+             zone. This is relative to the rest of the cooling
+             devices. For example, if all cooling devices have a
+             weight of 1, then they all contribute the same. You can
+             use percentages if you want, but it's not mandatory. A
+             weight of 0 means that this cooling device doesn't
+             contribute to the cooling of this zone unless all cooling
+             devices have a weight of 0. If all weights are 0, then
+             they all contribute the same.
     .trip_mask:This is a bit mask that gives the binding relation between
                this thermal zone and cdev, for a particular trip point.
                If nth bit is set, then the cdev and thermal zone are bound
@@ -169,6 +177,12 @@ Thermal zone device sys I/F, created once it's registered:
     |---trip_point_[0-*]_type:	Trip point type
     |---trip_point_[0-*]_hyst:	Hysteresis value for this trip point
     |---emul_temp:		Emulated temperature set node
+    |---sustainable_power:      Sustainable dissipatable power
+    |---k_po:                   Proportional term during temperature overshoot
+    |---k_pu:                   Proportional term during temperature undershoot
+    |---k_i:                    PID's integral term in the power allocator gov
+    |---k_d:                    PID's derivative term in the power allocator
+    |---integral_cutoff:        Offset above which errors are accumulated
 
 Thermal cooling device sys I/F, created once it's registered:
 /sys/class/thermal/cooling_device[0-*]:
@@ -185,6 +199,8 @@ thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device.
 /sys/class/thermal/thermal_zone[0-*]:
     |---cdev[0-*]:		[0-*]th cooling device in current thermal zone
     |---cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
+    |---cdev[0-*]_weight:       Influence of the cooling device in
+                                this thermal zone
 
 Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
 the generic thermal driver also creates a hwmon sysfs I/F for each _type_
@@ -258,6 +274,14 @@ cdev[0-*]_trip_point
 	point.
 	RO, Optional
 
+cdev[0-*]_weight
+        The influence of cdev[0-*] in this thermal zone. This value
+        is relative to the rest of cooling devices in the thermal
+        zone. For example, if a cooling device has a weight double
+        than that of other, it's twice as effective in cooling the
+        thermal zone.
+        RW, Optional
+
 passive
 	Attribute is only present for zones in which the passive cooling
 	policy is not supported by native thermal driver. Default is zero
@@ -282,6 +306,52 @@ emul_temp
 	  because userland can easily disable the thermal policy by simply
 	  flooding this sysfs node with low temperature values.
 
+sustainable_power
+	An estimate of the sustained power that can be dissipated by
+	the thermal zone. Used by the power allocator governor. For
+	more information see Documentation/thermal/power_allocator.txt
+	Unit: milliwatts
+	RW, Optional
+
+k_po
+	The proportional term of the power allocator governor's PID
+	controller during temperature overshoot. Temperature overshoot
+	is when the current temperature is above the "desired
+	temperature" trip point. For more information see
+	Documentation/thermal/power_allocator.txt
+	RW, Optional
+
+k_pu
+	The proportional term of the power allocator governor's PID
+	controller during temperature undershoot. Temperature undershoot
+	is when the current temperature is below the "desired
+	temperature" trip point. For more information see
+	Documentation/thermal/power_allocator.txt
+	RW, Optional
+
+k_i
+	The integral term of the power allocator governor's PID
+	controller. This term allows the PID controller to compensate
+	for long term drift. For more information see
+	Documentation/thermal/power_allocator.txt
+	RW, Optional
+
+k_d
+	The derivative term of the power allocator governor's PID
+	controller. For more information see
+	Documentation/thermal/power_allocator.txt
+	RW, Optional
+
+integral_cutoff
+	Temperature offset from the desired temperature trip point
+	above which the integral term of the power allocator
+	governor's PID controller starts accumulating errors. For
+	example, if integral_cutoff is 0, then the integral term only
+	accumulates error when temperature is above the desired
+	temperature trip point. For more information see
+	Documentation/thermal/power_allocator.txt
+	RW, Optional
+
 *****************************
 * Cooling device attributes *
 *****************************
@@ -311,7 +381,8 @@ passive, active. If an ACPI thermal zone supports critical, passive,
 active[0] and active[1] at the same time, it may register itself as a
 thermal_zone_device (thermal_zone1) with 4 trip points in all.
 It has one processor and one fan, which are both registered as
-thermal_cooling_device.
+thermal_cooling_device. Both are considered to have the same
+effectiveness in cooling the thermal zone.
 
 If the processor is listed in _PSL method, and the fan is listed in _AL0
 method, the sys I/F structure will be built like this:
@@ -333,8 +404,10 @@ method, the sys I/F structure will be built like this:
     |---trip_point_3_type:	active1
     |---cdev0:			--->/sys/class/thermal/cooling_device0
     |---cdev0_trip_point:	1	/* cdev0 can be used for passive */
+    |---cdev0_weight:           1024
     |---cdev1:			--->/sys/class/thermal/cooling_device3
     |---cdev1_trip_point:	2	/* cdev1 can be used for active[0]*/
+    |---cdev1_weight:           1024
 
 |cooling_device0:
     |---type:			Processor
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bfe8c29b1f1d..29064c3bfcd1 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2013,6 +2013,35 @@ will produce:
  1)   1.449 us    |             }
 
 
+You can disable the hierarchical function call formatting and instead print a
+flat list of function entry and return events.  This uses the format described
+in the Output Formatting section and respects all the trace options that
+control that formatting.  Hierarchical formatting is the default.
+
+	hierachical: echo nofuncgraph-flat > trace_options
+	flat: echo funcgraph-flat > trace_options
+
+  ie:
+
+  # tracer: function_graph
+  #
+  # entries-in-buffer/entries-written: 68355/68355   #P:2
+  #
+  #                              _-----=> irqs-off
+  #                             / _----=> need-resched
+  #                            | / _---=> hardirq/softirq
+  #                            || / _--=> preempt-depth
+  #                            ||| /     delay
+  #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+  #              | |       |   ||||       |         |
+                sh-1806  [001] d...   198.843443: graph_ent: func=_raw_spin_lock
+                sh-1806  [001] d...   198.843445: graph_ent: func=__raw_spin_lock
+                sh-1806  [001] d..1   198.843447: graph_ret: func=__raw_spin_lock
+                sh-1806  [001] d..1   198.843449: graph_ret: func=_raw_spin_lock
+                sh-1806  [001] d..1   198.843451: graph_ent: func=_raw_spin_unlock_irqrestore
+                sh-1806  [001] d...   198.843453: graph_ret: func=_raw_spin_unlock_irqrestore
+
+
 You might find other useful features for this tracer in the
 following "dynamic ftrace" section such as tracing only specific
 functions or tasks.
diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt
index da49437d5aeb..0ac4101a8e52 100644
--- a/Documentation/trace/tracepoints.txt
+++ b/Documentation/trace/tracepoints.txt
@@ -99,3 +99,27 @@ core kernel image or in modules.
 If the tracepoint has to be used in kernel modules, an
 EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
 used to export the defined tracepoints.
+
+If you need to do a bit of work for a tracepoint parameter, and
+that work is only used for the tracepoint, that work can be encapsulated
+within an if statement with the following:
+
+	if (trace_foo_bar_enabled()) {
+		int i;
+		int tot = 0;
+
+		for (i = 0; i < count; i++)
+			tot += calculate_nuggets();
+
+		trace_foo_bar(tot);
+	}
+
+All trace_<tracepoint>() calls have a matching trace_<tracepoint>_enabled()
+function defined that returns true if the tracepoint is enabled and
+false otherwise. The trace_<tracepoint>() should always be within the
+block of the if (trace_<tracepoint>_enabled()) to prevent races between
+the tracepoint being enabled and the check being seen.
+
+The advantage of using the trace_<tracepoint>_enabled() is that it uses
+the static_key of the tracepoint to allow the if statement to be implemented
+with jump labels and avoid conditional branches.
diff --git a/android/configs/README b/android/configs/README
new file mode 100644
index 000000000000..8798731f8904
--- /dev/null
+++ b/android/configs/README
@@ -0,0 +1,15 @@
+The files in this directory are meant to be used as a base for an Android
+kernel config. All devices should have the options in android-base.cfg enabled.
+While not mandatory, the options in android-recommended.cfg enable advanced
+Android features.
+
+Assuming you already have a minimalist defconfig for your device, a possible
+way to enable these options would be:
+
+     ARCH=<arch> scripts/kconfig/merge_config.sh <path_to>/<device>_defconfig android/configs/android-base.cfg android/configs/android-recommended.cfg
+
+This will generate a .config that can then be used to save a new defconfig or
+compile a new kernel with Android features enabled.
+
+Because there is no tool to consistently generate these config fragments,
+lets keep them alphabetically sorted instead of random.
diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg
new file mode 100644
index 000000000000..1bee5d614d1b
--- /dev/null
+++ b/android/configs/android-base.cfg
@@ -0,0 +1,152 @@
+#  KEEP ALPHABETICALLY SORTED
+# CONFIG_DEVKMEM is not set
+# CONFIG_DEVMEM is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_MODULES is not set
+# CONFIG_OABI_COMPAT is not set
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_LOW_MEMORY_KILLER=y
+CONFIG_ARMV7_COMPAT=y
+CONFIG_ASHMEM=y
+CONFIG_AUDIT=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_VERITY=y
+CONFIG_EMBEDDED=y
+CONFIG_FB=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_INET=y
+CONFIG_INET_ESP=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_TARGET_REJECT_SKERR=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_REJECT_SKERR=y
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_TPROXY=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_NAT=y
+CONFIG_NO_HZ=y
+CONFIG_PACKET=y
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_PPP=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PREEMPT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_RTC_CLASS=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_STAGING=y
+CONFIG_SWITCH=y
+CONFIG_SYNC=y
+CONFIG_SYSVIPC=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_G_ANDROID=y
+CONFIG_USB_OTG_WAKELOCK=y
+CONFIG_XFRM_USER=y
diff --git a/android/configs/android-recommended.cfg b/android/configs/android-recommended.cfg
new file mode 100644
index 000000000000..960b9de2860d
--- /dev/null
+++ b/android/configs/android-recommended.cfg
@@ -0,0 +1,121 @@
+#  KEEP ALPHABETICALLY SORTED
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_NF_CONNTRACK_SIP is not set
+# CONFIG_PM_WAKELOCKS_GC is not set
+# CONFIG_VT is not set
+CONFIG_ANDROID_TIMED_GPIO=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_COMPACTION=y
+CONFIG_DM_UEVENT=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_ENABLE_DEFAULT_TRACERS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FUSE_FS=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HIDRAW=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_GPIO=y
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_KEYCHORD=y
+CONFIG_INPUT_KEYRESET=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_TABLET=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_ION=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KSM=y
+CONFIG_LOGIG940_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGITECH_FF=y
+CONFIG_MD=y
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_MSDOS_FS=y
+CONFIG_PANIC_TIMEOUT=5
+CONFIG_PANTHERLORD_FF=y
+CONFIG_PERF_EVENTS=y
+CONFIG_PM_DEBUG=y
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_WAKELOCKS_LIMIT=0
+CONFIG_POWER_SUPPLY=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_SUSPEND_TIME=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_GTCO=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_TABLET_USB_WACOM=y
+CONFIG_TIMER_STATS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_UHID=y
+CONFIG_UID_STAT=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_USBNET=y
+CONFIG_VFAT_FS=y
diff --git a/arch/Kconfig b/arch/Kconfig
index 00e3702ec79b..4c0a1d03ae0d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index b9e37ad6fa19..1402fcc11c2c 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -96,6 +96,7 @@ struct osf_dirent {
 };
 
 struct osf_dirent_callback {
+	struct dir_context ctx;
 	struct osf_dirent __user *dirent;
 	long __user *basep;
 	unsigned int count;
@@ -146,17 +147,17 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd,
 {
 	int error;
 	struct fd arg = fdget(fd);
-	struct osf_dirent_callback buf;
+	struct osf_dirent_callback buf = {
+		.ctx.actor = osf_filldir,
+		.dirent = dirent,
+		.basep = basep,
+		.count = count
+	};
 
 	if (!arg.file)
 		return -EBADF;
 
-	buf.dirent = dirent;
-	buf.basep = basep;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(arg.file, osf_filldir, &buf);
+	error = iterate_dir(arg.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
 	if (count != buf.count)
diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts
index 0fa0d4abe795..ebc313a9f5b2 100644
--- a/arch/arc/boot/dts/abilis_tb100_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts
@@ -45,19 +45,19 @@
 		};
 
 		i2c0: i2c@FF120000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c1: i2c@FF121000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c2: i2c@FF122000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c3: i2c@FF123000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c4: i2c@FF124000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 
 		leds {
diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts
index a4d80ce283ae..b204657993aa 100644
--- a/arch/arc/boot/dts/abilis_tb101_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts
@@ -45,19 +45,19 @@
 		};
 
 		i2c0: i2c@FF120000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c1: i2c@FF121000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c2: i2c@FF122000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c3: i2c@FF123000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 		i2c4: i2c@FF124000 {
-			sda-hold-time = <432>;
+			i2c-sda-hold-time-ns = <432>;
 		};
 
 		leds {
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9422f298f32f..1a86004f3f27 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1958,6 +1958,15 @@ config XEN
 	help
 	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.
 
+config ARM_FLUSH_CONSOLE_ON_RESTART
+	bool "Force flush the console on restart"
+	help
+	  If the console is locked while the system is rebooted, the messages
+	  in the temporary logbuffer would not have propogated to all the
+	  console drivers. This option forces the console lock to be
+	  released if it failed to be acquired, which will cause all the
+	  pending messages to be flushed.
+
 endmenu
 
 menu "Boot options"
@@ -1987,6 +1996,21 @@ config DEPRECATED_PARAM_STRUCT
 	  This was deprecated in 2001 and announced to live on for 5 years.
 	  Some old boot loaders still use this way.
 
+config BUILD_ARM_APPENDED_DTB_IMAGE
+	bool "Build a concatenated zImage/dtb by default"
+	depends on OF
+	help
+	  Enabling this option will cause a concatenated zImage and DTB to
+	  be built by default (instead of a standalone zImage.)  The image
+	  will built in arch/arm/boot/zImage-dtb.<dtb name>
+
+config BUILD_ARM_APPENDED_DTB_IMAGE_NAME
+	string "Default dtb name"
+	depends on BUILD_ARM_APPENDED_DTB_IMAGE
+	help
+	  name of the dtb to append when building a concatenated
+	  zImage/dtb.
+
 # Compressed boot loader in ROM.  Yes, we really want to ask about
 # TEXT and BSS so we preserve their values in the config files.
 config ZBOOT_ROM_TEXT
@@ -2336,6 +2360,13 @@ config NEON
 	  Say Y to include support code for NEON, the ARMv7 Advanced SIMD
 	  Extension.
 
+config KERNEL_MODE_NEON
+	bool "Support for NEON in kernel mode"
+	default n
+	depends on NEON
+	help
+	  Say Y to include support for NEON in kernel mode.
+
 endmenu
 
 menu "Userspace binary formats"
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 5fdb6dbc5f89..4d1793474c9c 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -63,6 +63,27 @@ config DEBUG_USER
 	      8 - SIGSEGV faults
 	     16 - SIGBUS faults
 
+config DEBUG_RODATA
+	bool "Write protect kernel text section"
+	default n
+	depends on DEBUG_KERNEL && MMU
+	---help---
+	  Mark the kernel text section as write-protected in the pagetables,
+	  in order to catch accidental (and incorrect) writes to such const
+	  data. This will cause the size of the kernel, plus up to 4MB, to
+	  be mapped as pages instead of sections, which will increase TLB
+	  pressure.
+	  If in doubt, say "N".
+
+config DEBUG_RODATA_TEST
+	bool "Testcase for the DEBUG_RODATA feature"
+	depends on DEBUG_RODATA
+	default n
+	---help---
+	  This option enables a testcase for the DEBUG_RODATA
+	  feature.
+	  If in doubt, say "N"
+
 # These options are only for real kernel hackers who want to get their hands dirty.
 config DEBUG_LL
 	bool "Kernel low-level debugging functions (read help!)"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 70bc19e2274f..9d36200374f0 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -265,6 +265,8 @@ libs-y				:= arch/arm/lib/ $(libs-y)
 # Default target when executing plain make
 ifeq ($(CONFIG_XIP_KERNEL),y)
 KBUILD_IMAGE := xipImage
+else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y)
+KBUILD_IMAGE := zImage-dtb.$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAME)
 else
 KBUILD_IMAGE := zImage
 endif
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 84aa2caf07ed..085bb96493a3 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -14,6 +14,7 @@
 ifneq ($(MACHINE),)
 include $(srctree)/$(MACHINE)/Makefile.boot
 endif
+include $(srctree)/arch/arm/boot/dts/Makefile
 
 # Note: the following conditions must always be true:
 #   ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index f6e34be012ff..a8264aa9b03a 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -714,6 +714,8 @@ __armv7_mmu_cache_on:
 		bic     r6, r6, #1 << 31        @ 32-bit translation system
 		bic     r6, r6, #3 << 0         @ use only ttbr0
 		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
+		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 #endif
diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
index 9584232ee6b6..314e0d9b0002 100644
--- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
@@ -201,7 +201,7 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
-				dvi-transmitter@39 {
+				dvi_i2c_slave: dvi-transmitter@39 {
 					compatible = "sil,sii9022-tpi", "sil,sii9022";
 					reg = <0x39>;
 				};
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index f1dc620c5c45..a6650860af23 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -147,6 +147,7 @@
 		interrupts = <0 85 4>;
 		mode = "1024x768-16@60";
 		framebuffer = <0 0xff000000 0 0x01000000>;
+		i2c-slave = <&dvi_i2c_slave>;
 		clocks = <&oscclk5>;
 		clock-names = "pxlclk";
 	};
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 9353184d730d..ce01364a96e3 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -17,3 +17,7 @@ config SHARP_PARAM
 
 config SHARP_SCOOP
 	bool
+
+config FIQ_GLUE
+	bool
+	select FIQ
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 462cd580fc2d..505c479202b6 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -4,6 +4,7 @@
 
 obj-y				+= firmware.o
 
+obj-$(CONFIG_FIQ_GLUE)		+= fiq_glue.o fiq_glue_setup.o
 obj-$(CONFIG_ICST)		+= icst.o
 obj-$(CONFIG_SA1111)		+= sa1111.o
 obj-$(CONFIG_PCI_HOST_VIA82C505) += via82c505.o
diff --git a/arch/arm/common/fiq_glue.S b/arch/arm/common/fiq_glue.S
new file mode 100644
index 000000000000..24b42cec4813
--- /dev/null
+++ b/arch/arm/common/fiq_glue.S
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+		.global fiq_glue_end
+
+		/* fiq stack: r0-r15,cpsr,spsr of interrupted mode */
+
+ENTRY(fiq_glue)
+		/* store pc, cpsr from previous mode, reserve space for spsr */
+		mrs	r12, spsr
+		sub	lr, lr, #4
+		subs	r10, #1
+		bne	nested_fiq
+
+		str	r12, [sp, #-8]!
+		str	lr, [sp, #-4]!
+
+		/* store r8-r14 from previous mode */
+		sub	sp, sp, #(7 * 4)
+		stmia	sp, {r8-r14}^
+		nop
+
+		/* store r0-r7 from previous mode */
+		stmfd	sp!, {r0-r7}
+
+		/* setup func(data,regs) arguments */
+		mov	r0, r9
+		mov	r1, sp
+		mov	r3, r8
+
+		mov	r7, sp
+
+		/* Get sp and lr from non-user modes */
+		and	r4, r12, #MODE_MASK
+		cmp	r4, #USR_MODE
+		beq	fiq_from_usr_mode
+
+		mov	r7, sp
+		orr	r4, r4, #(PSR_I_BIT | PSR_F_BIT)
+		msr	cpsr_c, r4
+		str	sp, [r7, #(4 * 13)]
+		str	lr, [r7, #(4 * 14)]
+		mrs	r5, spsr
+		str	r5, [r7, #(4 * 17)]
+
+		cmp	r4, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT)
+		/* use fiq stack if we reenter this mode */
+		subne	sp, r7, #(4 * 3)
+
+fiq_from_usr_mode:
+		msr	cpsr_c, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT)
+		mov	r2, sp
+		sub	sp, r7, #12
+		stmfd	sp!, {r2, ip, lr}
+		/* call func(data,regs) */
+		blx	r3
+		ldmfd	sp, {r2, ip, lr}
+		mov	sp, r2
+
+		/* restore/discard saved state */
+		cmp	r4, #USR_MODE
+		beq	fiq_from_usr_mode_exit
+
+		msr	cpsr_c, r4
+		ldr	sp, [r7, #(4 * 13)]
+		ldr	lr, [r7, #(4 * 14)]
+		msr	spsr_cxsf, r5
+
+fiq_from_usr_mode_exit:
+		msr	cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)
+
+		ldmfd	sp!, {r0-r7}
+		ldr	lr, [sp, #(4 * 7)]
+		ldr	r12, [sp, #(4 * 8)]
+		add	sp, sp, #(10 * 4)
+exit_fiq:
+		msr	spsr_cxsf, r12
+		add	r10, #1
+		cmp	r11, #0
+		moveqs	pc, lr
+		bx	r11 /* jump to custom fiq return function */
+
+nested_fiq:
+		orr	r12, r12, #(PSR_F_BIT)
+		b	exit_fiq
+
+fiq_glue_end:
+
+ENTRY(fiq_glue_setup) /* func, data, sp, smc call number */
+		stmfd		sp!, {r4}
+		mrs		r4, cpsr
+		msr		cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)
+		movs		r8, r0
+		mov		r9, r1
+		mov		sp, r2
+		mov		r11, r3
+		moveq		r10, #0
+		movne		r10, #1
+		msr		cpsr_c, r4
+		ldmfd		sp!, {r4}
+		bx		lr
+
diff --git a/arch/arm/common/fiq_glue_setup.c b/arch/arm/common/fiq_glue_setup.c
new file mode 100644
index 000000000000..8cb1b611c6d5
--- /dev/null
+++ b/arch/arm/common/fiq_glue_setup.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/fiq.h>
+#include <asm/fiq_glue.h>
+
+extern unsigned char fiq_glue, fiq_glue_end;
+extern void fiq_glue_setup(void *func, void *data, void *sp,
+			   fiq_return_handler_t fiq_return_handler);
+
+static struct fiq_handler fiq_debbuger_fiq_handler = {
+	.name = "fiq_glue",
+};
+DEFINE_PER_CPU(void *, fiq_stack);
+static struct fiq_glue_handler *current_handler;
+static fiq_return_handler_t fiq_return_handler;
+static DEFINE_MUTEX(fiq_glue_lock);
+
+static void fiq_glue_setup_helper(void *info)
+{
+	struct fiq_glue_handler *handler = info;
+	fiq_glue_setup(handler->fiq, handler,
+		__get_cpu_var(fiq_stack) + THREAD_START_SP,
+		fiq_return_handler);
+}
+
+int fiq_glue_register_handler(struct fiq_glue_handler *handler)
+{
+	int ret;
+	int cpu;
+
+	if (!handler || !handler->fiq)
+		return -EINVAL;
+
+	mutex_lock(&fiq_glue_lock);
+	if (fiq_stack) {
+		ret = -EBUSY;
+		goto err_busy;
+	}
+
+	for_each_possible_cpu(cpu) {
+		void *stack;
+		stack = (void *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+		if (WARN_ON(!stack)) {
+			ret = -ENOMEM;
+			goto err_alloc_fiq_stack;
+		}
+		per_cpu(fiq_stack, cpu) = stack;
+	}
+
+	ret = claim_fiq(&fiq_debbuger_fiq_handler);
+	if (WARN_ON(ret))
+		goto err_claim_fiq;
+
+	current_handler = handler;
+	on_each_cpu(fiq_glue_setup_helper, handler, true);
+	set_fiq_handler(&fiq_glue, &fiq_glue_end - &fiq_glue);
+
+	mutex_unlock(&fiq_glue_lock);
+	return 0;
+
+err_claim_fiq:
+err_alloc_fiq_stack:
+	for_each_possible_cpu(cpu) {
+		__free_pages(per_cpu(fiq_stack, cpu), THREAD_SIZE_ORDER);
+		per_cpu(fiq_stack, cpu) = NULL;
+	}
+err_busy:
+	mutex_unlock(&fiq_glue_lock);
+	return ret;
+}
+
+static void fiq_glue_update_return_handler(void (*fiq_return)(void))
+{
+	fiq_return_handler = fiq_return;
+	if (current_handler)
+		on_each_cpu(fiq_glue_setup_helper, current_handler, true);
+}
+
+int fiq_glue_set_return_handler(void (*fiq_return)(void))
+{
+	int ret;
+
+	mutex_lock(&fiq_glue_lock);
+	if (fiq_return_handler) {
+		ret = -EBUSY;
+		goto err_busy;
+	}
+	fiq_glue_update_return_handler(fiq_return);
+	ret = 0;
+err_busy:
+	mutex_unlock(&fiq_glue_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(fiq_glue_set_return_handler);
+
+int fiq_glue_clear_return_handler(void (*fiq_return)(void))
+{
+	int ret;
+
+	mutex_lock(&fiq_glue_lock);
+	if (WARN_ON(fiq_return_handler != fiq_return)) {
+		ret = -EINVAL;
+		goto err_inval;
+	}
+	fiq_glue_update_return_handler(NULL);
+	ret = 0;
+err_inval:
+	mutex_unlock(&fiq_glue_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(fiq_glue_clear_return_handler);
+
+/**
+ * fiq_glue_resume - Restore fiqs after suspend or low power idle states
+ *
+ * This must be called before calling local_fiq_enable after returning from a
+ * power state where the fiq mode registers were lost. If a driver provided
+ * a resume hook when it registered the handler it will be called.
+ */
+
+void fiq_glue_resume(void)
+{
+	if (!current_handler)
+		return;
+	fiq_glue_setup(current_handler->fiq, current_handler,
+		__get_cpu_var(fiq_stack) + THREAD_START_SP,
+		fiq_return_handler);
+	if (current_handler->resume)
+		current_handler->resume(current_handler);
+}
+
diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore
new file mode 100644
index 000000000000..6231d36b3635
--- /dev/null
+++ b/arch/arm/crypto/.gitignore
@@ -0,0 +1 @@
+aesbs-core.S
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index a2c83851bc90..2cee53b27238 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -3,7 +3,27 @@
 #
 
 obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
+obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
 
-aes-arm-y  := aes-armv4.o aes_glue.o
-sha1-arm-y := sha1-armv4-large.o sha1_glue.o
+aes-arm-y	:= aes-armv4.o aes_glue.o
+aes-arm-bs-y	:= aesbs-core.o aesbs-glue.o
+sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
+sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
+sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+sha256-arm-y	:= sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
+sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
+
+quiet_cmd_perl = PERL    $@
+      cmd_perl = $(PERL) $(<) > $(@)
+
+$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
+	$(call cmd,perl)
+
+$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
+	$(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
index e73ec2ab1316..0409b8f89782 100644
--- a/arch/arm/crypto/aes_glue.c
+++ b/arch/arm/crypto/aes_glue.c
@@ -6,22 +6,12 @@
 #include <linux/crypto.h>
 #include <crypto/aes.h>
 
-#define AES_MAXNR 14
+#include "aes_glue.h"
 
-typedef struct {
-	unsigned int rd_key[4 *(AES_MAXNR + 1)];
-	int rounds;
-} AES_KEY;
-
-struct AES_CTX {
-	AES_KEY enc_key;
-	AES_KEY dec_key;
-};
-
-asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx);
-asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx);
-asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
-asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+EXPORT_SYMBOL(AES_encrypt);
+EXPORT_SYMBOL(AES_decrypt);
+EXPORT_SYMBOL(private_AES_set_encrypt_key);
+EXPORT_SYMBOL(private_AES_set_decrypt_key);
 
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
@@ -81,7 +71,7 @@ static struct crypto_alg aes_alg = {
 		.cipher	= {
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
 			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey			= aes_set_key,
+			.cia_setkey		= aes_set_key,
 			.cia_encrypt		= aes_encrypt,
 			.cia_decrypt		= aes_decrypt
 		}
diff --git a/arch/arm/crypto/aes_glue.h b/arch/arm/crypto/aes_glue.h
new file mode 100644
index 000000000000..cca3e51eb606
--- /dev/null
+++ b/arch/arm/crypto/aes_glue.h
@@ -0,0 +1,19 @@
+
+#define AES_MAXNR 14
+
+struct AES_KEY {
+	unsigned int rd_key[4 * (AES_MAXNR + 1)];
+	int rounds;
+};
+
+struct AES_CTX {
+	struct AES_KEY enc_key;
+	struct AES_KEY dec_key;
+};
+
+asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
+asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
+asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey,
+					   const int bits, struct AES_KEY *key);
+asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey,
+					   const int bits, struct AES_KEY *key);
diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped
new file mode 100644
index 000000000000..71e5fc7cfb18
--- /dev/null
+++ b/arch/arm/crypto/aesbs-core.S_shipped
@@ -0,0 +1,2544 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+@ <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
+@ granted.
+@ ====================================================================
+
+@ Bit-sliced AES for ARM NEON
+@
+@ February 2012.
+@
+@ This implementation is direct adaptation of bsaes-x86_64 module for
+@ ARM NEON. Except that this module is endian-neutral [in sense that
+@ it can be compiled for either endianness] by courtesy of vld1.8's
+@ neutrality. Initial version doesn't implement interface to OpenSSL,
+@ only low-level primitives and unsupported entry points, just enough
+@ to collect performance results, which for Cortex-A8 core are:
+@
+@ encrypt	19.5 cycles per byte processed with 128-bit key
+@ decrypt	22.1 cycles per byte processed with 128-bit key
+@ key conv.	440  cycles per 128-bit key/0.18 of 8x block
+@
+@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+@ which is [much] worse than anticipated (for further details see
+@ http://www.openssl.org/~appro/Snapdragon-S4.html).
+@
+@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+@ manages in 20.0 cycles].
+@
+@ When comparing to x86_64 results keep in mind that NEON unit is
+@ [mostly] single-issue and thus can't [fully] benefit from
+@ instruction-level parallelism. And when comparing to aes-armv4
+@ results keep in mind key schedule conversion overhead (see
+@ bsaes-x86_64.pl for further details)...
+@
+@						<appro@openssl.org>
+
+@ April-August 2013
+@
+@ Add CBC, CTR and XTS subroutines, adapt for kernel use.
+@
+@					<ard.biesheuvel@linaro.org>
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
+# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
+# define VFP_ABI_FRAME	0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME	0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__	7
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_ARCH__>=7
+.text
+.syntax	unified 	@ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code   32
+#endif
+
+.fpu	neon
+
+.type	_bsaes_decrypt8,%function
+.align	4
+_bsaes_decrypt8:
+	adr	r6,_bsaes_decrypt8
+	vldmia	r4!, {q9}		@ round 0 key
+	add	r6,r6,#.LM0ISR-_bsaes_decrypt8
+
+	vldmia	r6!, {q8}		@ .LM0ISR
+	veor	q10, q0, q9	@ xor with round0 key
+	veor	q11, q1, q9
+	 vtbl.8	d0, {q10}, d16
+	 vtbl.8	d1, {q10}, d17
+	veor	q12, q2, q9
+	 vtbl.8	d2, {q11}, d16
+	 vtbl.8	d3, {q11}, d17
+	veor	q13, q3, q9
+	 vtbl.8	d4, {q12}, d16
+	 vtbl.8	d5, {q12}, d17
+	veor	q14, q4, q9
+	 vtbl.8	d6, {q13}, d16
+	 vtbl.8	d7, {q13}, d17
+	veor	q15, q5, q9
+	 vtbl.8	d8, {q14}, d16
+	 vtbl.8	d9, {q14}, d17
+	veor	q10, q6, q9
+	 vtbl.8	d10, {q15}, d16
+	 vtbl.8	d11, {q15}, d17
+	veor	q11, q7, q9
+	 vtbl.8	d12, {q10}, d16
+	 vtbl.8	d13, {q10}, d17
+	 vtbl.8	d14, {q11}, d16
+	 vtbl.8	d15, {q11}, d17
+	vmov.i8	q8,#0x55			@ compose .LBS0
+	vmov.i8	q9,#0x33			@ compose .LBS1
+	vshr.u64	q10, q6, #1
+	 vshr.u64	q11, q4, #1
+	veor		q10, q10, q7
+	 veor		q11, q11, q5
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #1
+	 veor		q5, q5, q11
+	 vshl.u64	q11, q11, #1
+	veor		q6, q6, q10
+	 veor		q4, q4, q11
+	vshr.u64	q10, q2, #1
+	 vshr.u64	q11, q0, #1
+	veor		q10, q10, q3
+	 veor		q11, q11, q1
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q3, q3, q10
+	vshl.u64	q10, q10, #1
+	 veor		q1, q1, q11
+	 vshl.u64	q11, q11, #1
+	veor		q2, q2, q10
+	 veor		q0, q0, q11
+	vmov.i8	q8,#0x0f			@ compose .LBS2
+	vshr.u64	q10, q5, #2
+	 vshr.u64	q11, q4, #2
+	veor		q10, q10, q7
+	 veor		q11, q11, q6
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #2
+	 veor		q6, q6, q11
+	 vshl.u64	q11, q11, #2
+	veor		q5, q5, q10
+	 veor		q4, q4, q11
+	vshr.u64	q10, q1, #2
+	 vshr.u64	q11, q0, #2
+	veor		q10, q10, q3
+	 veor		q11, q11, q2
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q3, q3, q10
+	vshl.u64	q10, q10, #2
+	 veor		q2, q2, q11
+	 vshl.u64	q11, q11, #2
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	vshr.u64	q10, q3, #4
+	 vshr.u64	q11, q2, #4
+	veor		q10, q10, q7
+	 veor		q11, q11, q6
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #4
+	 veor		q6, q6, q11
+	 vshl.u64	q11, q11, #4
+	veor		q3, q3, q10
+	 veor		q2, q2, q11
+	vshr.u64	q10, q1, #4
+	 vshr.u64	q11, q0, #4
+	veor		q10, q10, q5
+	 veor		q11, q11, q4
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #4
+	 veor		q4, q4, q11
+	 vshl.u64	q11, q11, #4
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	sub	r5,r5,#1
+	b	.Ldec_sbox
+.align	4
+.Ldec_loop:
+	vldmia	r4!, {q8-q11}
+	veor	q8, q8, q0
+	veor	q9, q9, q1
+	vtbl.8	d0, {q8}, d24
+	vtbl.8	d1, {q8}, d25
+	vldmia	r4!, {q8}
+	veor	q10, q10, q2
+	vtbl.8	d2, {q9}, d24
+	vtbl.8	d3, {q9}, d25
+	vldmia	r4!, {q9}
+	veor	q11, q11, q3
+	vtbl.8	d4, {q10}, d24
+	vtbl.8	d5, {q10}, d25
+	vldmia	r4!, {q10}
+	vtbl.8	d6, {q11}, d24
+	vtbl.8	d7, {q11}, d25
+	vldmia	r4!, {q11}
+	veor	q8, q8, q4
+	veor	q9, q9, q5
+	vtbl.8	d8, {q8}, d24
+	vtbl.8	d9, {q8}, d25
+	veor	q10, q10, q6
+	vtbl.8	d10, {q9}, d24
+	vtbl.8	d11, {q9}, d25
+	veor	q11, q11, q7
+	vtbl.8	d12, {q10}, d24
+	vtbl.8	d13, {q10}, d25
+	vtbl.8	d14, {q11}, d24
+	vtbl.8	d15, {q11}, d25
+.Ldec_sbox:
+	 veor	q1, q1, q4
+	veor	q3, q3, q4
+
+	veor	q4, q4, q7
+	 veor	q1, q1, q6
+	veor	q2, q2, q7
+	veor	q6, q6, q4
+
+	veor	q0, q0, q1
+	veor	q2, q2, q5
+	 veor	q7, q7, q6
+	veor	q3, q3, q0
+	veor	q5, q5, q0
+	veor	q1, q1, q3
+	veor	q11, q3, q0
+	veor	q10, q7, q4
+	veor	q9, q1, q6
+	veor	q13, q4, q0
+	 vmov	q8, q10
+	veor	q12, q5, q2
+
+	vorr	q10, q10, q9
+	veor	q15, q11, q8
+	vand	q14, q11, q12
+	vorr	q11, q11, q12
+	veor	q12, q12, q9
+	vand	q8, q8, q9
+	veor	q9, q6, q2
+	vand	q15, q15, q12
+	vand	q13, q13, q9
+	veor	q9, q3, q7
+	veor	q12, q1, q5
+	veor	q11, q11, q13
+	veor	q10, q10, q13
+	vand	q13, q9, q12
+	vorr	q9, q9, q12
+	veor	q11, q11, q15
+	veor	q8, q8, q13
+	veor	q10, q10, q14
+	veor	q9, q9, q15
+	veor	q8, q8, q14
+	vand	q12, q4, q6
+	veor	q9, q9, q14
+	vand	q13, q0, q2
+	vand	q14, q7, q1
+	vorr	q15, q3, q5
+	veor	q11, q11, q12
+	veor	q9, q9, q14
+	veor	q8, q8, q15
+	veor	q10, q10, q13
+
+	@ Inv_GF16 	0, 	1, 	2, 	3, s0, s1, s2, s3
+
+	@ new smaller inversion
+
+	vand	q14, q11, q9
+	vmov	q12, q8
+
+	veor	q13, q10, q14
+	veor	q15, q8, q14
+	veor	q14, q8, q14	@ q14=q15
+
+	vbsl	q13, q9, q8
+	vbsl	q15, q11, q10
+	veor	q11, q11, q10
+
+	vbsl	q12, q13, q14
+	vbsl	q8, q14, q13
+
+	vand	q14, q12, q15
+	veor	q9, q9, q8
+
+	veor	q14, q14, q11
+	veor	q12, q5, q2
+	veor	q8, q1, q6
+	veor 	q10, q15, q14
+	vand	q10, q10, q5
+	veor	q5, q5, q1
+	vand	q11, q1, q15
+	vand	q5, q5, q14
+	veor	q1, q11, q10
+	veor	q5, q5, q11
+	veor	q15, q15, q13
+	veor	q14, q14, q9
+	veor	q11, q15, q14
+	 veor 	q10, q13, q9
+	vand	q11, q11, q12
+	 vand	q10, q10, q2
+	veor	q12, q12, q8
+	 veor	q2, q2, q6
+	vand	q8, q8, q15
+	 vand	q6, q6, q13
+	vand	q12, q12, q14
+	 vand	q2, q2, q9
+	veor	q8, q8, q12
+	 veor	q2, q2, q6
+	veor	q12, q12, q11
+	 veor	q6, q6, q10
+	veor	q5, q5, q12
+	veor	q2, q2, q12
+	veor	q1, q1, q8
+	veor	q6, q6, q8
+
+	veor	q12, q3, q0
+	veor	q8, q7, q4
+	veor	q11, q15, q14
+	 veor 	q10, q13, q9
+	vand	q11, q11, q12
+	 vand	q10, q10, q0
+	veor	q12, q12, q8
+	 veor	q0, q0, q4
+	vand	q8, q8, q15
+	 vand	q4, q4, q13
+	vand	q12, q12, q14
+	 vand	q0, q0, q9
+	veor	q8, q8, q12
+	 veor	q0, q0, q4
+	veor	q12, q12, q11
+	 veor	q4, q4, q10
+	veor	q15, q15, q13
+	veor	q14, q14, q9
+	veor 	q10, q15, q14
+	vand	q10, q10, q3
+	veor	q3, q3, q7
+	vand	q11, q7, q15
+	vand	q3, q3, q14
+	veor	q7, q11, q10
+	veor	q3, q3, q11
+	veor	q3, q3, q12
+	veor	q0, q0, q12
+	veor	q7, q7, q8
+	veor	q4, q4, q8
+	veor	q1, q1, q7
+	veor	q6, q6, q5
+
+	veor	q4, q4, q1
+	veor	q2, q2, q7
+	veor	q5, q5, q7
+	veor	q4, q4, q2
+	 veor 	q7, q7, q0
+	veor	q4, q4, q5
+	 veor	q3, q3, q6
+	 veor	q6, q6, q1
+	veor	q3, q3, q4
+
+	veor	q4, q4, q0
+	veor	q7, q7, q3
+	subs	r5,r5,#1
+	bcc	.Ldec_done
+	@ multiplication by 0x05-0x00-0x04-0x00
+	vext.8	q8, q0, q0, #8
+	vext.8	q14, q3, q3, #8
+	vext.8	q15, q5, q5, #8
+	veor	q8, q8, q0
+	vext.8	q9, q1, q1, #8
+	veor	q14, q14, q3
+	vext.8	q10, q6, q6, #8
+	veor	q15, q15, q5
+	vext.8	q11, q4, q4, #8
+	veor	q9, q9, q1
+	vext.8	q12, q2, q2, #8
+	veor	q10, q10, q6
+	vext.8	q13, q7, q7, #8
+	veor	q11, q11, q4
+	veor	q12, q12, q2
+	veor	q13, q13, q7
+
+	 veor	q0, q0, q14
+	 veor	q1, q1, q14
+	 veor	q6, q6, q8
+	 veor	q2, q2, q10
+	 veor	q4, q4, q9
+	 veor	q1, q1, q15
+	 veor	q6, q6, q15
+	 veor	q2, q2, q14
+	 veor	q7, q7, q11
+	 veor	q4, q4, q14
+	 veor	q3, q3, q12
+	 veor	q2, q2, q15
+	 veor	q7, q7, q15
+	 veor	q5, q5, q13
+	vext.8	q8, q0, q0, #12	@ x0 <<< 32
+	vext.8	q9, q1, q1, #12
+	 veor	q0, q0, q8		@ x0 ^ (x0 <<< 32)
+	vext.8	q10, q6, q6, #12
+	 veor	q1, q1, q9
+	vext.8	q11, q4, q4, #12
+	 veor	q6, q6, q10
+	vext.8	q12, q2, q2, #12
+	 veor	q4, q4, q11
+	vext.8	q13, q7, q7, #12
+	 veor	q2, q2, q12
+	vext.8	q14, q3, q3, #12
+	 veor	q7, q7, q13
+	vext.8	q15, q5, q5, #12
+	 veor	q3, q3, q14
+
+	veor	q9, q9, q0
+	 veor	q5, q5, q15
+	 vext.8	q0, q0, q0, #8		@ (x0 ^ (x0 <<< 32)) <<< 64)
+	veor	q10, q10, q1
+	veor	q8, q8, q5
+	veor	q9, q9, q5
+	 vext.8	q1, q1, q1, #8
+	veor	q13, q13, q2
+	 veor	q0, q0, q8
+	veor	q14, q14, q7
+	 veor	q1, q1, q9
+	 vext.8	q8, q2, q2, #8
+	veor	q12, q12, q4
+	 vext.8	q9, q7, q7, #8
+	veor	q15, q15, q3
+	 vext.8	q2, q4, q4, #8
+	veor	q11, q11, q6
+	 vext.8	q7, q5, q5, #8
+	veor	q12, q12, q5
+	 vext.8	q4, q3, q3, #8
+	veor	q11, q11, q5
+	 vext.8	q3, q6, q6, #8
+	veor	q5, q9, q13
+	veor	q11, q11, q2
+	veor	q7, q7, q15
+	veor	q6, q4, q14
+	veor	q4, q8, q12
+	veor	q2, q3, q10
+	vmov	q3, q11
+	 @ vmov	q5, q9
+	vldmia	r6, {q12}		@ .LISR
+	ite	eq				@ Thumb2 thing, sanity check in ARM
+	addeq	r6,r6,#0x10
+	bne	.Ldec_loop
+	vldmia	r6, {q12}		@ .LISRM0
+	b	.Ldec_loop
+.align	4
+.Ldec_done:
+	vmov.i8	q8,#0x55			@ compose .LBS0
+	vmov.i8	q9,#0x33			@ compose .LBS1
+	vshr.u64	q10, q3, #1
+	 vshr.u64	q11, q2, #1
+	veor		q10, q10, q5
+	 veor		q11, q11, q7
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #1
+	 veor		q7, q7, q11
+	 vshl.u64	q11, q11, #1
+	veor		q3, q3, q10
+	 veor		q2, q2, q11
+	vshr.u64	q10, q6, #1
+	 vshr.u64	q11, q0, #1
+	veor		q10, q10, q4
+	 veor		q11, q11, q1
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q4, q4, q10
+	vshl.u64	q10, q10, #1
+	 veor		q1, q1, q11
+	 vshl.u64	q11, q11, #1
+	veor		q6, q6, q10
+	 veor		q0, q0, q11
+	vmov.i8	q8,#0x0f			@ compose .LBS2
+	vshr.u64	q10, q7, #2
+	 vshr.u64	q11, q2, #2
+	veor		q10, q10, q5
+	 veor		q11, q11, q3
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #2
+	 veor		q3, q3, q11
+	 vshl.u64	q11, q11, #2
+	veor		q7, q7, q10
+	 veor		q2, q2, q11
+	vshr.u64	q10, q1, #2
+	 vshr.u64	q11, q0, #2
+	veor		q10, q10, q4
+	 veor		q11, q11, q6
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q4, q4, q10
+	vshl.u64	q10, q10, #2
+	 veor		q6, q6, q11
+	 vshl.u64	q11, q11, #2
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	vshr.u64	q10, q4, #4
+	 vshr.u64	q11, q6, #4
+	veor		q10, q10, q5
+	 veor		q11, q11, q3
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #4
+	 veor		q3, q3, q11
+	 vshl.u64	q11, q11, #4
+	veor		q4, q4, q10
+	 veor		q6, q6, q11
+	vshr.u64	q10, q1, #4
+	 vshr.u64	q11, q0, #4
+	veor		q10, q10, q7
+	 veor		q11, q11, q2
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #4
+	 veor		q2, q2, q11
+	 vshl.u64	q11, q11, #4
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	vldmia	r4, {q8}			@ last round key
+	veor	q6, q6, q8
+	veor	q4, q4, q8
+	veor	q2, q2, q8
+	veor	q7, q7, q8
+	veor	q3, q3, q8
+	veor	q5, q5, q8
+	veor	q0, q0, q8
+	veor	q1, q1, q8
+	bx	lr
+.size	_bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type	_bsaes_const,%object
+.align	6
+_bsaes_const:
+.LM0ISR:	@ InvShiftRows constants
+	.quad	0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+	.quad	0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+	.quad	0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR:		@ ShiftRows constants
+	.quad	0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+	.quad	0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+	.quad	0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+	.quad	0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+	.quad	0x090d01050c000408, 0x03070b0f060a0e02
+.asciz	"Bit-sliced AES for NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align	6
+.size	_bsaes_const,.-_bsaes_const
+
+.type	_bsaes_encrypt8,%function
+.align	4
+_bsaes_encrypt8:
+	adr	r6,_bsaes_encrypt8
+	vldmia	r4!, {q9}		@ round 0 key
+	sub	r6,r6,#_bsaes_encrypt8-.LM0SR
+
+	vldmia	r6!, {q8}		@ .LM0SR
+_bsaes_encrypt8_alt:
+	veor	q10, q0, q9	@ xor with round0 key
+	veor	q11, q1, q9
+	 vtbl.8	d0, {q10}, d16
+	 vtbl.8	d1, {q10}, d17
+	veor	q12, q2, q9
+	 vtbl.8	d2, {q11}, d16
+	 vtbl.8	d3, {q11}, d17
+	veor	q13, q3, q9
+	 vtbl.8	d4, {q12}, d16
+	 vtbl.8	d5, {q12}, d17
+	veor	q14, q4, q9
+	 vtbl.8	d6, {q13}, d16
+	 vtbl.8	d7, {q13}, d17
+	veor	q15, q5, q9
+	 vtbl.8	d8, {q14}, d16
+	 vtbl.8	d9, {q14}, d17
+	veor	q10, q6, q9
+	 vtbl.8	d10, {q15}, d16
+	 vtbl.8	d11, {q15}, d17
+	veor	q11, q7, q9
+	 vtbl.8	d12, {q10}, d16
+	 vtbl.8	d13, {q10}, d17
+	 vtbl.8	d14, {q11}, d16
+	 vtbl.8	d15, {q11}, d17
+_bsaes_encrypt8_bitslice:
+	vmov.i8	q8,#0x55			@ compose .LBS0
+	vmov.i8	q9,#0x33			@ compose .LBS1
+	vshr.u64	q10, q6, #1
+	 vshr.u64	q11, q4, #1
+	veor		q10, q10, q7
+	 veor		q11, q11, q5
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #1
+	 veor		q5, q5, q11
+	 vshl.u64	q11, q11, #1
+	veor		q6, q6, q10
+	 veor		q4, q4, q11
+	vshr.u64	q10, q2, #1
+	 vshr.u64	q11, q0, #1
+	veor		q10, q10, q3
+	 veor		q11, q11, q1
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q3, q3, q10
+	vshl.u64	q10, q10, #1
+	 veor		q1, q1, q11
+	 vshl.u64	q11, q11, #1
+	veor		q2, q2, q10
+	 veor		q0, q0, q11
+	vmov.i8	q8,#0x0f			@ compose .LBS2
+	vshr.u64	q10, q5, #2
+	 vshr.u64	q11, q4, #2
+	veor		q10, q10, q7
+	 veor		q11, q11, q6
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #2
+	 veor		q6, q6, q11
+	 vshl.u64	q11, q11, #2
+	veor		q5, q5, q10
+	 veor		q4, q4, q11
+	vshr.u64	q10, q1, #2
+	 vshr.u64	q11, q0, #2
+	veor		q10, q10, q3
+	 veor		q11, q11, q2
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q3, q3, q10
+	vshl.u64	q10, q10, #2
+	 veor		q2, q2, q11
+	 vshl.u64	q11, q11, #2
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	vshr.u64	q10, q3, #4
+	 vshr.u64	q11, q2, #4
+	veor		q10, q10, q7
+	 veor		q11, q11, q6
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #4
+	 veor		q6, q6, q11
+	 vshl.u64	q11, q11, #4
+	veor		q3, q3, q10
+	 veor		q2, q2, q11
+	vshr.u64	q10, q1, #4
+	 vshr.u64	q11, q0, #4
+	veor		q10, q10, q5
+	 veor		q11, q11, q4
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #4
+	 veor		q4, q4, q11
+	 vshl.u64	q11, q11, #4
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	sub	r5,r5,#1
+	b	.Lenc_sbox
+.align	4
+.Lenc_loop:
+	vldmia	r4!, {q8-q11}
+	veor	q8, q8, q0
+	veor	q9, q9, q1
+	vtbl.8	d0, {q8}, d24
+	vtbl.8	d1, {q8}, d25
+	vldmia	r4!, {q8}
+	veor	q10, q10, q2
+	vtbl.8	d2, {q9}, d24
+	vtbl.8	d3, {q9}, d25
+	vldmia	r4!, {q9}
+	veor	q11, q11, q3
+	vtbl.8	d4, {q10}, d24
+	vtbl.8	d5, {q10}, d25
+	vldmia	r4!, {q10}
+	vtbl.8	d6, {q11}, d24
+	vtbl.8	d7, {q11}, d25
+	vldmia	r4!, {q11}
+	veor	q8, q8, q4
+	veor	q9, q9, q5
+	vtbl.8	d8, {q8}, d24
+	vtbl.8	d9, {q8}, d25
+	veor	q10, q10, q6
+	vtbl.8	d10, {q9}, d24
+	vtbl.8	d11, {q9}, d25
+	veor	q11, q11, q7
+	vtbl.8	d12, {q10}, d24
+	vtbl.8	d13, {q10}, d25
+	vtbl.8	d14, {q11}, d24
+	vtbl.8	d15, {q11}, d25
+.Lenc_sbox:
+	veor	q2, q2, q1
+	veor	q5, q5, q6
+	veor	q3, q3, q0
+	veor	q6, q6, q2
+	veor	q5, q5, q0
+
+	veor	q6, q6, q3
+	veor	q3, q3, q7
+	veor	q7, q7, q5
+	veor	q3, q3, q4
+	veor	q4, q4, q5
+
+	veor	q2, q2, q7
+	veor	q3, q3, q1
+	veor	q1, q1, q5
+	veor	q11, q7, q4
+	veor	q10, q1, q2
+	veor	q9, q5, q3
+	veor	q13, q2, q4
+	 vmov	q8, q10
+	veor	q12, q6, q0
+
+	vorr	q10, q10, q9
+	veor	q15, q11, q8
+	vand	q14, q11, q12
+	vorr	q11, q11, q12
+	veor	q12, q12, q9
+	vand	q8, q8, q9
+	veor	q9, q3, q0
+	vand	q15, q15, q12
+	vand	q13, q13, q9
+	veor	q9, q7, q1
+	veor	q12, q5, q6
+	veor	q11, q11, q13
+	veor	q10, q10, q13
+	vand	q13, q9, q12
+	vorr	q9, q9, q12
+	veor	q11, q11, q15
+	veor	q8, q8, q13
+	veor	q10, q10, q14
+	veor	q9, q9, q15
+	veor	q8, q8, q14
+	vand	q12, q2, q3
+	veor	q9, q9, q14
+	vand	q13, q4, q0
+	vand	q14, q1, q5
+	vorr	q15, q7, q6
+	veor	q11, q11, q12
+	veor	q9, q9, q14
+	veor	q8, q8, q15
+	veor	q10, q10, q13
+
+	@ Inv_GF16 	0, 	1, 	2, 	3, s0, s1, s2, s3
+
+	@ new smaller inversion
+
+	vand	q14, q11, q9
+	vmov	q12, q8
+
+	veor	q13, q10, q14
+	veor	q15, q8, q14
+	veor	q14, q8, q14	@ q14=q15
+
+	vbsl	q13, q9, q8
+	vbsl	q15, q11, q10
+	veor	q11, q11, q10
+
+	vbsl	q12, q13, q14
+	vbsl	q8, q14, q13
+
+	vand	q14, q12, q15
+	veor	q9, q9, q8
+
+	veor	q14, q14, q11
+	veor	q12, q6, q0
+	veor	q8, q5, q3
+	veor 	q10, q15, q14
+	vand	q10, q10, q6
+	veor	q6, q6, q5
+	vand	q11, q5, q15
+	vand	q6, q6, q14
+	veor	q5, q11, q10
+	veor	q6, q6, q11
+	veor	q15, q15, q13
+	veor	q14, q14, q9
+	veor	q11, q15, q14
+	 veor 	q10, q13, q9
+	vand	q11, q11, q12
+	 vand	q10, q10, q0
+	veor	q12, q12, q8
+	 veor	q0, q0, q3
+	vand	q8, q8, q15
+	 vand	q3, q3, q13
+	vand	q12, q12, q14
+	 vand	q0, q0, q9
+	veor	q8, q8, q12
+	 veor	q0, q0, q3
+	veor	q12, q12, q11
+	 veor	q3, q3, q10
+	veor	q6, q6, q12
+	veor	q0, q0, q12
+	veor	q5, q5, q8
+	veor	q3, q3, q8
+
+	veor	q12, q7, q4
+	veor	q8, q1, q2
+	veor	q11, q15, q14
+	 veor 	q10, q13, q9
+	vand	q11, q11, q12
+	 vand	q10, q10, q4
+	veor	q12, q12, q8
+	 veor	q4, q4, q2
+	vand	q8, q8, q15
+	 vand	q2, q2, q13
+	vand	q12, q12, q14
+	 vand	q4, q4, q9
+	veor	q8, q8, q12
+	 veor	q4, q4, q2
+	veor	q12, q12, q11
+	 veor	q2, q2, q10
+	veor	q15, q15, q13
+	veor	q14, q14, q9
+	veor 	q10, q15, q14
+	vand	q10, q10, q7
+	veor	q7, q7, q1
+	vand	q11, q1, q15
+	vand	q7, q7, q14
+	veor	q1, q11, q10
+	veor	q7, q7, q11
+	veor	q7, q7, q12
+	veor	q4, q4, q12
+	veor	q1, q1, q8
+	veor	q2, q2, q8
+	veor	q7, q7, q0
+	veor	q1, q1, q6
+	veor	q6, q6, q0
+	veor	q4, q4, q7
+	veor	q0, q0, q1
+
+	veor	q1, q1, q5
+	veor	q5, q5, q2
+	veor	q2, q2, q3
+	veor	q3, q3, q5
+	veor	q4, q4, q5
+
+	veor	q6, q6, q3
+	subs	r5,r5,#1
+	bcc	.Lenc_done
+	vext.8	q8, q0, q0, #12	@ x0 <<< 32
+	vext.8	q9, q1, q1, #12
+	 veor	q0, q0, q8		@ x0 ^ (x0 <<< 32)
+	vext.8	q10, q4, q4, #12
+	 veor	q1, q1, q9
+	vext.8	q11, q6, q6, #12
+	 veor	q4, q4, q10
+	vext.8	q12, q3, q3, #12
+	 veor	q6, q6, q11
+	vext.8	q13, q7, q7, #12
+	 veor	q3, q3, q12
+	vext.8	q14, q2, q2, #12
+	 veor	q7, q7, q13
+	vext.8	q15, q5, q5, #12
+	 veor	q2, q2, q14
+
+	veor	q9, q9, q0
+	 veor	q5, q5, q15
+	 vext.8	q0, q0, q0, #8		@ (x0 ^ (x0 <<< 32)) <<< 64)
+	veor	q10, q10, q1
+	veor	q8, q8, q5
+	veor	q9, q9, q5
+	 vext.8	q1, q1, q1, #8
+	veor	q13, q13, q3
+	 veor	q0, q0, q8
+	veor	q14, q14, q7
+	 veor	q1, q1, q9
+	 vext.8	q8, q3, q3, #8
+	veor	q12, q12, q6
+	 vext.8	q9, q7, q7, #8
+	veor	q15, q15, q2
+	 vext.8	q3, q6, q6, #8
+	veor	q11, q11, q4
+	 vext.8	q7, q5, q5, #8
+	veor	q12, q12, q5
+	 vext.8	q6, q2, q2, #8
+	veor	q11, q11, q5
+	 vext.8	q2, q4, q4, #8
+	veor	q5, q9, q13
+	veor	q4, q8, q12
+	veor	q3, q3, q11
+	veor	q7, q7, q15
+	veor	q6, q6, q14
+	 @ vmov	q4, q8
+	veor	q2, q2, q10
+	 @ vmov	q5, q9
+	vldmia	r6, {q12}		@ .LSR
+	ite	eq				@ Thumb2 thing, samity check in ARM
+	addeq	r6,r6,#0x10
+	bne	.Lenc_loop
+	vldmia	r6, {q12}		@ .LSRM0
+	b	.Lenc_loop
+.align	4
+.Lenc_done:
+	vmov.i8	q8,#0x55			@ compose .LBS0
+	vmov.i8	q9,#0x33			@ compose .LBS1
+	vshr.u64	q10, q2, #1
+	 vshr.u64	q11, q3, #1
+	veor		q10, q10, q5
+	 veor		q11, q11, q7
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #1
+	 veor		q7, q7, q11
+	 vshl.u64	q11, q11, #1
+	veor		q2, q2, q10
+	 veor		q3, q3, q11
+	vshr.u64	q10, q4, #1
+	 vshr.u64	q11, q0, #1
+	veor		q10, q10, q6
+	 veor		q11, q11, q1
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q6, q6, q10
+	vshl.u64	q10, q10, #1
+	 veor		q1, q1, q11
+	 vshl.u64	q11, q11, #1
+	veor		q4, q4, q10
+	 veor		q0, q0, q11
+	vmov.i8	q8,#0x0f			@ compose .LBS2
+	vshr.u64	q10, q7, #2
+	 vshr.u64	q11, q3, #2
+	veor		q10, q10, q5
+	 veor		q11, q11, q2
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #2
+	 veor		q2, q2, q11
+	 vshl.u64	q11, q11, #2
+	veor		q7, q7, q10
+	 veor		q3, q3, q11
+	vshr.u64	q10, q1, #2
+	 vshr.u64	q11, q0, #2
+	veor		q10, q10, q6
+	 veor		q11, q11, q4
+	vand		q10, q10, q9
+	 vand		q11, q11, q9
+	veor		q6, q6, q10
+	vshl.u64	q10, q10, #2
+	 veor		q4, q4, q11
+	 vshl.u64	q11, q11, #2
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	vshr.u64	q10, q6, #4
+	 vshr.u64	q11, q4, #4
+	veor		q10, q10, q5
+	 veor		q11, q11, q2
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q5, q5, q10
+	vshl.u64	q10, q10, #4
+	 veor		q2, q2, q11
+	 vshl.u64	q11, q11, #4
+	veor		q6, q6, q10
+	 veor		q4, q4, q11
+	vshr.u64	q10, q1, #4
+	 vshr.u64	q11, q0, #4
+	veor		q10, q10, q7
+	 veor		q11, q11, q3
+	vand		q10, q10, q8
+	 vand		q11, q11, q8
+	veor		q7, q7, q10
+	vshl.u64	q10, q10, #4
+	 veor		q3, q3, q11
+	 vshl.u64	q11, q11, #4
+	veor		q1, q1, q10
+	 veor		q0, q0, q11
+	vldmia	r4, {q8}			@ last round key
+	veor	q4, q4, q8
+	veor	q6, q6, q8
+	veor	q3, q3, q8
+	veor	q7, q7, q8
+	veor	q2, q2, q8
+	veor	q5, q5, q8
+	veor	q0, q0, q8
+	veor	q1, q1, q8
+	bx	lr
+.size	_bsaes_encrypt8,.-_bsaes_encrypt8
+.type	_bsaes_key_convert,%function
+.align	4
+_bsaes_key_convert:
+	adr	r6,_bsaes_key_convert
+	vld1.8	{q7},  [r4]!		@ load round 0 key
+	sub	r6,r6,#_bsaes_key_convert-.LM0
+	vld1.8	{q15}, [r4]!		@ load round 1 key
+
+	vmov.i8	q8,  #0x01			@ bit masks
+	vmov.i8	q9,  #0x02
+	vmov.i8	q10, #0x04
+	vmov.i8	q11, #0x08
+	vmov.i8	q12, #0x10
+	vmov.i8	q13, #0x20
+	vldmia	r6, {q14}		@ .LM0
+
+#ifdef __ARMEL__
+	vrev32.8	q7,  q7
+	vrev32.8	q15, q15
+#endif
+	sub	r5,r5,#1
+	vstmia	r12!, {q7}		@ save round 0 key
+	b	.Lkey_loop
+
+.align	4
+.Lkey_loop:
+	vtbl.8	d14,{q15},d28
+	vtbl.8	d15,{q15},d29
+	vmov.i8	q6,  #0x40
+	vmov.i8	q15, #0x80
+
+	vtst.8	q0, q7, q8
+	vtst.8	q1, q7, q9
+	vtst.8	q2, q7, q10
+	vtst.8	q3, q7, q11
+	vtst.8	q4, q7, q12
+	vtst.8	q5, q7, q13
+	vtst.8	q6, q7, q6
+	vtst.8	q7, q7, q15
+	vld1.8	{q15}, [r4]!		@ load next round key
+	vmvn	q0, q0		@ "pnot"
+	vmvn	q1, q1
+	vmvn	q5, q5
+	vmvn	q6, q6
+#ifdef __ARMEL__
+	vrev32.8	q15, q15
+#endif
+	subs	r5,r5,#1
+	vstmia	r12!,{q0-q7}		@ write bit-sliced round key
+	bne	.Lkey_loop
+
+	vmov.i8	q7,#0x63			@ compose .L63
+	@ don't save last round key
+	bx	lr
+.size	_bsaes_key_convert,.-_bsaes_key_convert
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global	bsaes_cbc_encrypt
+.type	bsaes_cbc_encrypt,%function
+.align	5
+bsaes_cbc_encrypt:
+#ifndef	__KERNEL__
+	cmp	r2, #128
+#ifndef	__thumb__
+	blo	AES_cbc_encrypt
+#else
+	bhs	1f
+	b	AES_cbc_encrypt
+1:
+#endif
+#endif
+
+	@ it is up to the caller to make sure we are called with enc == 0
+
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}
+	VFP_ABI_PUSH
+	ldr	r8, [ip]			@ IV is 1st arg on the stack
+	mov	r2, r2, lsr#4		@ len in 16 byte blocks
+	sub	sp, #0x10			@ scratch space to carry over the IV
+	mov	r9, sp				@ save sp
+
+	ldr	r10, [r3, #240]		@ get # of rounds
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, r10, lsl#7		@ 128 bytes per inner round key
+	add	r12, #96			@ sifze of bit-slices key schedule
+
+	@ populate the key schedule
+	mov	r4, r3			@ pass key
+	mov	r5, r10			@ pass # of rounds
+	mov	sp, r12				@ sp is sp
+	bl	_bsaes_key_convert
+	vldmia	sp, {q6}
+	vstmia	r12,  {q15}		@ save last round key
+	veor	q7, q7, q6	@ fix up round 0 key
+	vstmia	sp, {q7}
+#else
+	ldr	r12, [r3, #244]
+	eors	r12, #1
+	beq	0f
+
+	@ populate the key schedule
+	str	r12, [r3, #244]
+	mov	r4, r3			@ pass key
+	mov	r5, r10			@ pass # of rounds
+	add	r12, r3, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	add	r4, r3, #248
+	vldmia	r4, {q6}
+	vstmia	r12, {q15}			@ save last round key
+	veor	q7, q7, q6	@ fix up round 0 key
+	vstmia	r4, {q7}
+
+.align	2
+0:
+#endif
+
+	vld1.8	{q15}, [r8]		@ load IV
+	b	.Lcbc_dec_loop
+
+.align	4
+.Lcbc_dec_loop:
+	subs	r2, r2, #0x8
+	bmi	.Lcbc_dec_loop_finish
+
+	vld1.8	{q0-q1}, [r0]!	@ load input
+	vld1.8	{q2-q3}, [r0]!
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	mov	r4, sp			@ pass the key
+#else
+	add	r4, r3, #248
+#endif
+	vld1.8	{q4-q5}, [r0]!
+	mov	r5, r10
+	vld1.8	{q6-q7}, [r0]
+	sub	r0, r0, #0x60
+	vstmia	r9, {q15}			@ put aside IV
+
+	bl	_bsaes_decrypt8
+
+	vldmia	r9, {q14}			@ reload IV
+	vld1.8	{q8-q9}, [r0]!	@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q10-q11}, [r0]!
+	veor	q1, q1, q8
+	veor	q6, q6, q9
+	vld1.8	{q12-q13}, [r0]!
+	veor	q4, q4, q10
+	veor	q2, q2, q11
+	vld1.8	{q14-q15}, [r0]!
+	veor	q7, q7, q12
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	veor	q3, q3, q13
+	vst1.8	{q6}, [r1]!
+	veor	q5, q5, q14
+	vst1.8	{q4}, [r1]!
+	vst1.8	{q2}, [r1]!
+	vst1.8	{q7}, [r1]!
+	vst1.8	{q3}, [r1]!
+	vst1.8	{q5}, [r1]!
+
+	b	.Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+	adds	r2, r2, #8
+	beq	.Lcbc_dec_done
+
+	vld1.8	{q0}, [r0]!		@ load input
+	cmp	r2, #2
+	blo	.Lcbc_dec_one
+	vld1.8	{q1}, [r0]!
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	mov	r4, sp			@ pass the key
+#else
+	add	r4, r3, #248
+#endif
+	mov	r5, r10
+	vstmia	r9, {q15}			@ put aside IV
+	beq	.Lcbc_dec_two
+	vld1.8	{q2}, [r0]!
+	cmp	r2, #4
+	blo	.Lcbc_dec_three
+	vld1.8	{q3}, [r0]!
+	beq	.Lcbc_dec_four
+	vld1.8	{q4}, [r0]!
+	cmp	r2, #6
+	blo	.Lcbc_dec_five
+	vld1.8	{q5}, [r0]!
+	beq	.Lcbc_dec_six
+	vld1.8	{q6}, [r0]!
+	sub	r0, r0, #0x70
+
+	bl	_bsaes_decrypt8
+
+	vldmia	r9, {q14}			@ reload IV
+	vld1.8	{q8-q9}, [r0]!	@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q10-q11}, [r0]!
+	veor	q1, q1, q8
+	veor	q6, q6, q9
+	vld1.8	{q12-q13}, [r0]!
+	veor	q4, q4, q10
+	veor	q2, q2, q11
+	vld1.8	{q15}, [r0]!
+	veor	q7, q7, q12
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	veor	q3, q3, q13
+	vst1.8	{q6}, [r1]!
+	vst1.8	{q4}, [r1]!
+	vst1.8	{q2}, [r1]!
+	vst1.8	{q7}, [r1]!
+	vst1.8	{q3}, [r1]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_six:
+	sub	r0, r0, #0x60
+	bl	_bsaes_decrypt8
+	vldmia	r9,{q14}			@ reload IV
+	vld1.8	{q8-q9}, [r0]!	@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q10-q11}, [r0]!
+	veor	q1, q1, q8
+	veor	q6, q6, q9
+	vld1.8	{q12}, [r0]!
+	veor	q4, q4, q10
+	veor	q2, q2, q11
+	vld1.8	{q15}, [r0]!
+	veor	q7, q7, q12
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	vst1.8	{q6}, [r1]!
+	vst1.8	{q4}, [r1]!
+	vst1.8	{q2}, [r1]!
+	vst1.8	{q7}, [r1]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_five:
+	sub	r0, r0, #0x50
+	bl	_bsaes_decrypt8
+	vldmia	r9, {q14}			@ reload IV
+	vld1.8	{q8-q9}, [r0]!	@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q10-q11}, [r0]!
+	veor	q1, q1, q8
+	veor	q6, q6, q9
+	vld1.8	{q15}, [r0]!
+	veor	q4, q4, q10
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	veor	q2, q2, q11
+	vst1.8	{q6}, [r1]!
+	vst1.8	{q4}, [r1]!
+	vst1.8	{q2}, [r1]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_four:
+	sub	r0, r0, #0x40
+	bl	_bsaes_decrypt8
+	vldmia	r9, {q14}			@ reload IV
+	vld1.8	{q8-q9}, [r0]!	@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q10}, [r0]!
+	veor	q1, q1, q8
+	veor	q6, q6, q9
+	vld1.8	{q15}, [r0]!
+	veor	q4, q4, q10
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	vst1.8	{q6}, [r1]!
+	vst1.8	{q4}, [r1]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_three:
+	sub	r0, r0, #0x30
+	bl	_bsaes_decrypt8
+	vldmia	r9, {q14}			@ reload IV
+	vld1.8	{q8-q9}, [r0]!	@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q15}, [r0]!
+	veor	q1, q1, q8
+	veor	q6, q6, q9
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	vst1.8	{q6}, [r1]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_two:
+	sub	r0, r0, #0x20
+	bl	_bsaes_decrypt8
+	vldmia	r9, {q14}			@ reload IV
+	vld1.8	{q8}, [r0]!		@ reload input
+	veor	q0, q0, q14	@ ^= IV
+	vld1.8	{q15}, [r0]!		@ reload input
+	veor	q1, q1, q8
+	vst1.8	{q0-q1}, [r1]!	@ write output
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_one:
+	sub	r0, r0, #0x10
+	mov	r10, r1			@ save original out pointer
+	mov	r1, r9			@ use the iv scratch space as out buffer
+	mov	r2, r3
+	vmov	q4,q15		@ just in case ensure that IV
+	vmov	q5,q0			@ and input are preserved
+	bl	AES_decrypt
+	vld1.8	{q0}, [r9,:64]		@ load result
+	veor	q0, q0, q4	@ ^= IV
+	vmov	q15, q5		@ q5 holds input
+	vst1.8	{q0}, [r10]		@ write output
+
+.Lcbc_dec_done:
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+.Lcbc_dec_bzero:				@ wipe key schedule [if any]
+	vstmia		sp!, {q0-q1}
+	cmp		sp, r9
+	bne		.Lcbc_dec_bzero
+#endif
+
+	mov	sp, r9
+	add	sp, #0x10			@ add sp,r9,#0x10 is no good for thumb
+	vst1.8	{q15}, [r8]		@ return IV
+	VFP_ABI_POP
+	ldmia	sp!, {r4-r10, pc}
+.size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+.extern	AES_encrypt
+.global	bsaes_ctr32_encrypt_blocks
+.type	bsaes_ctr32_encrypt_blocks,%function
+.align	5
+bsaes_ctr32_encrypt_blocks:
+	cmp	r2, #8			@ use plain AES for
+	blo	.Lctr_enc_short			@ small sizes
+
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}
+	VFP_ABI_PUSH
+	ldr	r8, [ip]			@ ctr is 1st arg on the stack
+	sub	sp, sp, #0x10			@ scratch space to carry over the ctr
+	mov	r9, sp				@ save sp
+
+	ldr	r10, [r3, #240]		@ get # of rounds
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, r10, lsl#7		@ 128 bytes per inner round key
+	add	r12, #96			@ size of bit-sliced key schedule
+
+	@ populate the key schedule
+	mov	r4, r3			@ pass key
+	mov	r5, r10			@ pass # of rounds
+	mov	sp, r12				@ sp is sp
+	bl	_bsaes_key_convert
+	veor	q7,q7,q15	@ fix up last round key
+	vstmia	r12, {q7}			@ save last round key
+
+	vld1.8	{q0}, [r8]		@ load counter
+	add	r8, r6, #.LREVM0SR-.LM0	@ borrow r8
+	vldmia	sp, {q4}		@ load round0 key
+#else
+	ldr	r12, [r3, #244]
+	eors	r12, #1
+	beq	0f
+
+	@ populate the key schedule
+	str	r12, [r3, #244]
+	mov	r4, r3			@ pass key
+	mov	r5, r10			@ pass # of rounds
+	add	r12, r3, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	q7,q7,q15	@ fix up last round key
+	vstmia	r12, {q7}			@ save last round key
+
+.align	2
+0:	add	r12, r3, #248
+	vld1.8	{q0}, [r8]		@ load counter
+	adrl	r8, .LREVM0SR			@ borrow r8
+	vldmia	r12, {q4}			@ load round0 key
+	sub	sp, #0x10			@ place for adjusted round0 key
+#endif
+
+	vmov.i32	q8,#1		@ compose 1<<96
+	veor		q9,q9,q9
+	vrev32.8	q0,q0
+	vext.8		q8,q9,q8,#4
+	vrev32.8	q4,q4
+	vadd.u32	q9,q8,q8	@ compose 2<<96
+	vstmia	sp, {q4}		@ save adjusted round0 key
+	b	.Lctr_enc_loop
+
+.align	4
+.Lctr_enc_loop:
+	vadd.u32	q10, q8, q9	@ compose 3<<96
+	vadd.u32	q1, q0, q8	@ +1
+	vadd.u32	q2, q0, q9	@ +2
+	vadd.u32	q3, q0, q10	@ +3
+	vadd.u32	q4, q1, q10
+	vadd.u32	q5, q2, q10
+	vadd.u32	q6, q3, q10
+	vadd.u32	q7, q4, q10
+	vadd.u32	q10, q5, q10	@ next counter
+
+	@ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+	@ to flip byte order in 32-bit counter
+
+	vldmia		sp, {q9}		@ load round0 key
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x10		@ pass next round key
+#else
+	add		r4, r3, #264
+#endif
+	vldmia		r8, {q8}			@ .LREVM0SR
+	mov		r5, r10			@ pass rounds
+	vstmia		r9, {q10}			@ save next counter
+	sub		r6, r8, #.LREVM0SR-.LSR	@ pass constants
+
+	bl		_bsaes_encrypt8_alt
+
+	subs		r2, r2, #8
+	blo		.Lctr_enc_loop_done
+
+	vld1.8		{q8-q9}, [r0]!	@ load input
+	vld1.8		{q10-q11}, [r0]!
+	veor		q0, q8
+	veor		q1, q9
+	vld1.8		{q12-q13}, [r0]!
+	veor		q4, q10
+	veor		q6, q11
+	vld1.8		{q14-q15}, [r0]!
+	veor		q3, q12
+	vst1.8		{q0-q1}, [r1]!	@ write output
+	veor		q7, q13
+	veor		q2, q14
+	vst1.8		{q4}, [r1]!
+	veor		q5, q15
+	vst1.8		{q6}, [r1]!
+	vmov.i32	q8, #1			@ compose 1<<96
+	vst1.8		{q3}, [r1]!
+	veor		q9, q9, q9
+	vst1.8		{q7}, [r1]!
+	vext.8		q8, q9, q8, #4
+	vst1.8		{q2}, [r1]!
+	vadd.u32	q9,q8,q8		@ compose 2<<96
+	vst1.8		{q5}, [r1]!
+	vldmia		r9, {q0}			@ load counter
+
+	bne		.Lctr_enc_loop
+	b		.Lctr_enc_done
+
+.align	4
+.Lctr_enc_loop_done:
+	add		r2, r2, #8
+	vld1.8		{q8}, [r0]!	@ load input
+	veor		q0, q8
+	vst1.8		{q0}, [r1]!	@ write output
+	cmp		r2, #2
+	blo		.Lctr_enc_done
+	vld1.8		{q9}, [r0]!
+	veor		q1, q9
+	vst1.8		{q1}, [r1]!
+	beq		.Lctr_enc_done
+	vld1.8		{q10}, [r0]!
+	veor		q4, q10
+	vst1.8		{q4}, [r1]!
+	cmp		r2, #4
+	blo		.Lctr_enc_done
+	vld1.8		{q11}, [r0]!
+	veor		q6, q11
+	vst1.8		{q6}, [r1]!
+	beq		.Lctr_enc_done
+	vld1.8		{q12}, [r0]!
+	veor		q3, q12
+	vst1.8		{q3}, [r1]!
+	cmp		r2, #6
+	blo		.Lctr_enc_done
+	vld1.8		{q13}, [r0]!
+	veor		q7, q13
+	vst1.8		{q7}, [r1]!
+	beq		.Lctr_enc_done
+	vld1.8		{q14}, [r0]
+	veor		q2, q14
+	vst1.8		{q2}, [r1]!
+
+.Lctr_enc_done:
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+#ifndef	BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero:			@ wipe key schedule [if any]
+	vstmia		sp!, {q0-q1}
+	cmp		sp, r9
+	bne		.Lctr_enc_bzero
+#else
+	vstmia		sp, {q0-q1}
+#endif
+
+	mov	sp, r9
+	add	sp, #0x10		@ add sp,r9,#0x10 is no good for thumb
+	VFP_ABI_POP
+	ldmia	sp!, {r4-r10, pc}	@ return
+
+.align	4
+.Lctr_enc_short:
+	ldr	ip, [sp]		@ ctr pointer is passed on stack
+	stmdb	sp!, {r4-r8, lr}
+
+	mov	r4, r0		@ copy arguments
+	mov	r5, r1
+	mov	r6, r2
+	mov	r7, r3
+	ldr	r8, [ip, #12]		@ load counter LSW
+	vld1.8	{q1}, [ip]		@ load whole counter value
+#ifdef __ARMEL__
+	rev	r8, r8
+#endif
+	sub	sp, sp, #0x10
+	vst1.8	{q1}, [sp,:64]	@ copy counter value
+	sub	sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+	add	r0, sp, #0x10		@ input counter value
+	mov	r1, sp			@ output on the stack
+	mov	r2, r7			@ key
+
+	bl	AES_encrypt
+
+	vld1.8	{q0}, [r4]!	@ load input
+	vld1.8	{q1}, [sp,:64]	@ load encrypted counter
+	add	r8, r8, #1
+#ifdef __ARMEL__
+	rev	r0, r8
+	str	r0, [sp, #0x1c]		@ next counter value
+#else
+	str	r8, [sp, #0x1c]		@ next counter value
+#endif
+	veor	q0,q0,q1
+	vst1.8	{q0}, [r5]!	@ store output
+	subs	r6, r6, #1
+	bne	.Lctr_enc_short_loop
+
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+	vstmia		sp!, {q0-q1}
+
+	ldmia	sp!, {r4-r8, pc}
+.size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl	bsaes_xts_encrypt
+.type	bsaes_xts_encrypt,%function
+.align	4
+bsaes_xts_encrypt:
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}		@ 0x20
+	VFP_ABI_PUSH
+	mov	r6, sp				@ future r3
+
+	mov	r7, r0
+	mov	r8, r1
+	mov	r9, r2
+	mov	r10, r3
+
+	sub	r0, sp, #0x10			@ 0x10
+	bic	r0, #0xf			@ align at 16 bytes
+	mov	sp, r0
+
+#ifdef	XTS_CHAIN_TWEAK
+	ldr	r0, [ip]			@ pointer to input tweak
+#else
+	@ generate initial tweak
+	ldr	r0, [ip, #4]			@ iv[]
+	mov	r1, sp
+	ldr	r2, [ip, #0]			@ key2
+	bl	AES_encrypt
+	mov	r0,sp				@ pointer to initial tweak
+#endif
+
+	ldr	r1, [r10, #240]		@ get # of rounds
+	mov	r3, r6
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, r1, lsl#7		@ 128 bytes per inner round key
+	@ add	r12, #96			@ size of bit-sliced key schedule
+	sub	r12, #48			@ place for tweak[9]
+
+	@ populate the key schedule
+	mov	r4, r10			@ pass key
+	mov	r5, r1			@ pass # of rounds
+	mov	sp, r12
+	add	r12, #0x90			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	q7, q7, q15	@ fix up last round key
+	vstmia	r12, {q7}			@ save last round key
+#else
+	ldr	r12, [r10, #244]
+	eors	r12, #1
+	beq	0f
+
+	str	r12, [r10, #244]
+	mov	r4, r10			@ pass key
+	mov	r5, r1			@ pass # of rounds
+	add	r12, r10, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	q7, q7, q15	@ fix up last round key
+	vstmia	r12, {q7}
+
+.align	2
+0:	sub	sp, #0x90			@ place for tweak[9]
+#endif
+
+	vld1.8	{q8}, [r0]			@ initial tweak
+	adr	r2, .Lxts_magic
+
+	subs	r9, #0x80
+	blo	.Lxts_enc_short
+	b	.Lxts_enc_loop
+
+.align	4
+.Lxts_enc_loop:
+	vldmia		r2, {q5}	@ load XTS magic
+	vshr.s64	q6, q8, #63
+	mov		r0, sp
+	vand		q6, q6, q5
+	vadd.u64	q9, q8, q8
+	vst1.64		{q8}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q9, #63
+	veor		q9, q9, q6
+	vand		q7, q7, q5
+	vadd.u64	q10, q9, q9
+	vst1.64		{q9}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q10, #63
+	veor		q10, q10, q7
+	vand		q6, q6, q5
+	vld1.8		{q0}, [r7]!
+	vadd.u64	q11, q10, q10
+	vst1.64		{q10}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q11, #63
+	veor		q11, q11, q6
+	vand		q7, q7, q5
+	vld1.8		{q1}, [r7]!
+	veor		q0, q0, q8
+	vadd.u64	q12, q11, q11
+	vst1.64		{q11}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q12, #63
+	veor		q12, q12, q7
+	vand		q6, q6, q5
+	vld1.8		{q2}, [r7]!
+	veor		q1, q1, q9
+	vadd.u64	q13, q12, q12
+	vst1.64		{q12}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q13, #63
+	veor		q13, q13, q6
+	vand		q7, q7, q5
+	vld1.8		{q3}, [r7]!
+	veor		q2, q2, q10
+	vadd.u64	q14, q13, q13
+	vst1.64		{q13}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q14, #63
+	veor		q14, q14, q7
+	vand		q6, q6, q5
+	vld1.8		{q4}, [r7]!
+	veor		q3, q3, q11
+	vadd.u64	q15, q14, q14
+	vst1.64		{q14}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q15, #63
+	veor		q15, q15, q6
+	vand		q7, q7, q5
+	vld1.8		{q5}, [r7]!
+	veor		q4, q4, q12
+	vadd.u64	q8, q15, q15
+	vst1.64		{q15}, [r0,:128]!
+	vswp		d15,d14
+	veor		q8, q8, q7
+	vst1.64		{q8}, [r0,:128]		@ next round tweak
+
+	vld1.8		{q6-q7}, [r7]!
+	veor		q5, q5, q13
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q6, q6, q14
+	mov		r5, r1			@ pass rounds
+	veor		q7, q7, q15
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12-q13}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q4, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q6, q11
+	vld1.64		{q14-q15}, [r0,:128]!
+	veor		q10, q3, q12
+	vst1.8		{q8-q9}, [r8]!
+	veor		q11, q7, q13
+	veor		q12, q2, q14
+	vst1.8		{q10-q11}, [r8]!
+	veor		q13, q5, q15
+	vst1.8		{q12-q13}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+
+	subs		r9, #0x80
+	bpl		.Lxts_enc_loop
+
+.Lxts_enc_short:
+	adds		r9, #0x70
+	bmi		.Lxts_enc_done
+
+	vldmia		r2, {q5}	@ load XTS magic
+	vshr.s64	q7, q8, #63
+	mov		r0, sp
+	vand		q7, q7, q5
+	vadd.u64	q9, q8, q8
+	vst1.64		{q8}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q9, #63
+	veor		q9, q9, q7
+	vand		q6, q6, q5
+	vadd.u64	q10, q9, q9
+	vst1.64		{q9}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q10, #63
+	veor		q10, q10, q6
+	vand		q7, q7, q5
+	vld1.8		{q0}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_enc_1
+	vadd.u64	q11, q10, q10
+	vst1.64		{q10}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q11, #63
+	veor		q11, q11, q7
+	vand		q6, q6, q5
+	vld1.8		{q1}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_enc_2
+	veor		q0, q0, q8
+	vadd.u64	q12, q11, q11
+	vst1.64		{q11}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q12, #63
+	veor		q12, q12, q6
+	vand		q7, q7, q5
+	vld1.8		{q2}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_enc_3
+	veor		q1, q1, q9
+	vadd.u64	q13, q12, q12
+	vst1.64		{q12}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q13, #63
+	veor		q13, q13, q7
+	vand		q6, q6, q5
+	vld1.8		{q3}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_enc_4
+	veor		q2, q2, q10
+	vadd.u64	q14, q13, q13
+	vst1.64		{q13}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q14, #63
+	veor		q14, q14, q6
+	vand		q7, q7, q5
+	vld1.8		{q4}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_enc_5
+	veor		q3, q3, q11
+	vadd.u64	q15, q14, q14
+	vst1.64		{q14}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q15, #63
+	veor		q15, q15, q7
+	vand		q6, q6, q5
+	vld1.8		{q5}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_enc_6
+	veor		q4, q4, q12
+	sub		r9, #0x10
+	vst1.64		{q15}, [r0,:128]		@ next round tweak
+
+	vld1.8		{q6}, [r7]!
+	veor		q5, q5, q13
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q6, q6, q14
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12-q13}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q4, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q6, q11
+	vld1.64		{q14}, [r0,:128]!
+	veor		q10, q3, q12
+	vst1.8		{q8-q9}, [r8]!
+	veor		q11, q7, q13
+	veor		q12, q2, q14
+	vst1.8		{q10-q11}, [r8]!
+	vst1.8		{q12}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_6:
+	vst1.64		{q14}, [r0,:128]		@ next round tweak
+
+	veor		q4, q4, q12
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q5, q5, q13
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12-q13}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q4, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q6, q11
+	veor		q10, q3, q12
+	vst1.8		{q8-q9}, [r8]!
+	veor		q11, q7, q13
+	vst1.8		{q10-q11}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align	5
+.Lxts_magic:
+	.quad	1, 0x87
+
+.align	5
+.Lxts_enc_5:
+	vst1.64		{q13}, [r0,:128]		@ next round tweak
+
+	veor		q3, q3, q11
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q4, q4, q12
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q4, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q6, q11
+	veor		q10, q3, q12
+	vst1.8		{q8-q9}, [r8]!
+	vst1.8		{q10}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_4:
+	vst1.64		{q12}, [r0,:128]		@ next round tweak
+
+	veor		q2, q2, q10
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q3, q3, q11
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	veor		q1, q1, q9
+	veor		q8, q4, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q6, q11
+	vst1.8		{q8-q9}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_3:
+	vst1.64		{q11}, [r0,:128]		@ next round tweak
+
+	veor		q1, q1, q9
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q2, q2, q10
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10}, [r0,:128]!
+	veor		q0, q0, q8
+	veor		q1, q1, q9
+	veor		q8, q4, q10
+	vst1.8		{q0-q1}, [r8]!
+	vst1.8		{q8}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_2:
+	vst1.64		{q10}, [r0,:128]		@ next round tweak
+
+	veor		q0, q0, q8
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q1, q1, q9
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	veor		q0, q0, q8
+	veor		q1, q1, q9
+	vst1.8		{q0-q1}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_1:
+	mov		r0, sp
+	veor		q0, q8
+	mov		r1, sp
+	vst1.8		{q0}, [sp,:128]
+	mov		r2, r10
+	mov		r4, r3				@ preserve fp
+
+	bl		AES_encrypt
+
+	vld1.8		{q0}, [sp,:128]
+	veor		q0, q0, q8
+	vst1.8		{q0}, [r8]!
+	mov		r3, r4
+
+	vmov		q8, q9		@ next round tweak
+
+.Lxts_enc_done:
+#ifndef	XTS_CHAIN_TWEAK
+	adds		r9, #0x10
+	beq		.Lxts_enc_ret
+	sub		r6, r8, #0x10
+
+.Lxts_enc_steal:
+	ldrb		r0, [r7], #1
+	ldrb		r1, [r8, #-0x10]
+	strb		r0, [r8, #-0x10]
+	strb		r1, [r8], #1
+
+	subs		r9, #1
+	bhi		.Lxts_enc_steal
+
+	vld1.8		{q0}, [r6]
+	mov		r0, sp
+	veor		q0, q0, q8
+	mov		r1, sp
+	vst1.8		{q0}, [sp,:128]
+	mov		r2, r10
+	mov		r4, r3			@ preserve fp
+
+	bl		AES_encrypt
+
+	vld1.8		{q0}, [sp,:128]
+	veor		q0, q0, q8
+	vst1.8		{q0}, [r6]
+	mov		r3, r4
+#endif
+
+.Lxts_enc_ret:
+	bic		r0, r3, #0xf
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+#ifdef	XTS_CHAIN_TWEAK
+	ldr		r1, [r3, #0x20+VFP_ABI_FRAME]	@ chain tweak
+#endif
+.Lxts_enc_bzero:				@ wipe key schedule [if any]
+	vstmia		sp!, {q0-q1}
+	cmp		sp, r0
+	bne		.Lxts_enc_bzero
+
+	mov		sp, r3
+#ifdef	XTS_CHAIN_TWEAK
+	vst1.8		{q8}, [r1]
+#endif
+	VFP_ABI_POP
+	ldmia		sp!, {r4-r10, pc}	@ return
+
+.size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl	bsaes_xts_decrypt
+.type	bsaes_xts_decrypt,%function
+.align	4
+bsaes_xts_decrypt:
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}		@ 0x20
+	VFP_ABI_PUSH
+	mov	r6, sp				@ future r3
+
+	mov	r7, r0
+	mov	r8, r1
+	mov	r9, r2
+	mov	r10, r3
+
+	sub	r0, sp, #0x10			@ 0x10
+	bic	r0, #0xf			@ align at 16 bytes
+	mov	sp, r0
+
+#ifdef	XTS_CHAIN_TWEAK
+	ldr	r0, [ip]			@ pointer to input tweak
+#else
+	@ generate initial tweak
+	ldr	r0, [ip, #4]			@ iv[]
+	mov	r1, sp
+	ldr	r2, [ip, #0]			@ key2
+	bl	AES_encrypt
+	mov	r0, sp				@ pointer to initial tweak
+#endif
+
+	ldr	r1, [r10, #240]		@ get # of rounds
+	mov	r3, r6
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, r1, lsl#7		@ 128 bytes per inner round key
+	@ add	r12, #96			@ size of bit-sliced key schedule
+	sub	r12, #48			@ place for tweak[9]
+
+	@ populate the key schedule
+	mov	r4, r10			@ pass key
+	mov	r5, r1			@ pass # of rounds
+	mov	sp, r12
+	add	r12, #0x90			@ pass key schedule
+	bl	_bsaes_key_convert
+	add	r4, sp, #0x90
+	vldmia	r4, {q6}
+	vstmia	r12,  {q15}		@ save last round key
+	veor	q7, q7, q6	@ fix up round 0 key
+	vstmia	r4, {q7}
+#else
+	ldr	r12, [r10, #244]
+	eors	r12, #1
+	beq	0f
+
+	str	r12, [r10, #244]
+	mov	r4, r10			@ pass key
+	mov	r5, r1			@ pass # of rounds
+	add	r12, r10, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	add	r4, r10, #248
+	vldmia	r4, {q6}
+	vstmia	r12,  {q15}		@ save last round key
+	veor	q7, q7, q6	@ fix up round 0 key
+	vstmia	r4, {q7}
+
+.align	2
+0:	sub	sp, #0x90			@ place for tweak[9]
+#endif
+	vld1.8	{q8}, [r0]			@ initial tweak
+	adr	r2, .Lxts_magic
+
+	tst	r9, #0xf			@ if not multiple of 16
+	it	ne				@ Thumb2 thing, sanity check in ARM
+	subne	r9, #0x10			@ subtract another 16 bytes
+	subs	r9, #0x80
+
+	blo	.Lxts_dec_short
+	b	.Lxts_dec_loop
+
+.align	4
+.Lxts_dec_loop:
+	vldmia		r2, {q5}	@ load XTS magic
+	vshr.s64	q6, q8, #63
+	mov		r0, sp
+	vand		q6, q6, q5
+	vadd.u64	q9, q8, q8
+	vst1.64		{q8}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q9, #63
+	veor		q9, q9, q6
+	vand		q7, q7, q5
+	vadd.u64	q10, q9, q9
+	vst1.64		{q9}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q10, #63
+	veor		q10, q10, q7
+	vand		q6, q6, q5
+	vld1.8		{q0}, [r7]!
+	vadd.u64	q11, q10, q10
+	vst1.64		{q10}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q11, #63
+	veor		q11, q11, q6
+	vand		q7, q7, q5
+	vld1.8		{q1}, [r7]!
+	veor		q0, q0, q8
+	vadd.u64	q12, q11, q11
+	vst1.64		{q11}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q12, #63
+	veor		q12, q12, q7
+	vand		q6, q6, q5
+	vld1.8		{q2}, [r7]!
+	veor		q1, q1, q9
+	vadd.u64	q13, q12, q12
+	vst1.64		{q12}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q13, #63
+	veor		q13, q13, q6
+	vand		q7, q7, q5
+	vld1.8		{q3}, [r7]!
+	veor		q2, q2, q10
+	vadd.u64	q14, q13, q13
+	vst1.64		{q13}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q14, #63
+	veor		q14, q14, q7
+	vand		q6, q6, q5
+	vld1.8		{q4}, [r7]!
+	veor		q3, q3, q11
+	vadd.u64	q15, q14, q14
+	vst1.64		{q14}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q15, #63
+	veor		q15, q15, q6
+	vand		q7, q7, q5
+	vld1.8		{q5}, [r7]!
+	veor		q4, q4, q12
+	vadd.u64	q8, q15, q15
+	vst1.64		{q15}, [r0,:128]!
+	vswp		d15,d14
+	veor		q8, q8, q7
+	vst1.64		{q8}, [r0,:128]		@ next round tweak
+
+	vld1.8		{q6-q7}, [r7]!
+	veor		q5, q5, q13
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q6, q6, q14
+	mov		r5, r1			@ pass rounds
+	veor		q7, q7, q15
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12-q13}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q6, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q4, q11
+	vld1.64		{q14-q15}, [r0,:128]!
+	veor		q10, q2, q12
+	vst1.8		{q8-q9}, [r8]!
+	veor		q11, q7, q13
+	veor		q12, q3, q14
+	vst1.8		{q10-q11}, [r8]!
+	veor		q13, q5, q15
+	vst1.8		{q12-q13}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+
+	subs		r9, #0x80
+	bpl		.Lxts_dec_loop
+
+.Lxts_dec_short:
+	adds		r9, #0x70
+	bmi		.Lxts_dec_done
+
+	vldmia		r2, {q5}	@ load XTS magic
+	vshr.s64	q7, q8, #63
+	mov		r0, sp
+	vand		q7, q7, q5
+	vadd.u64	q9, q8, q8
+	vst1.64		{q8}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q9, #63
+	veor		q9, q9, q7
+	vand		q6, q6, q5
+	vadd.u64	q10, q9, q9
+	vst1.64		{q9}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q10, #63
+	veor		q10, q10, q6
+	vand		q7, q7, q5
+	vld1.8		{q0}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_dec_1
+	vadd.u64	q11, q10, q10
+	vst1.64		{q10}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q11, #63
+	veor		q11, q11, q7
+	vand		q6, q6, q5
+	vld1.8		{q1}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_dec_2
+	veor		q0, q0, q8
+	vadd.u64	q12, q11, q11
+	vst1.64		{q11}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q12, #63
+	veor		q12, q12, q6
+	vand		q7, q7, q5
+	vld1.8		{q2}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_dec_3
+	veor		q1, q1, q9
+	vadd.u64	q13, q12, q12
+	vst1.64		{q12}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q13, #63
+	veor		q13, q13, q7
+	vand		q6, q6, q5
+	vld1.8		{q3}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_dec_4
+	veor		q2, q2, q10
+	vadd.u64	q14, q13, q13
+	vst1.64		{q13}, [r0,:128]!
+	vswp		d13,d12
+	vshr.s64	q7, q14, #63
+	veor		q14, q14, q6
+	vand		q7, q7, q5
+	vld1.8		{q4}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_dec_5
+	veor		q3, q3, q11
+	vadd.u64	q15, q14, q14
+	vst1.64		{q14}, [r0,:128]!
+	vswp		d15,d14
+	vshr.s64	q6, q15, #63
+	veor		q15, q15, q7
+	vand		q6, q6, q5
+	vld1.8		{q5}, [r7]!
+	subs		r9, #0x10
+	bmi		.Lxts_dec_6
+	veor		q4, q4, q12
+	sub		r9, #0x10
+	vst1.64		{q15}, [r0,:128]		@ next round tweak
+
+	vld1.8		{q6}, [r7]!
+	veor		q5, q5, q13
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q6, q6, q14
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12-q13}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q6, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q4, q11
+	vld1.64		{q14}, [r0,:128]!
+	veor		q10, q2, q12
+	vst1.8		{q8-q9}, [r8]!
+	veor		q11, q7, q13
+	veor		q12, q3, q14
+	vst1.8		{q10-q11}, [r8]!
+	vst1.8		{q12}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_6:
+	vst1.64		{q14}, [r0,:128]		@ next round tweak
+
+	veor		q4, q4, q12
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q5, q5, q13
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12-q13}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q6, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q4, q11
+	veor		q10, q2, q12
+	vst1.8		{q8-q9}, [r8]!
+	veor		q11, q7, q13
+	vst1.8		{q10-q11}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_5:
+	vst1.64		{q13}, [r0,:128]		@ next round tweak
+
+	veor		q3, q3, q11
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q4, q4, q12
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	vld1.64		{q12}, [r0,:128]!
+	veor		q1, q1, q9
+	veor		q8, q6, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q4, q11
+	veor		q10, q2, q12
+	vst1.8		{q8-q9}, [r8]!
+	vst1.8		{q10}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_4:
+	vst1.64		{q12}, [r0,:128]		@ next round tweak
+
+	veor		q2, q2, q10
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q3, q3, q11
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10-q11}, [r0,:128]!
+	veor		q0, q0, q8
+	veor		q1, q1, q9
+	veor		q8, q6, q10
+	vst1.8		{q0-q1}, [r8]!
+	veor		q9, q4, q11
+	vst1.8		{q8-q9}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_3:
+	vst1.64		{q11}, [r0,:128]		@ next round tweak
+
+	veor		q1, q1, q9
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q2, q2, q10
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	vld1.64		{q10}, [r0,:128]!
+	veor		q0, q0, q8
+	veor		q1, q1, q9
+	veor		q8, q6, q10
+	vst1.8		{q0-q1}, [r8]!
+	vst1.8		{q8}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_2:
+	vst1.64		{q10}, [r0,:128]		@ next round tweak
+
+	veor		q0, q0, q8
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, r10, #248			@ pass key schedule
+#endif
+	veor		q1, q1, q9
+	mov		r5, r1			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{q8-q9}, [r0,:128]!
+	veor		q0, q0, q8
+	veor		q1, q1, q9
+	vst1.8		{q0-q1}, [r8]!
+
+	vld1.64		{q8}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_1:
+	mov		r0, sp
+	veor		q0, q8
+	mov		r1, sp
+	vst1.8		{q0}, [sp,:128]
+	mov		r2, r10
+	mov		r4, r3				@ preserve fp
+	mov		r5, r2			@ preserve magic
+
+	bl		AES_decrypt
+
+	vld1.8		{q0}, [sp,:128]
+	veor		q0, q0, q8
+	vst1.8		{q0}, [r8]!
+	mov		r3, r4
+	mov		r2, r5
+
+	vmov		q8, q9		@ next round tweak
+
+.Lxts_dec_done:
+#ifndef	XTS_CHAIN_TWEAK
+	adds		r9, #0x10
+	beq		.Lxts_dec_ret
+
+	@ calculate one round of extra tweak for the stolen ciphertext
+	vldmia		r2, {q5}
+	vshr.s64	q6, q8, #63
+	vand		q6, q6, q5
+	vadd.u64	q9, q8, q8
+	vswp		d13,d12
+	veor		q9, q9, q6
+
+	@ perform the final decryption with the last tweak value
+	vld1.8		{q0}, [r7]!
+	mov		r0, sp
+	veor		q0, q0, q9
+	mov		r1, sp
+	vst1.8		{q0}, [sp,:128]
+	mov		r2, r10
+	mov		r4, r3			@ preserve fp
+
+	bl		AES_decrypt
+
+	vld1.8		{q0}, [sp,:128]
+	veor		q0, q0, q9
+	vst1.8		{q0}, [r8]
+
+	mov		r6, r8
+.Lxts_dec_steal:
+	ldrb		r1, [r8]
+	ldrb		r0, [r7], #1
+	strb		r1, [r8, #0x10]
+	strb		r0, [r8], #1
+
+	subs		r9, #1
+	bhi		.Lxts_dec_steal
+
+	vld1.8		{q0}, [r6]
+	mov		r0, sp
+	veor		q0, q8
+	mov		r1, sp
+	vst1.8		{q0}, [sp,:128]
+	mov		r2, r10
+
+	bl		AES_decrypt
+
+	vld1.8		{q0}, [sp,:128]
+	veor		q0, q0, q8
+	vst1.8		{q0}, [r6]
+	mov		r3, r4
+#endif
+
+.Lxts_dec_ret:
+	bic		r0, r3, #0xf
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+#ifdef	XTS_CHAIN_TWEAK
+	ldr		r1, [r3, #0x20+VFP_ABI_FRAME]	@ chain tweak
+#endif
+.Lxts_dec_bzero:				@ wipe key schedule [if any]
+	vstmia		sp!, {q0-q1}
+	cmp		sp, r0
+	bne		.Lxts_dec_bzero
+
+	mov		sp, r3
+#ifdef	XTS_CHAIN_TWEAK
+	vst1.8		{q8}, [r1]
+#endif
+	VFP_ABI_POP
+	ldmia		sp!, {r4-r10, pc}	@ return
+
+.size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
+#endif
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
new file mode 100644
index 000000000000..15468fbbdea3
--- /dev/null
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -0,0 +1,434 @@
+/*
+ * linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+
+#include "aes_glue.h"
+
+#define BIT_SLICED_KEY_MAXSIZE	(128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE)
+
+struct BS_KEY {
+	struct AES_KEY	rk;
+	int		converted;
+	u8 __aligned(8)	bs[BIT_SLICED_KEY_MAXSIZE];
+} __aligned(8);
+
+asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in);
+asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in);
+
+asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes,
+				  struct BS_KEY *key, u8 iv[]);
+
+asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks,
+					   struct BS_KEY *key, u8 const iv[]);
+
+asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes,
+				  struct BS_KEY *key, u8 tweak[]);
+
+asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes,
+				  struct BS_KEY *key, u8 tweak[]);
+
+struct aesbs_cbc_ctx {
+	struct AES_KEY	enc;
+	struct BS_KEY	dec;
+};
+
+struct aesbs_ctr_ctx {
+	struct BS_KEY	enc;
+};
+
+struct aesbs_xts_ctx {
+	struct BS_KEY	enc;
+	struct BS_KEY	dec;
+	struct AES_KEY	twkey;
+};
+
+static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			     unsigned int key_len)
+{
+	struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	int bits = key_len * 8;
+
+	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	ctx->dec.rk = ctx->enc;
+	private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
+	ctx->dec.converted = 0;
+	return 0;
+}
+
+static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			     unsigned int key_len)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+	int bits = key_len * 8;
+
+	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	ctx->enc.converted = 0;
+	return 0;
+}
+
+static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			     unsigned int key_len)
+{
+	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	int bits = key_len * 4;
+
+	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	ctx->dec.rk = ctx->enc.rk;
+	private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
+	private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey);
+	ctx->enc.converted = ctx->dec.converted = 0;
+	return 0;
+}
+
+static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst,
+			     struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while (walk.nbytes) {
+		u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
+		u8 *src = walk.src.virt.addr;
+
+		if (walk.dst.virt.addr == walk.src.virt.addr) {
+			u8 *iv = walk.iv;
+
+			do {
+				crypto_xor(src, iv, AES_BLOCK_SIZE);
+				AES_encrypt(src, src, &ctx->enc);
+				iv = src;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks);
+			memcpy(walk.iv, iv, AES_BLOCK_SIZE);
+		} else {
+			u8 *dst = walk.dst.virt.addr;
+
+			do {
+				crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
+				AES_encrypt(walk.iv, dst, &ctx->enc);
+				memcpy(walk.iv, dst, AES_BLOCK_SIZE);
+				src += AES_BLOCK_SIZE;
+				dst += AES_BLOCK_SIZE;
+			} while (--blocks);
+		}
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
+static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst,
+			     struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+	while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
+		kernel_neon_begin();
+		bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+				  walk.nbytes, &ctx->dec, walk.iv);
+		kernel_neon_end();
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	while (walk.nbytes) {
+		u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		u8 bk[2][AES_BLOCK_SIZE];
+		u8 *iv = walk.iv;
+
+		do {
+			if (walk.dst.virt.addr == walk.src.virt.addr)
+				memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
+
+			AES_decrypt(src, dst, &ctx->dec.rk);
+			crypto_xor(dst, iv, AES_BLOCK_SIZE);
+
+			if (walk.dst.virt.addr == walk.src.virt.addr)
+				iv = bk[blocks & 1];
+			else
+				iv = src;
+
+			dst += AES_BLOCK_SIZE;
+			src += AES_BLOCK_SIZE;
+		} while (--blocks);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
+static void inc_be128_ctr(__be32 ctr[], u32 addend)
+{
+	int i;
+
+	for (i = 3; i >= 0; i--, addend = 1) {
+		u32 n = be32_to_cpu(ctr[i]) + addend;
+
+		ctr[i] = cpu_to_be32(n);
+		if (n >= addend)
+			break;
+	}
+}
+
+static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst, struct scatterlist *src,
+			     unsigned int nbytes)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	u32 blocks;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+	while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
+		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+		__be32 *ctr = (__be32 *)walk.iv;
+		u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]);
+
+		/* avoid 32 bit counter overflow in the NEON code */
+		if (unlikely(headroom < blocks)) {
+			blocks = headroom + 1;
+			tail = walk.nbytes - blocks * AES_BLOCK_SIZE;
+		}
+		kernel_neon_begin();
+		bsaes_ctr32_encrypt_blocks(walk.src.virt.addr,
+					   walk.dst.virt.addr, blocks,
+					   &ctx->enc, walk.iv);
+		kernel_neon_end();
+		inc_be128_ctr(ctr, blocks);
+
+		nbytes -= blocks * AES_BLOCK_SIZE;
+		if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
+			break;
+
+		err = blkcipher_walk_done(desc, &walk, tail);
+	}
+	if (walk.nbytes) {
+		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+		u8 ks[AES_BLOCK_SIZE];
+
+		AES_encrypt(walk.iv, ks, &ctx->enc.rk);
+		if (tdst != tsrc)
+			memcpy(tdst, tsrc, nbytes);
+		crypto_xor(tdst, ks, nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	return err;
+}
+
+static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst,
+			     struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+	/* generate the initial tweak */
+	AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
+
+	while (walk.nbytes) {
+		kernel_neon_begin();
+		bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
+				  walk.nbytes, &ctx->enc, walk.iv);
+		kernel_neon_end();
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
+static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
+			     struct scatterlist *dst,
+			     struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+
+	/* generate the initial tweak */
+	AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
+
+	while (walk.nbytes) {
+		kernel_neon_begin();
+		bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
+				  walk.nbytes, &ctx->dec, walk.iv);
+		kernel_neon_end();
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
+static struct crypto_alg aesbs_algs[] = { {
+	.cra_name		= "__cbc-aes-neonbs",
+	.cra_driver_name	= "__driver-cbc-aes-neonbs",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesbs_cbc_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aesbs_cbc_set_key,
+		.encrypt	= aesbs_cbc_encrypt,
+		.decrypt	= aesbs_cbc_decrypt,
+	},
+}, {
+	.cra_name		= "__ctr-aes-neonbs",
+	.cra_driver_name	= "__driver-ctr-aes-neonbs",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct aesbs_ctr_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aesbs_ctr_set_key,
+		.encrypt	= aesbs_ctr_encrypt,
+		.decrypt	= aesbs_ctr_encrypt,
+	},
+}, {
+	.cra_name		= "__xts-aes-neonbs",
+	.cra_driver_name	= "__driver-xts-aes-neonbs",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesbs_xts_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_blkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aesbs_xts_set_key,
+		.encrypt	= aesbs_xts_encrypt,
+		.decrypt	= aesbs_xts_decrypt,
+	},
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-neonbs",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= __ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-neonbs",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+}, {
+	.cra_name		= "xts(aes)",
+	.cra_driver_name	= "xts-aes-neonbs",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_ablkcipher = {
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= ablk_set_key,
+		.encrypt	= ablk_encrypt,
+		.decrypt	= ablk_decrypt,
+	}
+} };
+
+static int __init aesbs_mod_init(void)
+{
+	if (!cpu_has_neon())
+		return -ENODEV;
+
+	return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+}
+
+static void __exit aesbs_mod_exit(void)
+{
+	crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+}
+
+module_init(aesbs_mod_init);
+module_exit(aesbs_mod_exit);
+
+MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl
new file mode 100644
index 000000000000..be068db960ee
--- /dev/null
+++ b/arch/arm/crypto/bsaes-armv7.pl
@@ -0,0 +1,2467 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
+# granted.
+# ====================================================================
+
+# Bit-sliced AES for ARM NEON
+#
+# February 2012.
+#
+# This implementation is direct adaptation of bsaes-x86_64 module for
+# ARM NEON. Except that this module is endian-neutral [in sense that
+# it can be compiled for either endianness] by courtesy of vld1.8's
+# neutrality. Initial version doesn't implement interface to OpenSSL,
+# only low-level primitives and unsupported entry points, just enough
+# to collect performance results, which for Cortex-A8 core are:
+#
+# encrypt	19.5 cycles per byte processed with 128-bit key
+# decrypt	22.1 cycles per byte processed with 128-bit key
+# key conv.	440  cycles per 128-bit key/0.18 of 8x block
+#
+# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+# which is [much] worse than anticipated (for further details see
+# http://www.openssl.org/~appro/Snapdragon-S4.html).
+#
+# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+# manages in 20.0 cycles].
+#
+# When comparing to x86_64 results keep in mind that NEON unit is
+# [mostly] single-issue and thus can't [fully] benefit from
+# instruction-level parallelism. And when comparing to aes-armv4
+# results keep in mind key schedule conversion overhead (see
+# bsaes-x86_64.pl for further details)...
+#
+#						<appro@openssl.org>
+
+# April-August 2013
+#
+# Add CBC, CTR and XTS subroutines, adapt for kernel use.
+#
+#					<ard.biesheuvel@linaro.org>
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+my ($inp,$out,$len,$key)=("r0","r1","r2","r3");
+my @XMM=map("q$_",(0..15));
+
+{
+my ($key,$rounds,$const)=("r4","r5","r6");
+
+sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
+sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
+
+sub Sbox {
+# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+	&InBasisChange	(@b);
+	&Inv_GF256	(@b[6,5,0,3,7,1,4,2],@t,@s);
+	&OutBasisChange	(@b[7,1,4,2,6,5,0,3]);
+}
+
+sub InBasisChange {
+# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb 
+my @b=@_[0..7];
+$code.=<<___;
+	veor	@b[2], @b[2], @b[1]
+	veor	@b[5], @b[5], @b[6]
+	veor	@b[3], @b[3], @b[0]
+	veor	@b[6], @b[6], @b[2]
+	veor	@b[5], @b[5], @b[0]
+
+	veor	@b[6], @b[6], @b[3]
+	veor	@b[3], @b[3], @b[7]
+	veor	@b[7], @b[7], @b[5]
+	veor	@b[3], @b[3], @b[4]
+	veor	@b[4], @b[4], @b[5]
+
+	veor	@b[2], @b[2], @b[7]
+	veor	@b[3], @b[3], @b[1]
+	veor	@b[1], @b[1], @b[5]
+___
+}
+
+sub OutBasisChange {
+# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
+my @b=@_[0..7];
+$code.=<<___;
+	veor	@b[0], @b[0], @b[6]
+	veor	@b[1], @b[1], @b[4]
+	veor	@b[4], @b[4], @b[6]
+	veor	@b[2], @b[2], @b[0]
+	veor	@b[6], @b[6], @b[1]
+
+	veor	@b[1], @b[1], @b[5]
+	veor	@b[5], @b[5], @b[3]
+	veor	@b[3], @b[3], @b[7]
+	veor	@b[7], @b[7], @b[5]
+	veor	@b[2], @b[2], @b[5]
+
+	veor	@b[4], @b[4], @b[7]
+___
+}
+
+sub InvSbox {
+# input in lsb 	> [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb	> [b0, b1, b6, b4, b2, b7, b3, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+	&InvInBasisChange	(@b);
+	&Inv_GF256		(@b[5,1,2,6,3,7,0,4],@t,@s);
+	&InvOutBasisChange	(@b[3,7,0,4,5,1,2,6]);
+}
+
+sub InvInBasisChange {		# OutBasisChange in reverse (with twist)
+my @b=@_[5,1,2,6,3,7,0,4];
+$code.=<<___
+	 veor	@b[1], @b[1], @b[7]
+	veor	@b[4], @b[4], @b[7]
+
+	veor	@b[7], @b[7], @b[5]
+	 veor	@b[1], @b[1], @b[3]
+	veor	@b[2], @b[2], @b[5]
+	veor	@b[3], @b[3], @b[7]
+
+	veor	@b[6], @b[6], @b[1]
+	veor	@b[2], @b[2], @b[0]
+	 veor	@b[5], @b[5], @b[3]
+	veor	@b[4], @b[4], @b[6]
+	veor	@b[0], @b[0], @b[6]
+	veor	@b[1], @b[1], @b[4]
+___
+}
+
+sub InvOutBasisChange {		# InBasisChange in reverse
+my @b=@_[2,5,7,3,6,1,0,4];
+$code.=<<___;
+	veor	@b[1], @b[1], @b[5]
+	veor	@b[2], @b[2], @b[7]
+
+	veor	@b[3], @b[3], @b[1]
+	veor	@b[4], @b[4], @b[5]
+	veor	@b[7], @b[7], @b[5]
+	veor	@b[3], @b[3], @b[4]
+	 veor 	@b[5], @b[5], @b[0]
+	veor	@b[3], @b[3], @b[7]
+	 veor	@b[6], @b[6], @b[2]
+	 veor	@b[2], @b[2], @b[1]
+	veor	@b[6], @b[6], @b[3]
+
+	veor	@b[3], @b[3], @b[0]
+	veor	@b[5], @b[5], @b[6]
+___
+}
+
+sub Mul_GF4 {
+#;*************************************************************
+#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
+#;*************************************************************
+my ($x0,$x1,$y0,$y1,$t0,$t1)=@_;
+$code.=<<___;
+	veor 	$t0, $y0, $y1
+	vand	$t0, $t0, $x0
+	veor	$x0, $x0, $x1
+	vand	$t1, $x1, $y0
+	vand	$x0, $x0, $y1
+	veor	$x1, $t1, $t0
+	veor	$x0, $x0, $t1
+___
+}
+
+sub Mul_GF4_N {				# not used, see next subroutine
+# multiply and scale by N
+my ($x0,$x1,$y0,$y1,$t0)=@_;
+$code.=<<___;
+	veor	$t0, $y0, $y1
+	vand	$t0, $t0, $x0
+	veor	$x0, $x0, $x1
+	vand	$x1, $x1, $y0
+	vand	$x0, $x0, $y1
+	veor	$x1, $x1, $x0
+	veor	$x0, $x0, $t0
+___
+}
+
+sub Mul_GF4_N_GF4 {
+# interleaved Mul_GF4_N and Mul_GF4
+my ($x0,$x1,$y0,$y1,$t0,
+    $x2,$x3,$y2,$y3,$t1)=@_;
+$code.=<<___;
+	veor	$t0, $y0, $y1
+	 veor 	$t1, $y2, $y3
+	vand	$t0, $t0, $x0
+	 vand	$t1, $t1, $x2
+	veor	$x0, $x0, $x1
+	 veor	$x2, $x2, $x3
+	vand	$x1, $x1, $y0
+	 vand	$x3, $x3, $y2
+	vand	$x0, $x0, $y1
+	 vand	$x2, $x2, $y3
+	veor	$x1, $x1, $x0
+	 veor	$x2, $x2, $x3
+	veor	$x0, $x0, $t0
+	 veor	$x3, $x3, $t1
+___
+}
+sub Mul_GF16_2 {
+my @x=@_[0..7];
+my @y=@_[8..11];
+my @t=@_[12..15];
+$code.=<<___;
+	veor	@t[0], @x[0], @x[2]
+	veor	@t[1], @x[1], @x[3]
+___
+	&Mul_GF4  	(@x[0], @x[1], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+	veor	@y[0], @y[0], @y[2]
+	veor	@y[1], @y[1], @y[3]
+___
+	Mul_GF4_N_GF4	(@t[0], @t[1], @y[0], @y[1], @t[3],
+			 @x[2], @x[3], @y[2], @y[3], @t[2]);
+$code.=<<___;
+	veor	@x[0], @x[0], @t[0]
+	veor	@x[2], @x[2], @t[0]
+	veor	@x[1], @x[1], @t[1]
+	veor	@x[3], @x[3], @t[1]
+
+	veor	@t[0], @x[4], @x[6]
+	veor	@t[1], @x[5], @x[7]
+___
+	&Mul_GF4_N_GF4	(@t[0], @t[1], @y[0], @y[1], @t[3],
+			 @x[6], @x[7], @y[2], @y[3], @t[2]);
+$code.=<<___;
+	veor	@y[0], @y[0], @y[2]
+	veor	@y[1], @y[1], @y[3]
+___
+	&Mul_GF4  	(@x[4], @x[5], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+	veor	@x[4], @x[4], @t[0]
+	veor	@x[6], @x[6], @t[0]
+	veor	@x[5], @x[5], @t[1]
+	veor	@x[7], @x[7], @t[1]
+___
+}
+sub Inv_GF256 {
+#;********************************************************************
+#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144)       *
+#;********************************************************************
+my @x=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+# direct optimizations from hardware
+$code.=<<___;
+	veor	@t[3], @x[4], @x[6]
+	veor	@t[2], @x[5], @x[7]
+	veor	@t[1], @x[1], @x[3]
+	veor	@s[1], @x[7], @x[6]
+	 vmov	@t[0], @t[2]
+	veor	@s[0], @x[0], @x[2]
+
+	vorr	@t[2], @t[2], @t[1]
+	veor	@s[3], @t[3], @t[0]
+	vand	@s[2], @t[3], @s[0]
+	vorr	@t[3], @t[3], @s[0]
+	veor	@s[0], @s[0], @t[1]
+	vand	@t[0], @t[0], @t[1]
+	veor	@t[1], @x[3], @x[2]
+	vand	@s[3], @s[3], @s[0]
+	vand	@s[1], @s[1], @t[1]
+	veor	@t[1], @x[4], @x[5]
+	veor	@s[0], @x[1], @x[0]
+	veor	@t[3], @t[3], @s[1]
+	veor	@t[2], @t[2], @s[1]
+	vand	@s[1], @t[1], @s[0]
+	vorr	@t[1], @t[1], @s[0]
+	veor	@t[3], @t[3], @s[3]
+	veor	@t[0], @t[0], @s[1]
+	veor	@t[2], @t[2], @s[2]
+	veor	@t[1], @t[1], @s[3]
+	veor	@t[0], @t[0], @s[2]
+	vand	@s[0], @x[7], @x[3]
+	veor	@t[1], @t[1], @s[2]
+	vand	@s[1], @x[6], @x[2]
+	vand	@s[2], @x[5], @x[1]
+	vorr	@s[3], @x[4], @x[0]
+	veor	@t[3], @t[3], @s[0]
+	veor	@t[1], @t[1], @s[2]
+	veor	@t[0], @t[0], @s[3]
+	veor	@t[2], @t[2], @s[1]
+
+	@ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
+
+	@ new smaller inversion
+
+	vand	@s[2], @t[3], @t[1]
+	vmov	@s[0], @t[0]
+
+	veor	@s[1], @t[2], @s[2]
+	veor	@s[3], @t[0], @s[2]
+	veor	@s[2], @t[0], @s[2]	@ @s[2]=@s[3]
+
+	vbsl	@s[1], @t[1], @t[0]
+	vbsl	@s[3], @t[3], @t[2]
+	veor	@t[3], @t[3], @t[2]
+
+	vbsl	@s[0], @s[1], @s[2]
+	vbsl	@t[0], @s[2], @s[1]
+
+	vand	@s[2], @s[0], @s[3]
+	veor	@t[1], @t[1], @t[0]
+
+	veor	@s[2], @s[2], @t[3]
+___
+# output in s3, s2, s1, t1
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+	&Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
+
+### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
+}
+
+# AES linear components
+
+sub ShiftRows {
+my @x=@_[0..7];
+my @t=@_[8..11];
+my $mask=pop;
+$code.=<<___;
+	vldmia	$key!, {@t[0]-@t[3]}
+	veor	@t[0], @t[0], @x[0]
+	veor	@t[1], @t[1], @x[1]
+	vtbl.8	`&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)`
+	vldmia	$key!, {@t[0]}
+	veor	@t[2], @t[2], @x[2]
+	vtbl.8	`&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)`
+	vldmia	$key!, {@t[1]}
+	veor	@t[3], @t[3], @x[3]
+	vtbl.8	`&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)`
+	vldmia	$key!, {@t[2]}
+	vtbl.8	`&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)`
+	vldmia	$key!, {@t[3]}
+	veor	@t[0], @t[0], @x[4]
+	veor	@t[1], @t[1], @x[5]
+	vtbl.8	`&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)`
+	veor	@t[2], @t[2], @x[6]
+	vtbl.8	`&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)`
+	veor	@t[3], @t[3], @x[7]
+	vtbl.8	`&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)`
+	vtbl.8	`&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)`
+	vtbl.8	`&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)`
+___
+}
+
+sub MixColumns {
+# modified to emit output in order suitable for feeding back to aesenc[last]
+my @x=@_[0..7];
+my @t=@_[8..15];
+my $inv=@_[16];	# optional
+$code.=<<___;
+	vext.8	@t[0], @x[0], @x[0], #12	@ x0 <<< 32
+	vext.8	@t[1], @x[1], @x[1], #12
+	 veor	@x[0], @x[0], @t[0]		@ x0 ^ (x0 <<< 32)
+	vext.8	@t[2], @x[2], @x[2], #12
+	 veor	@x[1], @x[1], @t[1]
+	vext.8	@t[3], @x[3], @x[3], #12
+	 veor	@x[2], @x[2], @t[2]
+	vext.8	@t[4], @x[4], @x[4], #12
+	 veor	@x[3], @x[3], @t[3]
+	vext.8	@t[5], @x[5], @x[5], #12
+	 veor	@x[4], @x[4], @t[4]
+	vext.8	@t[6], @x[6], @x[6], #12
+	 veor	@x[5], @x[5], @t[5]
+	vext.8	@t[7], @x[7], @x[7], #12
+	 veor	@x[6], @x[6], @t[6]
+
+	veor	@t[1], @t[1], @x[0]
+	 veor	@x[7], @x[7], @t[7]
+	 vext.8	@x[0], @x[0], @x[0], #8		@ (x0 ^ (x0 <<< 32)) <<< 64)
+	veor	@t[2], @t[2], @x[1]
+	veor	@t[0], @t[0], @x[7]
+	veor	@t[1], @t[1], @x[7]
+	 vext.8	@x[1], @x[1], @x[1], #8
+	veor	@t[5], @t[5], @x[4]
+	 veor	@x[0], @x[0], @t[0]
+	veor	@t[6], @t[6], @x[5]
+	 veor	@x[1], @x[1], @t[1]
+	 vext.8	@t[0], @x[4], @x[4], #8
+	veor	@t[4], @t[4], @x[3]
+	 vext.8	@t[1], @x[5], @x[5], #8
+	veor	@t[7], @t[7], @x[6]
+	 vext.8	@x[4], @x[3], @x[3], #8
+	veor	@t[3], @t[3], @x[2]
+	 vext.8	@x[5], @x[7], @x[7], #8
+	veor	@t[4], @t[4], @x[7]
+	 vext.8	@x[3], @x[6], @x[6], #8
+	veor	@t[3], @t[3], @x[7]
+	 vext.8	@x[6], @x[2], @x[2], #8
+	veor	@x[7], @t[1], @t[5]
+___
+$code.=<<___ if (!$inv);
+	veor	@x[2], @t[0], @t[4]
+	veor	@x[4], @x[4], @t[3]
+	veor	@x[5], @x[5], @t[7]
+	veor	@x[3], @x[3], @t[6]
+	 @ vmov	@x[2], @t[0]
+	veor	@x[6], @x[6], @t[2]
+	 @ vmov	@x[7], @t[1]
+___
+$code.=<<___ if ($inv);
+	veor	@t[3], @t[3], @x[4]
+	veor	@x[5], @x[5], @t[7]
+	veor	@x[2], @x[3], @t[6]
+	veor	@x[3], @t[0], @t[4]
+	veor	@x[4], @x[6], @t[2]
+	vmov	@x[6], @t[3]
+	 @ vmov	@x[7], @t[1]
+___
+}
+
+sub InvMixColumns_orig {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+$code.=<<___;
+	@ multiplication by 0x0e
+	vext.8	@t[7], @x[7], @x[7], #12
+	vmov	@t[2], @x[2]
+	veor	@x[2], @x[2], @x[5]		@ 2 5
+	veor	@x[7], @x[7], @x[5]		@ 7 5
+	vext.8	@t[0], @x[0], @x[0], #12
+	vmov	@t[5], @x[5]
+	veor	@x[5], @x[5], @x[0]		@ 5 0		[1]
+	veor	@x[0], @x[0], @x[1]		@ 0 1
+	vext.8	@t[1], @x[1], @x[1], #12
+	veor	@x[1], @x[1], @x[2]		@ 1 25
+	veor	@x[0], @x[0], @x[6]		@ 01 6		[2]
+	vext.8	@t[3], @x[3], @x[3], #12
+	veor	@x[1], @x[1], @x[3]		@ 125 3		[4]
+	veor	@x[2], @x[2], @x[0]		@ 25 016	[3]
+	veor	@x[3], @x[3], @x[7]		@ 3 75
+	veor	@x[7], @x[7], @x[6]		@ 75 6		[0]
+	vext.8	@t[6], @x[6], @x[6], #12
+	vmov	@t[4], @x[4]
+	veor	@x[6], @x[6], @x[4]		@ 6 4
+	veor	@x[4], @x[4], @x[3]		@ 4 375		[6]
+	veor	@x[3], @x[3], @x[7]		@ 375 756=36
+	veor	@x[6], @x[6], @t[5]		@ 64 5		[7]
+	veor	@x[3], @x[3], @t[2]		@ 36 2
+	vext.8	@t[5], @t[5], @t[5], #12
+	veor	@x[3], @x[3], @t[4]		@ 362 4		[5]
+___
+					my @y = @x[7,5,0,2,1,3,4,6];
+$code.=<<___;
+	@ multiplication by 0x0b
+	veor	@y[1], @y[1], @y[0]
+	veor	@y[0], @y[0], @t[0]
+	vext.8	@t[2], @t[2], @t[2], #12
+	veor	@y[1], @y[1], @t[1]
+	veor	@y[0], @y[0], @t[5]
+	vext.8	@t[4], @t[4], @t[4], #12
+	veor	@y[1], @y[1], @t[6]
+	veor	@y[0], @y[0], @t[7]
+	veor	@t[7], @t[7], @t[6]		@ clobber t[7]
+
+	veor	@y[3], @y[3], @t[0]
+	 veor	@y[1], @y[1], @y[0]
+	vext.8	@t[0], @t[0], @t[0], #12
+	veor	@y[2], @y[2], @t[1]
+	veor	@y[4], @y[4], @t[1]
+	vext.8	@t[1], @t[1], @t[1], #12
+	veor	@y[2], @y[2], @t[2]
+	veor	@y[3], @y[3], @t[2]
+	veor	@y[5], @y[5], @t[2]
+	veor	@y[2], @y[2], @t[7]
+	vext.8	@t[2], @t[2], @t[2], #12
+	veor	@y[3], @y[3], @t[3]
+	veor	@y[6], @y[6], @t[3]
+	veor	@y[4], @y[4], @t[3]
+	veor	@y[7], @y[7], @t[4]
+	vext.8	@t[3], @t[3], @t[3], #12
+	veor	@y[5], @y[5], @t[4]
+	veor	@y[7], @y[7], @t[7]
+	veor	@t[7], @t[7], @t[5]		@ clobber t[7] even more
+	veor	@y[3], @y[3], @t[5]
+	veor	@y[4], @y[4], @t[4]
+
+	veor	@y[5], @y[5], @t[7]
+	vext.8	@t[4], @t[4], @t[4], #12
+	veor	@y[6], @y[6], @t[7]
+	veor	@y[4], @y[4], @t[7]
+
+	veor	@t[7], @t[7], @t[5]
+	vext.8	@t[5], @t[5], @t[5], #12
+
+	@ multiplication by 0x0d
+	veor	@y[4], @y[4], @y[7]
+	 veor	@t[7], @t[7], @t[6]		@ restore t[7]
+	veor	@y[7], @y[7], @t[4]
+	vext.8	@t[6], @t[6], @t[6], #12
+	veor	@y[2], @y[2], @t[0]
+	veor	@y[7], @y[7], @t[5]
+	vext.8	@t[7], @t[7], @t[7], #12
+	veor	@y[2], @y[2], @t[2]
+
+	veor	@y[3], @y[3], @y[1]
+	veor	@y[1], @y[1], @t[1]
+	veor	@y[0], @y[0], @t[0]
+	veor	@y[3], @y[3], @t[0]
+	veor	@y[1], @y[1], @t[5]
+	veor	@y[0], @y[0], @t[5]
+	vext.8	@t[0], @t[0], @t[0], #12
+	veor	@y[1], @y[1], @t[7]
+	veor	@y[0], @y[0], @t[6]
+	veor	@y[3], @y[3], @y[1]
+	veor	@y[4], @y[4], @t[1]
+	vext.8	@t[1], @t[1], @t[1], #12
+
+	veor	@y[7], @y[7], @t[7]
+	veor	@y[4], @y[4], @t[2]
+	veor	@y[5], @y[5], @t[2]
+	veor	@y[2], @y[2], @t[6]
+	veor	@t[6], @t[6], @t[3]		@ clobber t[6]
+	vext.8	@t[2], @t[2], @t[2], #12
+	veor	@y[4], @y[4], @y[7]
+	veor	@y[3], @y[3], @t[6]
+
+	veor	@y[6], @y[6], @t[6]
+	veor	@y[5], @y[5], @t[5]
+	vext.8	@t[5], @t[5], @t[5], #12
+	veor	@y[6], @y[6], @t[4]
+	vext.8	@t[4], @t[4], @t[4], #12
+	veor	@y[5], @y[5], @t[6]
+	veor	@y[6], @y[6], @t[7]
+	vext.8	@t[7], @t[7], @t[7], #12
+	veor	@t[6], @t[6], @t[3]		@ restore t[6]
+	vext.8	@t[3], @t[3], @t[3], #12
+
+	@ multiplication by 0x09
+	veor	@y[4], @y[4], @y[1]
+	veor	@t[1], @t[1], @y[1]		@ t[1]=y[1]
+	veor	@t[0], @t[0], @t[5]		@ clobber t[0]
+	vext.8	@t[6], @t[6], @t[6], #12
+	veor	@t[1], @t[1], @t[5]
+	veor	@y[3], @y[3], @t[0]
+	veor	@t[0], @t[0], @y[0]		@ t[0]=y[0]
+	veor	@t[1], @t[1], @t[6]
+	veor	@t[6], @t[6], @t[7]		@ clobber t[6]
+	veor	@y[4], @y[4], @t[1]
+	veor	@y[7], @y[7], @t[4]
+	veor	@y[6], @y[6], @t[3]
+	veor	@y[5], @y[5], @t[2]
+	veor	@t[4], @t[4], @y[4]		@ t[4]=y[4]
+	veor	@t[3], @t[3], @y[3]		@ t[3]=y[3]
+	veor	@t[5], @t[5], @y[5]		@ t[5]=y[5]
+	veor	@t[2], @t[2], @y[2]		@ t[2]=y[2]
+	veor	@t[3], @t[3], @t[7]
+	veor	@XMM[5], @t[5], @t[6]
+	veor	@XMM[6], @t[6], @y[6]		@ t[6]=y[6]
+	veor	@XMM[2], @t[2], @t[6]
+	veor	@XMM[7], @t[7], @y[7]		@ t[7]=y[7]
+
+	vmov	@XMM[0], @t[0]
+	vmov	@XMM[1], @t[1]
+	@ vmov	@XMM[2], @t[2]
+	vmov	@XMM[3], @t[3]
+	vmov	@XMM[4], @t[4]
+	@ vmov	@XMM[5], @t[5]
+	@ vmov	@XMM[6], @t[6]
+	@ vmov	@XMM[7], @t[7]
+___
+}
+
+sub InvMixColumns {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+# Thanks to Jussi Kivilinna for providing pointer to
+#
+# | 0e 0b 0d 09 |   | 02 03 01 01 |   | 05 00 04 00 |
+# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
+# | 0d 09 0e 0b |   | 01 01 02 03 |   | 04 00 05 00 |
+# | 0b 0d 09 0e |   | 03 01 01 02 |   | 00 04 00 05 |
+
+$code.=<<___;
+	@ multiplication by 0x05-0x00-0x04-0x00
+	vext.8	@t[0], @x[0], @x[0], #8
+	vext.8	@t[6], @x[6], @x[6], #8
+	vext.8	@t[7], @x[7], @x[7], #8
+	veor	@t[0], @t[0], @x[0]
+	vext.8	@t[1], @x[1], @x[1], #8
+	veor	@t[6], @t[6], @x[6]
+	vext.8	@t[2], @x[2], @x[2], #8
+	veor	@t[7], @t[7], @x[7]
+	vext.8	@t[3], @x[3], @x[3], #8
+	veor	@t[1], @t[1], @x[1]
+	vext.8	@t[4], @x[4], @x[4], #8
+	veor	@t[2], @t[2], @x[2]
+	vext.8	@t[5], @x[5], @x[5], #8
+	veor	@t[3], @t[3], @x[3]
+	veor	@t[4], @t[4], @x[4]
+	veor	@t[5], @t[5], @x[5]
+
+	 veor	@x[0], @x[0], @t[6]
+	 veor	@x[1], @x[1], @t[6]
+	 veor	@x[2], @x[2], @t[0]
+	 veor	@x[4], @x[4], @t[2]
+	 veor	@x[3], @x[3], @t[1]
+	 veor	@x[1], @x[1], @t[7]
+	 veor	@x[2], @x[2], @t[7]
+	 veor	@x[4], @x[4], @t[6]
+	 veor	@x[5], @x[5], @t[3]
+	 veor	@x[3], @x[3], @t[6]
+	 veor	@x[6], @x[6], @t[4]
+	 veor	@x[4], @x[4], @t[7]
+	 veor	@x[5], @x[5], @t[7]
+	 veor	@x[7], @x[7], @t[5]
+___
+	&MixColumns	(@x,@t,1);	# flipped 2<->3 and 4<->6
+}
+
+sub swapmove {
+my ($a,$b,$n,$mask,$t)=@_;
+$code.=<<___;
+	vshr.u64	$t, $b, #$n
+	veor		$t, $t, $a
+	vand		$t, $t, $mask
+	veor		$a, $a, $t
+	vshl.u64	$t, $t, #$n
+	veor		$b, $b, $t
+___
+}
+sub swapmove2x {
+my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
+$code.=<<___;
+	vshr.u64	$t0, $b0, #$n
+	 vshr.u64	$t1, $b1, #$n
+	veor		$t0, $t0, $a0
+	 veor		$t1, $t1, $a1
+	vand		$t0, $t0, $mask
+	 vand		$t1, $t1, $mask
+	veor		$a0, $a0, $t0
+	vshl.u64	$t0, $t0, #$n
+	 veor		$a1, $a1, $t1
+	 vshl.u64	$t1, $t1, #$n
+	veor		$b0, $b0, $t0
+	 veor		$b1, $b1, $t1
+___
+}
+
+sub bitslice {
+my @x=reverse(@_[0..7]);
+my ($t0,$t1,$t2,$t3)=@_[8..11];
+$code.=<<___;
+	vmov.i8	$t0,#0x55			@ compose .LBS0
+	vmov.i8	$t1,#0x33			@ compose .LBS1
+___
+	&swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
+	&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+$code.=<<___;
+	vmov.i8	$t0,#0x0f			@ compose .LBS2
+___
+	&swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
+	&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+
+	&swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
+	&swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
+}
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
+# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
+# define VFP_ABI_FRAME	0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME	0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__	7
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_ARCH__>=7
+.text
+.syntax	unified 	@ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code   32
+#endif
+
+.fpu	neon
+
+.type	_bsaes_decrypt8,%function
+.align	4
+_bsaes_decrypt8:
+	adr	$const,_bsaes_decrypt8
+	vldmia	$key!, {@XMM[9]}		@ round 0 key
+	add	$const,$const,#.LM0ISR-_bsaes_decrypt8
+
+	vldmia	$const!, {@XMM[8]}		@ .LM0ISR
+	veor	@XMM[10], @XMM[0], @XMM[9]	@ xor with round0 key
+	veor	@XMM[11], @XMM[1], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+	veor	@XMM[12], @XMM[2], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+	veor	@XMM[13], @XMM[3], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+	veor	@XMM[14], @XMM[4], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+	veor	@XMM[15], @XMM[5], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+	veor	@XMM[10], @XMM[6], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+	veor	@XMM[11], @XMM[7], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+	 vtbl.8	`&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+___
+	&bitslice	(@XMM[0..7, 8..11]);
+$code.=<<___;
+	sub	$rounds,$rounds,#1
+	b	.Ldec_sbox
+.align	4
+.Ldec_loop:
+___
+	&ShiftRows	(@XMM[0..7, 8..12]);
+$code.=".Ldec_sbox:\n";
+	&InvSbox	(@XMM[0..7, 8..15]);
+$code.=<<___;
+	subs	$rounds,$rounds,#1
+	bcc	.Ldec_done
+___
+	&InvMixColumns	(@XMM[0,1,6,4,2,7,3,5, 8..15]);
+$code.=<<___;
+	vldmia	$const, {@XMM[12]}		@ .LISR
+	ite	eq				@ Thumb2 thing, sanity check in ARM
+	addeq	$const,$const,#0x10
+	bne	.Ldec_loop
+	vldmia	$const, {@XMM[12]}		@ .LISRM0
+	b	.Ldec_loop
+.align	4
+.Ldec_done:
+___
+	&bitslice	(@XMM[0,1,6,4,2,7,3,5, 8..11]);
+$code.=<<___;
+	vldmia	$key, {@XMM[8]}			@ last round key
+	veor	@XMM[6], @XMM[6], @XMM[8]
+	veor	@XMM[4], @XMM[4], @XMM[8]
+	veor	@XMM[2], @XMM[2], @XMM[8]
+	veor	@XMM[7], @XMM[7], @XMM[8]
+	veor	@XMM[3], @XMM[3], @XMM[8]
+	veor	@XMM[5], @XMM[5], @XMM[8]
+	veor	@XMM[0], @XMM[0], @XMM[8]
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	bx	lr
+.size	_bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type	_bsaes_const,%object
+.align	6
+_bsaes_const:
+.LM0ISR:	@ InvShiftRows constants
+	.quad	0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+	.quad	0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+	.quad	0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR:		@ ShiftRows constants
+	.quad	0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+	.quad	0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+	.quad	0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+	.quad	0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+	.quad	0x090d01050c000408, 0x03070b0f060a0e02
+.asciz	"Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align	6
+.size	_bsaes_const,.-_bsaes_const
+
+.type	_bsaes_encrypt8,%function
+.align	4
+_bsaes_encrypt8:
+	adr	$const,_bsaes_encrypt8
+	vldmia	$key!, {@XMM[9]}		@ round 0 key
+	sub	$const,$const,#_bsaes_encrypt8-.LM0SR
+
+	vldmia	$const!, {@XMM[8]}		@ .LM0SR
+_bsaes_encrypt8_alt:
+	veor	@XMM[10], @XMM[0], @XMM[9]	@ xor with round0 key
+	veor	@XMM[11], @XMM[1], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+	veor	@XMM[12], @XMM[2], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+	veor	@XMM[13], @XMM[3], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+	veor	@XMM[14], @XMM[4], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+	veor	@XMM[15], @XMM[5], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+	veor	@XMM[10], @XMM[6], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+	veor	@XMM[11], @XMM[7], @XMM[9]
+	 vtbl.8	`&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+	 vtbl.8	`&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+	 vtbl.8	`&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+_bsaes_encrypt8_bitslice:
+___
+	&bitslice	(@XMM[0..7, 8..11]);
+$code.=<<___;
+	sub	$rounds,$rounds,#1
+	b	.Lenc_sbox
+.align	4
+.Lenc_loop:
+___
+	&ShiftRows	(@XMM[0..7, 8..12]);
+$code.=".Lenc_sbox:\n";
+	&Sbox		(@XMM[0..7, 8..15]);
+$code.=<<___;
+	subs	$rounds,$rounds,#1
+	bcc	.Lenc_done
+___
+	&MixColumns	(@XMM[0,1,4,6,3,7,2,5, 8..15]);
+$code.=<<___;
+	vldmia	$const, {@XMM[12]}		@ .LSR
+	ite	eq				@ Thumb2 thing, samity check in ARM
+	addeq	$const,$const,#0x10
+	bne	.Lenc_loop
+	vldmia	$const, {@XMM[12]}		@ .LSRM0
+	b	.Lenc_loop
+.align	4
+.Lenc_done:
+___
+	# output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
+	&bitslice	(@XMM[0,1,4,6,3,7,2,5, 8..11]);
+$code.=<<___;
+	vldmia	$key, {@XMM[8]}			@ last round key
+	veor	@XMM[4], @XMM[4], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[8]
+	veor	@XMM[3], @XMM[3], @XMM[8]
+	veor	@XMM[7], @XMM[7], @XMM[8]
+	veor	@XMM[2], @XMM[2], @XMM[8]
+	veor	@XMM[5], @XMM[5], @XMM[8]
+	veor	@XMM[0], @XMM[0], @XMM[8]
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	bx	lr
+.size	_bsaes_encrypt8,.-_bsaes_encrypt8
+___
+}
+{
+my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6");
+
+sub bitslice_key {
+my @x=reverse(@_[0..7]);
+my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
+
+	&swapmove	(@x[0,1],1,$bs0,$t2,$t3);
+$code.=<<___;
+	@ &swapmove(@x[2,3],1,$t0,$t2,$t3);
+	vmov	@x[2], @x[0]
+	vmov	@x[3], @x[1]
+___
+	#&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+
+	&swapmove2x	(@x[0,2,1,3],2,$bs1,$t2,$t3);
+$code.=<<___;
+	@ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+	vmov	@x[4], @x[0]
+	vmov	@x[6], @x[2]
+	vmov	@x[5], @x[1]
+	vmov	@x[7], @x[3]
+___
+	&swapmove2x	(@x[0,4,1,5],4,$bs2,$t2,$t3);
+	&swapmove2x	(@x[2,6,3,7],4,$bs2,$t2,$t3);
+}
+
+$code.=<<___;
+.type	_bsaes_key_convert,%function
+.align	4
+_bsaes_key_convert:
+	adr	$const,_bsaes_key_convert
+	vld1.8	{@XMM[7]},  [$inp]!		@ load round 0 key
+	sub	$const,$const,#_bsaes_key_convert-.LM0
+	vld1.8	{@XMM[15]}, [$inp]!		@ load round 1 key
+
+	vmov.i8	@XMM[8],  #0x01			@ bit masks
+	vmov.i8	@XMM[9],  #0x02
+	vmov.i8	@XMM[10], #0x04
+	vmov.i8	@XMM[11], #0x08
+	vmov.i8	@XMM[12], #0x10
+	vmov.i8	@XMM[13], #0x20
+	vldmia	$const, {@XMM[14]}		@ .LM0
+
+#ifdef __ARMEL__
+	vrev32.8	@XMM[7],  @XMM[7]
+	vrev32.8	@XMM[15], @XMM[15]
+#endif
+	sub	$rounds,$rounds,#1
+	vstmia	$out!, {@XMM[7]}		@ save round 0 key
+	b	.Lkey_loop
+
+.align	4
+.Lkey_loop:
+	vtbl.8	`&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])`
+	vtbl.8	`&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])`
+	vmov.i8	@XMM[6],  #0x40
+	vmov.i8	@XMM[15], #0x80
+
+	vtst.8	@XMM[0], @XMM[7], @XMM[8]
+	vtst.8	@XMM[1], @XMM[7], @XMM[9]
+	vtst.8	@XMM[2], @XMM[7], @XMM[10]
+	vtst.8	@XMM[3], @XMM[7], @XMM[11]
+	vtst.8	@XMM[4], @XMM[7], @XMM[12]
+	vtst.8	@XMM[5], @XMM[7], @XMM[13]
+	vtst.8	@XMM[6], @XMM[7], @XMM[6]
+	vtst.8	@XMM[7], @XMM[7], @XMM[15]
+	vld1.8	{@XMM[15]}, [$inp]!		@ load next round key
+	vmvn	@XMM[0], @XMM[0]		@ "pnot"
+	vmvn	@XMM[1], @XMM[1]
+	vmvn	@XMM[5], @XMM[5]
+	vmvn	@XMM[6], @XMM[6]
+#ifdef __ARMEL__
+	vrev32.8	@XMM[15], @XMM[15]
+#endif
+	subs	$rounds,$rounds,#1
+	vstmia	$out!,{@XMM[0]-@XMM[7]}		@ write bit-sliced round key
+	bne	.Lkey_loop
+
+	vmov.i8	@XMM[7],#0x63			@ compose .L63
+	@ don't save last round key
+	bx	lr
+.size	_bsaes_key_convert,.-_bsaes_key_convert
+___
+}
+
+if (0) {		# following four functions are unsupported interface
+			# used for benchmarking...
+$code.=<<___;
+.globl	bsaes_enc_key_convert
+.type	bsaes_enc_key_convert,%function
+.align	4
+bsaes_enc_key_convert:
+	stmdb	sp!,{r4-r6,lr}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	ldr	r5,[$inp,#240]			@ pass rounds
+	mov	r4,$inp				@ pass key
+	mov	r12,$out			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	@XMM[7],@XMM[7],@XMM[15]	@ fix up last round key
+	vstmia	r12, {@XMM[7]}			@ save last round key
+
+	vldmia	sp!,{d8-d15}
+	ldmia	sp!,{r4-r6,pc}
+.size	bsaes_enc_key_convert,.-bsaes_enc_key_convert
+
+.globl	bsaes_encrypt_128
+.type	bsaes_encrypt_128,%function
+.align	4
+bsaes_encrypt_128:
+	stmdb	sp!,{r4-r6,lr}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+.Lenc128_loop:
+	vld1.8	{@XMM[0]-@XMM[1]}, [$inp]!	@ load input
+	vld1.8	{@XMM[2]-@XMM[3]}, [$inp]!
+	mov	r4,$key				@ pass the key
+	vld1.8	{@XMM[4]-@XMM[5]}, [$inp]!
+	mov	r5,#10				@ pass rounds
+	vld1.8	{@XMM[6]-@XMM[7]}, [$inp]!
+
+	bl	_bsaes_encrypt8
+
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	vst1.8	{@XMM[4]}, [$out]!
+	vst1.8	{@XMM[6]}, [$out]!
+	vst1.8	{@XMM[3]}, [$out]!
+	vst1.8	{@XMM[7]}, [$out]!
+	vst1.8	{@XMM[2]}, [$out]!
+	subs	$len,$len,#0x80
+	vst1.8	{@XMM[5]}, [$out]!
+	bhi	.Lenc128_loop
+
+	vldmia	sp!,{d8-d15}
+	ldmia	sp!,{r4-r6,pc}
+.size	bsaes_encrypt_128,.-bsaes_encrypt_128
+
+.globl	bsaes_dec_key_convert
+.type	bsaes_dec_key_convert,%function
+.align	4
+bsaes_dec_key_convert:
+	stmdb	sp!,{r4-r6,lr}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+
+	ldr	r5,[$inp,#240]			@ pass rounds
+	mov	r4,$inp				@ pass key
+	mov	r12,$out			@ pass key schedule
+	bl	_bsaes_key_convert
+	vldmia	$out, {@XMM[6]}
+	vstmia	r12,  {@XMM[15]}		@ save last round key
+	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
+	vstmia	$out, {@XMM[7]}
+
+	vldmia	sp!,{d8-d15}
+	ldmia	sp!,{r4-r6,pc}
+.size	bsaes_dec_key_convert,.-bsaes_dec_key_convert
+
+.globl	bsaes_decrypt_128
+.type	bsaes_decrypt_128,%function
+.align	4
+bsaes_decrypt_128:
+	stmdb	sp!,{r4-r6,lr}
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+.Ldec128_loop:
+	vld1.8	{@XMM[0]-@XMM[1]}, [$inp]!	@ load input
+	vld1.8	{@XMM[2]-@XMM[3]}, [$inp]!
+	mov	r4,$key				@ pass the key
+	vld1.8	{@XMM[4]-@XMM[5]}, [$inp]!
+	mov	r5,#10				@ pass rounds
+	vld1.8	{@XMM[6]-@XMM[7]}, [$inp]!
+
+	bl	_bsaes_decrypt8
+
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	vst1.8	{@XMM[6]}, [$out]!
+	vst1.8	{@XMM[4]}, [$out]!
+	vst1.8	{@XMM[2]}, [$out]!
+	vst1.8	{@XMM[7]}, [$out]!
+	vst1.8	{@XMM[3]}, [$out]!
+	subs	$len,$len,#0x80
+	vst1.8	{@XMM[5]}, [$out]!
+	bhi	.Ldec128_loop
+
+	vldmia	sp!,{d8-d15}
+	ldmia	sp!,{r4-r6,pc}
+.size	bsaes_decrypt_128,.-bsaes_decrypt_128
+___
+}
+{
+my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
+my ($keysched)=("sp");
+
+$code.=<<___;
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global	bsaes_cbc_encrypt
+.type	bsaes_cbc_encrypt,%function
+.align	5
+bsaes_cbc_encrypt:
+#ifndef	__KERNEL__
+	cmp	$len, #128
+#ifndef	__thumb__
+	blo	AES_cbc_encrypt
+#else
+	bhs	1f
+	b	AES_cbc_encrypt
+1:
+#endif
+#endif
+
+	@ it is up to the caller to make sure we are called with enc == 0
+
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}
+	VFP_ABI_PUSH
+	ldr	$ivp, [ip]			@ IV is 1st arg on the stack
+	mov	$len, $len, lsr#4		@ len in 16 byte blocks
+	sub	sp, #0x10			@ scratch space to carry over the IV
+	mov	$fp, sp				@ save sp
+
+	ldr	$rounds, [$key, #240]		@ get # of rounds
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
+	add	r12, #`128-32`			@ sifze of bit-slices key schedule
+
+	@ populate the key schedule
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	mov	sp, r12				@ sp is $keysched
+	bl	_bsaes_key_convert
+	vldmia	$keysched, {@XMM[6]}
+	vstmia	r12,  {@XMM[15]}		@ save last round key
+	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
+	vstmia	$keysched, {@XMM[7]}
+#else
+	ldr	r12, [$key, #244]
+	eors	r12, #1
+	beq	0f
+
+	@ populate the key schedule
+	str	r12, [$key, #244]
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	add	r12, $key, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	add	r4, $key, #248
+	vldmia	r4, {@XMM[6]}
+	vstmia	r12, {@XMM[15]}			@ save last round key
+	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
+	vstmia	r4, {@XMM[7]}
+
+.align	2
+0:
+#endif
+
+	vld1.8	{@XMM[15]}, [$ivp]		@ load IV
+	b	.Lcbc_dec_loop
+
+.align	4
+.Lcbc_dec_loop:
+	subs	$len, $len, #0x8
+	bmi	.Lcbc_dec_loop_finish
+
+	vld1.8	{@XMM[0]-@XMM[1]}, [$inp]!	@ load input
+	vld1.8	{@XMM[2]-@XMM[3]}, [$inp]!
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	mov	r4, $keysched			@ pass the key
+#else
+	add	r4, $key, #248
+#endif
+	vld1.8	{@XMM[4]-@XMM[5]}, [$inp]!
+	mov	r5, $rounds
+	vld1.8	{@XMM[6]-@XMM[7]}, [$inp]
+	sub	$inp, $inp, #0x60
+	vstmia	$fp, {@XMM[15]}			@ put aside IV
+
+	bl	_bsaes_decrypt8
+
+	vldmia	$fp, {@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[9]
+	vld1.8	{@XMM[12]-@XMM[13]}, [$inp]!
+	veor	@XMM[4], @XMM[4], @XMM[10]
+	veor	@XMM[2], @XMM[2], @XMM[11]
+	vld1.8	{@XMM[14]-@XMM[15]}, [$inp]!
+	veor	@XMM[7], @XMM[7], @XMM[12]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	veor	@XMM[3], @XMM[3], @XMM[13]
+	vst1.8	{@XMM[6]}, [$out]!
+	veor	@XMM[5], @XMM[5], @XMM[14]
+	vst1.8	{@XMM[4]}, [$out]!
+	vst1.8	{@XMM[2]}, [$out]!
+	vst1.8	{@XMM[7]}, [$out]!
+	vst1.8	{@XMM[3]}, [$out]!
+	vst1.8	{@XMM[5]}, [$out]!
+
+	b	.Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+	adds	$len, $len, #8
+	beq	.Lcbc_dec_done
+
+	vld1.8	{@XMM[0]}, [$inp]!		@ load input
+	cmp	$len, #2
+	blo	.Lcbc_dec_one
+	vld1.8	{@XMM[1]}, [$inp]!
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	mov	r4, $keysched			@ pass the key
+#else
+	add	r4, $key, #248
+#endif
+	mov	r5, $rounds
+	vstmia	$fp, {@XMM[15]}			@ put aside IV
+	beq	.Lcbc_dec_two
+	vld1.8	{@XMM[2]}, [$inp]!
+	cmp	$len, #4
+	blo	.Lcbc_dec_three
+	vld1.8	{@XMM[3]}, [$inp]!
+	beq	.Lcbc_dec_four
+	vld1.8	{@XMM[4]}, [$inp]!
+	cmp	$len, #6
+	blo	.Lcbc_dec_five
+	vld1.8	{@XMM[5]}, [$inp]!
+	beq	.Lcbc_dec_six
+	vld1.8	{@XMM[6]}, [$inp]!
+	sub	$inp, $inp, #0x70
+
+	bl	_bsaes_decrypt8
+
+	vldmia	$fp, {@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[9]
+	vld1.8	{@XMM[12]-@XMM[13]}, [$inp]!
+	veor	@XMM[4], @XMM[4], @XMM[10]
+	veor	@XMM[2], @XMM[2], @XMM[11]
+	vld1.8	{@XMM[15]}, [$inp]!
+	veor	@XMM[7], @XMM[7], @XMM[12]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	veor	@XMM[3], @XMM[3], @XMM[13]
+	vst1.8	{@XMM[6]}, [$out]!
+	vst1.8	{@XMM[4]}, [$out]!
+	vst1.8	{@XMM[2]}, [$out]!
+	vst1.8	{@XMM[7]}, [$out]!
+	vst1.8	{@XMM[3]}, [$out]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_six:
+	sub	$inp, $inp, #0x60
+	bl	_bsaes_decrypt8
+	vldmia	$fp,{@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[9]
+	vld1.8	{@XMM[12]}, [$inp]!
+	veor	@XMM[4], @XMM[4], @XMM[10]
+	veor	@XMM[2], @XMM[2], @XMM[11]
+	vld1.8	{@XMM[15]}, [$inp]!
+	veor	@XMM[7], @XMM[7], @XMM[12]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	vst1.8	{@XMM[6]}, [$out]!
+	vst1.8	{@XMM[4]}, [$out]!
+	vst1.8	{@XMM[2]}, [$out]!
+	vst1.8	{@XMM[7]}, [$out]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_five:
+	sub	$inp, $inp, #0x50
+	bl	_bsaes_decrypt8
+	vldmia	$fp, {@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[9]
+	vld1.8	{@XMM[15]}, [$inp]!
+	veor	@XMM[4], @XMM[4], @XMM[10]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	veor	@XMM[2], @XMM[2], @XMM[11]
+	vst1.8	{@XMM[6]}, [$out]!
+	vst1.8	{@XMM[4]}, [$out]!
+	vst1.8	{@XMM[2]}, [$out]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_four:
+	sub	$inp, $inp, #0x40
+	bl	_bsaes_decrypt8
+	vldmia	$fp, {@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[10]}, [$inp]!
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[9]
+	vld1.8	{@XMM[15]}, [$inp]!
+	veor	@XMM[4], @XMM[4], @XMM[10]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	vst1.8	{@XMM[6]}, [$out]!
+	vst1.8	{@XMM[4]}, [$out]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_three:
+	sub	$inp, $inp, #0x30
+	bl	_bsaes_decrypt8
+	vldmia	$fp, {@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[15]}, [$inp]!
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	veor	@XMM[6], @XMM[6], @XMM[9]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	vst1.8	{@XMM[6]}, [$out]!
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_two:
+	sub	$inp, $inp, #0x20
+	bl	_bsaes_decrypt8
+	vldmia	$fp, {@XMM[14]}			@ reload IV
+	vld1.8	{@XMM[8]}, [$inp]!		@ reload input
+	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
+	vld1.8	{@XMM[15]}, [$inp]!		@ reload input
+	veor	@XMM[1], @XMM[1], @XMM[8]
+	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	b	.Lcbc_dec_done
+.align	4
+.Lcbc_dec_one:
+	sub	$inp, $inp, #0x10
+	mov	$rounds, $out			@ save original out pointer
+	mov	$out, $fp			@ use the iv scratch space as out buffer
+	mov	r2, $key
+	vmov	@XMM[4],@XMM[15]		@ just in case ensure that IV
+	vmov	@XMM[5],@XMM[0]			@ and input are preserved
+	bl	AES_decrypt
+	vld1.8	{@XMM[0]}, [$fp,:64]		@ load result
+	veor	@XMM[0], @XMM[0], @XMM[4]	@ ^= IV
+	vmov	@XMM[15], @XMM[5]		@ @XMM[5] holds input
+	vst1.8	{@XMM[0]}, [$rounds]		@ write output
+
+.Lcbc_dec_done:
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+.Lcbc_dec_bzero:				@ wipe key schedule [if any]
+	vstmia		$keysched!, {q0-q1}
+	cmp		$keysched, $fp
+	bne		.Lcbc_dec_bzero
+#endif
+
+	mov	sp, $fp
+	add	sp, #0x10			@ add sp,$fp,#0x10 is no good for thumb
+	vst1.8	{@XMM[15]}, [$ivp]		@ return IV
+	VFP_ABI_POP
+	ldmia	sp!, {r4-r10, pc}
+.size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+___
+}
+{
+my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
+my $const = "r6";	# shared with _bsaes_encrypt8_alt
+my $keysched = "sp";
+
+$code.=<<___;
+.extern	AES_encrypt
+.global	bsaes_ctr32_encrypt_blocks
+.type	bsaes_ctr32_encrypt_blocks,%function
+.align	5
+bsaes_ctr32_encrypt_blocks:
+	cmp	$len, #8			@ use plain AES for
+	blo	.Lctr_enc_short			@ small sizes
+
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}
+	VFP_ABI_PUSH
+	ldr	$ctr, [ip]			@ ctr is 1st arg on the stack
+	sub	sp, sp, #0x10			@ scratch space to carry over the ctr
+	mov	$fp, sp				@ save sp
+
+	ldr	$rounds, [$key, #240]		@ get # of rounds
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
+	add	r12, #`128-32`			@ size of bit-sliced key schedule
+
+	@ populate the key schedule
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	mov	sp, r12				@ sp is $keysched
+	bl	_bsaes_key_convert
+	veor	@XMM[7],@XMM[7],@XMM[15]	@ fix up last round key
+	vstmia	r12, {@XMM[7]}			@ save last round key
+
+	vld1.8	{@XMM[0]}, [$ctr]		@ load counter
+	add	$ctr, $const, #.LREVM0SR-.LM0	@ borrow $ctr
+	vldmia	$keysched, {@XMM[4]}		@ load round0 key
+#else
+	ldr	r12, [$key, #244]
+	eors	r12, #1
+	beq	0f
+
+	@ populate the key schedule
+	str	r12, [$key, #244]
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	add	r12, $key, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	@XMM[7],@XMM[7],@XMM[15]	@ fix up last round key
+	vstmia	r12, {@XMM[7]}			@ save last round key
+
+.align	2
+0:	add	r12, $key, #248
+	vld1.8	{@XMM[0]}, [$ctr]		@ load counter
+	adrl	$ctr, .LREVM0SR			@ borrow $ctr
+	vldmia	r12, {@XMM[4]}			@ load round0 key
+	sub	sp, #0x10			@ place for adjusted round0 key
+#endif
+
+	vmov.i32	@XMM[8],#1		@ compose 1<<96
+	veor		@XMM[9],@XMM[9],@XMM[9]
+	vrev32.8	@XMM[0],@XMM[0]
+	vext.8		@XMM[8],@XMM[9],@XMM[8],#4
+	vrev32.8	@XMM[4],@XMM[4]
+	vadd.u32	@XMM[9],@XMM[8],@XMM[8]	@ compose 2<<96
+	vstmia	$keysched, {@XMM[4]}		@ save adjusted round0 key
+	b	.Lctr_enc_loop
+
+.align	4
+.Lctr_enc_loop:
+	vadd.u32	@XMM[10], @XMM[8], @XMM[9]	@ compose 3<<96
+	vadd.u32	@XMM[1], @XMM[0], @XMM[8]	@ +1
+	vadd.u32	@XMM[2], @XMM[0], @XMM[9]	@ +2
+	vadd.u32	@XMM[3], @XMM[0], @XMM[10]	@ +3
+	vadd.u32	@XMM[4], @XMM[1], @XMM[10]
+	vadd.u32	@XMM[5], @XMM[2], @XMM[10]
+	vadd.u32	@XMM[6], @XMM[3], @XMM[10]
+	vadd.u32	@XMM[7], @XMM[4], @XMM[10]
+	vadd.u32	@XMM[10], @XMM[5], @XMM[10]	@ next counter
+
+	@ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+	@ to flip byte order in 32-bit counter
+
+	vldmia		$keysched, {@XMM[9]}		@ load round0 key
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, $keysched, #0x10		@ pass next round key
+#else
+	add		r4, $key, #`248+16`
+#endif
+	vldmia		$ctr, {@XMM[8]}			@ .LREVM0SR
+	mov		r5, $rounds			@ pass rounds
+	vstmia		$fp, {@XMM[10]}			@ save next counter
+	sub		$const, $ctr, #.LREVM0SR-.LSR	@ pass constants
+
+	bl		_bsaes_encrypt8_alt
+
+	subs		$len, $len, #8
+	blo		.Lctr_enc_loop_done
+
+	vld1.8		{@XMM[8]-@XMM[9]}, [$inp]!	@ load input
+	vld1.8		{@XMM[10]-@XMM[11]}, [$inp]!
+	veor		@XMM[0], @XMM[8]
+	veor		@XMM[1], @XMM[9]
+	vld1.8		{@XMM[12]-@XMM[13]}, [$inp]!
+	veor		@XMM[4], @XMM[10]
+	veor		@XMM[6], @XMM[11]
+	vld1.8		{@XMM[14]-@XMM[15]}, [$inp]!
+	veor		@XMM[3], @XMM[12]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!	@ write output
+	veor		@XMM[7], @XMM[13]
+	veor		@XMM[2], @XMM[14]
+	vst1.8		{@XMM[4]}, [$out]!
+	veor		@XMM[5], @XMM[15]
+	vst1.8		{@XMM[6]}, [$out]!
+	vmov.i32	@XMM[8], #1			@ compose 1<<96
+	vst1.8		{@XMM[3]}, [$out]!
+	veor		@XMM[9], @XMM[9], @XMM[9]
+	vst1.8		{@XMM[7]}, [$out]!
+	vext.8		@XMM[8], @XMM[9], @XMM[8], #4
+	vst1.8		{@XMM[2]}, [$out]!
+	vadd.u32	@XMM[9],@XMM[8],@XMM[8]		@ compose 2<<96
+	vst1.8		{@XMM[5]}, [$out]!
+	vldmia		$fp, {@XMM[0]}			@ load counter
+
+	bne		.Lctr_enc_loop
+	b		.Lctr_enc_done
+
+.align	4
+.Lctr_enc_loop_done:
+	add		$len, $len, #8
+	vld1.8		{@XMM[8]}, [$inp]!	@ load input
+	veor		@XMM[0], @XMM[8]
+	vst1.8		{@XMM[0]}, [$out]!	@ write output
+	cmp		$len, #2
+	blo		.Lctr_enc_done
+	vld1.8		{@XMM[9]}, [$inp]!
+	veor		@XMM[1], @XMM[9]
+	vst1.8		{@XMM[1]}, [$out]!
+	beq		.Lctr_enc_done
+	vld1.8		{@XMM[10]}, [$inp]!
+	veor		@XMM[4], @XMM[10]
+	vst1.8		{@XMM[4]}, [$out]!
+	cmp		$len, #4
+	blo		.Lctr_enc_done
+	vld1.8		{@XMM[11]}, [$inp]!
+	veor		@XMM[6], @XMM[11]
+	vst1.8		{@XMM[6]}, [$out]!
+	beq		.Lctr_enc_done
+	vld1.8		{@XMM[12]}, [$inp]!
+	veor		@XMM[3], @XMM[12]
+	vst1.8		{@XMM[3]}, [$out]!
+	cmp		$len, #6
+	blo		.Lctr_enc_done
+	vld1.8		{@XMM[13]}, [$inp]!
+	veor		@XMM[7], @XMM[13]
+	vst1.8		{@XMM[7]}, [$out]!
+	beq		.Lctr_enc_done
+	vld1.8		{@XMM[14]}, [$inp]
+	veor		@XMM[2], @XMM[14]
+	vst1.8		{@XMM[2]}, [$out]!
+
+.Lctr_enc_done:
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+#ifndef	BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero:			@ wipe key schedule [if any]
+	vstmia		$keysched!, {q0-q1}
+	cmp		$keysched, $fp
+	bne		.Lctr_enc_bzero
+#else
+	vstmia		$keysched, {q0-q1}
+#endif
+
+	mov	sp, $fp
+	add	sp, #0x10		@ add sp,$fp,#0x10 is no good for thumb
+	VFP_ABI_POP
+	ldmia	sp!, {r4-r10, pc}	@ return
+
+.align	4
+.Lctr_enc_short:
+	ldr	ip, [sp]		@ ctr pointer is passed on stack
+	stmdb	sp!, {r4-r8, lr}
+
+	mov	r4, $inp		@ copy arguments
+	mov	r5, $out
+	mov	r6, $len
+	mov	r7, $key
+	ldr	r8, [ip, #12]		@ load counter LSW
+	vld1.8	{@XMM[1]}, [ip]		@ load whole counter value
+#ifdef __ARMEL__
+	rev	r8, r8
+#endif
+	sub	sp, sp, #0x10
+	vst1.8	{@XMM[1]}, [sp,:64]	@ copy counter value
+	sub	sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+	add	r0, sp, #0x10		@ input counter value
+	mov	r1, sp			@ output on the stack
+	mov	r2, r7			@ key
+
+	bl	AES_encrypt
+
+	vld1.8	{@XMM[0]}, [r4]!	@ load input
+	vld1.8	{@XMM[1]}, [sp,:64]	@ load encrypted counter
+	add	r8, r8, #1
+#ifdef __ARMEL__
+	rev	r0, r8
+	str	r0, [sp, #0x1c]		@ next counter value
+#else
+	str	r8, [sp, #0x1c]		@ next counter value
+#endif
+	veor	@XMM[0],@XMM[0],@XMM[1]
+	vst1.8	{@XMM[0]}, [r5]!	@ store output
+	subs	r6, r6, #1
+	bne	.Lctr_enc_short_loop
+
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+	vstmia		sp!, {q0-q1}
+
+	ldmia	sp!, {r4-r8, pc}
+.size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+___
+}
+{
+######################################################################
+# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
+#	const AES_KEY *key1, const AES_KEY *key2,
+#	const unsigned char iv[16]);
+#
+my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3)));
+my $const="r6";		# returned by _bsaes_key_convert
+my $twmask=@XMM[5];
+my @T=@XMM[6..7];
+
+$code.=<<___;
+.globl	bsaes_xts_encrypt
+.type	bsaes_xts_encrypt,%function
+.align	4
+bsaes_xts_encrypt:
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}		@ 0x20
+	VFP_ABI_PUSH
+	mov	r6, sp				@ future $fp
+
+	mov	$inp, r0
+	mov	$out, r1
+	mov	$len, r2
+	mov	$key, r3
+
+	sub	r0, sp, #0x10			@ 0x10
+	bic	r0, #0xf			@ align at 16 bytes
+	mov	sp, r0
+
+#ifdef	XTS_CHAIN_TWEAK
+	ldr	r0, [ip]			@ pointer to input tweak
+#else
+	@ generate initial tweak
+	ldr	r0, [ip, #4]			@ iv[]
+	mov	r1, sp
+	ldr	r2, [ip, #0]			@ key2
+	bl	AES_encrypt
+	mov	r0,sp				@ pointer to initial tweak
+#endif
+
+	ldr	$rounds, [$key, #240]		@ get # of rounds
+	mov	$fp, r6
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
+	@ add	r12, #`128-32`			@ size of bit-sliced key schedule
+	sub	r12, #`32+16`			@ place for tweak[9]
+
+	@ populate the key schedule
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	mov	sp, r12
+	add	r12, #0x90			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	@XMM[7], @XMM[7], @XMM[15]	@ fix up last round key
+	vstmia	r12, {@XMM[7]}			@ save last round key
+#else
+	ldr	r12, [$key, #244]
+	eors	r12, #1
+	beq	0f
+
+	str	r12, [$key, #244]
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	add	r12, $key, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	veor	@XMM[7], @XMM[7], @XMM[15]	@ fix up last round key
+	vstmia	r12, {@XMM[7]}
+
+.align	2
+0:	sub	sp, #0x90			@ place for tweak[9]
+#endif
+
+	vld1.8	{@XMM[8]}, [r0]			@ initial tweak
+	adr	$magic, .Lxts_magic
+
+	subs	$len, #0x80
+	blo	.Lxts_enc_short
+	b	.Lxts_enc_loop
+
+.align	4
+.Lxts_enc_loop:
+	vldmia		$magic, {$twmask}	@ load XTS magic
+	vshr.s64	@T[0], @XMM[8], #63
+	mov		r0, sp
+	vand		@T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
+	vst1.64		{@XMM[$i-1]}, [r0,:128]!
+	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+	vshr.s64	@T[1], @XMM[$i], #63
+	veor		@XMM[$i], @XMM[$i], @T[0]
+	vand		@T[1], @T[1], $twmask
+___
+	@T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+	vld1.8		{@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+	vadd.u64	@XMM[8], @XMM[15], @XMM[15]
+	vst1.64		{@XMM[15]}, [r0,:128]!
+	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+	veor		@XMM[8], @XMM[8], @T[0]
+	vst1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+
+	vld1.8		{@XMM[6]-@XMM[7]}, [$inp]!
+	veor		@XMM[5], @XMM[5], @XMM[13]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[6], @XMM[6], @XMM[14]
+	mov		r5, $rounds			@ pass rounds
+	veor		@XMM[7], @XMM[7], @XMM[15]
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[4], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[6], @XMM[11]
+	vld1.64		{@XMM[14]-@XMM[15]}, [r0,:128]!
+	veor		@XMM[10], @XMM[3], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	veor		@XMM[11], @XMM[7], @XMM[13]
+	veor		@XMM[12], @XMM[2], @XMM[14]
+	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
+	veor		@XMM[13], @XMM[5], @XMM[15]
+	vst1.8		{@XMM[12]-@XMM[13]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+
+	subs		$len, #0x80
+	bpl		.Lxts_enc_loop
+
+.Lxts_enc_short:
+	adds		$len, #0x70
+	bmi		.Lxts_enc_done
+
+	vldmia		$magic, {$twmask}	@ load XTS magic
+	vshr.s64	@T[0], @XMM[8], #63
+	mov		r0, sp
+	vand		@T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
+	vst1.64		{@XMM[$i-1]}, [r0,:128]!
+	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+	vshr.s64	@T[1], @XMM[$i], #63
+	veor		@XMM[$i], @XMM[$i], @T[0]
+	vand		@T[1], @T[1], $twmask
+___
+	@T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+	vld1.8		{@XMM[$i-10]}, [$inp]!
+	subs		$len, #0x10
+	bmi		.Lxts_enc_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+	sub		$len, #0x10
+	vst1.64		{@XMM[15]}, [r0,:128]		@ next round tweak
+
+	vld1.8		{@XMM[6]}, [$inp]!
+	veor		@XMM[5], @XMM[5], @XMM[13]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[6], @XMM[6], @XMM[14]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[4], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[6], @XMM[11]
+	vld1.64		{@XMM[14]}, [r0,:128]!
+	veor		@XMM[10], @XMM[3], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	veor		@XMM[11], @XMM[7], @XMM[13]
+	veor		@XMM[12], @XMM[2], @XMM[14]
+	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
+	vst1.8		{@XMM[12]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_6:
+	vst1.64		{@XMM[14]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[4], @XMM[4], @XMM[12]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[5], @XMM[5], @XMM[13]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[4], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[6], @XMM[11]
+	veor		@XMM[10], @XMM[3], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	veor		@XMM[11], @XMM[7], @XMM[13]
+	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align	5
+.Lxts_magic:
+	.quad	1, 0x87
+
+.align	5
+.Lxts_enc_5:
+	vst1.64		{@XMM[13]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[3], @XMM[3], @XMM[11]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[4], @XMM[4], @XMM[12]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[4], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[6], @XMM[11]
+	veor		@XMM[10], @XMM[3], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	vst1.8		{@XMM[10]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_4:
+	vst1.64		{@XMM[12]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[2], @XMM[2], @XMM[10]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[3], @XMM[3], @XMM[11]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[4], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[6], @XMM[11]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_3:
+	vst1.64		{@XMM[11]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[1], @XMM[1], @XMM[9]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[2], @XMM[2], @XMM[10]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
+	vld1.64		{@XMM[10]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[4], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	vst1.8		{@XMM[8]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_2:
+	vst1.64		{@XMM[10]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[0], @XMM[0], @XMM[8]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[1], @XMM[1], @XMM[9]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_encrypt8
+
+	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_enc_done
+.align	4
+.Lxts_enc_1:
+	mov		r0, sp
+	veor		@XMM[0], @XMM[8]
+	mov		r1, sp
+	vst1.8		{@XMM[0]}, [sp,:128]
+	mov		r2, $key
+	mov		r4, $fp				@ preserve fp
+
+	bl		AES_encrypt
+
+	vld1.8		{@XMM[0]}, [sp,:128]
+	veor		@XMM[0], @XMM[0], @XMM[8]
+	vst1.8		{@XMM[0]}, [$out]!
+	mov		$fp, r4
+
+	vmov		@XMM[8], @XMM[9]		@ next round tweak
+
+.Lxts_enc_done:
+#ifndef	XTS_CHAIN_TWEAK
+	adds		$len, #0x10
+	beq		.Lxts_enc_ret
+	sub		r6, $out, #0x10
+
+.Lxts_enc_steal:
+	ldrb		r0, [$inp], #1
+	ldrb		r1, [$out, #-0x10]
+	strb		r0, [$out, #-0x10]
+	strb		r1, [$out], #1
+
+	subs		$len, #1
+	bhi		.Lxts_enc_steal
+
+	vld1.8		{@XMM[0]}, [r6]
+	mov		r0, sp
+	veor		@XMM[0], @XMM[0], @XMM[8]
+	mov		r1, sp
+	vst1.8		{@XMM[0]}, [sp,:128]
+	mov		r2, $key
+	mov		r4, $fp			@ preserve fp
+
+	bl		AES_encrypt
+
+	vld1.8		{@XMM[0]}, [sp,:128]
+	veor		@XMM[0], @XMM[0], @XMM[8]
+	vst1.8		{@XMM[0]}, [r6]
+	mov		$fp, r4
+#endif
+
+.Lxts_enc_ret:
+	bic		r0, $fp, #0xf
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+#ifdef	XTS_CHAIN_TWEAK
+	ldr		r1, [$fp, #0x20+VFP_ABI_FRAME]	@ chain tweak
+#endif
+.Lxts_enc_bzero:				@ wipe key schedule [if any]
+	vstmia		sp!, {q0-q1}
+	cmp		sp, r0
+	bne		.Lxts_enc_bzero
+
+	mov		sp, $fp
+#ifdef	XTS_CHAIN_TWEAK
+	vst1.8		{@XMM[8]}, [r1]
+#endif
+	VFP_ABI_POP
+	ldmia		sp!, {r4-r10, pc}	@ return
+
+.size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl	bsaes_xts_decrypt
+.type	bsaes_xts_decrypt,%function
+.align	4
+bsaes_xts_decrypt:
+	mov	ip, sp
+	stmdb	sp!, {r4-r10, lr}		@ 0x20
+	VFP_ABI_PUSH
+	mov	r6, sp				@ future $fp
+
+	mov	$inp, r0
+	mov	$out, r1
+	mov	$len, r2
+	mov	$key, r3
+
+	sub	r0, sp, #0x10			@ 0x10
+	bic	r0, #0xf			@ align at 16 bytes
+	mov	sp, r0
+
+#ifdef	XTS_CHAIN_TWEAK
+	ldr	r0, [ip]			@ pointer to input tweak
+#else
+	@ generate initial tweak
+	ldr	r0, [ip, #4]			@ iv[]
+	mov	r1, sp
+	ldr	r2, [ip, #0]			@ key2
+	bl	AES_encrypt
+	mov	r0, sp				@ pointer to initial tweak
+#endif
+
+	ldr	$rounds, [$key, #240]		@ get # of rounds
+	mov	$fp, r6
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	@ allocate the key schedule on the stack
+	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
+	@ add	r12, #`128-32`			@ size of bit-sliced key schedule
+	sub	r12, #`32+16`			@ place for tweak[9]
+
+	@ populate the key schedule
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	mov	sp, r12
+	add	r12, #0x90			@ pass key schedule
+	bl	_bsaes_key_convert
+	add	r4, sp, #0x90
+	vldmia	r4, {@XMM[6]}
+	vstmia	r12,  {@XMM[15]}		@ save last round key
+	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
+	vstmia	r4, {@XMM[7]}
+#else
+	ldr	r12, [$key, #244]
+	eors	r12, #1
+	beq	0f
+
+	str	r12, [$key, #244]
+	mov	r4, $key			@ pass key
+	mov	r5, $rounds			@ pass # of rounds
+	add	r12, $key, #248			@ pass key schedule
+	bl	_bsaes_key_convert
+	add	r4, $key, #248
+	vldmia	r4, {@XMM[6]}
+	vstmia	r12,  {@XMM[15]}		@ save last round key
+	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
+	vstmia	r4, {@XMM[7]}
+
+.align	2
+0:	sub	sp, #0x90			@ place for tweak[9]
+#endif
+	vld1.8	{@XMM[8]}, [r0]			@ initial tweak
+	adr	$magic, .Lxts_magic
+
+	tst	$len, #0xf			@ if not multiple of 16
+	it	ne				@ Thumb2 thing, sanity check in ARM
+	subne	$len, #0x10			@ subtract another 16 bytes
+	subs	$len, #0x80
+
+	blo	.Lxts_dec_short
+	b	.Lxts_dec_loop
+
+.align	4
+.Lxts_dec_loop:
+	vldmia		$magic, {$twmask}	@ load XTS magic
+	vshr.s64	@T[0], @XMM[8], #63
+	mov		r0, sp
+	vand		@T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
+	vst1.64		{@XMM[$i-1]}, [r0,:128]!
+	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+	vshr.s64	@T[1], @XMM[$i], #63
+	veor		@XMM[$i], @XMM[$i], @T[0]
+	vand		@T[1], @T[1], $twmask
+___
+	@T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+	vld1.8		{@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+	vadd.u64	@XMM[8], @XMM[15], @XMM[15]
+	vst1.64		{@XMM[15]}, [r0,:128]!
+	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+	veor		@XMM[8], @XMM[8], @T[0]
+	vst1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+
+	vld1.8		{@XMM[6]-@XMM[7]}, [$inp]!
+	veor		@XMM[5], @XMM[5], @XMM[13]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[6], @XMM[6], @XMM[14]
+	mov		r5, $rounds			@ pass rounds
+	veor		@XMM[7], @XMM[7], @XMM[15]
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[6], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[4], @XMM[11]
+	vld1.64		{@XMM[14]-@XMM[15]}, [r0,:128]!
+	veor		@XMM[10], @XMM[2], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	veor		@XMM[11], @XMM[7], @XMM[13]
+	veor		@XMM[12], @XMM[3], @XMM[14]
+	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
+	veor		@XMM[13], @XMM[5], @XMM[15]
+	vst1.8		{@XMM[12]-@XMM[13]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+
+	subs		$len, #0x80
+	bpl		.Lxts_dec_loop
+
+.Lxts_dec_short:
+	adds		$len, #0x70
+	bmi		.Lxts_dec_done
+
+	vldmia		$magic, {$twmask}	@ load XTS magic
+	vshr.s64	@T[0], @XMM[8], #63
+	mov		r0, sp
+	vand		@T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
+	vst1.64		{@XMM[$i-1]}, [r0,:128]!
+	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+	vshr.s64	@T[1], @XMM[$i], #63
+	veor		@XMM[$i], @XMM[$i], @T[0]
+	vand		@T[1], @T[1], $twmask
+___
+	@T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+	vld1.8		{@XMM[$i-10]}, [$inp]!
+	subs		$len, #0x10
+	bmi		.Lxts_dec_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+	sub		$len, #0x10
+	vst1.64		{@XMM[15]}, [r0,:128]		@ next round tweak
+
+	vld1.8		{@XMM[6]}, [$inp]!
+	veor		@XMM[5], @XMM[5], @XMM[13]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[6], @XMM[6], @XMM[14]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[6], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[4], @XMM[11]
+	vld1.64		{@XMM[14]}, [r0,:128]!
+	veor		@XMM[10], @XMM[2], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	veor		@XMM[11], @XMM[7], @XMM[13]
+	veor		@XMM[12], @XMM[3], @XMM[14]
+	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
+	vst1.8		{@XMM[12]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_6:
+	vst1.64		{@XMM[14]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[4], @XMM[4], @XMM[12]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[5], @XMM[5], @XMM[13]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[6], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[4], @XMM[11]
+	veor		@XMM[10], @XMM[2], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	veor		@XMM[11], @XMM[7], @XMM[13]
+	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_5:
+	vst1.64		{@XMM[13]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[3], @XMM[3], @XMM[11]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[4], @XMM[4], @XMM[12]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	vld1.64		{@XMM[12]}, [r0,:128]!
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[6], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[4], @XMM[11]
+	veor		@XMM[10], @XMM[2], @XMM[12]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+	vst1.8		{@XMM[10]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_4:
+	vst1.64		{@XMM[12]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[2], @XMM[2], @XMM[10]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[3], @XMM[3], @XMM[11]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[6], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	veor		@XMM[9], @XMM[4], @XMM[11]
+	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_3:
+	vst1.64		{@XMM[11]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[1], @XMM[1], @XMM[9]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[2], @XMM[2], @XMM[10]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
+	vld1.64		{@XMM[10]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	veor		@XMM[8], @XMM[6], @XMM[10]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+	vst1.8		{@XMM[8]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_2:
+	vst1.64		{@XMM[10]}, [r0,:128]		@ next round tweak
+
+	veor		@XMM[0], @XMM[0], @XMM[8]
+#ifndef	BSAES_ASM_EXTENDED_KEY
+	add		r4, sp, #0x90			@ pass key schedule
+#else
+	add		r4, $key, #248			@ pass key schedule
+#endif
+	veor		@XMM[1], @XMM[1], @XMM[9]
+	mov		r5, $rounds			@ pass rounds
+	mov		r0, sp
+
+	bl		_bsaes_decrypt8
+
+	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
+	veor		@XMM[0], @XMM[0], @XMM[ 8]
+	veor		@XMM[1], @XMM[1], @XMM[ 9]
+	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
+
+	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
+	b		.Lxts_dec_done
+.align	4
+.Lxts_dec_1:
+	mov		r0, sp
+	veor		@XMM[0], @XMM[8]
+	mov		r1, sp
+	vst1.8		{@XMM[0]}, [sp,:128]
+	mov		r2, $key
+	mov		r4, $fp				@ preserve fp
+	mov		r5, $magic			@ preserve magic
+
+	bl		AES_decrypt
+
+	vld1.8		{@XMM[0]}, [sp,:128]
+	veor		@XMM[0], @XMM[0], @XMM[8]
+	vst1.8		{@XMM[0]}, [$out]!
+	mov		$fp, r4
+	mov		$magic, r5
+
+	vmov		@XMM[8], @XMM[9]		@ next round tweak
+
+.Lxts_dec_done:
+#ifndef	XTS_CHAIN_TWEAK
+	adds		$len, #0x10
+	beq		.Lxts_dec_ret
+
+	@ calculate one round of extra tweak for the stolen ciphertext
+	vldmia		$magic, {$twmask}
+	vshr.s64	@XMM[6], @XMM[8], #63
+	vand		@XMM[6], @XMM[6], $twmask
+	vadd.u64	@XMM[9], @XMM[8], @XMM[8]
+	vswp		`&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")`
+	veor		@XMM[9], @XMM[9], @XMM[6]
+
+	@ perform the final decryption with the last tweak value
+	vld1.8		{@XMM[0]}, [$inp]!
+	mov		r0, sp
+	veor		@XMM[0], @XMM[0], @XMM[9]
+	mov		r1, sp
+	vst1.8		{@XMM[0]}, [sp,:128]
+	mov		r2, $key
+	mov		r4, $fp			@ preserve fp
+
+	bl		AES_decrypt
+
+	vld1.8		{@XMM[0]}, [sp,:128]
+	veor		@XMM[0], @XMM[0], @XMM[9]
+	vst1.8		{@XMM[0]}, [$out]
+
+	mov		r6, $out
+.Lxts_dec_steal:
+	ldrb		r1, [$out]
+	ldrb		r0, [$inp], #1
+	strb		r1, [$out, #0x10]
+	strb		r0, [$out], #1
+
+	subs		$len, #1
+	bhi		.Lxts_dec_steal
+
+	vld1.8		{@XMM[0]}, [r6]
+	mov		r0, sp
+	veor		@XMM[0], @XMM[8]
+	mov		r1, sp
+	vst1.8		{@XMM[0]}, [sp,:128]
+	mov		r2, $key
+
+	bl		AES_decrypt
+
+	vld1.8		{@XMM[0]}, [sp,:128]
+	veor		@XMM[0], @XMM[0], @XMM[8]
+	vst1.8		{@XMM[0]}, [r6]
+	mov		$fp, r4
+#endif
+
+.Lxts_dec_ret:
+	bic		r0, $fp, #0xf
+	vmov.i32	q0, #0
+	vmov.i32	q1, #0
+#ifdef	XTS_CHAIN_TWEAK
+	ldr		r1, [$fp, #0x20+VFP_ABI_FRAME]	@ chain tweak
+#endif
+.Lxts_dec_bzero:				@ wipe key schedule [if any]
+	vstmia		sp!, {q0-q1}
+	cmp		sp, r0
+	bne		.Lxts_dec_bzero
+
+	mov		sp, $fp
+#ifdef	XTS_CHAIN_TWEAK
+	vst1.8		{@XMM[8]}, [r1]
+#endif
+	VFP_ABI_POP
+	ldmia		sp!, {r4-r10, pc}	@ return
+
+.size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
+___
+}
+$code.=<<___;
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+open SELF,$0;
+while(<SELF>) {
+	next if (/^#!/);
+        last if (!s/^#/@/ and !/^$/);
+        print;
+}
+close SELF;
+
+print $code;
+
+close STDOUT;
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
new file mode 100644
index 000000000000..50013c0e2864
--- /dev/null
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -0,0 +1,634 @@
+/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code   32
+.fpu neon
+
+.text
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 4
+.LK_VEC:
+.LK1:	.long K1, K1, K1, K1
+.LK2:	.long K2, K2, K2, K2
+.LK3:	.long K3, K3, K3, K3
+.LK4:	.long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define RSTATE r0
+#define RDATA r1
+#define RNBLKS r2
+#define ROLDSTACK r3
+#define RWK lr
+
+#define _a r4
+#define _b r5
+#define _c r6
+#define _d r7
+#define _e r8
+
+#define RT0 r9
+#define RT1 r10
+#define RT2 r11
+#define RT3 r12
+
+#define W0 q0
+#define W1 q1
+#define W2 q2
+#define W3 q3
+#define W4 q4
+#define W5 q5
+#define W6 q6
+#define W7 q7
+
+#define tmp0 q8
+#define tmp1 q9
+#define tmp2 q10
+#define tmp3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round function macros. */
+
+#define WK_offs(i) (((i) & 15) * 4)
+
+#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	ldr RT3, [sp, WK_offs(i)]; \
+		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	bic RT0, d, b; \
+	add e, e, a, ror #(32 - 5); \
+	and RT1, c, b; \
+		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add RT0, RT0, RT3; \
+	add e, e, RT1; \
+	ror b, #(32 - 30); \
+		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT0;
+
+#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	ldr RT3, [sp, WK_offs(i)]; \
+		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	eor RT0, d, b; \
+	add e, e, a, ror #(32 - 5); \
+	eor RT0, RT0, c; \
+		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT3; \
+	ror b, #(32 - 30); \
+		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT0; \
+
+#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	ldr RT3, [sp, WK_offs(i)]; \
+		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	eor RT0, b, c; \
+	and RT1, b, c; \
+	add e, e, a, ror #(32 - 5); \
+		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	and RT0, RT0, d; \
+	add RT1, RT1, RT3; \
+	add e, e, RT0; \
+	ror b, #(32 - 30); \
+		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+	add e, e, RT1;
+
+#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
+           W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	_R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
+	       W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define R(a,b,c,d,e,f,i) \
+	_R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
+	       W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define dummy(...)
+
+
+/* Input expansion macros. */
+
+/********* Precalc macros for rounds 0-15 *************************************/
+
+#define W_PRECALC_00_15() \
+	add       RWK, sp, #(WK_offs(0));			\
+	\
+	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
+	vrev32.8  W0, tmp0;		/* big => little */	\
+	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+	vadd.u32  tmp0, W0, curK;				\
+	vrev32.8  W7, tmp1;		/* big => little */	\
+	vrev32.8  W6, tmp2;		/* big => little */	\
+	vadd.u32  tmp1, W7, curK;				\
+	vrev32.8  W5, tmp3;		/* big => little */	\
+	vadd.u32  tmp2, W6, curK;				\
+	vst1.32   {tmp0, tmp1}, [RWK]!;				\
+	vadd.u32  tmp3, W5, curK;				\
+	vst1.32   {tmp2, tmp3}, [RWK];				\
+
+#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
+
+#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	add       RWK, sp, #(WK_offs(0));			\
+
+#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W0, tmp0;		/* big => little */	\
+
+#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+
+#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp0, W0, curK;				\
+
+#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W7, tmp1;		/* big => little */	\
+
+#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W6, tmp2;		/* big => little */	\
+
+#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp1, W7, curK;				\
+
+#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vrev32.8  W5, tmp3;		/* big => little */	\
+
+#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp2, W6, curK;				\
+
+#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32   {tmp0, tmp1}, [RWK]!;				\
+
+#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp3, W5, curK;				\
+
+#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32   {tmp2, tmp3}, [RWK];				\
+
+
+/********* Precalc macros for rounds 16-31 ************************************/
+
+#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp0, tmp0;			\
+	vext.8    W, W_m16, W_m12, #8;		\
+
+#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	add       RWK, sp, #(WK_offs(i));	\
+	vext.8    tmp0, W_m04, tmp0, #4;	\
+
+#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp0, tmp0, W_m16;		\
+	veor.32   W, W, W_m08;			\
+
+#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp1, tmp1;			\
+	veor      W, W, tmp0;			\
+
+#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshl.u32  tmp0, W, #1;			\
+
+#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vext.8    tmp1, tmp1, W, #(16-12);	\
+	vshr.u32  W, W, #31;			\
+
+#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vorr      tmp0, tmp0, W;		\
+	vshr.u32  W, tmp1, #30;			\
+
+#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshl.u32  tmp1, tmp1, #2;		\
+
+#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      tmp0, tmp0, W;		\
+
+#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor      W, tmp0, tmp1;		\
+
+#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32  tmp0, W, curK;		\
+
+#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32   {tmp0}, [RWK];
+
+
+/********* Precalc macros for rounds 32-79 ************************************/
+
+#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor W, W_m28; \
+
+#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vext.8 tmp0, W_m08, W_m04, #8; \
+
+#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor W, W_m16; \
+
+#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	veor W, tmp0; \
+
+#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	add RWK, sp, #(WK_offs(i&~3)); \
+
+#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshl.u32 tmp1, W, #2; \
+
+#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vshr.u32 tmp0, W, #30; \
+
+#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vorr W, tmp0, tmp1; \
+
+#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
+	vst1.32 {tmp0}, [RWK];
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * sha1_transform_neon (void *ctx, const unsigned char *data,
+ *                      unsigned int nblks)
+ */
+.align 3
+ENTRY(sha1_transform_neon)
+  /* input:
+   *	r0: ctx, CTX
+   *	r1: data (64*nblks bytes)
+   *	r2: nblks
+   */
+
+  cmp RNBLKS, #0;
+  beq .Ldo_nothing;
+
+  push {r4-r12, lr};
+  /*vpush {q4-q7};*/
+
+  adr RT3, .LK_VEC;
+
+  mov ROLDSTACK, sp;
+
+  /* Align stack. */
+  sub RT0, sp, #(16*4);
+  and RT0, #(~(16-1));
+  mov sp, RT0;
+
+  vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
+
+  /* Get the values of the chaining variables. */
+  ldm RSTATE, {_a-_e};
+
+  vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
+
+#undef curK
+#define curK qK1
+  /* Precalc 0-15. */
+  W_PRECALC_00_15();
+
+.Loop:
+  /* Transform 0-15 + Precalc 16-31. */
+  _R( _a, _b, _c, _d, _e, F1,  0,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
+      W4, W5, W6, W7, W0, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1,  1,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
+      W4, W5, W6, W7, W0, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F1,  2,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
+      W4, W5, W6, W7, W0, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F1,  3,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
+      W4, W5, W6, W7, W0, _, _, _ );
+
+#undef curK
+#define curK qK2
+  _R( _b, _c, _d, _e, _a, F1,  4,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
+      W3, W4, W5, W6, W7, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1,  5,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
+      W3, W4, W5, W6, W7, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1,  6,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
+      W3, W4, W5, W6, W7, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F1,  7,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
+      W3, W4, W5, W6, W7, _, _, _ );
+
+  _R( _c, _d, _e, _a, _b, F1,  8,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
+      W2, W3, W4, W5, W6, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F1,  9,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
+      W2, W3, W4, W5, W6, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1, 10,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
+      W2, W3, W4, W5, W6, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1, 11,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
+      W2, W3, W4, W5, W6, _, _, _ );
+
+  _R( _d, _e, _a, _b, _c, F1, 12,
+      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
+      W1, W2, W3, W4, W5, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F1, 13,
+      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
+      W1, W2, W3, W4, W5, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F1, 14,
+      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
+      W1, W2, W3, W4, W5, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1, 15,
+      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
+      W1, W2, W3, W4, W5, _, _, _ );
+
+  /* Transform 16-63 + Precalc 32-79. */
+  _R( _e, _a, _b, _c, _d, F1, 16,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _d, _e, _a, _b, _c, F1, 17,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _c, _d, _e, _a, _b, F1, 18,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _b, _c, _d, _e, _a, F1, 19,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 32,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+
+  _R( _a, _b, _c, _d, _e, F2, 20,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _e, _a, _b, _c, _d, F2, 21,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _d, _e, _a, _b, _c, F2, 22,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _c, _d, _e, _a, _b, F2, 23,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 36,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+
+#undef curK
+#define curK qK3
+  _R( _b, _c, _d, _e, _a, F2, 24,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _a, _b, _c, _d, _e, F2, 25,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _e, _a, _b, _c, _d, F2, 26,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _d, _e, _a, _b, _c, F2, 27,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 40,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+
+  _R( _c, _d, _e, _a, _b, F2, 28,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _b, _c, _d, _e, _a, F2, 29,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _a, _b, _c, _d, _e, F2, 30,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _e, _a, _b, _c, _d, F2, 31,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 44,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+
+  _R( _d, _e, _a, _b, _c, F2, 32,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _c, _d, _e, _a, _b, F2, 33,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _b, _c, _d, _e, _a, F2, 34,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _a, _b, _c, _d, _e, F2, 35,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 48,
+      W4, W5, W6, W7, W0, W1, W2, W3);
+
+  _R( _e, _a, _b, _c, _d, F2, 36,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _d, _e, _a, _b, _c, F2, 37,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _c, _d, _e, _a, _b, F2, 38,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _b, _c, _d, _e, _a, F2, 39,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 52,
+      W3, W4, W5, W6, W7, W0, W1, W2);
+
+  _R( _a, _b, _c, _d, _e, F3, 40,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _e, _a, _b, _c, _d, F3, 41,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _d, _e, _a, _b, _c, F3, 42,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _c, _d, _e, _a, _b, F3, 43,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 56,
+      W2, W3, W4, W5, W6, W7, W0, W1);
+
+#undef curK
+#define curK qK4
+  _R( _b, _c, _d, _e, _a, F3, 44,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _a, _b, _c, _d, _e, F3, 45,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _e, _a, _b, _c, _d, F3, 46,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _d, _e, _a, _b, _c, F3, 47,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 60,
+      W1, W2, W3, W4, W5, W6, W7, W0);
+
+  _R( _c, _d, _e, _a, _b, F3, 48,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _b, _c, _d, _e, _a, F3, 49,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _a, _b, _c, _d, _e, F3, 50,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _e, _a, _b, _c, _d, F3, 51,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 64,
+      W0, W1, W2, W3, W4, W5, W6, W7);
+
+  _R( _d, _e, _a, _b, _c, F3, 52,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _c, _d, _e, _a, _b, F3, 53,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _b, _c, _d, _e, _a, F3, 54,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _a, _b, _c, _d, _e, F3, 55,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 68,
+      W7, W0, W1, W2, W3, W4, W5, W6);
+
+  _R( _e, _a, _b, _c, _d, F3, 56,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _d, _e, _a, _b, _c, F3, 57,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _c, _d, _e, _a, _b, F3, 58,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _b, _c, _d, _e, _a, F3, 59,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 72,
+      W6, W7, W0, W1, W2, W3, W4, W5);
+
+  subs RNBLKS, #1;
+
+  _R( _a, _b, _c, _d, _e, F4, 60,
+      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _e, _a, _b, _c, _d, F4, 61,
+      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _d, _e, _a, _b, _c, F4, 62,
+      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _c, _d, _e, _a, _b, F4, 63,
+      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 76,
+      W5, W6, W7, W0, W1, W2, W3, W4);
+
+  beq .Lend;
+
+  /* Transform 64-79 + Precalc 0-15 of next block. */
+#undef curK
+#define curK qK1
+  _R( _b, _c, _d, _e, _a, F4, 64,
+      WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 65,
+      WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F4, 66,
+      WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F4, 67,
+      WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+  _R( _c, _d, _e, _a, _b, F4, 68,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 69,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 70,
+      WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F4, 71,
+      WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+  _R( _d, _e, _a, _b, _c, F4, 72,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F4, 73,
+      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 74,
+      WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 75,
+      WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+
+  _R( _e, _a, _b, _c, _d, F4, 76,
+      WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F4, 77,
+      WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F4, 78,
+      WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 79,
+      WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
+
+  /* Update the chaining variables. */
+  ldm RSTATE, {RT0-RT3};
+  add _a, RT0;
+  ldr RT0, [RSTATE, #state_h4];
+  add _b, RT1;
+  add _c, RT2;
+  add _d, RT3;
+  add _e, RT0;
+  stm RSTATE, {_a-_e};
+
+  b .Loop;
+
+.Lend:
+  /* Transform 64-79 */
+  R( _b, _c, _d, _e, _a, F4, 64 );
+  R( _a, _b, _c, _d, _e, F4, 65 );
+  R( _e, _a, _b, _c, _d, F4, 66 );
+  R( _d, _e, _a, _b, _c, F4, 67 );
+  R( _c, _d, _e, _a, _b, F4, 68 );
+  R( _b, _c, _d, _e, _a, F4, 69 );
+  R( _a, _b, _c, _d, _e, F4, 70 );
+  R( _e, _a, _b, _c, _d, F4, 71 );
+  R( _d, _e, _a, _b, _c, F4, 72 );
+  R( _c, _d, _e, _a, _b, F4, 73 );
+  R( _b, _c, _d, _e, _a, F4, 74 );
+  R( _a, _b, _c, _d, _e, F4, 75 );
+  R( _e, _a, _b, _c, _d, F4, 76 );
+  R( _d, _e, _a, _b, _c, F4, 77 );
+  R( _c, _d, _e, _a, _b, F4, 78 );
+  R( _b, _c, _d, _e, _a, F4, 79 );
+
+  mov sp, ROLDSTACK;
+
+  /* Update the chaining variables. */
+  ldm RSTATE, {RT0-RT3};
+  add _a, RT0;
+  ldr RT0, [RSTATE, #state_h4];
+  add _b, RT1;
+  add _c, RT2;
+  add _d, RT3;
+  /*vpop {q4-q7};*/
+  add _e, RT0;
+  stm RSTATE, {_a-_e};
+
+  pop {r4-r12, pc};
+
+.Ldo_nothing:
+  bx lr
+ENDPROC(sha1_transform_neon)
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index ace4cd67464c..e31b0440c613 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -23,32 +23,27 @@
 #include <linux/types.h>
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
+#include <asm/crypto/sha1.h>
 
-struct SHA1_CTX {
-	uint32_t h0,h1,h2,h3,h4;
-	u64 count;
-	u8 data[SHA1_BLOCK_SIZE];
-};
 
-asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
+asmlinkage void sha1_block_data_order(u32 *digest,
 		const unsigned char *data, unsigned int rounds);
 
 
 static int sha1_init(struct shash_desc *desc)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
-	memset(sctx, 0, sizeof(*sctx));
-	sctx->h0 = SHA1_H0;
-	sctx->h1 = SHA1_H1;
-	sctx->h2 = SHA1_H2;
-	sctx->h3 = SHA1_H3;
-	sctx->h4 = SHA1_H4;
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
 	return 0;
 }
 
 
-static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
-			       unsigned int len, unsigned int partial)
+static int __sha1_update(struct sha1_state *sctx, const u8 *data,
+			 unsigned int len, unsigned int partial)
 {
 	unsigned int done = 0;
 
@@ -56,43 +51,44 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
 
 	if (partial) {
 		done = SHA1_BLOCK_SIZE - partial;
-		memcpy(sctx->data + partial, data, done);
-		sha1_block_data_order(sctx, sctx->data, 1);
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_block_data_order(sctx->state, sctx->buffer, 1);
 	}
 
 	if (len - done >= SHA1_BLOCK_SIZE) {
 		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-		sha1_block_data_order(sctx, data + done, rounds);
+		sha1_block_data_order(sctx->state, data + done, rounds);
 		done += rounds * SHA1_BLOCK_SIZE;
 	}
 
-	memcpy(sctx->data, data + done, len - done);
+	memcpy(sctx->buffer, data + done, len - done);
 	return 0;
 }
 
 
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+		    unsigned int len)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
 	int res;
 
 	/* Handle the fast case right here */
 	if (partial + len < SHA1_BLOCK_SIZE) {
 		sctx->count += len;
-		memcpy(sctx->data + partial, data, len);
+		memcpy(sctx->buffer + partial, data, len);
 		return 0;
 	}
 	res = __sha1_update(sctx, data, len, partial);
 	return res;
 }
+EXPORT_SYMBOL_GPL(sha1_update_arm);
 
 
 /* Add padding and return the message digest. */
 static int sha1_final(struct shash_desc *desc, u8 *out)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int i, index, padlen;
 	__be32 *dst = (__be32 *)out;
 	__be64 bits;
@@ -106,7 +102,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 	/* We need to fill a whole block for __sha1_update() */
 	if (padlen <= 56) {
 		sctx->count += padlen;
-		memcpy(sctx->data + index, padding, padlen);
+		memcpy(sctx->buffer + index, padding, padlen);
 	} else {
 		__sha1_update(sctx, padding, padlen, index);
 	}
@@ -114,7 +110,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 
 	/* Store state in digest */
 	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
+		dst[i] = cpu_to_be32(sctx->state[i]);
 
 	/* Wipe context */
 	memset(sctx, 0, sizeof(*sctx));
@@ -124,7 +120,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 
 static int sha1_export(struct shash_desc *desc, void *out)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	memcpy(out, sctx, sizeof(*sctx));
 	return 0;
 }
@@ -132,7 +128,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
 
 static int sha1_import(struct shash_desc *desc, const void *in)
 {
-	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	memcpy(sctx, in, sizeof(*sctx));
 	return 0;
 }
@@ -141,12 +137,12 @@ static int sha1_import(struct shash_desc *desc, const void *in)
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_init,
-	.update		=	sha1_update,
+	.update		=	sha1_update_arm,
 	.final		=	sha1_final,
 	.export		=	sha1_export,
 	.import		=	sha1_import,
-	.descsize	=	sizeof(struct SHA1_CTX),
-	.statesize	=	sizeof(struct SHA1_CTX),
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-asm",
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
new file mode 100644
index 000000000000..6f1b411b1d55
--- /dev/null
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -0,0 +1,197 @@
+/*
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
+ * ARM NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha1_generic.c and sha1_ssse3_glue.c:
+ *  Copyright (c) Alan Smithee.
+ *  Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ *  Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *  Copyright (c) Mathias Krause <minipli@googlemail.com>
+ *  Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/crypto/sha1.h>
+
+
+asmlinkage void sha1_transform_neon(void *state_h, const char *data,
+				    unsigned int rounds);
+
+
+static int sha1_neon_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
+	return 0;
+}
+
+static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, unsigned int partial)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA1_BLOCK_SIZE - partial;
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_transform_neon(sctx->state, sctx->buffer, 1);
+	}
+
+	if (len - done >= SHA1_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+		sha1_transform_neon(sctx->state, data + done, rounds);
+		done += rounds * SHA1_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buffer, data + done, len - done);
+
+	return 0;
+}
+
+static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA1_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buffer + partial, data, len);
+
+		return 0;
+	}
+
+	if (!may_use_simd()) {
+		res = sha1_update_arm(desc, data, len);
+	} else {
+		kernel_neon_begin();
+		res = __sha1_neon_update(desc, data, len, partial);
+		kernel_neon_end();
+	}
+
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_neon_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA1_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+	if (!may_use_simd()) {
+		sha1_update_arm(desc, padding, padlen);
+		sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_neon_begin();
+		/* We need to fill a whole block for __sha1_neon_update() */
+		if (padlen <= 56) {
+			sctx->count += padlen;
+			memcpy(sctx->buffer + index, padding, padlen);
+		} else {
+			__sha1_neon_update(desc, padding, padlen, index);
+		}
+		__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+		kernel_neon_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_neon_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_neon_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_neon_init,
+	.update		=	sha1_neon_update,
+	.final		=	sha1_neon_final,
+	.export		=	sha1_neon_export,
+	.import		=	sha1_neon_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name		= "sha1",
+		.cra_driver_name	= "sha1-neon",
+		.cra_priority		= 250,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init sha1_neon_mod_init(void)
+{
+	if (!cpu_has_neon())
+		return -ENODEV;
+
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_neon_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_neon_mod_init);
+module_exit(sha1_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
+MODULE_ALIAS("sha1");
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..fac0533ea633
--- /dev/null
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -0,0 +1,716 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA256 block procedure for ARMv4. May 2007.
+
+# Performance is ~2x better than gcc 3.4 generated code and in "abso-
+# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+# byte [on single-issue Xscale PXA250 core].
+
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 22% improvement on
+# Cortex A8 core and ~20 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 16%
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0";	$t0="r0";
+$inp="r1";	$t4="r1";
+$len="r2";	$t1="r2";
+$T1="r3";	$t3="r3";
+$A="r4";
+$B="r5";
+$C="r6";
+$D="r7";
+$E="r8";
+$F="r9";
+$G="r10";
+$H="r11";
+@V=($A,$B,$C,$D,$E,$F,$G,$H);
+$t2="r12";
+$Ktbl="r14";
+
+@Sigma0=( 2,13,22);
+@Sigma1=( 6,11,25);
+@sigma0=( 7,18, 3);
+@sigma1=(17,19,10);
+
+sub BODY_00_15 {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___ if ($i<16);
+#if __ARM_ARCH__>=7
+	@ ldr	$t1,[$inp],#4			@ $i
+# if $i==15
+	str	$inp,[sp,#17*4]			@ make room for $t4
+# endif
+	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
+	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	$t1,$t1
+# endif
+#else
+	@ ldrb	$t1,[$inp,#3]			@ $i
+	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
+	ldrb	$t2,[$inp,#2]
+	ldrb	$t0,[$inp,#1]
+	orr	$t1,$t1,$t2,lsl#8
+	ldrb	$t2,[$inp],#4
+	orr	$t1,$t1,$t0,lsl#16
+# if $i==15
+	str	$inp,[sp,#17*4]			@ make room for $t4
+# endif
+	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+	orr	$t1,$t1,$t2,lsl#24
+	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
+#endif
+___
+$code.=<<___;
+	ldr	$t2,[$Ktbl],#4			@ *K256++
+	add	$h,$h,$t1			@ h+=X[i]
+	str	$t1,[sp,#`$i%16`*4]
+	eor	$t1,$f,$g
+	add	$h,$h,$t0,ror#$Sigma1[0]	@ h+=Sigma1(e)
+	and	$t1,$t1,$e
+	add	$h,$h,$t2			@ h+=K256[i]
+	eor	$t1,$t1,$g			@ Ch(e,f,g)
+	eor	$t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
+	add	$h,$h,$t1			@ h+=Ch(e,f,g)
+#if $i==31
+	and	$t2,$t2,#0xff
+	cmp	$t2,#0xf2			@ done?
+#endif
+#if $i<15
+# if __ARM_ARCH__>=7
+	ldr	$t1,[$inp],#4			@ prefetch
+# else
+	ldrb	$t1,[$inp,#3]
+# endif
+	eor	$t2,$a,$b			@ a^b, b^c in next round
+#else
+	ldr	$t1,[sp,#`($i+2)%16`*4]		@ from future BODY_16_xx
+	eor	$t2,$a,$b			@ a^b, b^c in next round
+	ldr	$t4,[sp,#`($i+15)%16`*4]	@ from future BODY_16_xx
+#endif
+	eor	$t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`	@ Sigma0(a)
+	and	$t3,$t3,$t2			@ (b^c)&=(a^b)
+	add	$d,$d,$h			@ d+=h
+	eor	$t3,$t3,$b			@ Maj(a,b,c)
+	add	$h,$h,$t0,ror#$Sigma0[0]	@ h+=Sigma0(a)
+	@ add	$h,$h,$t3			@ h+=Maj(a,b,c)
+___
+	($t2,$t3)=($t3,$t2);
+}
+
+sub BODY_16_XX {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___;
+	@ ldr	$t1,[sp,#`($i+1)%16`*4]		@ $i
+	@ ldr	$t4,[sp,#`($i+14)%16`*4]
+	mov	$t0,$t1,ror#$sigma0[0]
+	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
+	mov	$t2,$t4,ror#$sigma1[0]
+	eor	$t0,$t0,$t1,ror#$sigma0[1]
+	eor	$t2,$t2,$t4,ror#$sigma1[1]
+	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
+	ldr	$t1,[sp,#`($i+0)%16`*4]
+	eor	$t2,$t2,$t4,lsr#$sigma1[2]	@ sigma1(X[i+14])
+	ldr	$t4,[sp,#`($i+9)%16`*4]
+
+	add	$t2,$t2,$t0
+	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`	@ from BODY_00_15
+	add	$t1,$t1,$t2
+	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
+	add	$t1,$t1,$t4			@ X[i]
+___
+	&BODY_00_15(@_);
+}
+
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K256,%object
+.align	5
+K256:
+.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size	K256,.-K256
+.word	0				@ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align	5
+
+.global	sha256_block_data_order
+.type	sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha256_block_data_order
+#else
+	adr	r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#ARMV8_SHA256
+	bne	.LARMv8
+	tst	r12,#ARMV7_NEON
+	bne	.LNEON
+#endif
+	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}
+	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+	sub	$Ktbl,r3,#256+32	@ K256
+	sub	sp,sp,#16*4		@ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+	ldr	$t1,[$inp],#4
+# else
+	ldrb	$t1,[$inp,#3]
+# endif
+	eor	$t3,$B,$C		@ magic
+	eor	$t2,$t2,$t2
+___
+for($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=".Lrounds_16_xx:\n";
+for (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+#if __ARM_ARCH__>=7
+	ite	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	$t3,[sp,#16*4]		@ pull ctx
+	bne	.Lrounds_16_xx
+
+	add	$A,$A,$t2		@ h+=Maj(a,b,c) from the past
+	ldr	$t0,[$t3,#0]
+	ldr	$t1,[$t3,#4]
+	ldr	$t2,[$t3,#8]
+	add	$A,$A,$t0
+	ldr	$t0,[$t3,#12]
+	add	$B,$B,$t1
+	ldr	$t1,[$t3,#16]
+	add	$C,$C,$t2
+	ldr	$t2,[$t3,#20]
+	add	$D,$D,$t0
+	ldr	$t0,[$t3,#24]
+	add	$E,$E,$t1
+	ldr	$t1,[$t3,#28]
+	add	$F,$F,$t2
+	ldr	$inp,[sp,#17*4]		@ pull inp
+	ldr	$t2,[sp,#18*4]		@ pull inp+len
+	add	$G,$G,$t0
+	add	$H,$H,$t1
+	stmia	$t3,{$A,$B,$C,$D,$E,$F,$G,$H}
+	cmp	$inp,$t2
+	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
+	bne	.Loop
+
+	add	sp,sp,#`16+3`*4	@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r11,pc}
+#else
+	ldmia	sp!,{r4-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha256_block_data_order,.-sha256_block_data_order
+___
+######################################################################
+# NEON stuff
+#
+{{{
+my @X=map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
+my $Xfer=$t4;
+my $j=0;
+
+sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
+sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
+
+sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+  my $arg = pop;
+    $arg = "#$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+	&vext_8		($T0,@X[0],@X[1],4);	# X[1..4]
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vext_8		($T1,@X[2],@X[3],4);	# X[9..12]
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vshr_u32	($T2,$T0,$sigma0[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += X[9..12]
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vshr_u32	($T1,$T0,$sigma0[2]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vsli_32	($T2,$T0,32-$sigma0[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vshr_u32	($T3,$T0,$sigma0[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&veor		($T1,$T1,$T2);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vsli_32	($T3,$T0,32-$sigma0[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&veor		($T1,$T1,$T3);		# sigma0(X[1..4])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T5,&Dhi(@X[3]),$sigma1[2]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);		# sigma1(X[14..15])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T5,&Dlo(@X[0]),$sigma1[2]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vld1_32	("{$T0}","[$Ktbl,:128]!");
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[1]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	  &veor		($T5,$T5,$T4);		# sigma1(X[16..17])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	(&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	($T0,$T0,@X[0]);
+	 while($#insns>=2) { eval(shift(@insns)); }
+	&vst1_32	("{$T0}","[$Xfer,:128]!");
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+
+	push(@X,shift(@X));		# "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vld1_32	("{$T0}","[$Ktbl,:128]!");
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vrev32_8	(@X[0],@X[0]);
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	 eval(shift(@insns));
+	&vadd_i32	($T0,$T0,@X[0]);
+	 foreach (@insns) { eval; }	# remaining instructions
+	&vst1_32	("{$T0}","[$Xfer,:128]!");
+
+	push(@X,shift(@X));		# "rotate" X[]
+}
+
+sub body_00_15 () {
+	(
+	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+	'&add	($h,$h,$t1)',			# h+=X[i]+K[i]
+	'&eor	($t1,$f,$g)',
+	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past
+	'&and	($t1,$t1,$e)',
+	'&eor	($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e)
+	'&eor	($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+	'&eor	($t1,$t1,$g)',			# Ch(e,f,g)
+	'&add	($h,$h,$t2,"ror#$Sigma1[0]")',	# h+=Sigma1(e)
+	'&eor	($t2,$a,$b)',			# a^b, b^c in next round
+	'&eor	($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a)
+	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g)
+	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'.
+	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'.
+	'&ldr	($t1,"[sp,#64]")			if ($j==31)',
+	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b)
+	'&add	($d,$d,$h)',			# d+=h
+	'&add	($h,$h,$t0,"ror#$Sigma0[0]");'.	# h+=Sigma0(a)
+	'&eor	($t3,$t3,$b)',			# Maj(a,b,c)
+	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+	)
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha256_block_data_order_neon
+.type	sha256_block_data_order_neon,%function
+.align	4
+sha256_block_data_order_neon:
+.LNEON:
+	stmdb	sp!,{r4-r12,lr}
+
+	sub	$H,sp,#16*4+16
+	adrl	$Ktbl,K256
+	bic	$H,$H,#15		@ align for 128-bit stores
+	mov	$t2,sp
+	mov	sp,$H			@ alloca
+	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+
+	vld1.8		{@X[0]},[$inp]!
+	vld1.8		{@X[1]},[$inp]!
+	vld1.8		{@X[2]},[$inp]!
+	vld1.8		{@X[3]},[$inp]!
+	vld1.32		{$T0},[$Ktbl,:128]!
+	vld1.32		{$T1},[$Ktbl,:128]!
+	vld1.32		{$T2},[$Ktbl,:128]!
+	vld1.32		{$T3},[$Ktbl,:128]!
+	vrev32.8	@X[0],@X[0]		@ yes, even on
+	str		$ctx,[sp,#64]
+	vrev32.8	@X[1],@X[1]		@ big-endian
+	str		$inp,[sp,#68]
+	mov		$Xfer,sp
+	vrev32.8	@X[2],@X[2]
+	str		$len,[sp,#72]
+	vrev32.8	@X[3],@X[3]
+	str		$t2,[sp,#76]		@ save original sp
+	vadd.i32	$T0,$T0,@X[0]
+	vadd.i32	$T1,$T1,@X[1]
+	vst1.32		{$T0},[$Xfer,:128]!
+	vadd.i32	$T2,$T2,@X[2]
+	vst1.32		{$T1},[$Xfer,:128]!
+	vadd.i32	$T3,$T3,@X[3]
+	vst1.32		{$T2},[$Xfer,:128]!
+	vst1.32		{$T3},[$Xfer,:128]!
+
+	ldmia		$ctx,{$A-$H}
+	sub		$Xfer,$Xfer,#64
+	ldr		$t1,[sp,#0]
+	eor		$t2,$t2,$t2
+	eor		$t3,$B,$C
+	b		.L_00_48
+
+.align	4
+.L_00_48:
+___
+	&Xupdate(\&body_00_15);
+	&Xupdate(\&body_00_15);
+	&Xupdate(\&body_00_15);
+	&Xupdate(\&body_00_15);
+$code.=<<___;
+	teq	$t1,#0				@ check for K256 terminator
+	ldr	$t1,[sp,#0]
+	sub	$Xfer,$Xfer,#64
+	bne	.L_00_48
+
+	ldr		$inp,[sp,#68]
+	ldr		$t0,[sp,#72]
+	sub		$Ktbl,$Ktbl,#256	@ rewind $Ktbl
+	teq		$inp,$t0
+	it		eq
+	subeq		$inp,$inp,#64		@ avoid SEGV
+	vld1.8		{@X[0]},[$inp]!		@ load next input block
+	vld1.8		{@X[1]},[$inp]!
+	vld1.8		{@X[2]},[$inp]!
+	vld1.8		{@X[3]},[$inp]!
+	it		ne
+	strne		$inp,[sp,#68]
+	mov		$Xfer,sp
+___
+	&Xpreload(\&body_00_15);
+	&Xpreload(\&body_00_15);
+	&Xpreload(\&body_00_15);
+	&Xpreload(\&body_00_15);
+$code.=<<___;
+	ldr	$t0,[$t1,#0]
+	add	$A,$A,$t2			@ h+=Maj(a,b,c) from the past
+	ldr	$t2,[$t1,#4]
+	ldr	$t3,[$t1,#8]
+	ldr	$t4,[$t1,#12]
+	add	$A,$A,$t0			@ accumulate
+	ldr	$t0,[$t1,#16]
+	add	$B,$B,$t2
+	ldr	$t2,[$t1,#20]
+	add	$C,$C,$t3
+	ldr	$t3,[$t1,#24]
+	add	$D,$D,$t4
+	ldr	$t4,[$t1,#28]
+	add	$E,$E,$t0
+	str	$A,[$t1],#4
+	add	$F,$F,$t2
+	str	$B,[$t1],#4
+	add	$G,$G,$t3
+	str	$C,[$t1],#4
+	add	$H,$H,$t4
+	str	$D,[$t1],#4
+	stmia	$t1,{$E-$H}
+
+	ittte	ne
+	movne	$Xfer,sp
+	ldrne	$t1,[sp,#0]
+	eorne	$t2,$t2,$t2
+	ldreq	sp,[sp,#76]			@ restore original sp
+	itt	ne
+	eorne	$t3,$B,$C
+	bne	.L_00_48
+
+	ldmia	sp!,{r4-r12,pc}
+.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+___
+}}}
+######################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
+my @MSG=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
+my $Ktbl="r3";
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
+# else
+#  define INST(a,b,c,d)	.byte	a,b,c,d
+# endif
+
+.type	sha256_block_data_order_armv8,%function
+.align	5
+sha256_block_data_order_armv8:
+.LARMv8:
+	vld1.32	{$ABCD,$EFGH},[$ctx]
+# ifdef __thumb2__
+	adr	$Ktbl,.LARMv8
+	sub	$Ktbl,$Ktbl,#.LARMv8-K256
+# else
+	adrl	$Ktbl,K256
+# endif
+	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+
+.Loop_v8:
+	vld1.8		{@MSG[0]-@MSG[1]},[$inp]!
+	vld1.8		{@MSG[2]-@MSG[3]},[$inp]!
+	vld1.32		{$W0},[$Ktbl]!
+	vrev32.8	@MSG[0],@MSG[0]
+	vrev32.8	@MSG[1],@MSG[1]
+	vrev32.8	@MSG[2],@MSG[2]
+	vrev32.8	@MSG[3],@MSG[3]
+	vmov		$ABCD_SAVE,$ABCD	@ offload
+	vmov		$EFGH_SAVE,$EFGH
+	teq		$inp,$len
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+	vld1.32		{$W1},[$Ktbl]!
+	vadd.i32	$W0,$W0,@MSG[0]
+	sha256su0	@MSG[0],@MSG[1]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+	sha256su1	@MSG[0],@MSG[2],@MSG[3]
+___
+	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+	vld1.32		{$W1},[$Ktbl]!
+	vadd.i32	$W0,$W0,@MSG[0]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+
+	vld1.32		{$W0},[$Ktbl]!
+	vadd.i32	$W1,$W1,@MSG[1]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W1
+	sha256h2	$EFGH,$abcd,$W1
+
+	vld1.32		{$W1},[$Ktbl]
+	vadd.i32	$W0,$W0,@MSG[2]
+	sub		$Ktbl,$Ktbl,#256-16	@ rewind
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W0
+	sha256h2	$EFGH,$abcd,$W0
+
+	vadd.i32	$W1,$W1,@MSG[3]
+	vmov		$abcd,$ABCD
+	sha256h		$ABCD,$EFGH,$W1
+	sha256h2	$EFGH,$abcd,$W1
+
+	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE
+	vadd.i32	$EFGH,$EFGH,$EFGH_SAVE
+	it		ne
+	bne		.Loop_v8
+
+	vst1.32		{$ABCD,$EFGH},[$ctx]
+
+	ret		@ bx lr
+.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm   OPENSSL_armcap_P,4,4
+#endif
+___
+
+open SELF,$0;
+while(<SELF>) {
+	next if (/^#!/);
+	last if (!s/^#/@/ and !/^$/);
+	print;
+}
+close SELF;
+
+{   my  %opcode = (
+	"sha256h"	=> 0xf3000c40,	"sha256h2"	=> 0xf3100c40,
+	"sha256su0"	=> 0xf3ba03c0,	"sha256su1"	=> 0xf3200c40	);
+
+    sub unsha256 {
+	my ($mnemonic,$arg)=@_;
+
+	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+					 |(($2&7)<<17)|(($2&8)<<4)
+					 |(($3&7)<<1) |(($3&8)<<2);
+	    # since ARMv7 instructions are always encoded little-endian.
+	    # correct solution is to use .inst directive, but older
+	    # assemblers don't implement it:-(
+	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
+			$word&0xff,($word>>8)&0xff,
+			($word>>16)&0xff,($word>>24)&0xff,
+			$mnemonic,$arg;
+	}
+    }
+}
+
+foreach (split($/,$code)) {
+
+	s/\`([^\`]*)\`/eval $1/geo;
+
+	s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+
+	s/\bret\b/bx	lr/go		or
+	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
+
+	print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..555a1a8eec90
--- /dev/null
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -0,0 +1,2808 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA256 block procedure for ARMv4. May 2007.
+
+@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
+@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+@ byte [on single-issue Xscale PXA250 core].
+
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
+@ Cortex A8 core and ~20 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 16%
+@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+@ September 2013.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process one
+@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+@ code (meaning that latter performs sub-optimally, nothing was done
+@ about it).
+
+@ May 2014.
+@
+@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K256,%object
+.align	5
+K256:
+.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size	K256,.-K256
+.word	0				@ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align	5
+
+.global	sha256_block_data_order
+.type	sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha256_block_data_order
+#else
+	adr	r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#ARMV8_SHA256
+	bne	.LARMv8
+	tst	r12,#ARMV7_NEON
+	bne	.LNEON
+#endif
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
+	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+	sub	r14,r3,#256+32	@ K256
+	sub	sp,sp,#16*4		@ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6		@ magic
+	eor	r12,r12,r12
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 0
+# if 0==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 0
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 0==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#0*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 0==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 0<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 1
+# if 1==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 1
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 1==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#1*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 1==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 1<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 2
+# if 2==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 2
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 2==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#2*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 2==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 2<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 3
+# if 3==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 3
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 3==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#3*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 3==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 3<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 4
+# if 4==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 4
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 4==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#4*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 4==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 4<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 5
+# if 5==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 5
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 5==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#5*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 5==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 5<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 6
+# if 6==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 6
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 6==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#6*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 6==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 6<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 7
+# if 7==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 7
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 7==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#7*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 7==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 7<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 8
+# if 8==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 8
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 8==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r8,r8,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#8*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 8==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 8<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 9
+# if 9==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 9
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 9==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r7,r7,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#9*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 9==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 9<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 10
+# if 10==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 10
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 10==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r6,r6,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#10*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 10==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 10<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 11
+# if 11==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 11
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 11==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r5,r5,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#11*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 11==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 11<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 12
+# if 12==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 12
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 12==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r4,r4,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#12*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 12==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 12<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 13
+# if 13==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 13
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 13==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r11,r11,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#13*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 13==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 13<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 14
+# if 14==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 14
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	ldrb	r12,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r12,lsl#8
+	ldrb	r12,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 14==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r10,r10,ror#5
+	orr	r2,r2,r12,lsl#24
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+#endif
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#14*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 14==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 14<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	@ ldr	r2,[r1],#4			@ 15
+# if 15==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+# ifndef __ARMEB__
+	rev	r2,r2
+# endif
+#else
+	@ ldrb	r2,[r1,#3]			@ 15
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	ldrb	r3,[r1,#2]
+	ldrb	r0,[r1,#1]
+	orr	r2,r2,r3,lsl#8
+	ldrb	r3,[r1],#4
+	orr	r2,r2,r0,lsl#16
+# if 15==15
+	str	r1,[sp,#17*4]			@ make room for r1
+# endif
+	eor	r0,r9,r9,ror#5
+	orr	r2,r2,r3,lsl#24
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+#endif
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#15*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 15==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 15<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+.Lrounds_16_xx:
+	@ ldr	r2,[sp,#1*4]		@ 16
+	@ ldr	r1,[sp,#14*4]
+	mov	r0,r2,ror#7
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#0*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#9*4]
+
+	add	r12,r12,r0
+	eor	r0,r8,r8,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#0*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 16==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 16<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#2*4]		@ 17
+	@ ldr	r1,[sp,#15*4]
+	mov	r0,r2,ror#7
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#1*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#10*4]
+
+	add	r3,r3,r0
+	eor	r0,r7,r7,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#1*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 17==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 17<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#3*4]		@ 18
+	@ ldr	r1,[sp,#0*4]
+	mov	r0,r2,ror#7
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#2*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#11*4]
+
+	add	r12,r12,r0
+	eor	r0,r6,r6,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#2*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 18==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 18<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#4*4]		@ 19
+	@ ldr	r1,[sp,#1*4]
+	mov	r0,r2,ror#7
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#3*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#12*4]
+
+	add	r3,r3,r0
+	eor	r0,r5,r5,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#3*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 19==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 19<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#5*4]		@ 20
+	@ ldr	r1,[sp,#2*4]
+	mov	r0,r2,ror#7
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#4*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#13*4]
+
+	add	r12,r12,r0
+	eor	r0,r4,r4,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#4*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 20==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 20<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#6*4]		@ 21
+	@ ldr	r1,[sp,#3*4]
+	mov	r0,r2,ror#7
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#5*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#14*4]
+
+	add	r3,r3,r0
+	eor	r0,r11,r11,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#5*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 21==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 21<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#7*4]		@ 22
+	@ ldr	r1,[sp,#4*4]
+	mov	r0,r2,ror#7
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#6*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#15*4]
+
+	add	r12,r12,r0
+	eor	r0,r10,r10,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#6*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 22==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 22<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#8*4]		@ 23
+	@ ldr	r1,[sp,#5*4]
+	mov	r0,r2,ror#7
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#7*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#0*4]
+
+	add	r3,r3,r0
+	eor	r0,r9,r9,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#7*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 23==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 23<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#9*4]		@ 24
+	@ ldr	r1,[sp,#6*4]
+	mov	r0,r2,ror#7
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#8*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#1*4]
+
+	add	r12,r12,r0
+	eor	r0,r8,r8,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r8,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r11,r11,r2			@ h+=X[i]
+	str	r2,[sp,#8*4]
+	eor	r2,r9,r10
+	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r8
+	add	r11,r11,r12			@ h+=K256[i]
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	eor	r0,r4,r4,ror#11
+	add	r11,r11,r2			@ h+=Ch(e,f,g)
+#if 24==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 24<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r4,r5			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
+	eor	r12,r4,r5			@ a^b, b^c in next round
+	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r4,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r7,r7,r11			@ d+=h
+	eor	r3,r3,r5			@ Maj(a,b,c)
+	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#10*4]		@ 25
+	@ ldr	r1,[sp,#7*4]
+	mov	r0,r2,ror#7
+	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#9*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#2*4]
+
+	add	r3,r3,r0
+	eor	r0,r7,r7,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r7,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r10,r10,r2			@ h+=X[i]
+	str	r2,[sp,#9*4]
+	eor	r2,r8,r9
+	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r7
+	add	r10,r10,r3			@ h+=K256[i]
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	eor	r0,r11,r11,ror#11
+	add	r10,r10,r2			@ h+=Ch(e,f,g)
+#if 25==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 25<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r11,r4			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
+	eor	r3,r11,r4			@ a^b, b^c in next round
+	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r11,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r6,r6,r10			@ d+=h
+	eor	r12,r12,r4			@ Maj(a,b,c)
+	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#11*4]		@ 26
+	@ ldr	r1,[sp,#8*4]
+	mov	r0,r2,ror#7
+	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#10*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#3*4]
+
+	add	r12,r12,r0
+	eor	r0,r6,r6,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r6,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r9,r9,r2			@ h+=X[i]
+	str	r2,[sp,#10*4]
+	eor	r2,r7,r8
+	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r6
+	add	r9,r9,r12			@ h+=K256[i]
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	eor	r0,r10,r10,ror#11
+	add	r9,r9,r2			@ h+=Ch(e,f,g)
+#if 26==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 26<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r10,r11			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
+	eor	r12,r10,r11			@ a^b, b^c in next round
+	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r10,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r5,r5,r9			@ d+=h
+	eor	r3,r3,r11			@ Maj(a,b,c)
+	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#12*4]		@ 27
+	@ ldr	r1,[sp,#9*4]
+	mov	r0,r2,ror#7
+	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#11*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#4*4]
+
+	add	r3,r3,r0
+	eor	r0,r5,r5,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r5,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r8,r8,r2			@ h+=X[i]
+	str	r2,[sp,#11*4]
+	eor	r2,r6,r7
+	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r5
+	add	r8,r8,r3			@ h+=K256[i]
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	eor	r0,r9,r9,ror#11
+	add	r8,r8,r2			@ h+=Ch(e,f,g)
+#if 27==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 27<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r9,r10			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
+	eor	r3,r9,r10			@ a^b, b^c in next round
+	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r9,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r4,r4,r8			@ d+=h
+	eor	r12,r12,r10			@ Maj(a,b,c)
+	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#13*4]		@ 28
+	@ ldr	r1,[sp,#10*4]
+	mov	r0,r2,ror#7
+	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#12*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#5*4]
+
+	add	r12,r12,r0
+	eor	r0,r4,r4,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r4,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r7,r7,r2			@ h+=X[i]
+	str	r2,[sp,#12*4]
+	eor	r2,r5,r6
+	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r4
+	add	r7,r7,r12			@ h+=K256[i]
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	eor	r0,r8,r8,ror#11
+	add	r7,r7,r2			@ h+=Ch(e,f,g)
+#if 28==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 28<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r8,r9			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
+	eor	r12,r8,r9			@ a^b, b^c in next round
+	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r8,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r11,r11,r7			@ d+=h
+	eor	r3,r3,r9			@ Maj(a,b,c)
+	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#14*4]		@ 29
+	@ ldr	r1,[sp,#11*4]
+	mov	r0,r2,ror#7
+	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#13*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#6*4]
+
+	add	r3,r3,r0
+	eor	r0,r11,r11,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r11,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r6,r6,r2			@ h+=X[i]
+	str	r2,[sp,#13*4]
+	eor	r2,r4,r5
+	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r11
+	add	r6,r6,r3			@ h+=K256[i]
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	eor	r0,r7,r7,ror#11
+	add	r6,r6,r2			@ h+=Ch(e,f,g)
+#if 29==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 29<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r7,r8			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
+	eor	r3,r7,r8			@ a^b, b^c in next round
+	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r7,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r10,r10,r6			@ d+=h
+	eor	r12,r12,r8			@ Maj(a,b,c)
+	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#15*4]		@ 30
+	@ ldr	r1,[sp,#12*4]
+	mov	r0,r2,ror#7
+	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
+	mov	r12,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r12,r12,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#14*4]
+	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#7*4]
+
+	add	r12,r12,r0
+	eor	r0,r10,r10,ror#5	@ from BODY_00_15
+	add	r2,r2,r12
+	eor	r0,r0,r10,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r12,[r14],#4			@ *K256++
+	add	r5,r5,r2			@ h+=X[i]
+	str	r2,[sp,#14*4]
+	eor	r2,r11,r4
+	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r10
+	add	r5,r5,r12			@ h+=K256[i]
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	eor	r0,r6,r6,ror#11
+	add	r5,r5,r2			@ h+=Ch(e,f,g)
+#if 30==31
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2			@ done?
+#endif
+#if 30<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r12,r6,r7			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
+	eor	r12,r6,r7			@ a^b, b^c in next round
+	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r6,ror#20	@ Sigma0(a)
+	and	r3,r3,r12			@ (b^c)&=(a^b)
+	add	r9,r9,r5			@ d+=h
+	eor	r3,r3,r7			@ Maj(a,b,c)
+	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
+	@ ldr	r2,[sp,#0*4]		@ 31
+	@ ldr	r1,[sp,#13*4]
+	mov	r0,r2,ror#7
+	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
+	mov	r3,r1,ror#17
+	eor	r0,r0,r2,ror#18
+	eor	r3,r3,r1,ror#19
+	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
+	ldr	r2,[sp,#15*4]
+	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
+	ldr	r1,[sp,#8*4]
+
+	add	r3,r3,r0
+	eor	r0,r9,r9,ror#5	@ from BODY_00_15
+	add	r2,r2,r3
+	eor	r0,r0,r9,ror#19	@ Sigma1(e)
+	add	r2,r2,r1			@ X[i]
+	ldr	r3,[r14],#4			@ *K256++
+	add	r4,r4,r2			@ h+=X[i]
+	str	r2,[sp,#15*4]
+	eor	r2,r10,r11
+	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
+	and	r2,r2,r9
+	add	r4,r4,r3			@ h+=K256[i]
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	eor	r0,r5,r5,ror#11
+	add	r4,r4,r2			@ h+=Ch(e,f,g)
+#if 31==31
+	and	r3,r3,#0xff
+	cmp	r3,#0xf2			@ done?
+#endif
+#if 31<15
+# if __ARM_ARCH__>=7
+	ldr	r2,[r1],#4			@ prefetch
+# else
+	ldrb	r2,[r1,#3]
+# endif
+	eor	r3,r5,r6			@ a^b, b^c in next round
+#else
+	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
+	eor	r3,r5,r6			@ a^b, b^c in next round
+	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
+#endif
+	eor	r0,r0,r5,ror#20	@ Sigma0(a)
+	and	r12,r12,r3			@ (b^c)&=(a^b)
+	add	r8,r8,r4			@ d+=h
+	eor	r12,r12,r6			@ Maj(a,b,c)
+	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
+	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+	ite	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	r3,[sp,#16*4]		@ pull ctx
+	bne	.Lrounds_16_xx
+
+	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
+	ldr	r0,[r3,#0]
+	ldr	r2,[r3,#4]
+	ldr	r12,[r3,#8]
+	add	r4,r4,r0
+	ldr	r0,[r3,#12]
+	add	r5,r5,r2
+	ldr	r2,[r3,#16]
+	add	r6,r6,r12
+	ldr	r12,[r3,#20]
+	add	r7,r7,r0
+	ldr	r0,[r3,#24]
+	add	r8,r8,r2
+	ldr	r2,[r3,#28]
+	add	r9,r9,r12
+	ldr	r1,[sp,#17*4]		@ pull inp
+	ldr	r12,[sp,#18*4]		@ pull inp+len
+	add	r10,r10,r0
+	add	r11,r11,r2
+	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+	cmp	r1,r12
+	sub	r14,r14,#256	@ rewind Ktbl
+	bne	.Loop
+
+	add	sp,sp,#19*4	@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r11,pc}
+#else
+	ldmia	sp!,{r4-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha256_block_data_order,.-sha256_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha256_block_data_order_neon
+.type	sha256_block_data_order_neon,%function
+.align	4
+sha256_block_data_order_neon:
+.LNEON:
+	stmdb	sp!,{r4-r12,lr}
+
+	sub	r11,sp,#16*4+16
+	adrl	r14,K256
+	bic	r11,r11,#15		@ align for 128-bit stores
+	mov	r12,sp
+	mov	sp,r11			@ alloca
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+
+	vld1.8		{q0},[r1]!
+	vld1.8		{q1},[r1]!
+	vld1.8		{q2},[r1]!
+	vld1.8		{q3},[r1]!
+	vld1.32		{q8},[r14,:128]!
+	vld1.32		{q9},[r14,:128]!
+	vld1.32		{q10},[r14,:128]!
+	vld1.32		{q11},[r14,:128]!
+	vrev32.8	q0,q0		@ yes, even on
+	str		r0,[sp,#64]
+	vrev32.8	q1,q1		@ big-endian
+	str		r1,[sp,#68]
+	mov		r1,sp
+	vrev32.8	q2,q2
+	str		r2,[sp,#72]
+	vrev32.8	q3,q3
+	str		r12,[sp,#76]		@ save original sp
+	vadd.i32	q8,q8,q0
+	vadd.i32	q9,q9,q1
+	vst1.32		{q8},[r1,:128]!
+	vadd.i32	q10,q10,q2
+	vst1.32		{q9},[r1,:128]!
+	vadd.i32	q11,q11,q3
+	vst1.32		{q10},[r1,:128]!
+	vst1.32		{q11},[r1,:128]!
+
+	ldmia		r0,{r4-r11}
+	sub		r1,r1,#64
+	ldr		r2,[sp,#0]
+	eor		r12,r12,r12
+	eor		r3,r5,r6
+	b		.L_00_48
+
+.align	4
+.L_00_48:
+	vext.8	q8,q0,q1,#4
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	vext.8	q9,q2,q3,#4
+	add	r4,r4,r12
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vadd.i32	q0,q0,q9
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#4]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	veor	q9,q9,q10
+	add	r10,r10,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	vshr.u32	d24,d7,#17
+	add	r11,r11,r3
+	and	r2,r2,r7
+	veor	q9,q9,q11
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	vsli.32	d24,d7,#15
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	vshr.u32	d25,d7,#10
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	vadd.i32	q0,q0,q9
+	add	r10,r10,r2
+	ldr	r2,[sp,#8]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r6,r6,r10
+	vshr.u32	d24,d7,#19
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	vsli.32	d24,d7,#13
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	veor	d25,d25,d24
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	vadd.i32	d0,d0,d25
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	vshr.u32	d24,d0,#17
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	vsli.32	d24,d0,#15
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	vshr.u32	d25,d0,#10
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#12]
+	and	r3,r3,r12
+	vshr.u32	d24,d0,#19
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	vld1.32	{q8},[r14,:128]!
+	add	r8,r8,r2
+	vsli.32	d24,d0,#13
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	veor	d25,d25,d24
+	add	r9,r9,r3
+	and	r2,r2,r5
+	vadd.i32	d1,d1,d25
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	vadd.i32	q8,q8,q0
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#16]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	vst1.32	{q8},[r1,:128]!
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vext.8	q8,q1,q2,#4
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	vext.8	q9,q3,q0,#4
+	add	r8,r8,r12
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vadd.i32	q1,q1,q9
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#20]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	veor	q9,q9,q10
+	add	r6,r6,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	vshr.u32	d24,d1,#17
+	add	r7,r7,r3
+	and	r2,r2,r11
+	veor	q9,q9,q11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	vsli.32	d24,d1,#15
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	vshr.u32	d25,d1,#10
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	vadd.i32	q1,q1,q9
+	add	r6,r6,r2
+	ldr	r2,[sp,#24]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r10,r10,r6
+	vshr.u32	d24,d1,#19
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	vsli.32	d24,d1,#13
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	veor	d25,d25,d24
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	vadd.i32	d2,d2,d25
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	vshr.u32	d24,d2,#17
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	vsli.32	d24,d2,#15
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	vshr.u32	d25,d2,#10
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#28]
+	and	r3,r3,r12
+	vshr.u32	d24,d2,#19
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	vld1.32	{q8},[r14,:128]!
+	add	r4,r4,r2
+	vsli.32	d24,d2,#13
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	veor	d25,d25,d24
+	add	r5,r5,r3
+	and	r2,r2,r9
+	vadd.i32	d3,d3,d25
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	vadd.i32	q8,q8,q1
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[sp,#32]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	vst1.32	{q8},[r1,:128]!
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	vext.8	q8,q2,q3,#4
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	vext.8	q9,q0,q1,#4
+	add	r4,r4,r12
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vadd.i32	q2,q2,q9
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#36]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	veor	q9,q9,q10
+	add	r10,r10,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	vshr.u32	d24,d3,#17
+	add	r11,r11,r3
+	and	r2,r2,r7
+	veor	q9,q9,q11
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	vsli.32	d24,d3,#15
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	vshr.u32	d25,d3,#10
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	vadd.i32	q2,q2,q9
+	add	r10,r10,r2
+	ldr	r2,[sp,#40]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r6,r6,r10
+	vshr.u32	d24,d3,#19
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	vsli.32	d24,d3,#13
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	veor	d25,d25,d24
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	vadd.i32	d4,d4,d25
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	vshr.u32	d24,d4,#17
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	vsli.32	d24,d4,#15
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	vshr.u32	d25,d4,#10
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#44]
+	and	r3,r3,r12
+	vshr.u32	d24,d4,#19
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	vld1.32	{q8},[r14,:128]!
+	add	r8,r8,r2
+	vsli.32	d24,d4,#13
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	veor	d25,d25,d24
+	add	r9,r9,r3
+	and	r2,r2,r5
+	vadd.i32	d5,d5,d25
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	vadd.i32	q8,q8,q2
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#48]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	vst1.32	{q8},[r1,:128]!
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vext.8	q8,q3,q0,#4
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	vext.8	q9,q1,q2,#4
+	add	r8,r8,r12
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	vshr.u32	q10,q8,#7
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vadd.i32	q3,q3,q9
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	vshr.u32	q9,q8,#3
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vsli.32	q10,q8,#25
+	ldr	r2,[sp,#52]
+	and	r3,r3,r12
+	vshr.u32	q11,q8,#18
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	veor	q9,q9,q10
+	add	r6,r6,r2
+	vsli.32	q11,q8,#14
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	vshr.u32	d24,d5,#17
+	add	r7,r7,r3
+	and	r2,r2,r11
+	veor	q9,q9,q11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	vsli.32	d24,d5,#15
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	vshr.u32	d25,d5,#10
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	vadd.i32	q3,q3,q9
+	add	r6,r6,r2
+	ldr	r2,[sp,#56]
+	veor	d25,d25,d24
+	and	r12,r12,r3
+	add	r10,r10,r6
+	vshr.u32	d24,d5,#19
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	vsli.32	d24,d5,#13
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	veor	d25,d25,d24
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	vadd.i32	d6,d6,d25
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	vshr.u32	d24,d6,#17
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	vsli.32	d24,d6,#15
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	vshr.u32	d25,d6,#10
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	veor	d25,d25,d24
+	ldr	r2,[sp,#60]
+	and	r3,r3,r12
+	vshr.u32	d24,d6,#19
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	vld1.32	{q8},[r14,:128]!
+	add	r4,r4,r2
+	vsli.32	d24,d6,#13
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	veor	d25,d25,d24
+	add	r5,r5,r3
+	and	r2,r2,r9
+	vadd.i32	d7,d7,d25
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	vadd.i32	q8,q8,q3
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[r14]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	vst1.32	{q8},[r1,:128]!
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	teq	r2,#0				@ check for K256 terminator
+	ldr	r2,[sp,#0]
+	sub	r1,r1,#64
+	bne	.L_00_48
+
+	ldr		r1,[sp,#68]
+	ldr		r0,[sp,#72]
+	sub		r14,r14,#256	@ rewind r14
+	teq		r1,r0
+	it		eq
+	subeq		r1,r1,#64		@ avoid SEGV
+	vld1.8		{q0},[r1]!		@ load next input block
+	vld1.8		{q1},[r1]!
+	vld1.8		{q2},[r1]!
+	vld1.8		{q3},[r1]!
+	it		ne
+	strne		r1,[sp,#68]
+	mov		r1,sp
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vrev32.8	q0,q0
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vadd.i32	q8,q8,q0
+	ldr	r2,[sp,#4]
+	and	r3,r3,r12
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	add	r10,r10,r2
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3
+	and	r2,r2,r7
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	add	r10,r10,r2
+	ldr	r2,[sp,#8]
+	and	r12,r12,r3
+	add	r6,r6,r10
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	ldr	r2,[sp,#12]
+	and	r3,r3,r12
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	add	r8,r8,r2
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3
+	and	r2,r2,r5
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#16]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vst1.32	{q8},[r1,:128]!
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vrev32.8	q1,q1
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vadd.i32	q8,q8,q1
+	ldr	r2,[sp,#20]
+	and	r3,r3,r12
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	add	r6,r6,r2
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3
+	and	r2,r2,r11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	add	r6,r6,r2
+	ldr	r2,[sp,#24]
+	and	r12,r12,r3
+	add	r10,r10,r6
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	ldr	r2,[sp,#28]
+	and	r3,r3,r12
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	add	r4,r4,r2
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3
+	and	r2,r2,r9
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[sp,#32]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	vst1.32	{q8},[r1,:128]!
+	add	r11,r11,r2
+	eor	r2,r9,r10
+	eor	r0,r8,r8,ror#5
+	add	r4,r4,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r8
+	eor	r12,r0,r8,ror#19
+	eor	r0,r4,r4,ror#11
+	eor	r2,r2,r10
+	vrev32.8	q2,q2
+	add	r11,r11,r12,ror#6
+	eor	r12,r4,r5
+	eor	r0,r0,r4,ror#20
+	add	r11,r11,r2
+	vadd.i32	q8,q8,q2
+	ldr	r2,[sp,#36]
+	and	r3,r3,r12
+	add	r7,r7,r11
+	add	r11,r11,r0,ror#2
+	eor	r3,r3,r5
+	add	r10,r10,r2
+	eor	r2,r8,r9
+	eor	r0,r7,r7,ror#5
+	add	r11,r11,r3
+	and	r2,r2,r7
+	eor	r3,r0,r7,ror#19
+	eor	r0,r11,r11,ror#11
+	eor	r2,r2,r9
+	add	r10,r10,r3,ror#6
+	eor	r3,r11,r4
+	eor	r0,r0,r11,ror#20
+	add	r10,r10,r2
+	ldr	r2,[sp,#40]
+	and	r12,r12,r3
+	add	r6,r6,r10
+	add	r10,r10,r0,ror#2
+	eor	r12,r12,r4
+	add	r9,r9,r2
+	eor	r2,r7,r8
+	eor	r0,r6,r6,ror#5
+	add	r10,r10,r12
+	and	r2,r2,r6
+	eor	r12,r0,r6,ror#19
+	eor	r0,r10,r10,ror#11
+	eor	r2,r2,r8
+	add	r9,r9,r12,ror#6
+	eor	r12,r10,r11
+	eor	r0,r0,r10,ror#20
+	add	r9,r9,r2
+	ldr	r2,[sp,#44]
+	and	r3,r3,r12
+	add	r5,r5,r9
+	add	r9,r9,r0,ror#2
+	eor	r3,r3,r11
+	add	r8,r8,r2
+	eor	r2,r6,r7
+	eor	r0,r5,r5,ror#5
+	add	r9,r9,r3
+	and	r2,r2,r5
+	eor	r3,r0,r5,ror#19
+	eor	r0,r9,r9,ror#11
+	eor	r2,r2,r7
+	add	r8,r8,r3,ror#6
+	eor	r3,r9,r10
+	eor	r0,r0,r9,ror#20
+	add	r8,r8,r2
+	ldr	r2,[sp,#48]
+	and	r12,r12,r3
+	add	r4,r4,r8
+	add	r8,r8,r0,ror#2
+	eor	r12,r12,r10
+	vst1.32	{q8},[r1,:128]!
+	add	r7,r7,r2
+	eor	r2,r5,r6
+	eor	r0,r4,r4,ror#5
+	add	r8,r8,r12
+	vld1.32	{q8},[r14,:128]!
+	and	r2,r2,r4
+	eor	r12,r0,r4,ror#19
+	eor	r0,r8,r8,ror#11
+	eor	r2,r2,r6
+	vrev32.8	q3,q3
+	add	r7,r7,r12,ror#6
+	eor	r12,r8,r9
+	eor	r0,r0,r8,ror#20
+	add	r7,r7,r2
+	vadd.i32	q8,q8,q3
+	ldr	r2,[sp,#52]
+	and	r3,r3,r12
+	add	r11,r11,r7
+	add	r7,r7,r0,ror#2
+	eor	r3,r3,r9
+	add	r6,r6,r2
+	eor	r2,r4,r5
+	eor	r0,r11,r11,ror#5
+	add	r7,r7,r3
+	and	r2,r2,r11
+	eor	r3,r0,r11,ror#19
+	eor	r0,r7,r7,ror#11
+	eor	r2,r2,r5
+	add	r6,r6,r3,ror#6
+	eor	r3,r7,r8
+	eor	r0,r0,r7,ror#20
+	add	r6,r6,r2
+	ldr	r2,[sp,#56]
+	and	r12,r12,r3
+	add	r10,r10,r6
+	add	r6,r6,r0,ror#2
+	eor	r12,r12,r8
+	add	r5,r5,r2
+	eor	r2,r11,r4
+	eor	r0,r10,r10,ror#5
+	add	r6,r6,r12
+	and	r2,r2,r10
+	eor	r12,r0,r10,ror#19
+	eor	r0,r6,r6,ror#11
+	eor	r2,r2,r4
+	add	r5,r5,r12,ror#6
+	eor	r12,r6,r7
+	eor	r0,r0,r6,ror#20
+	add	r5,r5,r2
+	ldr	r2,[sp,#60]
+	and	r3,r3,r12
+	add	r9,r9,r5
+	add	r5,r5,r0,ror#2
+	eor	r3,r3,r7
+	add	r4,r4,r2
+	eor	r2,r10,r11
+	eor	r0,r9,r9,ror#5
+	add	r5,r5,r3
+	and	r2,r2,r9
+	eor	r3,r0,r9,ror#19
+	eor	r0,r5,r5,ror#11
+	eor	r2,r2,r11
+	add	r4,r4,r3,ror#6
+	eor	r3,r5,r6
+	eor	r0,r0,r5,ror#20
+	add	r4,r4,r2
+	ldr	r2,[sp,#64]
+	and	r12,r12,r3
+	add	r8,r8,r4
+	add	r4,r4,r0,ror#2
+	eor	r12,r12,r6
+	vst1.32	{q8},[r1,:128]!
+	ldr	r0,[r2,#0]
+	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
+	ldr	r12,[r2,#4]
+	ldr	r3,[r2,#8]
+	ldr	r1,[r2,#12]
+	add	r4,r4,r0			@ accumulate
+	ldr	r0,[r2,#16]
+	add	r5,r5,r12
+	ldr	r12,[r2,#20]
+	add	r6,r6,r3
+	ldr	r3,[r2,#24]
+	add	r7,r7,r1
+	ldr	r1,[r2,#28]
+	add	r8,r8,r0
+	str	r4,[r2],#4
+	add	r9,r9,r12
+	str	r5,[r2],#4
+	add	r10,r10,r3
+	str	r6,[r2],#4
+	add	r11,r11,r1
+	str	r7,[r2],#4
+	stmia	r2,{r8-r11}
+
+	ittte	ne
+	movne	r1,sp
+	ldrne	r2,[sp,#0]
+	eorne	r12,r12,r12
+	ldreq	sp,[sp,#76]			@ restore original sp
+	itt	ne
+	eorne	r3,r5,r6
+	bne	.L_00_48
+
+	ldmia	sp!,{r4-r12,pc}
+.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
+# else
+#  define INST(a,b,c,d)	.byte	a,b,c,d
+# endif
+
+.type	sha256_block_data_order_armv8,%function
+.align	5
+sha256_block_data_order_armv8:
+.LARMv8:
+	vld1.32	{q0,q1},[r0]
+# ifdef __thumb2__
+	adr	r3,.LARMv8
+	sub	r3,r3,#.LARMv8-K256
+# else
+	adrl	r3,K256
+# endif
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+
+.Loop_v8:
+	vld1.8		{q8-q9},[r1]!
+	vld1.8		{q10-q11},[r1]!
+	vld1.32		{q12},[r3]!
+	vrev32.8	q8,q8
+	vrev32.8	q9,q9
+	vrev32.8	q10,q10
+	vrev32.8	q11,q11
+	vmov		q14,q0	@ offload
+	vmov		q15,q1
+	teq		r1,r2
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q10
+	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q11
+	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q10
+	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q11
+	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q10
+	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q11
+	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
+	vld1.32		{q13},[r3]!
+	vadd.i32	q12,q12,q8
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+
+	vld1.32		{q12},[r3]!
+	vadd.i32	q13,q13,q9
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+
+	vld1.32		{q13},[r3]
+	vadd.i32	q12,q12,q10
+	sub		r3,r3,#256-16	@ rewind
+	vmov		q2,q0
+	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
+	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
+
+	vadd.i32	q13,q13,q11
+	vmov		q2,q0
+	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
+	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
+
+	vadd.i32	q0,q0,q14
+	vadd.i32	q1,q1,q15
+	it		ne
+	bne		.Loop_v8
+
+	vst1.32		{q0,q1},[r0]
+
+	bx	lr		@ bx lr
+.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm   OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
new file mode 100644
index 000000000000..bb03482e53e2
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.c
@@ -0,0 +1,246 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using optimized ARM assembler and NEON instructions.
+ *
+ * Copyright © 2015 Google Inc.
+ *
+ * This file is based on sha256_ssse3_glue.c:
+ *   Copyright (C) 2013 Intel Corporation
+ *   Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
+				      unsigned int num_blks);
+
+
+int sha256_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+int sha224_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len,
+		    unsigned int partial)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA256_BLOCK_SIZE - partial;
+		memcpy(sctx->buf + partial, data, done);
+		sha256_block_data_order(sctx->state, sctx->buf, 1);
+	}
+
+	if (len - done >= SHA256_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
+
+		sha256_block_data_order(sctx->state, data + done, rounds);
+		done += rounds * SHA256_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buf, data + done, len - done);
+
+	return 0;
+}
+
+int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA256_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buf + partial, data, len);
+
+		return 0;
+	}
+
+	return __sha256_update(desc, data, len, partial);
+}
+
+/* Add padding and return the message digest. */
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+	/* save number of bits */
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA256_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
+
+	/* We need to fill a whole block for __sha256_update */
+	if (padlen <= 56) {
+		sctx->count += padlen;
+		memcpy(sctx->buf + index, padding, padlen);
+	} else {
+		__sha256_update(desc, padding, padlen, index);
+	}
+	__sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+	u8 D[SHA256_DIGEST_SIZE];
+
+	sha256_final(desc, D);
+
+	memcpy(out, D, SHA224_DIGEST_SIZE);
+	memset(D, 0, SHA256_DIGEST_SIZE);
+
+	return 0;
+}
+
+int sha256_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+int sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_init,
+	.update		=	sha256_update,
+	.final		=	sha256_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-asm",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_init,
+	.update		=	sha256_update,
+	.final		=	sha224_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-asm",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init sha256_mod_init(void)
+{
+	int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+	if (res < 0)
+		return res;
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+		res = crypto_register_shashes(sha256_neon_algs,
+					      ARRAY_SIZE(sha256_neon_algs));
+
+		if (res < 0)
+			crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	}
+
+	return res;
+}
+
+static void __exit sha256_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+		crypto_unregister_shashes(sha256_neon_algs,
+					  ARRAY_SIZE(sha256_neon_algs));
+}
+
+module_init(sha256_mod_init);
+module_exit(sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
+
+MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
new file mode 100644
index 000000000000..0312f4ffe8cc
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.h
@@ -0,0 +1,23 @@
+#ifndef _CRYPTO_SHA256_GLUE_H
+#define _CRYPTO_SHA256_GLUE_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+extern struct shash_alg sha256_neon_algs[2];
+
+extern int sha256_init(struct shash_desc *desc);
+
+extern int sha224_init(struct shash_desc *desc);
+
+extern int __sha256_update(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, unsigned int partial);
+
+extern int sha256_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len);
+
+extern int sha256_export(struct shash_desc *desc, void *out);
+
+extern int sha256_import(struct shash_desc *desc, const void *in);
+
+#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..3ff0a7f1d092
--- /dev/null
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -0,0 +1,172 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2015 Google Inc.
+ *
+ * This file is based on sha512_neon_glue.c:
+ *   Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
+				      unsigned int num_blks);
+
+
+static int __sha256_neon_update(struct shash_desc *desc, const u8 *data,
+				unsigned int len, unsigned int partial)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA256_BLOCK_SIZE - partial;
+		memcpy(sctx->buf + partial, data, done);
+		sha256_block_data_order_neon(sctx->state, sctx->buf, 1);
+	}
+
+	if (len - done >= SHA256_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
+
+		sha256_block_data_order_neon(sctx->state, data + done, rounds);
+		done += rounds * SHA256_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buf, data + done, len - done);
+
+	return 0;
+}
+
+static int sha256_neon_update(struct shash_desc *desc, const u8 *data,
+			      unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA256_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buf + partial, data, len);
+
+		return 0;
+	}
+
+	if (!may_use_simd()) {
+		res = __sha256_update(desc, data, len, partial);
+	} else {
+		kernel_neon_begin();
+		res = __sha256_neon_update(desc, data, len, partial);
+		kernel_neon_end();
+	}
+
+	return res;
+}
+
+/* Add padding and return the message digest. */
+static int sha256_neon_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+	/* save number of bits */
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA256_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
+
+	if (!may_use_simd()) {
+		sha256_update(desc, padding, padlen);
+		sha256_update(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_neon_begin();
+		/* We need to fill a whole block for __sha256_neon_update() */
+		if (padlen <= 56) {
+			sctx->count += padlen;
+			memcpy(sctx->buf + index, padding, padlen);
+		} else {
+			__sha256_neon_update(desc, padding, padlen, index);
+		}
+		__sha256_neon_update(desc, (const u8 *)&bits,
+					sizeof(bits), 56);
+		kernel_neon_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha224_neon_final(struct shash_desc *desc, u8 *out)
+{
+	u8 D[SHA256_DIGEST_SIZE];
+
+	sha256_neon_final(desc, D);
+
+	memcpy(out, D, SHA224_DIGEST_SIZE);
+	memset(D, 0, SHA256_DIGEST_SIZE);
+
+	return 0;
+}
+
+struct shash_alg sha256_neon_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_init,
+	.update		=	sha256_neon_update,
+	.final		=	sha256_neon_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_init,
+	.update		=	sha256_neon_update,
+	.final		=	sha224_neon_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S
new file mode 100644
index 000000000000..fe99472e507c
--- /dev/null
+++ b/arch/arm/crypto/sha512-armv7-neon.S
@@ -0,0 +1,455 @@
+/* sha512-armv7-neon.S  -  ARM/NEON assembly implementation of SHA-512 transform
+ *
+ * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/linkage.h>
+
+
+.syntax unified
+.code   32
+.fpu neon
+
+.text
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RA d0
+#define RB d1
+#define RC d2
+#define RD d3
+#define RE d4
+#define RF d5
+#define RG d6
+#define RH d7
+
+#define RT0 d8
+#define RT1 d9
+#define RT2 d10
+#define RT3 d11
+#define RT4 d12
+#define RT5 d13
+#define RT6 d14
+#define RT7 d15
+
+#define RT01q q4
+#define RT23q q5
+#define RT45q q6
+#define RT67q q7
+
+#define RW0 d16
+#define RW1 d17
+#define RW2 d18
+#define RW3 d19
+#define RW4 d20
+#define RW5 d21
+#define RW6 d22
+#define RW7 d23
+#define RW8 d24
+#define RW9 d25
+#define RW10 d26
+#define RW11 d27
+#define RW12 d28
+#define RW13 d29
+#define RW14 d30
+#define RW15 d31
+
+#define RW01q q8
+#define RW23q q9
+#define RW45q q10
+#define RW67q q11
+#define RW89q q12
+#define RW1011q q13
+#define RW1213q q14
+#define RW1415q q15
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
+                     rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
+	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+	vshr.u64 RT2, re, #14; \
+	vshl.u64 RT3, re, #64 - 14; \
+	interleave_op(arg1); \
+	vshr.u64 RT4, re, #18; \
+	vshl.u64 RT5, re, #64 - 18; \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, re, #41; \
+	vshl.u64 RT5, re, #64 - 41; \
+	vadd.u64 RT0, RT0, rw0; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, re; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, rf, rg; \
+	\
+	vadd.u64 RT1, RT1, rh; \
+	vshr.u64 RT2, ra, #28; \
+	vshl.u64 RT3, ra, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, ra, #34; \
+	vshl.u64 RT5, ra, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* h = Sum0 (a) + Maj (a, b, c); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, ra, #39; \
+	vshl.u64 RT5, ra, #64 - 39; \
+	veor.64 RT0, ra, rb; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rc, rb; \
+	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+	veor.64 rh, RT2, RT3; \
+	\
+	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+	vshr.u64 RT2, rd, #14; \
+	vshl.u64 RT3, rd, #64 - 14; \
+	vadd.u64 rh, rh, RT0; \
+	vshr.u64 RT4, rd, #18; \
+	vshl.u64 RT5, rd, #64 - 18; \
+	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rd, #41; \
+	vshl.u64 RT5, rd, #64 - 41; \
+	vadd.u64 RT0, RT0, rw1; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, rd; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, re, rf; \
+	\
+	vadd.u64 RT1, RT1, rg; \
+	vshr.u64 RT2, rh, #28; \
+	vshl.u64 RT3, rh, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, rh, #34; \
+	vshl.u64 RT5, rh, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* g = Sum0 (h) + Maj (h, a, b); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rh, #39; \
+	vshl.u64 RT5, rh, #64 - 39; \
+	veor.64 RT0, rh, ra; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rb, ra; \
+	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+	veor.64 rg, RT2, RT3; \
+	\
+	/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
+	/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
+	\
+	/**** S0(w[1:2]) */ \
+	\
+	/* w[0:1] += w[9:10] */ \
+	/* RT23q = rw1:rw2 */ \
+	vext.u64 RT23q, rw01q, rw23q, #1; \
+	vadd.u64 rw0, rw9; \
+	vadd.u64 rg, rg, RT0; \
+	vadd.u64 rw1, rw10;\
+	vadd.u64 rg, rg, RT1; /* g+=t1; */ \
+	\
+	vshr.u64 RT45q, RT23q, #1; \
+	vshl.u64 RT67q, RT23q, #64 - 1; \
+	vshr.u64 RT01q, RT23q, #8; \
+	veor.u64 RT45q, RT45q, RT67q; \
+	vshl.u64 RT67q, RT23q, #64 - 8; \
+	veor.u64 RT45q, RT45q, RT01q; \
+	vshr.u64 RT01q, RT23q, #7; \
+	veor.u64 RT45q, RT45q, RT67q; \
+	\
+	/**** S1(w[14:15]) */ \
+	vshr.u64 RT23q, rw1415q, #6; \
+	veor.u64 RT01q, RT01q, RT45q; \
+	vshr.u64 RT45q, rw1415q, #19; \
+	vshl.u64 RT67q, rw1415q, #64 - 19; \
+	veor.u64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT45q, rw1415q, #61; \
+	veor.u64 RT23q, RT23q, RT67q; \
+	vshl.u64 RT67q, rw1415q, #64 - 61; \
+	veor.u64 RT23q, RT23q, RT45q; \
+	vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
+	veor.u64 RT01q, RT23q, RT67q;
+#define vadd_RT01q(rw01q) \
+	/* w[0:1] += S(w[14:15]) */ \
+	vadd.u64 rw01q, RT01q;
+
+#define dummy(_) /*_*/
+
+#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
+	              interleave_op1, arg1, interleave_op2, arg2) \
+	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+	vshr.u64 RT2, re, #14; \
+	vshl.u64 RT3, re, #64 - 14; \
+	interleave_op1(arg1); \
+	vshr.u64 RT4, re, #18; \
+	vshl.u64 RT5, re, #64 - 18; \
+	interleave_op2(arg2); \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, re, #41; \
+	vshl.u64 RT5, re, #64 - 41; \
+	vadd.u64 RT0, RT0, rw0; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, re; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, rf, rg; \
+	\
+	vadd.u64 RT1, RT1, rh; \
+	vshr.u64 RT2, ra, #28; \
+	vshl.u64 RT3, ra, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, ra, #34; \
+	vshl.u64 RT5, ra, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* h = Sum0 (a) + Maj (a, b, c); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, ra, #39; \
+	vshl.u64 RT5, ra, #64 - 39; \
+	veor.64 RT0, ra, rb; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rc, rb; \
+	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+	veor.64 rh, RT2, RT3; \
+	\
+	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+	vshr.u64 RT2, rd, #14; \
+	vshl.u64 RT3, rd, #64 - 14; \
+	vadd.u64 rh, rh, RT0; \
+	vshr.u64 RT4, rd, #18; \
+	vshl.u64 RT5, rd, #64 - 18; \
+	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+	vld1.64 {RT0}, [RK]!; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rd, #41; \
+	vshl.u64 RT5, rd, #64 - 41; \
+	vadd.u64 RT0, RT0, rw1; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vmov.64 RT7, rd; \
+	veor.64 RT1, RT2, RT3; \
+	vbsl.64 RT7, re, rf; \
+	\
+	vadd.u64 RT1, RT1, rg; \
+	vshr.u64 RT2, rh, #28; \
+	vshl.u64 RT3, rh, #64 - 28; \
+	vadd.u64 RT1, RT1, RT0; \
+	vshr.u64 RT4, rh, #34; \
+	vshl.u64 RT5, rh, #64 - 34; \
+	vadd.u64 RT1, RT1, RT7; \
+	\
+	/* g = Sum0 (h) + Maj (h, a, b); */ \
+	veor.64 RT23q, RT23q, RT45q; \
+	vshr.u64 RT4, rh, #39; \
+	vshl.u64 RT5, rh, #64 - 39; \
+	veor.64 RT0, rh, ra; \
+	veor.64 RT23q, RT23q, RT45q; \
+	vbsl.64 RT0, rb, ra; \
+	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+	veor.64 rg, RT2, RT3;
+#define vadd_rg_RT0(rg) \
+	vadd.u64 rg, rg, RT0;
+#define vadd_rg_RT1(rg) \
+	vadd.u64 rg, rg, RT1; /* g+=t1; */
+
+.align 3
+ENTRY(sha512_transform_neon)
+	/* Input:
+	 *	%r0: SHA512_CONTEXT
+	 *	%r1: data
+	 *	%r2: u64 k[] constants
+	 *	%r3: nblks
+	 */
+	push {%lr};
+
+	mov %lr, #0;
+
+	/* Load context to d0-d7 */
+	vld1.64 {RA-RD}, [%r0]!;
+	vld1.64 {RE-RH}, [%r0];
+	sub %r0, #(4*8);
+
+	/* Load input to w[16], d16-d31 */
+	/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
+	vld1.64 {RW0-RW3}, [%r1]!;
+	vld1.64 {RW4-RW7}, [%r1]!;
+	vld1.64 {RW8-RW11}, [%r1]!;
+	vld1.64 {RW12-RW15}, [%r1]!;
+#ifdef __ARMEL__
+	/* byteswap */
+	vrev64.8 RW01q, RW01q;
+	vrev64.8 RW23q, RW23q;
+	vrev64.8 RW45q, RW45q;
+	vrev64.8 RW67q, RW67q;
+	vrev64.8 RW89q, RW89q;
+	vrev64.8 RW1011q, RW1011q;
+	vrev64.8 RW1213q, RW1213q;
+	vrev64.8 RW1415q, RW1415q;
+#endif
+
+	/* EABI says that d8-d15 must be preserved by callee. */
+	/*vpush {RT0-RT7};*/
+
+.Loop:
+	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+		     RW23q, RW1415q, RW9, RW10, dummy, _);
+	b .Lenter_rounds;
+
+.Loop_rounds:
+	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
+		     RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
+.Lenter_rounds:
+	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
+		     RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
+	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
+		     RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
+	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
+		     RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
+	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
+		     RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
+	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
+		     RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
+	add %lr, #16;
+	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
+		     RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
+	cmp %lr, #64;
+	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
+		     RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
+	bne .Loop_rounds;
+
+	subs %r3, #1;
+
+	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
+		      vadd_RT01q, RW1415q, dummy, _);
+	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
+		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+	beq .Lhandle_tail;
+	vld1.64 {RW0-RW3}, [%r1]!;
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+	vrev64.8 RW01q, RW01q;
+	vrev64.8 RW23q, RW23q;
+#endif
+	vld1.64 {RW4-RW7}, [%r1]!;
+	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+#ifdef __ARMEL__
+	vrev64.8 RW45q, RW45q;
+	vrev64.8 RW67q, RW67q;
+#endif
+	vld1.64 {RW8-RW11}, [%r1]!;
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+	vrev64.8 RW89q, RW89q;
+	vrev64.8 RW1011q, RW1011q;
+#endif
+	vld1.64 {RW12-RW15}, [%r1]!;
+	vadd_rg_RT0(RA);
+	vadd_rg_RT1(RA);
+
+	/* Load context */
+	vld1.64 {RT0-RT3}, [%r0]!;
+	vld1.64 {RT4-RT7}, [%r0];
+	sub %r0, #(4*8);
+
+#ifdef __ARMEL__
+	vrev64.8 RW1213q, RW1213q;
+	vrev64.8 RW1415q, RW1415q;
+#endif
+
+	vadd.u64 RA, RT0;
+	vadd.u64 RB, RT1;
+	vadd.u64 RC, RT2;
+	vadd.u64 RD, RT3;
+	vadd.u64 RE, RT4;
+	vadd.u64 RF, RT5;
+	vadd.u64 RG, RT6;
+	vadd.u64 RH, RT7;
+
+	/* Store the first half of context */
+	vst1.64 {RA-RD}, [%r0]!;
+	sub RK, $(8*80);
+	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+	mov %lr, #0;
+	sub %r0, #(4*8);
+
+	b .Loop;
+
+.Lhandle_tail:
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
+		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
+	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
+		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
+	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
+		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
+	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
+		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
+
+	/* Load context to d16-d23 */
+	vld1.64 {RW0-RW3}, [%r0]!;
+	vadd_rg_RT0(RA);
+	vld1.64 {RW4-RW7}, [%r0];
+	vadd_rg_RT1(RA);
+	sub %r0, #(4*8);
+
+	vadd.u64 RA, RW0;
+	vadd.u64 RB, RW1;
+	vadd.u64 RC, RW2;
+	vadd.u64 RD, RW3;
+	vadd.u64 RE, RW4;
+	vadd.u64 RF, RW5;
+	vadd.u64 RG, RW6;
+	vadd.u64 RH, RW7;
+
+	/* Store the first half of context */
+	vst1.64 {RA-RD}, [%r0]!;
+
+	/* Clear used registers */
+	/* d16-d31 */
+	veor.u64 RW01q, RW01q;
+	veor.u64 RW23q, RW23q;
+	veor.u64 RW45q, RW45q;
+	veor.u64 RW67q, RW67q;
+	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+	veor.u64 RW89q, RW89q;
+	veor.u64 RW1011q, RW1011q;
+	veor.u64 RW1213q, RW1213q;
+	veor.u64 RW1415q, RW1415q;
+	/* d8-d15 */
+	/*vpop {RT0-RT7};*/
+	/* d0-d7 (q0-q3) */
+	veor.u64 %q0, %q0;
+	veor.u64 %q1, %q1;
+	veor.u64 %q2, %q2;
+	veor.u64 %q3, %q3;
+
+	pop {%pc};
+ENDPROC(sha512_transform_neon)
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
new file mode 100644
index 000000000000..0d2758ff5e12
--- /dev/null
+++ b/arch/arm/crypto/sha512_neon_glue.c
@@ -0,0 +1,305 @@
+/*
+ * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is based on sha512_ssse3_glue.c:
+ *   Copyright (C) 2013 Intel Corporation
+ *   Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+
+static const u64 sha512_k[] = {
+	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+
+asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
+				      const u64 k[], unsigned int num_blks);
+
+
+static int sha512_neon_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA512_H0;
+	sctx->state[1] = SHA512_H1;
+	sctx->state[2] = SHA512_H2;
+	sctx->state[3] = SHA512_H3;
+	sctx->state[4] = SHA512_H4;
+	sctx->state[5] = SHA512_H5;
+	sctx->state[6] = SHA512_H6;
+	sctx->state[7] = SHA512_H7;
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
+				unsigned int len, unsigned int partial)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count[0] += len;
+	if (sctx->count[0] < len)
+		sctx->count[1]++;
+
+	if (partial) {
+		done = SHA512_BLOCK_SIZE - partial;
+		memcpy(sctx->buf + partial, data, done);
+		sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
+	}
+
+	if (len - done >= SHA512_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+
+		sha512_transform_neon(sctx->state, data + done, sha512_k,
+				      rounds);
+
+		done += rounds * SHA512_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buf, data + done, len - done);
+
+	return 0;
+}
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA512_BLOCK_SIZE) {
+		sctx->count[0] += len;
+		if (sctx->count[0] < len)
+			sctx->count[1]++;
+		memcpy(sctx->buf + partial, data, len);
+
+		return 0;
+	}
+
+	if (!may_use_simd()) {
+		res = crypto_sha512_update(desc, data, len);
+	} else {
+		kernel_neon_begin();
+		res = __sha512_neon_update(desc, data, len, partial);
+		kernel_neon_end();
+	}
+
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be64 *dst = (__be64 *)out;
+	__be64 bits[2];
+	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
+
+	/* save number of bits */
+	bits[1] = cpu_to_be64(sctx->count[0] << 3);
+	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+	/* Pad out to 112 mod 128 and append length */
+	index = sctx->count[0] & 0x7f;
+	padlen = (index < 112) ? (112 - index) : ((128+112) - index);
+
+	if (!may_use_simd()) {
+		crypto_sha512_update(desc, padding, padlen);
+		crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_neon_begin();
+		/* We need to fill a whole block for __sha512_neon_update() */
+		if (padlen <= 112) {
+			sctx->count[0] += padlen;
+			if (sctx->count[0] < padlen)
+				sctx->count[1]++;
+			memcpy(sctx->buf + index, padding, padlen);
+		} else {
+			__sha512_neon_update(desc, padding, padlen, index);
+		}
+		__sha512_neon_update(desc, (const u8 *)&bits,
+					sizeof(bits), 112);
+		kernel_neon_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be64(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha512_neon_export(struct shash_desc *desc, void *out)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha512_neon_import(struct shash_desc *desc, const void *in)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha384_neon_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA384_H0;
+	sctx->state[1] = SHA384_H1;
+	sctx->state[2] = SHA384_H2;
+	sctx->state[3] = SHA384_H3;
+	sctx->state[4] = SHA384_H4;
+	sctx->state[5] = SHA384_H5;
+	sctx->state[6] = SHA384_H6;
+	sctx->state[7] = SHA384_H7;
+
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
+{
+	u8 D[SHA512_DIGEST_SIZE];
+
+	sha512_neon_final(desc, D);
+
+	memcpy(hash, D, SHA384_DIGEST_SIZE);
+	memset(D, 0, SHA512_DIGEST_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_neon_init,
+	.update		=	sha512_neon_update,
+	.final		=	sha512_neon_final,
+	.export		=	sha512_neon_export,
+	.import		=	sha512_neon_import,
+	.descsize	=	sizeof(struct sha512_state),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name =	"sha512-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+},  {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_neon_init,
+	.update		=	sha512_neon_update,
+	.final		=	sha384_neon_final,
+	.export		=	sha512_neon_export,
+	.import		=	sha512_neon_import,
+	.descsize	=	sizeof(struct sha512_state),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name =	"sha384-neon",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int __init sha512_neon_mod_init(void)
+{
+	if (!cpu_has_neon())
+		return -ENODEV;
+
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_neon_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha512_neon_mod_init);
+module_exit(sha512_neon_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
+
+MODULE_ALIAS("sha512");
+MODULE_ALIAS("sha384");
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index d3db39860b9c..6577b8aeb711 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += socket.h
 generic-y += sockios.h
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 60f15e274e6d..2f59f7443396 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -59,6 +59,21 @@
 #define smp_wmb()	dmb(ishst)
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)
 
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 2059f019bef4..455e6637c881 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -16,6 +16,7 @@
 #include <asm/shmparam.h>
 #include <asm/cachetype.h>
 #include <asm/outercache.h>
+#include <asm/rodata.h>
 
 #define CACHE_COLOUR(vaddr)	((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
 
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/include/asm/crypto/sha1.h
new file mode 100644
index 000000000000..75e6a417416b
--- /dev/null
+++ b/arch/arm/include/asm/crypto/sha1.h
@@ -0,0 +1,10 @@
+#ifndef ASM_ARM_CRYPTO_SHA1_H
+#define ASM_ARM_CRYPTO_SHA1_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
+			   unsigned int len);
+
+#endif
diff --git a/arch/arm/include/asm/fiq_glue.h b/arch/arm/include/asm/fiq_glue.h
new file mode 100644
index 000000000000..a9e244f9f197
--- /dev/null
+++ b/arch/arm/include/asm/fiq_glue.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ASM_FIQ_GLUE_H
+#define __ASM_FIQ_GLUE_H
+
+struct fiq_glue_handler {
+	void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp);
+	void (*resume)(struct fiq_glue_handler *h);
+};
+typedef void (*fiq_return_handler_t)(void);
+
+int fiq_glue_register_handler(struct fiq_glue_handler *handler);
+int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return);
+int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return);
+
+#ifdef CONFIG_FIQ_GLUE
+void fiq_glue_resume(void);
+#else
+static inline void fiq_glue_resume(void) {}
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 3d7351c844aa..fe3ea776dc34 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	7
+#define NR_IPI	8
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 3b2c40b5bfa2..0ca0f5a7c84b 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -66,6 +66,7 @@
 #define   L2X0_STNDBY_MODE_EN		(1 << 0)
 
 /* Registers shifts and masks */
+#define L2X0_CACHE_ID_REV_MASK		(0x3f)
 #define L2X0_CACHE_ID_PART_MASK		(0xf << 6)
 #define L2X0_CACHE_ID_PART_L210		(1 << 6)
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
@@ -106,6 +107,8 @@
 
 #define L2X0_WAY_SIZE_SHIFT		3
 
+#define REV_PL310_R2P0				4
+
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask);
 #if defined(CONFIG_CACHE_L2X0) && defined(CONFIG_OF)
diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index 53c15dec7af6..809203a4b71b 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -35,6 +35,9 @@ extern void (*handle_arch_irq)(struct pt_regs *);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 #endif
 
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/mach/mmc.h b/arch/arm/include/asm/mach/mmc.h
new file mode 100644
index 000000000000..bca864ac945f
--- /dev/null
+++ b/arch/arm/include/asm/mach/mmc.h
@@ -0,0 +1,28 @@
+/*
+ *  arch/arm/include/asm/mach/mmc.h
+ */
+#ifndef ASMARM_MACH_MMC_H
+#define ASMARM_MACH_MMC_H
+
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+
+struct embedded_sdio_data {
+        struct sdio_cis cis;
+        struct sdio_cccr cccr;
+        struct sdio_embedded_func *funcs;
+        int num_funcs;
+};
+
+struct mmc_platform_data {
+	unsigned int ocr_mask;			/* available voltages */
+	int built_in;				/* built-in device flag */
+	int card_present;			/* card detect state */
+	u32 (*translate_vdd)(struct device *, unsigned int);
+	unsigned int (*status)(struct device *);
+	struct embedded_sdio_data *embedded_sdio;
+	int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id);
+};
+
+#endif
diff --git a/arch/arm/include/asm/neon.h b/arch/arm/include/asm/neon.h
new file mode 100644
index 000000000000..8f730fe70093
--- /dev/null
+++ b/arch/arm/include/asm/neon.h
@@ -0,0 +1,36 @@
+/*
+ * linux/arch/arm/include/asm/neon.h
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+
+#define cpu_has_neon()		(!!(elf_hwcap & HWCAP_NEON))
+
+#ifdef __ARM_NEON__
+
+/*
+ * If you are affected by the BUILD_BUG below, it probably means that you are
+ * using NEON code /and/ calling the kernel_neon_begin() function from the same
+ * compilation unit. To prevent issues that may arise from GCC reordering or
+ * generating(1) NEON instructions outside of these begin/end functions, the
+ * only supported way of using NEON code in the kernel is by isolating it in a
+ * separate compilation unit, and calling it from another unit from inside a
+ * kernel_neon_begin/kernel_neon_end pair.
+ *
+ * (1) Current GCC (4.7) might generate NEON instructions at O3 level if
+ *     -mpfu=neon is set.
+ */
+
+#define kernel_neon_begin() \
+	BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")
+
+#else
+void kernel_neon_begin(void);
+#endif
+void kernel_neon_end(void);
diff --git a/arch/arm/include/asm/rodata.h b/arch/arm/include/asm/rodata.h
new file mode 100644
index 000000000000..8c8add87bbc5
--- /dev/null
+++ b/arch/arm/include/asm/rodata.h
@@ -0,0 +1,32 @@
+/*
+ *  arch/arm/include/asm/rodata.h
+ *
+ *  Copyright (C) 2011 Google, Inc.
+ *
+ *  Author: Colin Cross <ccross@android.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASMARM_RODATA_H
+#define _ASMARM_RODATA_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_DEBUG_RODATA
+
+int set_memory_rw(unsigned long virt, int numpages);
+int set_memory_ro(unsigned long virt, int numpages);
+
+void mark_rodata_ro(void);
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
+#endif
+
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 610ccf33f5e7..67a18a5ed9fa 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -82,6 +82,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
 
 extern int register_ipi_completion(struct completion *completion, int cpu);
+extern void smp_send_all_cpu_backtrace(void);
 
 struct smp_operations {
 #ifdef CONFIG_SMP
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 73ddd7239b33..ed805f1d3785 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -103,8 +103,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 	/* ARM tasks don't change audit architectures on the fly. */
 	return AUDIT_ARCH_ARM;
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index cbd61977c996..43876245fc57 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define __NR_syscalls  (380)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index af33b44990ed..17407c92c0da 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -406,6 +406,12 @@
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
 #define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
+/* Reserve for later
+#define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
+#define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
+#define __NR_renameat2			(__NR_SYSCALL_BASE+382)
+*/
+#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index c6ca7e376773..725f844926ea 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -389,6 +389,11 @@
 		CALL(sys_process_vm_writev)
 		CALL(sys_kcmp)
 		CALL(sys_finit_module)
+/* 380 */	CALL(sys_ni_syscall)		/* reserved sys_sched_setattr */
+		CALL(sys_ni_syscall)		/* reserved sys_sched_getattr */
+		CALL(sys_ni_syscall)		/* reserved sys_renameat2     */
+		CALL(sys_seccomp)
+
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 34e56647dcee..6a740a93f4bb 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/ftrace.h>
+#include <linux/module.h>
 #include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
@@ -63,6 +64,20 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
 }
 #endif
 
+int ftrace_arch_code_modify_prepare(void)
+{
+	set_kernel_text_rw();
+	set_all_modules_text_rw();
+	return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+	set_all_modules_text_ro();
+	set_kernel_text_ro();
+	return 0;
+}
+
 static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
 	return arm_gen_branch_link(pc, addr);
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 778c2f7024ff..b321c8fbb87d 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -144,6 +144,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
 
 static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
 {
+	if (user_mode(regs))
+		return -1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 
 	return 0;
@@ -151,6 +153,8 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
 
 static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
 {
+	if (user_mode(regs))
+		return -1;
 	compiled_break = 1;
 	kgdb_handle_exception(1, SIGTRAP, 0, regs);
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index ac4c2e5e17e4..978002e5b406 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -32,6 +32,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/cpuidle.h>
 #include <linux/leds.h>
+#include <linux/console.h>
 
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
@@ -57,9 +58,46 @@ static const char *isa_modes[] = {
   "ARM" , "Thumb" , "Jazelle", "ThumbEE"
 };
 
+#ifdef CONFIG_SMP
+void arch_trigger_all_cpu_backtrace(void)
+{
+	smp_send_all_cpu_backtrace();
+}
+#else
+void arch_trigger_all_cpu_backtrace(void)
+{
+	dump_stack();
+}
+#endif
+
 extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
 typedef void (*phys_reset_t)(unsigned long);
 
+#ifdef CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART
+void arm_machine_flush_console(void)
+{
+	printk("\n");
+	pr_emerg("Restarting %s\n", linux_banner);
+	if (console_trylock()) {
+		console_unlock();
+		return;
+	}
+
+	mdelay(50);
+
+	local_irq_disable();
+	if (!console_trylock())
+		pr_emerg("arm_restart: Console was locked! Busting\n");
+	else
+		pr_emerg("arm_restart: Console was locked!\n");
+	console_unlock();
+}
+#else
+void arm_machine_flush_console(void)
+{
+}
+#endif
+
 /*
  * A temporary stack to use for CPU reset. This is static so that we
  * don't clobber it with the identity mapping. When running with this
@@ -147,6 +185,7 @@ void arch_cpu_idle_prepare(void)
 
 void arch_cpu_idle_enter(void)
 {
+	idle_notifier_call_chain(IDLE_START);
 	ledtrig_cpu(CPU_LED_IDLE_START);
 #ifdef CONFIG_PL310_ERRATA_769419
 	wmb();
@@ -156,6 +195,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle_exit(void)
 {
 	ledtrig_cpu(CPU_LED_IDLE_END);
+	idle_notifier_call_chain(IDLE_END);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -195,6 +235,16 @@ __setup("reboot=", reboot_setup);
  */
 void machine_shutdown(void)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * Disable preemption so we're guaranteed to
+	 * run to power off or reboot and prevent
+	 * the possibility of switching to another
+	 * thread that might wind up blocking on
+	 * one of the stopped CPUs.
+	 */
+	preempt_disable();
+#endif
 	disable_nonboot_cpus();
 }
 
@@ -240,6 +290,10 @@ void machine_restart(char *cmd)
 {
 	smp_send_stop();
 
+	/* Flush the console to make sure all the relevant messages make it
+	 * out to the console drivers */
+	arm_machine_flush_console();
+
 	arm_pm_restart(reboot_mode, cmd);
 
 	/* Give a grace period for failure to restart of 1s */
@@ -251,6 +305,77 @@ void machine_restart(char *cmd)
 	while (1);
 }
 
+/*
+ * dump a block of kernel memory from around the given address
+ */
+static void show_data(unsigned long addr, int nbytes, const char *name)
+{
+	int	i, j;
+	int	nlines;
+	u32	*p;
+
+	/*
+	 * don't attempt to dump non-kernel addresses or
+	 * values that are probably just small negative numbers
+	 */
+	if (addr < PAGE_OFFSET || addr > -256UL)
+		return;
+
+	printk("\n%s: %#lx:\n", name, addr);
+
+	/*
+	 * round address down to a 32 bit boundary
+	 * and always dump a multiple of 32 bytes
+	 */
+	p = (u32 *)(addr & ~(sizeof(u32) - 1));
+	nbytes += (addr & (sizeof(u32) - 1));
+	nlines = (nbytes + 31) / 32;
+
+
+	for (i = 0; i < nlines; i++) {
+		/*
+		 * just display low 16 bits of address to keep
+		 * each line of the dump < 80 characters
+		 */
+		printk("%04lx ", (unsigned long)p & 0xffff);
+		for (j = 0; j < 8; j++) {
+			u32	data;
+			if (probe_kernel_address(p, data)) {
+				printk(" ********");
+			} else {
+				printk(" %08x", data);
+			}
+			++p;
+		}
+		printk("\n");
+	}
+}
+
+static void show_extra_register_data(struct pt_regs *regs, int nbytes)
+{
+	mm_segment_t fs;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC");
+	show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR");
+	show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP");
+	show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP");
+	show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP");
+	show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0");
+	show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1");
+	show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2");
+	show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3");
+	show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4");
+	show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5");
+	show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6");
+	show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7");
+	show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8");
+	show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9");
+	show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10");
+	set_fs(fs);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
@@ -307,6 +432,8 @@ void __show_regs(struct pt_regs *regs)
 		printk("Control: %08x%s\n", ctrl, buf);
 	}
 #endif
+
+	show_extra_register_data(regs, 128);
 }
 
 void show_regs(struct pt_regs * regs)
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 03deeffd9f6d..394424b25254 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -916,7 +916,7 @@ enum ptrace_syscall_dir {
 	PTRACE_SYSCALL_EXIT,
 };
 
-static int tracehook_report_syscall(struct pt_regs *regs,
+static void tracehook_report_syscall(struct pt_regs *regs,
 				    enum ptrace_syscall_dir dir)
 {
 	unsigned long ip;
@@ -934,7 +934,6 @@ static int tracehook_report_syscall(struct pt_regs *regs,
 		current_thread_info()->syscall = -1;
 
 	regs->ARM_ip = ip;
-	return current_thread_info()->syscall;
 }
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
@@ -946,7 +945,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 		return -1;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		scno = tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+	scno = current_thread_info()->syscall;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index d6e3d1ca4ddf..f2724e475b96 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -70,6 +70,7 @@ enum ipi_msg_type {
 	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
 	IPI_COMPLETION,
+	IPI_CPU_BACKTRACE,
 };
 
 static DECLARE_COMPLETION(cpu_running);
@@ -468,6 +469,7 @@ static const char *ipi_types[NR_IPI] = {
 	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
 	S(IPI_COMPLETION, "completion interrupts"),
+	S(IPI_CPU_BACKTRACE, "CPU backtrace"),
 };
 
 void show_ipi_list(struct seq_file *p, int prec)
@@ -606,6 +608,58 @@ static void ipi_complete(unsigned int cpu)
 	complete(per_cpu(cpu_completion, cpu));
 }
 
+static cpumask_t backtrace_mask;
+static DEFINE_RAW_SPINLOCK(backtrace_lock);
+
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
+void smp_send_all_cpu_backtrace(void)
+{
+	unsigned int this_cpu = smp_processor_id();
+	int i;
+
+	if (test_and_set_bit(0, &backtrace_flag))
+		/*
+		 * If there is already a trigger_all_cpu_backtrace() in progress
+		 * (backtrace_flag == 1), don't output double cpu dump infos.
+		 */
+		return;
+
+	cpumask_copy(&backtrace_mask, cpu_online_mask);
+	cpu_clear(this_cpu, backtrace_mask);
+
+	pr_info("Backtrace for cpu %d (current):\n", this_cpu);
+	dump_stack();
+
+	pr_info("\nsending IPI to all other CPUs:\n");
+	smp_cross_call(&backtrace_mask, IPI_CPU_BACKTRACE);
+
+	/* Wait for up to 10 seconds for all other CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpumask_empty(&backtrace_mask))
+			break;
+		mdelay(1);
+	}
+
+	clear_bit(0, &backtrace_flag);
+	smp_mb__after_clear_bit();
+}
+
+/*
+ * ipi_cpu_backtrace - handle IPI from smp_send_all_cpu_backtrace()
+ */
+static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs)
+{
+	if (cpu_isset(cpu, backtrace_mask)) {
+		raw_spin_lock(&backtrace_lock);
+		pr_warning("IPI backtrace for cpu %d\n", cpu);
+		show_regs(regs);
+		raw_spin_unlock(&backtrace_lock);
+		cpu_clear(cpu, backtrace_mask);
+	}
+}
+
 /*
  * Main handler for inter-processor interrupts
  */
@@ -663,6 +717,10 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		irq_exit();
 		break;
 
+	case IPI_CPU_BACKTRACE:
+		ipi_cpu_backtrace(cpu, regs);
+		break;
+
 	default:
 		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
 		       cpu, ipinr);
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 224a9cc09877..2d861a210ac5 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -7,6 +7,7 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 				   mmap.o pgd.o mmu.o
+obj-$(CONFIG_DEBUG_RODATA)	+= rodata.o
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c465faca51b0..90a130f98acf 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -33,6 +33,9 @@ static void __iomem *l2x0_base;
 static DEFINE_RAW_SPINLOCK(l2x0_lock);
 static u32 l2x0_way_mask;	/* Bitmask of active ways */
 static u32 l2x0_size;
+static u32 l2x0_cache_id;
+static unsigned int l2x0_sets;
+static unsigned int l2x0_ways;
 static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
 
 /* Aurora don't have the cache ID register available, so we have to
@@ -49,6 +52,13 @@ struct l2x0_of_data {
 
 static bool of_init = false;
 
+static inline bool is_pl310_rev(int rev)
+{
+	return (l2x0_cache_id &
+		(L2X0_CACHE_ID_PART_MASK | L2X0_CACHE_ID_REV_MASK)) ==
+			(L2X0_CACHE_ID_PART_L310 | rev);
+}
+
 static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
@@ -137,6 +147,23 @@ static void l2x0_cache_sync(void)
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
+#ifdef CONFIG_PL310_ERRATA_727915
+static void l2x0_for_each_set_way(void __iomem *reg)
+{
+	int set;
+	int way;
+	unsigned long flags;
+
+	for (way = 0; way < l2x0_ways; way++) {
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		for (set = 0; set < l2x0_sets; set++)
+			writel_relaxed((way << 28) | (set << 5), reg);
+		cache_sync();
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+	}
+}
+#endif
+
 static void __l2x0_flush_all(void)
 {
 	debug_writel(0x03);
@@ -150,6 +177,13 @@ static void l2x0_flush_all(void)
 {
 	unsigned long flags;
 
+#ifdef CONFIG_PL310_ERRATA_727915
+	if (is_pl310_rev(REV_PL310_R2P0)) {
+		l2x0_for_each_set_way(l2x0_base + L2X0_CLEAN_INV_LINE_IDX);
+		return;
+	}
+#endif
+
 	/* clean all ways */
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	__l2x0_flush_all();
@@ -160,11 +194,20 @@ static void l2x0_clean_all(void)
 {
 	unsigned long flags;
 
+#ifdef CONFIG_PL310_ERRATA_727915
+	if (is_pl310_rev(REV_PL310_R2P0)) {
+		l2x0_for_each_set_way(l2x0_base + L2X0_CLEAN_LINE_IDX);
+		return;
+	}
+#endif
+
 	/* clean all ways */
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	debug_writel(0x03);
 	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
 	cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
 	cache_sync();
+	debug_writel(0x00);
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
@@ -323,65 +366,64 @@ static void l2x0_unlock(u32 cache_id)
 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 {
 	u32 aux;
-	u32 cache_id;
 	u32 way_size = 0;
-	int ways;
 	int way_size_shift = L2X0_WAY_SIZE_SHIFT;
 	const char *type;
 
 	l2x0_base = base;
 	if (cache_id_part_number_from_dt)
-		cache_id = cache_id_part_number_from_dt;
+		l2x0_cache_id = cache_id_part_number_from_dt;
 	else
-		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+		l2x0_cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
 	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 
 	aux &= aux_mask;
 	aux |= aux_val;
 
 	/* Determine the number of ways */
-	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+	switch (l2x0_cache_id & L2X0_CACHE_ID_PART_MASK) {
 	case L2X0_CACHE_ID_PART_L310:
 		if (aux & (1 << 16))
-			ways = 16;
+			l2x0_ways = 16;
 		else
-			ways = 8;
+			l2x0_ways = 8;
 		type = "L310";
 #ifdef CONFIG_PL310_ERRATA_753970
 		/* Unmapped register. */
 		sync_reg_offset = L2X0_DUMMY_REG;
 #endif
-		if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0)
+		if ((l2x0_cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0)
 			outer_cache.set_debug = pl310_set_debug;
 		break;
 	case L2X0_CACHE_ID_PART_L210:
-		ways = (aux >> 13) & 0xf;
+		l2x0_ways = (aux >> 13) & 0xf;
 		type = "L210";
 		break;
 
 	case AURORA_CACHE_ID:
 		sync_reg_offset = AURORA_SYNC_REG;
-		ways = (aux >> 13) & 0xf;
-		ways = 2 << ((ways + 1) >> 2);
+		l2x0_ways = (aux >> 13) & 0xf;
+		l2x0_ways = 2 << ((l2x0_ways + 1) >> 2);
 		way_size_shift = AURORA_WAY_SIZE_SHIFT;
 		type = "Aurora";
 		break;
 	default:
 		/* Assume unknown chips have 8 ways */
-		ways = 8;
+		l2x0_ways = 8;
 		type = "L2x0 series";
 		break;
 	}
 
-	l2x0_way_mask = (1 << ways) - 1;
+	l2x0_way_mask = (1 << l2x0_ways) - 1;
 
 	/*
 	 * L2 cache Size =  Way size * Number of ways
 	 */
 	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17;
-	way_size = 1 << (way_size + way_size_shift);
+	way_size = SZ_1K << (way_size + way_size_shift);
 
-	l2x0_size = ways * way_size * SZ_1K;
+	l2x0_size = l2x0_ways * way_size;
+	l2x0_sets = way_size / CACHE_LINE_SIZE;
 
 	/*
 	 * Check if l2x0 controller is already enabled.
@@ -390,7 +432,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 	 */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
 		/* Make sure that I&D is not locked down when starting */
-		l2x0_unlock(cache_id);
+		l2x0_unlock(l2x0_cache_id);
 
 		/* l2x0 controller is disabled */
 		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
@@ -419,7 +461,7 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 
 	printk(KERN_INFO "%s cache controller enabled\n", type);
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
-			ways, cache_id, aux, l2x0_size);
+			l2x0_ways, l2x0_cache_id, aux, l2x0_size);
 }
 
 #ifdef CONFIG_OF
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index d8fd4d4bd3d4..7a3d3d8d98d7 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -270,6 +270,11 @@ v6_dma_clean_range:
  *	- end     - virtual end address of region
  */
 ENTRY(v6_dma_flush_range)
+#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT
+	sub	r2, r1, r0
+	cmp	r2, #CONFIG_CACHE_FLUSH_RANGE_LIMIT
+	bhi	v6_dma_flush_dcache_all
+#endif
 #ifdef CONFIG_DMA_CACHE_RWFO
 	ldrb	r2, [r0]		@ read for ownership
 	strb	r2, [r0]		@ write for ownership
@@ -292,6 +297,18 @@ ENTRY(v6_dma_flush_range)
 	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
 	mov	pc, lr
 
+#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT
+v6_dma_flush_dcache_all:
+	mov	r0, #0
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 0		@ D cache clean+invalidate
+#else
+	mcr	p15, 0, r0, c7, c15, 0		@ Cache clean+invalidate
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mov	pc, lr
+#endif
+
 /*
  *	dma_map_area(start, size, dir)
  *	- start	- kernel virtual start address
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 54fcddafec15..9820ad4b80c0 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -274,10 +274,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		local_irq_enable();
 
 	/*
-	 * If we're in an interrupt or have no user
+	 * If we're in an interrupt, or have no irqs, or have no user
 	 * context, we must not take the fault..
 	 */
-	if (in_atomic() || !mm)
+	if (in_atomic() || irqs_disabled() || !mm)
 		goto no_context;
 
 	if (user_mode(regs))
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4c7d5cddef35..a4e5cd5c3067 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -605,11 +605,25 @@ static void __init *early_alloc(unsigned long sz)
 	return early_alloc_aligned(sz, sz);
 }
 
-static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
+static pte_t * __init early_pte_alloc(pmd_t *pmd)
+{
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		return early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
+	return pmd_page_vaddr(*pmd);
+}
+
+static void __init early_pte_install(pmd_t *pmd, pte_t *pte, unsigned long prot)
+{
+	__pmd_populate(pmd, __pa(pte), prot);
+	BUG_ON(pmd_bad(*pmd));
+}
+
+static pte_t * __init early_pte_alloc_and_install(pmd_t *pmd,
+	unsigned long addr, unsigned long prot)
 {
 	if (pmd_none(*pmd)) {
-		pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
-		__pmd_populate(pmd, __pa(pte), prot);
+		pte_t *pte = early_pte_alloc(pmd);
+		early_pte_install(pmd, pte, prot);
 	}
 	BUG_ON(pmd_bad(*pmd));
 	return pte_offset_kernel(pmd, addr);
@@ -619,11 +633,17 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  const struct mem_type *type)
 {
-	pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
+	pte_t *start_pte = early_pte_alloc(pmd);
+	pte_t *pte = start_pte + pte_index(addr);
+
+	/* If replacing a section mapping, the whole section must be replaced */
+	BUG_ON(!pmd_none(*pmd) && pmd_bad(*pmd) && ((addr | end) & ~PMD_MASK));
+
 	do {
 		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+	early_pte_install(pmd, start_pte, type->prot_l1);
 }
 
 static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
@@ -655,7 +675,8 @@ static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
 
 static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 				      unsigned long end, phys_addr_t phys,
-				      const struct mem_type *type)
+				      const struct mem_type *type,
+				      bool force_pages)
 {
 	pmd_t *pmd = pmd_offset(pud, addr);
 	unsigned long next;
@@ -672,7 +693,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 		 * aligned to a section boundary.
 		 */
 		if (type->prot_sect &&
-				((addr | next | phys) & ~SECTION_MASK) == 0) {
+				((addr | next | phys) & ~SECTION_MASK) == 0 &&
+				!force_pages) {
 			__map_init_section(pmd, addr, next, phys, type);
 		} else {
 			alloc_init_pte(pmd, addr, next,
@@ -686,14 +708,15 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 
 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 				  unsigned long end, phys_addr_t phys,
-				  const struct mem_type *type)
+				  const struct mem_type *type,
+				  bool force_pages)
 {
 	pud_t *pud = pud_offset(pgd, addr);
 	unsigned long next;
 
 	do {
 		next = pud_addr_end(addr, end);
-		alloc_init_pmd(pud, addr, next, phys, type);
+		alloc_init_pmd(pud, addr, next, phys, type, force_pages);
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
 }
@@ -767,7 +790,7 @@ static void __init create_36bit_mapping(struct map_desc *md,
  * offsets, and we take full advantage of sections and
  * supersections.
  */
-static void __init create_mapping(struct map_desc *md)
+static void __init create_mapping(struct map_desc *md, bool force_pages)
 {
 	unsigned long addr, length, end;
 	phys_addr_t phys;
@@ -817,7 +840,7 @@ static void __init create_mapping(struct map_desc *md)
 	do {
 		unsigned long next = pgd_addr_end(addr, end);
 
-		alloc_init_pud(pgd, addr, next, phys, type);
+		alloc_init_pud(pgd, addr, next, phys, type, force_pages);
 
 		phys += next - addr;
 		addr = next;
@@ -839,7 +862,7 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
 	svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm));
 
 	for (md = io_desc; nr; md++, nr--) {
-		create_mapping(md);
+		create_mapping(md, false);
 
 		vm = &svm->vm;
 		vm->addr = (void *)(md->virtual & PAGE_MASK);
@@ -960,7 +983,7 @@ void __init debug_ll_io_init(void)
 	map.virtual &= PAGE_MASK;
 	map.length = PAGE_SIZE;
 	map.type = MT_DEVICE;
-	create_mapping(&map);
+	create_mapping(&map, false);
 }
 #endif
 
@@ -1005,6 +1028,28 @@ void __init sanity_check_meminfo(void)
 		struct membank *bank = &meminfo.bank[j];
 		*bank = meminfo.bank[i];
 
+#ifdef CONFIG_SPARSEMEM
+		if (pfn_to_section_nr(bank_pfn_start(bank)) !=
+		    pfn_to_section_nr(bank_pfn_end(bank) - 1)) {
+			phys_addr_t sz;
+			unsigned long start_pfn = bank_pfn_start(bank);
+			unsigned long end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
+			sz = ((phys_addr_t)(end_pfn - start_pfn) << PAGE_SHIFT);
+
+			if (meminfo.nr_banks >= NR_BANKS) {
+				pr_crit("NR_BANKS too low, ignoring %lld bytes of memory\n",
+					(unsigned long long)(bank->size - sz));
+			} else {
+				memmove(bank + 1, bank,
+					(meminfo.nr_banks - i) * sizeof(*bank));
+				meminfo.nr_banks++;
+				bank[1].size -= sz;
+				bank[1].start = __pfn_to_phys(end_pfn);
+			}
+			bank->size = sz;
+		}
+#endif
+
 		if (bank->start > ULONG_MAX)
 			highmem = 1;
 
@@ -1202,7 +1247,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	map.virtual = MODULES_VADDR;
 	map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
 	map.type = MT_ROM;
-	create_mapping(&map);
+	create_mapping(&map, false);
 #endif
 
 	/*
@@ -1213,14 +1258,14 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	map.virtual = FLUSH_BASE;
 	map.length = SZ_1M;
 	map.type = MT_CACHECLEAN;
-	create_mapping(&map);
+	create_mapping(&map, false);
 #endif
 #ifdef FLUSH_BASE_MINICACHE
 	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
 	map.virtual = FLUSH_BASE_MINICACHE;
 	map.length = SZ_1M;
 	map.type = MT_MINICLEAN;
-	create_mapping(&map);
+	create_mapping(&map, false);
 #endif
 
 	/*
@@ -1236,13 +1281,13 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 #else
 	map.type = MT_LOW_VECTORS;
 #endif
-	create_mapping(&map);
+	create_mapping(&map, false);
 
 	if (!vectors_high()) {
 		map.virtual = 0;
 		map.length = PAGE_SIZE * 2;
 		map.type = MT_LOW_VECTORS;
-		create_mapping(&map);
+		create_mapping(&map, false);
 	}
 
 	/* Now create a kernel read-only mapping */
@@ -1250,7 +1295,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	map.virtual = 0xffff0000 + PAGE_SIZE;
 	map.length = PAGE_SIZE;
 	map.type = MT_LOW_VECTORS;
-	create_mapping(&map);
+	create_mapping(&map, false);
 
 	/*
 	 * Ask the machine support to map in the statically mapped devices.
@@ -1275,20 +1320,23 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 static void __init kmap_init(void)
 {
 #ifdef CONFIG_HIGHMEM
-	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
+	pkmap_page_table = early_pte_alloc_and_install(pmd_off_k(PKMAP_BASE),
 		PKMAP_BASE, _PAGE_KERNEL_TABLE);
 #endif
 }
 
+
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
+	phys_addr_t start;
+	phys_addr_t end;
+	struct map_desc map;
 
 	/* Map all the lowmem memory banks. */
 	for_each_memblock(memory, reg) {
-		phys_addr_t start = reg->base;
-		phys_addr_t end = start + reg->size;
-		struct map_desc map;
+		start = reg->base;
+		end = start + reg->size;
 
 		if (end > arm_lowmem_limit)
 			end = arm_lowmem_limit;
@@ -1300,8 +1348,20 @@ static void __init map_lowmem(void)
 		map.length = end - start;
 		map.type = MT_MEMORY;
 
-		create_mapping(&map);
+		create_mapping(&map, false);
 	}
+
+#ifdef CONFIG_DEBUG_RODATA
+	start = __pa(_stext) & PMD_MASK;
+	end = ALIGN(__pa(__end_rodata), PMD_SIZE);
+
+	map.pfn = __phys_to_pfn(start);
+	map.virtual = __phys_to_virt(start);
+	map.length = end - start;
+	map.type = MT_MEMORY;
+
+	create_mapping(&map, true);
+#endif
 }
 
 /*
diff --git a/arch/arm/mm/rodata.c b/arch/arm/mm/rodata.c
new file mode 100644
index 000000000000..9a8eb841c428
--- /dev/null
+++ b/arch/arm/mm/rodata.c
@@ -0,0 +1,159 @@
+/*
+ *  linux/arch/arm/mm/rodata.c
+ *
+ *  Copyright (C) 2011 Google, Inc.
+ *
+ *  Author: Colin Cross <ccross@android.com>
+ *
+ *  Based on x86 implementation in arch/x86/mm/init_32.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+#include <asm/rodata.h>
+#include <asm/sections.h>
+#include <asm/tlbflush.h>
+
+#include "mm.h"
+
+static int kernel_set_to_readonly __read_mostly;
+
+#ifdef CONFIG_DEBUG_RODATA_TEST
+static const int rodata_test_data = 0xC3;
+
+static noinline void rodata_test(void)
+{
+	int result;
+
+	pr_info("%s: attempting to write to read-only section:\n", __func__);
+
+	if (*(volatile int *)&rodata_test_data != 0xC3) {
+		pr_err("read only data changed before test\n");
+		return;
+	}
+
+	/*
+	 * Attempt to to write to rodata_test_data, trapping the expected
+	 * data abort.  If the trap executed, result will be 1.  If it didn't,
+	 * result will be 0xFF.
+	 */
+	asm volatile(
+		"0:	str	%[zero], [%[rodata_test_data]]\n"
+		"	mov	%[result], #0xFF\n"
+		"	b	2f\n"
+		"1:	mov	%[result], #1\n"
+		"2:\n"
+
+		/* Exception fixup - if store at label 0 faults, jumps to 1 */
+		".pushsection __ex_table, \"a\"\n"
+		"	.long	0b, 1b\n"
+		".popsection\n"
+
+		: [result] "=r" (result)
+		: [rodata_test_data] "r" (&rodata_test_data), [zero] "r" (0)
+		: "memory"
+	);
+
+	if (result == 1)
+		pr_info("write to read-only section trapped, success\n");
+	else
+		pr_err("write to read-only section NOT trapped, test failed\n");
+
+	if (*(volatile int *)&rodata_test_data != 0xC3)
+		pr_err("read only data changed during write\n");
+}
+#else
+static inline void rodata_test(void) { }
+#endif
+
+static int set_page_attributes(unsigned long virt, int numpages,
+	pte_t (*f)(pte_t))
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long start = virt;
+	unsigned long end = virt + (numpages << PAGE_SHIFT);
+	unsigned long pmd_end;
+
+	while (virt < end) {
+		pmd = pmd_off_k(virt);
+		pmd_end = min(ALIGN(virt + 1, PMD_SIZE), end);
+
+		if ((pmd_val(*pmd) & PMD_TYPE_MASK) != PMD_TYPE_TABLE) {
+			pr_err("%s: pmd %p=%08lx for %08lx not page table\n",
+				__func__, pmd, pmd_val(*pmd), virt);
+			virt = pmd_end;
+			continue;
+		}
+
+		while (virt < pmd_end) {
+			pte = pte_offset_kernel(pmd, virt);
+			set_pte_ext(pte, f(*pte), 0);
+			virt += PAGE_SIZE;
+		}
+	}
+
+	flush_tlb_kernel_range(start, end);
+
+	return 0;
+}
+
+int set_memory_ro(unsigned long virt, int numpages)
+{
+	return set_page_attributes(virt, numpages, pte_wrprotect);
+}
+EXPORT_SYMBOL(set_memory_ro);
+
+int set_memory_rw(unsigned long virt, int numpages)
+{
+	return set_page_attributes(virt, numpages, pte_mkwrite);
+}
+EXPORT_SYMBOL(set_memory_rw);
+
+void set_kernel_text_rw(void)
+{
+	unsigned long start = PAGE_ALIGN((unsigned long)_text);
+	unsigned long size = PAGE_ALIGN((unsigned long)__end_rodata) - start;
+
+	if (!kernel_set_to_readonly)
+		return;
+
+	pr_debug("Set kernel text: %lx - %lx to read-write\n",
+		 start, start + size);
+
+	set_memory_rw(start, size >> PAGE_SHIFT);
+}
+
+void set_kernel_text_ro(void)
+{
+	unsigned long start = PAGE_ALIGN((unsigned long)_text);
+	unsigned long size = PAGE_ALIGN((unsigned long)__end_rodata) - start;
+
+	if (!kernel_set_to_readonly)
+		return;
+
+	pr_info_once("Write protecting the kernel text section %lx - %lx\n",
+		start, start + size);
+
+	pr_debug("Set kernel text: %lx - %lx to read only\n",
+		 start, start + size);
+
+	set_memory_ro(start, size >> PAGE_SHIFT);
+}
+
+void mark_rodata_ro(void)
+{
+	kernel_set_to_readonly = 1;
+
+	set_kernel_text_ro();
+
+	rodata_test();
+}
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 5dfbb0b8e7f4..452fb3ad68aa 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/uaccess.h>
 #include <linux/user.h>
+#include <linux/export.h>
 
 #include <asm/cp15.h>
 #include <asm/cputype.h>
@@ -648,6 +649,52 @@ static int vfp_hotplug(struct notifier_block *b, unsigned long action,
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Kernel-side NEON support functions
+ */
+void kernel_neon_begin(void)
+{
+	struct thread_info *thread = current_thread_info();
+	unsigned int cpu;
+	u32 fpexc;
+
+	/*
+	 * Kernel mode NEON is only allowed outside of interrupt context
+	 * with preemption disabled. This will make sure that the kernel
+	 * mode NEON register contents never need to be preserved.
+	 */
+	BUG_ON(in_interrupt());
+	cpu = get_cpu();
+
+	fpexc = fmrx(FPEXC) | FPEXC_EN;
+	fmxr(FPEXC, fpexc);
+
+	/*
+	 * Save the userland NEON/VFP state. Under UP,
+	 * the owner could be a task other than 'current'
+	 */
+	if (vfp_state_in_hw(cpu, thread))
+		vfp_save_state(&thread->vfpstate, fpexc);
+#ifndef CONFIG_SMP
+	else if (vfp_current_hw_state[cpu] != NULL)
+		vfp_save_state(vfp_current_hw_state[cpu], fpexc);
+#endif
+	vfp_current_hw_state[cpu] = NULL;
+}
+EXPORT_SYMBOL(kernel_neon_begin);
+
+void kernel_neon_end(void)
+{
+	/* Disable the NEON/VFP unit. */
+	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
+	put_cpu();
+}
+EXPORT_SYMBOL(kernel_neon_end);
+
+#endif /* CONFIG_KERNEL_MODE_NEON */
+
 /*
  * VFP support code initialisation.
  */
@@ -731,4 +778,4 @@ static int __init vfp_init(void)
 	return 0;
 }
 
-late_initcall(vfp_init);
+core_initcall(vfp_init);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9742106cb51c..58ed097828b6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -15,6 +15,7 @@ config ARM64
 	select ARM_GIC
 	select ARM_GIC_V3
 	select BUILDTIME_EXTABLE_SORT
+	select AUDIT_ARCH_COMPAT_GENERIC
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select CPU_PM if (SUSPEND || CPU_IDLE)
@@ -34,6 +35,8 @@ config ARM64
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_CC_STACKPROTECTOR
@@ -336,6 +339,31 @@ config HOTPLUG_CPU
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
 
+config SWP_EMULATE
+	bool "Emulate SWP/SWPB instructions"
+	help
+	  ARMv6 architecture deprecates use of the SWP/SWPB instructions. ARMv8
+	  oblosetes the use of SWP/SWPB instructions. ARMv7 multiprocessing
+	  extensions introduce the ability to disable these instructions,
+	  triggering an undefined instruction exception when executed. Say Y
+	  here to enable software emulation of these instructions for userspace
+	  (not kernel) using LDREX/STREX. Also creates /proc/cpu/swp_emulation
+	  for statistics.
+
+	  In some older versions of glibc [<=2.8] SWP is used during futex
+	  trylock() operations with the assumption that the code will not
+	  be preempted. This invalid assumption may be more likely to fail
+	  with SWP emulation enabled, leading to deadlock of the user
+	  application.
+
+	  NOTE: when accessing uncached shared regions, LDREX/STREX rely
+	  on an external transaction monitoring block called a global
+	  monitor to maintain update atomicity. If your system does not
+	  implement a global monitor, this option can cause programs that
+	  perform SWP operations to uncached memory to deadlock.
+
+	  If unsure, say Y.
+
 source kernel/Kconfig.preempt
 
 config HZ
@@ -378,6 +406,27 @@ config ARCH_WANT_HUGE_PMD_SHARE
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	def_bool y
 
+config ARMV7_COMPAT
+	bool "Kernel support for ARMv7 applications"
+	depends on COMPAT
+	select SWP_EMULATE
+	help
+	 This option enables features that allow that ran on an ARMv7 or older
+	 processor to continue functioning.
+
+	 If you want to execute ARMv7 applications, say Y
+
+config ARMV7_COMPAT_CPUINFO
+	bool "Report backwards compatible cpu features in /proc/cpuinfo"
+	depends on ARMV7_COMPAT
+	default y
+	help
+	 This option makes /proc/cpuinfo list CPU features that an ARMv7 or
+	 earlier kernel would report, but are not optional on an ARMv8 or later
+	 processor.
+
+	 If you want to execute ARMv7 applications, say Y
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
@@ -385,6 +434,19 @@ config FORCE_MAX_ZONEORDER
 	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
 	default "11"
 
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	---help---
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
 endmenu
 
 menu "Boot options"
@@ -397,6 +459,23 @@ config CMDLINE
 	  entering them here. As a minimum, you should specify the the
 	  root device (e.g. root=/dev/nfs).
 
+choice
+	prompt "Kernel command line type" if CMDLINE != ""
+	default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader. If
+	  the boot loader doesn't provide any, the default kernel command
+	  string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+	bool "Extend bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the default kernel command string.
+
 config CMDLINE_FORCE
 	bool "Always use the default kernel command string"
 	help
@@ -404,6 +483,22 @@ config CMDLINE_FORCE
 	  loader passes other arguments to the kernel.
 	  This is useful if you cannot or don't want to change the
 	  command-line options your boot loader passes to the kernel.
+endchoice
+
+config BUILD_ARM64_APPENDED_DTB_IMAGE
+	bool "Build a concatenated Image.gz/dtb by default"
+	depends on OF
+	help
+	  Enabling this option will cause a concatenated Image.gz and list of
+	  DTBs to be built by default (instead of a standalone Image.gz.)
+	  The image will built in arch/arm64/boot/Image.gz-dtb
+
+config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES
+	string "Default dtb names"
+	depends on BUILD_ARM64_APPENDED_DTB_IMAGE
+	help
+	  Space separated list of names of dtbs to append when
+	  building a concatenated Image.gz-dtb.
 
 config EFI
 	bool "UEFI runtime support"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 3e7882c4e034..9e64836cb256 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -20,6 +20,7 @@ LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 KBUILD_DEFCONFIG := defconfig
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only
+KBUILD_CFLAGS	+= -fno-pic
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 AS		+= -EB
@@ -53,7 +54,12 @@ libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-y		+= $(LIBGCC)
 
 # Default target when executing plain make
+ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y)
+KBUILD_IMAGE	:= Image.gz-dtb
+else
 KBUILD_IMAGE	:= Image.gz
+endif
+
 KBUILD_DTBS	:= dtbs
 
 all:	$(KBUILD_IMAGE) $(KBUILD_DTBS)
@@ -72,6 +78,9 @@ zinstall install: vmlinux
 dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts dtbs
 
+Image.gz-dtb: vmlinux scripts dtbs
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
 PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore
index 8dab0bb6ae66..eb3551131b1e 100644
--- a/arch/arm64/boot/.gitignore
+++ b/arch/arm64/boot/.gitignore
@@ -1,2 +1,3 @@
 Image
 Image.gz
+Image.gz-dtb
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index 5a0e3ab854a5..df519849fa00 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -14,14 +14,27 @@
 # Based on the ia64 boot/Makefile.
 #
 
+include $(srctree)/arch/arm64/boot/dts/Makefile
+
 targets := Image Image.gz
 
+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST))
+
 $(obj)/Image: vmlinux FORCE
 	$(call if_changed,objcopy)
 
 $(obj)/Image.gz: $(obj)/Image FORCE
 	$(call if_changed,gzip)
 
+$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE
+	$(call if_changed,cat)
+
 install: $(obj)/Image
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/Image System.map "$(INSTALL_PATH)"
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index 82db93061fcc..1304cd1e4fdd 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,12 +1,19 @@
-dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \
-				fvp-base-gicv2-psci.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += fvp-base-gicv2-psci.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += fvp-foundation-gicv2-psci.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += juno-r1.dtb
 dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
 
 targets += dtbs
-targets += $(dtb-y)
 
-dtbs: $(addprefix $(obj)/, $(dtb-y))
+DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES))
+ifneq ($(DTB_NAMES),)
+DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES))
+else
+DTB_LIST := $(dtb-y)
+endif
+targets += $(DTB_LIST)
 
-clean-files := *.dtb
+dtbs: $(addprefix $(obj)/, $(DTB_LIST))
+
+clean-files := dts/*.dtb *.dtb
diff --git a/arch/arm64/boot/dts/clcd-panels.dtsi b/arch/arm64/boot/dts/clcd-panels.dtsi
deleted file mode 100644
index 0b0ff6ead4b2..000000000000
--- a/arch/arm64/boot/dts/clcd-panels.dtsi
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * ARM Ltd. Versatile Express
- *
- */
-
-/ {
-	panels {
-		panel@0 {
-			compatible	= "panel";
-			mode		= "VGA";
-			refresh		= <60>;
-			xres		= <640>;
-			yres		= <480>;
-			pixclock	= <39721>;
-			left_margin	= <40>;
-			right_margin	= <24>;
-			upper_margin	= <32>;
-			lower_margin	= <11>;
-			hsync_len	= <96>;
-			vsync_len	= <2>;
-			sync		= <0>;
-			vmode		= "FB_VMODE_NONINTERLACED";
-
-			tim2		= "TIM2_BCD", "TIM2_IPC";
-			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
-			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
-			bpp		= <16>;
-		};
-
-		panel@1 {
-			compatible	= "panel";
-			mode		= "XVGA";
-			refresh		= <60>;
-			xres		= <1024>;
-			yres		= <768>;
-			pixclock	= <15748>;
-			left_margin	= <152>;
-			right_margin	= <48>;
-			upper_margin	= <23>;
-			lower_margin	= <3>;
-			hsync_len	= <104>;
-			vsync_len	= <4>;
-			sync		= <0>;
-			vmode		= "FB_VMODE_NONINTERLACED";
-
-			tim2		= "TIM2_BCD", "TIM2_IPC";
-			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
-			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
-			bpp		= <16>;
-		};
-	};
-};
diff --git a/arch/arm64/boot/dts/foundation-v8.dts b/arch/arm64/boot/dts/foundation-v8.dts
deleted file mode 100644
index 519c4b2c0687..000000000000
--- a/arch/arm64/boot/dts/foundation-v8.dts
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * ARM Ltd.
- *
- * ARMv8 Foundation model DTS
- */
-
-/dts-v1/;
-
-/memreserve/ 0x80000000 0x00010000;
-
-/ {
-	model = "Foundation-v8A";
-	compatible = "arm,foundation-aarch64", "arm,vexpress";
-	interrupt-parent = <&gic>;
-	#address-cells = <2>;
-	#size-cells = <2>;
-
-	chosen { };
-
-	aliases {
-		serial0 = &v2m_serial0;
-		serial1 = &v2m_serial1;
-		serial2 = &v2m_serial2;
-		serial3 = &v2m_serial3;
-	};
-
-	cpus {
-		#address-cells = <2>;
-		#size-cells = <0>;
-
-		cpu@0 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x0>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
-		};
-		cpu@1 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x1>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
-		};
-		cpu@2 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x2>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
-		};
-		cpu@3 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x3>;
-			enable-method = "spin-table";
-			cpu-release-addr = <0x0 0x8000fff8>;
-		};
-	};
-
-	memory@80000000 {
-		device_type = "memory";
-		reg = <0x00000000 0x80000000 0 0x80000000>,
-		      <0x00000008 0x80000000 0 0x80000000>;
-	};
-
-	gic: interrupt-controller@2c001000 {
-		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
-		#interrupt-cells = <3>;
-		#address-cells = <0>;
-		interrupt-controller;
-		reg = <0x0 0x2c001000 0 0x1000>,
-		      <0x0 0x2c002000 0 0x1000>,
-		      <0x0 0x2c004000 0 0x2000>,
-		      <0x0 0x2c006000 0 0x2000>;
-		interrupts = <1 9 0xf04>;
-	};
-
-	timer {
-		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0xff01>,
-			     <1 14 0xff01>,
-			     <1 11 0xff01>,
-			     <1 10 0xff01>;
-		clock-frequency = <100000000>;
-	};
-
-	pmu {
-		compatible = "arm,armv8-pmuv3";
-		interrupts = <0 60 4>,
-			     <0 61 4>,
-			     <0 62 4>,
-			     <0 63 4>;
-	};
-
-	smb {
-		compatible = "arm,vexpress,v2m-p1", "simple-bus";
-		arm,v2m-memory-map = "rs1";
-		#address-cells = <2>; /* SMB chipselect number and offset */
-		#size-cells = <1>;
-
-		ranges = <0 0 0 0x08000000 0x04000000>,
-			 <1 0 0 0x14000000 0x04000000>,
-			 <2 0 0 0x18000000 0x04000000>,
-			 <3 0 0 0x1c000000 0x04000000>,
-			 <4 0 0 0x0c000000 0x04000000>,
-			 <5 0 0 0x10000000 0x04000000>;
-
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0 0 63>;
-		interrupt-map = <0 0  0 &gic 0  0 4>,
-				<0 0  1 &gic 0  1 4>,
-				<0 0  2 &gic 0  2 4>,
-				<0 0  3 &gic 0  3 4>,
-				<0 0  4 &gic 0  4 4>,
-				<0 0  5 &gic 0  5 4>,
-				<0 0  6 &gic 0  6 4>,
-				<0 0  7 &gic 0  7 4>,
-				<0 0  8 &gic 0  8 4>,
-				<0 0  9 &gic 0  9 4>,
-				<0 0 10 &gic 0 10 4>,
-				<0 0 11 &gic 0 11 4>,
-				<0 0 12 &gic 0 12 4>,
-				<0 0 13 &gic 0 13 4>,
-				<0 0 14 &gic 0 14 4>,
-				<0 0 15 &gic 0 15 4>,
-				<0 0 16 &gic 0 16 4>,
-				<0 0 17 &gic 0 17 4>,
-				<0 0 18 &gic 0 18 4>,
-				<0 0 19 &gic 0 19 4>,
-				<0 0 20 &gic 0 20 4>,
-				<0 0 21 &gic 0 21 4>,
-				<0 0 22 &gic 0 22 4>,
-				<0 0 23 &gic 0 23 4>,
-				<0 0 24 &gic 0 24 4>,
-				<0 0 25 &gic 0 25 4>,
-				<0 0 26 &gic 0 26 4>,
-				<0 0 27 &gic 0 27 4>,
-				<0 0 28 &gic 0 28 4>,
-				<0 0 29 &gic 0 29 4>,
-				<0 0 30 &gic 0 30 4>,
-				<0 0 31 &gic 0 31 4>,
-				<0 0 32 &gic 0 32 4>,
-				<0 0 33 &gic 0 33 4>,
-				<0 0 34 &gic 0 34 4>,
-				<0 0 35 &gic 0 35 4>,
-				<0 0 36 &gic 0 36 4>,
-				<0 0 37 &gic 0 37 4>,
-				<0 0 38 &gic 0 38 4>,
-				<0 0 39 &gic 0 39 4>,
-				<0 0 40 &gic 0 40 4>,
-				<0 0 41 &gic 0 41 4>,
-				<0 0 42 &gic 0 42 4>;
-
-		ethernet@2,02000000 {
-			compatible = "smsc,lan91c111";
-			reg = <2 0x02000000 0x10000>;
-			interrupts = <15>;
-		};
-
-		v2m_clk24mhz: clk24mhz {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <24000000>;
-			clock-output-names = "v2m:clk24mhz";
-		};
-
-		v2m_refclk1mhz: refclk1mhz {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <1000000>;
-			clock-output-names = "v2m:refclk1mhz";
-		};
-
-		v2m_refclk32khz: refclk32khz {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <32768>;
-			clock-output-names = "v2m:refclk32khz";
-		};
-
-		iofpga@3,00000000 {
-			compatible = "arm,amba-bus", "simple-bus";
-			#address-cells = <1>;
-			#size-cells = <1>;
-			ranges = <0 3 0 0x200000>;
-
-			v2m_sysreg: sysreg@010000 {
-				compatible = "arm,vexpress-sysreg";
-				reg = <0x010000 0x1000>;
-			};
-
-			v2m_serial0: uart@090000 {
-				compatible = "arm,pl011", "arm,primecell";
-				reg = <0x090000 0x1000>;
-				interrupts = <5>;
-				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
-				clock-names = "uartclk", "apb_pclk";
-			};
-
-			v2m_serial1: uart@0a0000 {
-				compatible = "arm,pl011", "arm,primecell";
-				reg = <0x0a0000 0x1000>;
-				interrupts = <6>;
-				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
-				clock-names = "uartclk", "apb_pclk";
-			};
-
-			v2m_serial2: uart@0b0000 {
-				compatible = "arm,pl011", "arm,primecell";
-				reg = <0x0b0000 0x1000>;
-				interrupts = <7>;
-				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
-				clock-names = "uartclk", "apb_pclk";
-			};
-
-			v2m_serial3: uart@0c0000 {
-				compatible = "arm,pl011", "arm,primecell";
-				reg = <0x0c0000 0x1000>;
-				interrupts = <8>;
-				clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>;
-				clock-names = "uartclk", "apb_pclk";
-			};
-
-			virtio_block@0130000 {
-				compatible = "virtio,mmio";
-				reg = <0x130000 0x1000>;
-				interrupts = <42>;
-			};
-		};
-	};
-};
diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
index ed55571e06dd..bb4b775b7c45 100644
--- a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
+++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, ARM Limited. All rights reserved.
+ * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -30,6 +30,8 @@
 
 /dts-v1/;
 
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
 /memreserve/ 0x80000000 0x00010000;
 
 / {
@@ -37,7 +39,7 @@
 
 / {
 	model = "FVP Base";
-	compatible = "arm,vfp-base", "arm,vexpress";
+	compatible = "arm,fvp-base", "arm,vexpress";
 	interrupt-parent = <&gic>;
 	#address-cells = <2>;
 	#size-cells = <2>;
@@ -52,23 +54,58 @@
 	};
 
 	psci {
-		compatible = "arm,psci";
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 		method = "smc";
-		cpu_suspend = <0x84000001>;
+		cpu_suspend = <0xc4000001>;
 		cpu_off = <0x84000002>;
 		cpu_on = <0xc4000003>;
+		sys_poweroff = <0x84000008>;
+		sys_reset = <0x84000009>;
 	};
 
 	cpus {
 		#address-cells = <2>;
 		#size-cells = <0>;
 
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&CPU0>;
+				};
+				core1 {
+					cpu = <&CPU1>;
+				};
+				core2 {
+					cpu = <&CPU2>;
+				};
+				core3 {
+					cpu = <&CPU3>;
+				};
+			};
+
+			cluster1 {
+				core0 {
+					cpu = <&CPU4>;
+				};
+				core1 {
+					cpu = <&CPU5>;
+				};
+				core2 {
+					cpu = <&CPU6>;
+				};
+				core3 {
+					cpu = <&CPU7>;
+				};
+			};
+		};
+
 		idle-states {
 			entry-method = "arm,psci";
 
 			CPU_SLEEP_0: cpu-sleep-0 {
 				compatible = "arm,idle-state";
-				entry-method-param = <0x0010000>;
+				local-timer-stop;
+				arm,psci-suspend-param = <0x0010000>;
 				entry-latency-us = <40>;
 				exit-latency-us = <100>;
 				min-residency-us = <150>;
@@ -76,113 +113,82 @@
 
 			CLUSTER_SLEEP_0: cluster-sleep-0 {
 				compatible = "arm,idle-state";
-				entry-method-param = <0x1010000>;
+				local-timer-stop;
+				arm,psci-suspend-param = <0x1010000>;
 				entry-latency-us = <500>;
 				exit-latency-us = <1000>;
 				min-residency-us = <2500>;
 			};
 		};
 
-		big0: cpu@0 {
+		CPU0:cpu@0 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a57", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x0>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		big1: cpu@1 {
+
+		CPU1:cpu@1 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a57", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x1>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		big2: cpu@2 {
+
+		CPU2:cpu@2 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a57", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x2>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		big3: cpu@3 {
+
+		CPU3:cpu@3 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a57", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x3>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		little0: cpu@100 {
+
+		CPU4:cpu@100 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a53", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x100>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		little1: cpu@101 {
+
+		CPU5:cpu@101 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a53", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x101>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		little2: cpu@102 {
+
+		CPU6:cpu@102 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a53", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x102>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-		little3: cpu@103 {
+
+		CPU7:cpu@103 {
 			device_type = "cpu";
-			compatible = "arm,cortex-a53", "arm,armv8";
+			compatible = "arm,armv8";
 			reg = <0x0 0x103>;
 			enable-method = "psci";
-			clock-frequency = <1000000>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 		};
-
-		cpu-map {
-			cluster0 {
-				core0 {
-					cpu = <&big0>;
-				};
-				core1 {
-					cpu = <&big1>;
-				};
-				core2 {
-					cpu = <&big2>;
-				};
-				core3 {
-					cpu = <&big3>;
-				};
-			};
-			cluster1 {
-				core0 {
-					cpu = <&little0>;
-				};
-				core1 {
-					cpu = <&little1>;
-				};
-				core2 {
-					cpu = <&little2>;
-				};
-				core3 {
-					cpu = <&little3>;
-				};
-			};
-		};
 	};
 
 	memory@80000000 {
 		device_type = "memory";
-		reg = <0x00000000 0x80000000 0 0x80000000>,
+		reg = <0x00000000 0x80000000 0 0x7F000000>,
 		      <0x00000008 0x80000000 0 0x80000000>;
 	};
 
@@ -195,15 +201,15 @@
 		      <0x0 0x2c000000 0 0x2000>,
 		      <0x0 0x2c010000 0 0x2000>,
 		      <0x0 0x2c02F000 0 0x2000>;
-		interrupts = <1 9 0xf04>;
+		interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_HIGH)>;
 	};
 
 	timer {
 		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0xff01>,
-			     <1 14 0xff01>,
-			     <1 11 0xff01>,
-			     <1 10 0xff01>;
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(8) | IRQ_TYPE_LEVEL_LOW)>;
 		clock-frequency = <100000000>;
 	};
 
@@ -214,19 +220,19 @@
 			#address-cells = <2>;
 			#size-cells = <2>;
 			ranges;
-			frame@2a820000 {
-				frame-number = <0>;
-				interrupts = <0 25 4>;
-				reg = <0x0 0x2a820000 0x0 0x10000>;
+			frame@2a830000 {
+				frame-number = <1>;
+				interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+				reg = <0x0 0x2a830000 0x0 0x10000>;
 			};
 	};
 
 	pmu {
 		compatible = "arm,armv8-pmuv3";
-		interrupts = <0 60 4>,
-			     <0 61 4>,
-			     <0 62 4>,
-			     <0 63 4>;
+		interrupts = <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
 	smb {
@@ -289,6 +295,27 @@
 
 		/include/ "rtsm_ve-motherboard.dtsi"
 	};
-};
 
-/include/ "clcd-panels.dtsi"
+	panels {
+		panel@0 {
+			compatible	= "panel";
+			mode		= "XVGA";
+			refresh		= <60>;
+			xres		= <1024>;
+			yres		= <768>;
+			pixclock	= <15748>;
+			left_margin	= <152>;
+			right_margin	= <48>;
+			upper_margin	= <23>;
+			lower_margin	= <3>;
+			hsync_len	= <104>;
+			vsync_len	= <4>;
+			sync		= <0>;
+			vmode		= "FB_VMODE_NONINTERLACED";
+			tim2		= "TIM2_BCD", "TIM2_IPC";
+			cntl		= "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+			caps		= "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+			bpp		= <16>;
+		};
+	};
+};
diff --git a/arch/arm64/boot/dts/fvp-foundation-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-foundation-gicv2-psci.dts
new file mode 100644
index 000000000000..6e1ce79a87c3
--- /dev/null
+++ b/arch/arm64/boot/dts/fvp-foundation-gicv2-psci.dts
@@ -0,0 +1,51 @@
+/* Foundation model is Base model with bits removed */
+
+#include "fvp-base-gicv2-psci.dts"
+
+&CPU4 {
+	status = "disabled";
+};
+
+&CPU5 {
+	status = "disabled";
+};
+
+&CPU6 {
+	status = "disabled";
+};
+
+&CPU7 {
+	status = "disabled";
+};
+
+&flash {
+	status = "disabled";
+};
+
+&v2m_video_ram {
+	status = "disabled";
+};
+
+&aaci {
+	status = "disabled";
+};
+
+&mmci {
+	status = "disabled";
+};
+
+&kmi0 {
+	status = "disabled";
+};
+
+&kmi1 {
+	status = "disabled";
+};
+
+&clcd {
+	status = "disabled";
+};
+
+&v2m_oscclk1 {
+	status = "disabled";
+};
diff --git a/arch/arm64/boot/dts/juno-clocks.dtsi b/arch/arm64/boot/dts/juno-clocks.dtsi
index ea2b5666a16f..4e44ee229b4a 100644
--- a/arch/arm64/boot/dts/juno-clocks.dtsi
+++ b/arch/arm64/boot/dts/juno-clocks.dtsi
@@ -42,3 +42,34 @@
 		clock-frequency = <533000000>;
 		clock-output-names = "faxi_clk";
 	};
+
+	scpi {
+		compatible = "arm,scpi";
+		mboxes = <&mailbox 1>;
+		shmem = <&cpu_scp_hpri>;
+
+		clocks {
+			compatible = "arm,scpi-clocks";
+
+			scpi_dvfs: scpi_clocks@0 {
+				compatible = "arm,scpi-dvfs";
+				#clock-cells = <1>;
+				clock-indices = <0>, <1>, <2>;
+				clock-output-names = "vbig", "vlittle", "vgpu";
+			};
+
+			pixel_clk: scpi_clocks@3 {
+				compatible = "arm,scpi-clk";
+				#clock-cells = <1>;
+				clock-indices = <3>, <4>;
+				clock-output-names = "pxlclk0", "pxlclk1";
+			};
+
+			audio_clk: scpi_clocks@5 {
+				compatible = "arm,scpi-clk";
+				#clock-cells = <1>;
+				clock-indices = <5>;
+				clock-output-names = "i2s_clock";
+			};
+		};
+	};
diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts
index b8baba186b1e..e0f6c5f9bbab 100644
--- a/arch/arm64/boot/dts/juno.dts
+++ b/arch/arm64/boot/dts/juno.dts
@@ -9,6 +9,7 @@
 /dts-v1/;
 
 #include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/thermal.h>
 
 / {
 	model = "ARM Juno development board (r0)";
@@ -35,7 +36,9 @@
 		#size-cells = <0>;
 
 		cpu-map {
-			cluster0 {
+			cluster0: cluster0 {
+				#cooling-cells = <2>; /* min followed by max */
+
 				core0 {
 					cpu = <&A57_0>;
 				};
@@ -44,7 +47,9 @@
 				};
 			};
 
-			cluster1 {
+			cluster1: cluster1 {
+				#cooling-cells = <2>; /* min followed by max */
+
 				core0 {
 					cpu = <&A53_0>;
 				};
@@ -60,11 +65,36 @@
 			};
 		};
 
+		idle-states {
+			entry-method = "arm,psci";
+
+			CPU_SLEEP_0: cpu-sleep-0 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x0010000>;
+				local-timer-stop;
+				entry-latency-us = <40>;
+				exit-latency-us = <100>;
+				min-residency-us = <2000>;
+			};
+
+			CLUSTER_SLEEP_0: cluster-sleep-0 {
+				compatible = "arm,idle-state";
+				arm,psci-suspend-param = <0x1010000>;
+				local-timer-stop;
+				entry-latency-us = <500>;
+				exit-latency-us = <1000>;
+				min-residency-us = <2500>;
+			};
+		};
+
 		A57_0: cpu@0 {
 			compatible = "arm,cortex-a57","arm,armv8";
 			reg = <0x0 0x0>;
 			device_type = "cpu";
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			clocks = <&scpi_dvfs 0>;
+			clock-names = "vbig";
 		};
 
 		A57_1: cpu@1 {
@@ -72,6 +102,9 @@
 			reg = <0x0 0x1>;
 			device_type = "cpu";
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			clocks = <&scpi_dvfs 0>;
+			clock-names = "vbig";
 		};
 
 		A53_0: cpu@100 {
@@ -79,6 +112,9 @@
 			reg = <0x0 0x100>;
 			device_type = "cpu";
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			clocks = <&scpi_dvfs 1>;
+			clock-names = "vlittle";
 		};
 
 		A53_1: cpu@101 {
@@ -86,6 +122,9 @@
 			reg = <0x0 0x101>;
 			device_type = "cpu";
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			clocks = <&scpi_dvfs 1>;
+			clock-names = "vlittle";
 		};
 
 		A53_2: cpu@102 {
@@ -93,6 +132,9 @@
 			reg = <0x0 0x102>;
 			device_type = "cpu";
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			clocks = <&scpi_dvfs 1>;
+			clock-names = "vlittle";
 		};
 
 		A53_3: cpu@103 {
@@ -100,6 +142,9 @@
 			reg = <0x0 0x103>;
 			device_type = "cpu";
 			enable-method = "psci";
+			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+			clocks = <&scpi_dvfs 1>;
+			clock-names = "vlittle";
 		};
 	};
 
@@ -110,6 +155,16 @@
 		      <0x00000008 0x80000000 0x1 0x80000000>;
 	};
 
+	mailbox: mhu@2b1f0000 {
+		compatible = "arm,mhu";
+		reg = <0x0 0x2b1f0000 0x0 0x1000>;
+		interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "mhu_lpri_rx",
+				  "mhu_hpri_rx";
+		#mbox-cells = <1>;
+	};
+
 	gic: interrupt-controller@2c001000 {
 		compatible = "arm,gic-400", "arm,cortex-a15-gic";
 		reg = <0x0 0x2c010000 0 0x1000>,
@@ -130,6 +185,21 @@
 			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>;
 	};
 
+	timer@2a810000 {
+		compatible = "arm,armv7-timer-mem";
+		reg = <0x0 0x2a810000 0x0 0x10000>;
+		arm,cnttidr = <0x1>;
+		clock-frequency = <50000000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		frame@2a830000 {
+			frame-number = <1>;
+			interrupts = <0 60 4>;
+			reg = <0x0 0x2a830000 0x0 0x10000>;
+		};
+	};
+
 	pmu {
 		compatible = "arm,armv8-pmuv3";
 		interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
@@ -140,8 +210,78 @@
 			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
+	sram: sram@2e000000 {
+		compatible = "arm,juno-sram-ns", "mmio-sram";
+		reg = <0x0 0x2e000000 0x0 0x8000>;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0x0 0x2e000000 0x8000>;
+
+		cpu_scp_lpri: scp-shmem@0 {
+			compatible = "arm,juno-scp-shmem";
+			reg = <0x0 0x200>;
+		};
+
+		cpu_scp_hpri: scp-shmem@200 {
+			compatible = "arm,juno-scp-shmem";
+			reg = <0x200 0x200>;
+		};
+	};
+
+	scpi_sensor0: scpi-sensor@0 {
+			compatible = "arm,scpi-thermal";
+			#thermal-sensor-cells = <1>;
+	};
+
+	thermal-zones {
+		soc_thermal {
+			polling-delay = <1000>;
+			polling-delay-passive = <100>;
+			sustainable-power = <2500>;
+
+			thermal-sensors = <&scpi_sensor0 3>;
+
+			trips {
+				threshold: trip-point@0 {
+					temperature = <55000>;
+					hysteresis = <1000>;
+					type = "passive";
+				};
+				target: trip-point@1 {
+					temperature = <65000>;
+					hysteresis = <1000>;
+					type = "passive";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+				     trip = <&target>;
+				     cooling-device = <&cluster0 0 4>;
+                                     contribution = <1024>;
+				};
+				map1 {
+				     trip = <&target>;
+				     cooling-device = <&cluster1 0 4>;
+                                     contribution = <2048>;
+				};
+				map2 {
+				    trip = <&target>;
+				    cooling-device = <&gpu 0 4>;
+                                    contribution = <1024>;
+				};
+			};
+		};
+	};
+
 	/include/ "juno-clocks.dtsi"
 
+	hwmon@1c010000 {
+		compatible = "arm,v2m-juno-meters";
+		reg = <0x0 0x1c010000 0x0 0x1000>;
+	};
+
 	dma@7ff00000 {
 		compatible = "arm,pl330", "arm,primecell";
 		reg = <0x0 0x7ff00000 0 0x1000>;
@@ -174,7 +314,7 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 		interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>;
-		clock-frequency = <400000>;
+		clock-frequency = <100000>;
 		i2c-sda-hold-time-ns = <500>;
 		clocks = <&soc_smc50mhz>;
 
@@ -219,11 +359,64 @@
 		clock-names = "apb_pclk";
 	};
 
-	gpu@0x2d000000 {
+	hdlcd@7ff60000 {
+		compatible = "arm,hdlcd";
+		reg = <0 0x7ff60000 0 0x1000>;
+		interrupts = <0 85 4>;
+		clocks = <&pixel_clk 0>;
+		clock-names = "pxlclk";
+		i2c-slave = <&dvi0>;
+
+		/* display-timings {
+			native-mode = <&timing0>;
+			timing0: timing@0 {
+				/* 1024 x 768 framebufer, standard VGA timings * /
+				clock-frequency = <65000>;
+				hactive = <1024>;
+				vactive = <768>;
+				hfront-porch = <24>;
+				hback-porch = <160>;
+				hsync-len = <136>;
+				vfront-porch = <3>;
+				vback-porch = <29>;
+				vsync-len = <6>;
+			};
+		}; */
+	};
+
+	/* hdlcd@7ff50000 {
+		compatible = "arm,hdlcd";
+		reg = <0 0x7ff50000 0 0x1000>;
+		interrupts = <0 93 4>;
+		clocks = <&pixel_clk 1>;
+		clock-names = "pxlclk";
+		i2c-slave = <&dvi1>;
+
+		display-timings {
+			native-mode = <&timing1>;
+			timing1: timing@1 {
+				/* 1024 x 768 framebufer, standard VGA timings * /
+				clock-frequency = <65000>;
+				hactive = <1024>;
+				vactive = <768>;
+				hfront-porch = <24>;
+				hback-porch = <160>;
+				hsync-len = <136>;
+				vfront-porch = <3>;
+				vback-porch = <29>;
+				vsync-len = <6>;
+			};
+		};
+	}; */
+
+	gpu: gpu@0x2d000000 {
 		compatible = "arm,malit6xx", "arm,mali";
+		#cooling-cells = <2>; /* min followed by max */
 		reg = <0x0 0x2d000000 0x0 0x4000>;
 		interrupts = <0 33 4>, <0 34 4>, <0 32 4>;
 		interrupt-names = "JOB", "MMU", "GPU";
+		clocks = <&scpi_dvfs 2>;
+		clock-names = "clk_mali";
 	};
 
 	smb {
diff --git a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
deleted file mode 100644
index 28ed4ba3391a..000000000000
--- a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * ARM Ltd. Fast Models
- *
- * Architecture Envelope Model (AEM) ARMv8-A
- * ARMAEMv8AMPCT
- *
- * RTSM_VE_AEMv8A.lisa
- */
-
-/dts-v1/;
-
-/memreserve/ 0x80000000 0x00010000;
-
-/ {
-	model = "RTSM_VE_AEMv8A";
-	compatible = "arm,rtsm_ve,aemv8a", "arm,vexpress";
-	interrupt-parent = <&gic>;
-	#address-cells = <2>;
-	#size-cells = <2>;
-
-	chosen { };
-
-	aliases {
-		serial0 = &v2m_serial0;
-		serial1 = &v2m_serial1;
-		serial2 = &v2m_serial2;
-		serial3 = &v2m_serial3;
-	};
-
-	psci {
-		compatible = "arm,psci";
-		method = "smc";
-		/*
-		 * Function IDs usage and compliancy with PSCI v0.2 still
-		 * under discussion.  Current IDs should be considered
-		 * temporary for demonstration purposes.
-		 */
-		cpu_suspend = <0x84000001>;
-		cpu_off = <0x84000002>;
-		cpu_on = <0x84000003>;
-	};
-
-	cpus {
-		#address-cells = <2>;
-		#size-cells = <0>;
-
-		idle-states {
-			entry-method = "arm,psci";
-
-			CPU_SLEEP_0: cpu-sleep-0 {
-				compatible = "arm,idle-state";
-				entry-method-param = <0x0010000>;
-				entry-latency-us = <40>;
-				exit-latency-us = <100>;
-				min-residency-us = <150>;
-			};
-
-			CLUSTER_SLEEP_0: cluster-sleep-0 {
-				compatible = "arm,idle-state";
-				entry-method-param = <0x1010000>;
-				entry-latency-us = <500>;
-				exit-latency-us = <1000>;
-				min-residency-us = <2500>;
-			};
-		};
-
-		cpu@0 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x0>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-		};
-		cpu@1 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x1>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-		};
-		cpu@2 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x2>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-		};
-		cpu@3 {
-			device_type = "cpu";
-			compatible = "arm,armv8";
-			reg = <0x0 0x3>;
-			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-		};
-	};
-
-	memory@80000000 {
-		device_type = "memory";
-		reg = <0x00000000 0x80000000 0 0x80000000>,
-		      <0x00000008 0x80000000 0 0x80000000>;
-	};
-
-	gic: interrupt-controller@2c001000 {
-		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
-		#interrupt-cells = <3>;
-		#address-cells = <0>;
-		interrupt-controller;
-		reg = <0x0 0x2c001000 0 0x1000>,
-		      <0x0 0x2c002000 0 0x1000>,
-		      <0x0 0x2c004000 0 0x2000>,
-		      <0x0 0x2c006000 0 0x2000>;
-		interrupts = <1 9 0xf04>;
-	};
-
-	timer {
-		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0xff01>,
-			     <1 14 0xff01>,
-			     <1 11 0xff01>,
-			     <1 10 0xff01>;
-		clock-frequency = <100000000>;
-	};
-
-	pmu {
-		compatible = "arm,armv8-pmuv3";
-		interrupts = <0 60 4>,
-			     <0 61 4>,
-			     <0 62 4>,
-			     <0 63 4>;
-	};
-
-	smb {
-		compatible = "simple-bus";
-
-		#address-cells = <2>;
-		#size-cells = <1>;
-		ranges = <0 0 0 0x08000000 0x04000000>,
-			 <1 0 0 0x14000000 0x04000000>,
-			 <2 0 0 0x18000000 0x04000000>,
-			 <3 0 0 0x1c000000 0x04000000>,
-			 <4 0 0 0x0c000000 0x04000000>,
-			 <5 0 0 0x10000000 0x04000000>;
-
-		#interrupt-cells = <1>;
-		interrupt-map-mask = <0 0 63>;
-		interrupt-map = <0 0  0 &gic 0  0 4>,
-				<0 0  1 &gic 0  1 4>,
-				<0 0  2 &gic 0  2 4>,
-				<0 0  3 &gic 0  3 4>,
-				<0 0  4 &gic 0  4 4>,
-				<0 0  5 &gic 0  5 4>,
-				<0 0  6 &gic 0  6 4>,
-				<0 0  7 &gic 0  7 4>,
-				<0 0  8 &gic 0  8 4>,
-				<0 0  9 &gic 0  9 4>,
-				<0 0 10 &gic 0 10 4>,
-				<0 0 11 &gic 0 11 4>,
-				<0 0 12 &gic 0 12 4>,
-				<0 0 13 &gic 0 13 4>,
-				<0 0 14 &gic 0 14 4>,
-				<0 0 15 &gic 0 15 4>,
-				<0 0 16 &gic 0 16 4>,
-				<0 0 17 &gic 0 17 4>,
-				<0 0 18 &gic 0 18 4>,
-				<0 0 19 &gic 0 19 4>,
-				<0 0 20 &gic 0 20 4>,
-				<0 0 21 &gic 0 21 4>,
-				<0 0 22 &gic 0 22 4>,
-				<0 0 23 &gic 0 23 4>,
-				<0 0 24 &gic 0 24 4>,
-				<0 0 25 &gic 0 25 4>,
-				<0 0 26 &gic 0 26 4>,
-				<0 0 27 &gic 0 27 4>,
-				<0 0 28 &gic 0 28 4>,
-				<0 0 29 &gic 0 29 4>,
-				<0 0 30 &gic 0 30 4>,
-				<0 0 31 &gic 0 31 4>,
-				<0 0 32 &gic 0 32 4>,
-				<0 0 33 &gic 0 33 4>,
-				<0 0 34 &gic 0 34 4>,
-				<0 0 35 &gic 0 35 4>,
-				<0 0 36 &gic 0 36 4>,
-				<0 0 37 &gic 0 37 4>,
-				<0 0 38 &gic 0 38 4>,
-				<0 0 39 &gic 0 39 4>,
-				<0 0 40 &gic 0 40 4>,
-				<0 0 41 &gic 0 41 4>,
-				<0 0 42 &gic 0 42 4>;
-
-		/include/ "rtsm_ve-motherboard.dtsi"
-	};
-};
-
-/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index b683d4703582..1df48b8e86d2 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -15,14 +15,14 @@
 		#interrupt-cells = <1>;
 		ranges;
 
-		flash@0,00000000 {
+		flash: flash@0,00000000 {
 			compatible = "arm,vexpress-flash", "cfi-flash";
 			reg = <0 0x00000000 0x04000000>,
 			      <4 0x00000000 0x04000000>;
 			bank-width = <4>;
 		};
 
-		vram@2,00000000 {
+		v2m_video_ram: vram@2,00000000 {
 			compatible = "arm,vexpress-vram";
 			reg = <2 0x00000000 0x00800000>;
 		};
@@ -76,7 +76,7 @@
 				clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
 			};
 
-			aaci@040000 {
+			aaci: aaci@040000 {
 				compatible = "arm,pl041", "arm,primecell";
 				reg = <0x040000 0x1000>;
 				interrupts = <11>;
@@ -84,7 +84,7 @@
 				clock-names = "apb_pclk";
 			};
 
-			mmci@050000 {
+			mmci: mmci@050000 {
 				compatible = "arm,pl180", "arm,primecell";
 				reg = <0x050000 0x1000>;
 				interrupts = <9 10>;
@@ -96,7 +96,7 @@
 				clock-names = "mclk", "apb_pclk";
 			};
 
-			kmi@060000 {
+			kmi0: kmi@060000 {
 				compatible = "arm,pl050", "arm,primecell";
 				reg = <0x060000 0x1000>;
 				interrupts = <12>;
@@ -104,7 +104,7 @@
 				clock-names = "KMIREFCLK", "apb_pclk";
 			};
 
-			kmi@070000 {
+			kmi1: kmi@070000 {
 				compatible = "arm,pl050", "arm,primecell";
 				reg = <0x070000 0x1000>;
 				interrupts = <13>;
@@ -176,9 +176,10 @@
 				clock-names = "apb_pclk";
 			};
 
-			clcd@1f0000 {
+			clcd: clcd@1f0000 {
 				compatible = "arm,pl111", "arm,primecell";
 				reg = <0x1f0000 0x1000>;
+				interrupt-names = "combined";
 				interrupts = <14>;
 				clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
 				clock-names = "clcdclk", "apb_pclk";
@@ -203,7 +204,7 @@
 		};
 
 		mcc {
-			compatible = "arm,vexpress,config-bus", "simple-bus";
+			compatible = "arm,vexpress,config-bus";
 			arm,vexpress,config-bridge = <&v2m_sysreg>;
 
 			v2m_oscclk1: osc@1 {
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 2070a56ecc46..a3f935fde975 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -35,4 +35,4 @@ AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
-	$(call if_changed_dep,cc_o_c)
+	$(call if_changed_rule,cc_o_c)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 60f2f4c12256..79cd911ef88c 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_dec, rounds, blocks, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
 				first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
 				first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
@@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
@@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index b9e6eaf41c9b..607928ad1fdf 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -19,13 +19,15 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	DATA	.req	v0
-	SHASH	.req	v1
-	IN1	.req	v2
+	SHASH	.req	v0
+	SHASH2	.req	v1
 	T1	.req	v2
 	T2	.req	v3
-	T3	.req	v4
-	VZR	.req	v5
+	MASK	.req	v4
+	XL	.req	v5
+	XM	.req	v6
+	XH	.req	v7
+	IN1	.req	v7
 
 	.text
 	.arch		armv8-a+crypto
@@ -35,61 +37,51 @@
 	 *			   struct ghash_key const *k, const char *head)
 	 */
 ENTRY(pmull_ghash_update)
-	ld1		{DATA.16b}, [x1]
 	ld1		{SHASH.16b}, [x3]
-	eor		VZR.16b, VZR.16b, VZR.16b
+	ld1		{XL.16b}, [x1]
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 	/* do the head block first, if supplied */
 	cbz		x4, 0f
-	ld1		{IN1.2d}, [x4]
+	ld1		{T1.2d}, [x4]
 	b		1f
 
-0:	ld1		{IN1.2d}, [x2], #16
+0:	ld1		{T1.2d}, [x2], #16
 	sub		w0, w0, #1
-1:	ext		IN1.16b, IN1.16b, IN1.16b, #8
-CPU_LE(	rev64		IN1.16b, IN1.16b	)
-	eor		DATA.16b, DATA.16b, IN1.16b
 
-	/* multiply DATA by SHASH in GF(2^128) */
-	ext		T2.16b, DATA.16b, DATA.16b, #8
-	ext		T3.16b, SHASH.16b, SHASH.16b, #8
-	eor		T2.16b, T2.16b, DATA.16b
-	eor		T3.16b, T3.16b, SHASH.16b
+1:	/* multiply XL by SHASH in GF(2^128) */
+CPU_LE(	rev64		T1.16b, T1.16b	)
 
-	pmull2		T1.1q, SHASH.2d, DATA.2d	// a1 * b1
-	pmull		DATA.1q, SHASH.1d, DATA.1d	// a0 * b0
-	pmull		T2.1q, T2.1d, T3.1d		// (a1 + a0)(b1 + b0)
-	eor		T2.16b, T2.16b, T1.16b		// (a0 * b1) + (a1 * b0)
-	eor		T2.16b, T2.16b, DATA.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
+	eor		T1.16b, T1.16b, T2.16b
+	eor		XL.16b, XL.16b, IN1.16b
 
-	ext		T3.16b, VZR.16b, T2.16b, #8
-	ext		T2.16b, T2.16b, VZR.16b, #8
-	eor		DATA.16b, DATA.16b, T3.16b
-	eor		T1.16b, T1.16b, T2.16b	// <T1:DATA> is result of
-						// carry-less multiplication
+	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
+	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
 
-	/* first phase of the reduction */
-	shl		T3.2d, DATA.2d, #1
-	eor		T3.16b, T3.16b, DATA.16b
-	shl		T3.2d, T3.2d, #5
-	eor		T3.16b, T3.16b, DATA.16b
-	shl		T3.2d, T3.2d, #57
-	ext		T2.16b, VZR.16b, T3.16b, #8
-	ext		T3.16b, T3.16b, VZR.16b, #8
-	eor		DATA.16b, DATA.16b, T2.16b
-	eor		T1.16b, T1.16b, T3.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, XL.16b, XH.16b
+	eor		XM.16b, XM.16b, T1.16b
+	eor		XM.16b, XM.16b, T2.16b
+	pmull		T2.1q, XL.1d, MASK.1d
 
-	/* second phase of the reduction */
-	ushr		T2.2d, DATA.2d, #5
-	eor		T2.16b, T2.16b, DATA.16b
-	ushr		T2.2d, T2.2d, #1
-	eor		T2.16b, T2.16b, DATA.16b
-	ushr		T2.2d, T2.2d, #1
-	eor		T1.16b, T1.16b, T2.16b
-	eor		DATA.16b, DATA.16b, T1.16b
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
 
 	cbnz		w0, 0b
 
-	st1		{DATA.16b}, [x1]
+	st1		{XL.16b}, [x1]
 	ret
 ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index b92baf3f68c7..833ec1e3f3e9 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -67,11 +67,12 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
-		kernel_neon_begin_partial(6);
+		kernel_neon_begin_partial(8);
 		pmull_ghash_update(blocks, ctx->digest, src, key,
 				   partial ? ctx->buf : NULL);
 		kernel_neon_end();
 		src += blocks * GHASH_BLOCK_SIZE;
+		partial = 0;
 	}
 	if (len)
 		memcpy(ctx->buf + partial, src, len);
@@ -88,7 +89,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		kernel_neon_begin_partial(6);
+		kernel_neon_begin_partial(8);
 		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
 		kernel_neon_end();
 	}
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index a84d4c8acbbe..ddb9d7830558 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -168,8 +168,8 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 ({ \
 	__typeof__(*(ptr)) __ret; \
 	__ret = (__typeof__(*(ptr))) \
-	__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
-		sizeof(*(ptr))); \
+		__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
+			     sizeof(*(ptr))); \
 	__ret; \
 })
 
@@ -177,8 +177,8 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 ({ \
 	__typeof__(*(ptr)) __ret; \
 	__ret = (__typeof__(*(ptr))) \
-	__cmpxchg((ptr), (unsigned long)(o), \
-		(unsigned long)(n), sizeof(*(ptr))); \
+		__cmpxchg((ptr), (unsigned long)(o), \
+			  (unsigned long)(n), sizeof(*(ptr))); \
 	__ret; \
 })
 
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 56de5aadede2..e94e8dde78b4 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -205,6 +205,13 @@ typedef struct compat_siginfo {
 			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
 			int _fd;
 		} _sigpoll;
+
+		/* SIGSYS */
+		struct {
+			compat_uptr_t _call_addr; /* calling user insn */
+			int _syscall;	/* triggering system call number */
+			unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
 	} _sifields;
 } compat_siginfo_t;
 
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac13008..50f559f574fe 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,8 +37,21 @@ struct fpsimd_state {
 			u32 fpcr;
 		};
 	};
+	/* the id of the last cpu to have restored this state */
+	unsigned int cpu;
 };
 
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+	u32		fpsr;
+	u32		fpcr;
+	u32		num_regs;
+	__uint128_t	vregs[32];
+};
+
+
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
+extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_restore_current_state(void);
+extern void fpsimd_update_current_state(struct fpsimd_state *state);
+
+extern void fpsimd_flush_task_state(struct task_struct *target);
+
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+				      u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599c96bd..768414d55e64 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -62,3 +62,38 @@
 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
 	msr	fpcr, x\tmpnr
 .endm
+
+.altmacro
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+	mrs	x\tmpnr1, fpsr
+	str	w\numnr, [\state, #8]
+	mrs	x\tmpnr2, fpcr
+	stp	w\tmpnr1, w\tmpnr2, [\state]
+	adr	x\tmpnr1, 0f
+	add	\state, \state, x\numnr, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+	br	x\tmpnr1
+	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+	.irp	qb, %(qa + 1)
+	stp	q\qa, q\qb, [\state, # -16 * \qa - 16]
+	.endr
+	.endr
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+	ldp	w\tmpnr1, w\tmpnr2, [\state]
+	msr	fpsr, x\tmpnr1
+	msr	fpcr, x\tmpnr2
+	adr	x\tmpnr1, 0f
+	ldr	w\tmpnr2, [\state, #8]
+	add	\state, \state, x\tmpnr2, lsl #4
+	sub	x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+	br	x\tmpnr1
+	.irp	qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+	.irp	qb, %(qa + 1)
+	ldp	q\qa, q\qb, [\state, # -16 * \qa - 16]
+	.endr
+	.endr
+0:
+.endm
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..101a42bde728 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned int cpu = smp_processor_id();
 
+	/*
+	 * init_mm.pgd does not contain any user mappings and it is always
+	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+	 */
+	if (next == &init_mm) {
+		cpu_set_reserved_ttbr0();
+		return;
+	}
+
 	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
 		check_and_switch_context(next, tsk);
 }
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
new file mode 100644
index 000000000000..13ce4cc18e26
--- /dev/null
+++ b/arch/arm64/include/asm/neon.h
@@ -0,0 +1,18 @@
+/*
+ * linux/arch/arm64/include/asm/neon.h
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+
+#define cpu_has_neon()		(1)
+
+#define kernel_neon_begin()	kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
+void kernel_neon_end(void);
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
new file mode 100644
index 000000000000..fd189a522aee
--- /dev/null
+++ b/arch/arm64/include/asm/opcodes.h
@@ -0,0 +1,231 @@
+/*
+ *  Copied from arch/arm/include/asm/opcodes.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_ARM_OPCODES_H
+#define __ASM_ARM_OPCODES_H
+
+#ifndef __ASSEMBLY__
+#include <linux/linkage.h>
+extern asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr);
+#endif
+
+#define ARM_OPCODE_CONDTEST_FAIL   0
+#define ARM_OPCODE_CONDTEST_PASS   1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
+
+
+/*
+ * Assembler opcode byteswap helpers.
+ * These are only intended for use by this header: don't use them directly,
+ * because they will be suboptimal in most cases.
+ */
+#define ___asm_opcode_swab32(x) (	\
+	  (((x) << 24) & 0xFF000000)	\
+	| (((x) <<  8) & 0x00FF0000)	\
+	| (((x) >>  8) & 0x0000FF00)	\
+	| (((x) >> 24) & 0x000000FF)	\
+)
+#define ___asm_opcode_swab16(x) (	\
+	  (((x) << 8) & 0xFF00)		\
+	| (((x) >> 8) & 0x00FF)		\
+)
+#define ___asm_opcode_swahb32(x) (	\
+	  (((x) << 8) & 0xFF00FF00)	\
+	| (((x) >> 8) & 0x00FF00FF)	\
+)
+#define ___asm_opcode_swahw32(x) (	\
+	  (((x) << 16) & 0xFFFF0000)	\
+	| (((x) >> 16) & 0x0000FFFF)	\
+)
+#define ___asm_opcode_identity32(x) ((x) & 0xFFFFFFFF)
+#define ___asm_opcode_identity16(x) ((x) & 0xFFFF)
+
+
+/*
+ * Opcode byteswap helpers
+ *
+ * These macros help with converting instructions between a canonical integer
+ * format and in-memory representation, in an endianness-agnostic manner.
+ *
+ * __mem_to_opcode_*() convert from in-memory representation to canonical form.
+ * __opcode_to_mem_*() convert from canonical form to in-memory representation.
+ *
+ *
+ * Canonical instruction representation:
+ *
+ *	ARM:		0xKKLLMMNN
+ *	Thumb 16-bit:	0x0000KKLL, where KK < 0xE8
+ *	Thumb 32-bit:	0xKKLLMMNN, where KK >= 0xE8
+ *
+ * There is no way to distinguish an ARM instruction in canonical representation
+ * from a Thumb instruction (just as these cannot be distinguished in memory).
+ * Where this distinction is important, it needs to be tracked separately.
+ *
+ * Note that values in the range 0x0000E800..0xE7FFFFFF intentionally do not
+ * represent any valid Thumb-2 instruction.  For this range,
+ * __opcode_is_thumb32() and __opcode_is_thumb16() will both be false.
+ *
+ * The ___asm variants are intended only for use by this header, in situations
+ * involving inline assembler.  For .S files, the normal __opcode_*() macros
+ * should do the right thing.
+ */
+#ifdef __ASSEMBLY__
+
+#define ___opcode_swab32(x) ___asm_opcode_swab32(x)
+#define ___opcode_swab16(x) ___asm_opcode_swab16(x)
+#define ___opcode_swahb32(x) ___asm_opcode_swahb32(x)
+#define ___opcode_swahw32(x) ___asm_opcode_swahw32(x)
+#define ___opcode_identity32(x) ___asm_opcode_identity32(x)
+#define ___opcode_identity16(x) ___asm_opcode_identity16(x)
+
+#else /* ! __ASSEMBLY__ */
+
+#include <linux/types.h>
+#include <linux/swab.h>
+
+#define ___opcode_swab32(x) swab32(x)
+#define ___opcode_swab16(x) swab16(x)
+#define ___opcode_swahb32(x) swahb32(x)
+#define ___opcode_swahw32(x) swahw32(x)
+#define ___opcode_identity32(x) ((u32)(x))
+#define ___opcode_identity16(x) ((u16)(x))
+
+#endif /* ! __ASSEMBLY__ */
+
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+
+#define __opcode_to_mem_arm(x) ___opcode_swab32(x)
+#define __opcode_to_mem_thumb16(x) ___opcode_swab16(x)
+#define __opcode_to_mem_thumb32(x) ___opcode_swahb32(x)
+#define ___asm_opcode_to_mem_arm(x) ___asm_opcode_swab32(x)
+#define ___asm_opcode_to_mem_thumb16(x) ___asm_opcode_swab16(x)
+#define ___asm_opcode_to_mem_thumb32(x) ___asm_opcode_swahb32(x)
+
+#else /* ! CONFIG_CPU_ENDIAN_BE8 */
+
+#define __opcode_to_mem_arm(x) ___opcode_identity32(x)
+#define __opcode_to_mem_thumb16(x) ___opcode_identity16(x)
+#define ___asm_opcode_to_mem_arm(x) ___asm_opcode_identity32(x)
+#define ___asm_opcode_to_mem_thumb16(x) ___asm_opcode_identity16(x)
+#ifndef CONFIG_CPU_ENDIAN_BE32
+/*
+ * On BE32 systems, using 32-bit accesses to store Thumb instructions will not
+ * work in all cases, due to alignment constraints.  For now, a correct
+ * version is not provided for BE32.
+ */
+#define __opcode_to_mem_thumb32(x) ___opcode_swahw32(x)
+#define ___asm_opcode_to_mem_thumb32(x) ___asm_opcode_swahw32(x)
+#endif
+
+#endif /* ! CONFIG_CPU_ENDIAN_BE8 */
+
+#define __mem_to_opcode_arm(x) __opcode_to_mem_arm(x)
+#define __mem_to_opcode_thumb16(x) __opcode_to_mem_thumb16(x)
+#ifndef CONFIG_CPU_ENDIAN_BE32
+#define __mem_to_opcode_thumb32(x) __opcode_to_mem_thumb32(x)
+#endif
+
+/* Operations specific to Thumb opcodes */
+
+/* Instruction size checks: */
+#define __opcode_is_thumb32(x) (		\
+	   ((x) & 0xF8000000) == 0xE8000000	\
+	|| ((x) & 0xF0000000) == 0xF0000000	\
+)
+#define __opcode_is_thumb16(x) (					\
+	   ((x) & 0xFFFF0000) == 0					\
+	&& !(((x) & 0xF800) == 0xE800 || ((x) & 0xF000) == 0xF000)	\
+)
+
+/* Operations to construct or split 32-bit Thumb instructions: */
+#define __opcode_thumb32_first(x) (___opcode_identity16((x) >> 16))
+#define __opcode_thumb32_second(x) (___opcode_identity16(x))
+#define __opcode_thumb32_compose(first, second) (			\
+	  (___opcode_identity32(___opcode_identity16(first)) << 16)	\
+	| ___opcode_identity32(___opcode_identity16(second))		\
+)
+#define ___asm_opcode_thumb32_first(x) (___asm_opcode_identity16((x) >> 16))
+#define ___asm_opcode_thumb32_second(x) (___asm_opcode_identity16(x))
+#define ___asm_opcode_thumb32_compose(first, second) (			    \
+	  (___asm_opcode_identity32(___asm_opcode_identity16(first)) << 16) \
+	| ___asm_opcode_identity32(___asm_opcode_identity16(second))	    \
+)
+
+/*
+ * Opcode injection helpers
+ *
+ * In rare cases it is necessary to assemble an opcode which the
+ * assembler does not support directly, or which would normally be
+ * rejected because of the CFLAGS or AFLAGS used to build the affected
+ * file.
+ *
+ * Before using these macros, consider carefully whether it is feasible
+ * instead to change the build flags for your file, or whether it really
+ * makes sense to support old assembler versions when building that
+ * particular kernel feature.
+ *
+ * The macros defined here should only be used where there is no viable
+ * alternative.
+ *
+ *
+ * __inst_arm(x): emit the specified ARM opcode
+ * __inst_thumb16(x): emit the specified 16-bit Thumb opcode
+ * __inst_thumb32(x): emit the specified 32-bit Thumb opcode
+ *
+ * __inst_arm_thumb16(arm, thumb): emit either the specified arm or
+ *	16-bit Thumb opcode, depending on whether an ARM or Thumb-2
+ *	kernel is being built
+ *
+ * __inst_arm_thumb32(arm, thumb): emit either the specified arm or
+ *	32-bit Thumb opcode, depending on whether an ARM or Thumb-2
+ *	kernel is being built
+ *
+ *
+ * Note that using these macros directly is poor practice.  Instead, you
+ * should use them to define human-readable wrapper macros to encode the
+ * instructions that you care about.  In code which might run on ARMv7 or
+ * above, you can usually use the __inst_arm_thumb{16,32} macros to
+ * specify the ARM and Thumb alternatives at the same time.  This ensures
+ * that the correct opcode gets emitted depending on the instruction set
+ * used for the kernel build.
+ *
+ * Look at opcodes-virt.h for an example of how to use these macros.
+ */
+#include <linux/stringify.h>
+
+#define __inst_arm(x) ___inst_arm(___asm_opcode_to_mem_arm(x))
+#define __inst_thumb32(x) ___inst_thumb32(				\
+	___asm_opcode_to_mem_thumb16(___asm_opcode_thumb32_first(x)),	\
+	___asm_opcode_to_mem_thumb16(___asm_opcode_thumb32_second(x))	\
+)
+#define __inst_thumb16(x) ___inst_thumb16(___asm_opcode_to_mem_thumb16(x))
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define __inst_arm_thumb16(arm_opcode, thumb_opcode) \
+	__inst_thumb16(thumb_opcode)
+#define __inst_arm_thumb32(arm_opcode, thumb_opcode) \
+	__inst_thumb32(thumb_opcode)
+#else
+#define __inst_arm_thumb16(arm_opcode, thumb_opcode) __inst_arm(arm_opcode)
+#define __inst_arm_thumb32(arm_opcode, thumb_opcode) __inst_arm(arm_opcode)
+#endif
+
+/* Helpers for the helpers.  Don't use these directly. */
+#ifdef __ASSEMBLY__
+#define ___inst_arm(x) .long x
+#define ___inst_thumb16(x) .short x
+#define ___inst_thumb32(first, second) .short first, second
+#else
+#define ___inst_arm(x) ".long " __stringify(x) "\n\t"
+#define ___inst_thumb16(x) ".short " __stringify(x) "\n\t"
+#define ___inst_thumb32(first, second) \
+	".short " __stringify(first) ", " __stringify(second) "\n\t"
+#endif
+
+#endif /* __ASM_ARM_OPCODES_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 5db016e6d065..c29d88526405 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -61,6 +61,15 @@
 #define COMPAT_PT_TEXT_ADDR		0x10000
 #define COMPAT_PT_DATA_ADDR		0x10004
 #define COMPAT_PT_TEXT_END_ADDR		0x10008
+
+/*
+ * used to skip a system call when tracer changes its number to -1
+ * with ptrace(PTRACE_SET_SYSCALL)
+ */
+#define RET_SKIP_SYSCALL	-1
+#define RET_SKIP_SYSCALL_TRACE	-2
+#define IS_SKIP_SYSCALL(no)	((int)(no & 0xffffffff) == -1)
+
 #ifndef __ASSEMBLY__
 
 /* sizeof(struct user) for AArch32 */
@@ -178,5 +187,13 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 #define profile_pc(regs) instruction_pointer(regs)
 #endif
 
+/*
+ * True if instr is a 32-bit thumb instruction. This works if instr
+ * is the first or only half-word of a thumb instruction. It also works
+ * when instr holds all 32-bits of a wide thumb instruction if stored
+ * in the form (first_half<<16)|(second_half)
+ */
+#define is_wide_instruction(instr)	((unsigned)(instr) >= 0xe800)
+
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
new file mode 100644
index 000000000000..bec3a43f7b17
--- /dev/null
+++ b/arch/arm64/include/asm/seccomp.h
@@ -0,0 +1,25 @@
+/*
+ * arch/arm64/include/asm/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd.h>
+
+#ifdef CONFIG_COMPAT
+#define __NR_seccomp_read_32		__NR_compat_read
+#define __NR_seccomp_write_32		__NR_compat_write
+#define __NR_seccomp_exit_32		__NR_compat_exit
+#define __NR_seccomp_sigreturn_32	__NR_compat_rt_sigreturn
+#endif /* CONFIG_COMPAT */
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index e9c149c042e0..456d67c1f0fa 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -21,6 +21,7 @@ struct sleep_save_sp {
 	phys_addr_t save_ptr_stash_phys;
 };
 
+extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
 extern void cpu_resume(void);
 extern int cpu_suspend(unsigned long);
 
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 383771eb0b87..709a574468f0 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_SYSCALL_H
 #define __ASM_SYSCALL_H
 
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
 #include <linux/err.h>
 
 extern const void *sys_call_table[];
@@ -105,4 +107,16 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->regs[i], args, n * sizeof(args[0]));
 }
 
+/*
+ * We don't care about endianness (__AUDIT_ARCH_LE bit) here because
+ * AArch64 has the same system calls both on little- and big- endian.
+ */
+static inline int syscall_get_arch(void)
+{
+	if (is_compat_task())
+		return AUDIT_ARCH_ARM;
+
+	return AUDIT_ARCH_AARCH64;
+}
+
 #endif	/* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 70dd1ac01f16..e4a98b4c2020 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -114,6 +114,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING		0
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_SYSCALL_TRACE	8
 #define TIF_SYSCALL_AUDIT	9
 #define TIF_SYSCALL_TRACEPOINT	10
@@ -133,10 +134,15 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_FOREIGN_FPSTATE	(1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-				 _TIF_NOTIFY_RESUME)
+				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 10ca8ff93cc2..75f51eadb9eb 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -18,6 +18,19 @@
 #ifndef __ASM_TRAP_H
 #define __ASM_TRAP_H
 
+#include <linux/list.h>
+
+struct undef_hook {
+	struct list_head node;
+	u32 instr_mask;
+	u32 instr_val;
+	u32 pstate_mask;
+	u32 pstate_val;
+	int (*fn)(struct pt_regs *regs, unsigned int instr);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+
 static inline int in_exception_text(unsigned long ptr)
 {
 	extern char __exception_text_start[];
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index c335479c2638..f67d0ec20f94 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -25,7 +25,27 @@
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
+
+/*
+ * Compat syscall numbers used by the AArch64 kernel.
+ */
+#define __NR_compat_restart_syscall	0
+#define __NR_compat_exit		1
+#define __NR_compat_read		3
+#define __NR_compat_write		4
+#define __NR_compat_sigreturn		119
+#define __NR_compat_rt_sigreturn	173
+
+/*
+ * The following SVCs are ARM private.
+ */
+#define __ARM_NR_COMPAT_BASE		0x0f0000
+#define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
+#define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
+
+#define __NR_compat_syscalls		384
 #endif
+
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 58125bf008d3..76d094565090 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -21,399 +21,771 @@
 #define __SYSCALL(x, y)
 #endif
 
-__SYSCALL(0,   sys_restart_syscall)
-__SYSCALL(1,   sys_exit)
-__SYSCALL(2,   sys_fork)
-__SYSCALL(3,   sys_read)
-__SYSCALL(4,   sys_write)
-__SYSCALL(5,   compat_sys_open)
-__SYSCALL(6,   sys_close)
-__SYSCALL(7,   sys_ni_syscall)			/* 7 was sys_waitpid */
-__SYSCALL(8,   sys_creat)
-__SYSCALL(9,   sys_link)
-__SYSCALL(10,  sys_unlink)
-__SYSCALL(11,  compat_sys_execve)
-__SYSCALL(12,  sys_chdir)
-__SYSCALL(13,  sys_ni_syscall)			/* 13 was sys_time */
-__SYSCALL(14,  sys_mknod)
-__SYSCALL(15,  sys_chmod)
-__SYSCALL(16,  sys_lchown16)
-__SYSCALL(17,  sys_ni_syscall)			/* 17 was sys_break */
-__SYSCALL(18,  sys_ni_syscall)			/* 18 was sys_stat */
-__SYSCALL(19,  compat_sys_lseek)
-__SYSCALL(20,  sys_getpid)
-__SYSCALL(21,  compat_sys_mount)
-__SYSCALL(22,  sys_ni_syscall)			/* 22 was sys_umount */
-__SYSCALL(23,  sys_setuid16)
-__SYSCALL(24,  sys_getuid16)
-__SYSCALL(25,  sys_ni_syscall)			/* 25 was sys_stime */
-__SYSCALL(26,  compat_sys_ptrace)
-__SYSCALL(27,  sys_ni_syscall)			/* 27 was sys_alarm */
-__SYSCALL(28,  sys_ni_syscall)			/* 28 was sys_fstat */
-__SYSCALL(29,  sys_pause)
-__SYSCALL(30,  sys_ni_syscall)			/* 30 was sys_utime */
-__SYSCALL(31,  sys_ni_syscall)			/* 31 was sys_stty */
-__SYSCALL(32,  sys_ni_syscall)			/* 32 was sys_gtty */
-__SYSCALL(33,  sys_access)
-__SYSCALL(34,  sys_nice)
-__SYSCALL(35,  sys_ni_syscall)			/* 35 was sys_ftime */
-__SYSCALL(36,  sys_sync)
-__SYSCALL(37,  sys_kill)
-__SYSCALL(38,  sys_rename)
-__SYSCALL(39,  sys_mkdir)
-__SYSCALL(40,  sys_rmdir)
-__SYSCALL(41,  sys_dup)
-__SYSCALL(42,  sys_pipe)
-__SYSCALL(43,  compat_sys_times)
-__SYSCALL(44,  sys_ni_syscall)			/* 44 was sys_prof */
-__SYSCALL(45,  sys_brk)
-__SYSCALL(46,  sys_setgid16)
-__SYSCALL(47,  sys_getgid16)
-__SYSCALL(48,  sys_ni_syscall)			/* 48 was sys_signal */
-__SYSCALL(49,  sys_geteuid16)
-__SYSCALL(50,  sys_getegid16)
-__SYSCALL(51,  sys_acct)
-__SYSCALL(52,  sys_umount)
-__SYSCALL(53,  sys_ni_syscall)			/* 53 was sys_lock */
-__SYSCALL(54,  compat_sys_ioctl)
-__SYSCALL(55,  compat_sys_fcntl)
-__SYSCALL(56,  sys_ni_syscall)			/* 56 was sys_mpx */
-__SYSCALL(57,  sys_setpgid)
-__SYSCALL(58,  sys_ni_syscall)			/* 58 was sys_ulimit */
-__SYSCALL(59,  sys_ni_syscall)			/* 59 was sys_olduname */
-__SYSCALL(60,  sys_umask)
-__SYSCALL(61,  sys_chroot)
-__SYSCALL(62,  compat_sys_ustat)
-__SYSCALL(63,  sys_dup2)
-__SYSCALL(64,  sys_getppid)
-__SYSCALL(65,  sys_getpgrp)
-__SYSCALL(66,  sys_setsid)
-__SYSCALL(67,  compat_sys_sigaction)
-__SYSCALL(68,  sys_ni_syscall)			/* 68 was sys_sgetmask */
-__SYSCALL(69,  sys_ni_syscall)			/* 69 was sys_ssetmask */
-__SYSCALL(70,  sys_setreuid16)
-__SYSCALL(71,  sys_setregid16)
-__SYSCALL(72,  sys_sigsuspend)
-__SYSCALL(73,  compat_sys_sigpending)
-__SYSCALL(74,  sys_sethostname)
-__SYSCALL(75,  compat_sys_setrlimit)
-__SYSCALL(76,  sys_ni_syscall)			/* 76 was compat_sys_getrlimit */
-__SYSCALL(77,  compat_sys_getrusage)
-__SYSCALL(78,  compat_sys_gettimeofday)
-__SYSCALL(79,  compat_sys_settimeofday)
-__SYSCALL(80,  sys_getgroups16)
-__SYSCALL(81,  sys_setgroups16)
-__SYSCALL(82,  sys_ni_syscall)			/* 82 was compat_sys_select */
-__SYSCALL(83,  sys_symlink)
-__SYSCALL(84,  sys_ni_syscall)			/* 84 was sys_lstat */
-__SYSCALL(85,  sys_readlink)
-__SYSCALL(86,  sys_uselib)
-__SYSCALL(87,  sys_swapon)
-__SYSCALL(88,  sys_reboot)
-__SYSCALL(89,  sys_ni_syscall)			/* 89 was sys_readdir */
-__SYSCALL(90,  sys_ni_syscall)			/* 90 was sys_mmap */
-__SYSCALL(91,  sys_munmap)
-__SYSCALL(92,  compat_sys_truncate)
-__SYSCALL(93,  compat_sys_ftruncate)
-__SYSCALL(94,  sys_fchmod)
-__SYSCALL(95,  sys_fchown16)
-__SYSCALL(96,  sys_getpriority)
-__SYSCALL(97,  sys_setpriority)
-__SYSCALL(98,  sys_ni_syscall)			/* 98 was sys_profil */
-__SYSCALL(99,  compat_sys_statfs)
-__SYSCALL(100, compat_sys_fstatfs)
-__SYSCALL(101, sys_ni_syscall)			/* 101 was sys_ioperm */
-__SYSCALL(102, sys_ni_syscall)			/* 102 was sys_socketcall */
-__SYSCALL(103, sys_syslog)
-__SYSCALL(104, compat_sys_setitimer)
-__SYSCALL(105, compat_sys_getitimer)
-__SYSCALL(106, compat_sys_newstat)
-__SYSCALL(107, compat_sys_newlstat)
-__SYSCALL(108, compat_sys_newfstat)
-__SYSCALL(109, sys_ni_syscall)			/* 109 was sys_uname */
-__SYSCALL(110, sys_ni_syscall)			/* 110 was sys_iopl */
-__SYSCALL(111, sys_vhangup)
-__SYSCALL(112, sys_ni_syscall)			/* 112 was sys_idle */
-__SYSCALL(113, sys_ni_syscall)			/* 113 was sys_syscall */
-__SYSCALL(114, compat_sys_wait4)
-__SYSCALL(115, sys_swapoff)
-__SYSCALL(116, compat_sys_sysinfo)
-__SYSCALL(117, sys_ni_syscall)			/* 117 was sys_ipc */
-__SYSCALL(118, sys_fsync)
-__SYSCALL(119, compat_sys_sigreturn_wrapper)
-__SYSCALL(120, sys_clone)
-__SYSCALL(121, sys_setdomainname)
-__SYSCALL(122, sys_newuname)
-__SYSCALL(123, sys_ni_syscall)			/* 123 was sys_modify_ldt */
-__SYSCALL(124, compat_sys_adjtimex)
-__SYSCALL(125, sys_mprotect)
-__SYSCALL(126, compat_sys_sigprocmask)
-__SYSCALL(127, sys_ni_syscall)			/* 127 was sys_create_module */
-__SYSCALL(128, sys_init_module)
-__SYSCALL(129, sys_delete_module)
-__SYSCALL(130, sys_ni_syscall)			/* 130 was sys_get_kernel_syms */
-__SYSCALL(131, sys_quotactl)
-__SYSCALL(132, sys_getpgid)
-__SYSCALL(133, sys_fchdir)
-__SYSCALL(134, sys_bdflush)
-__SYSCALL(135, sys_sysfs)
-__SYSCALL(136, sys_personality)
-__SYSCALL(137, sys_ni_syscall)			/* 137 was sys_afs_syscall */
-__SYSCALL(138, sys_setfsuid16)
-__SYSCALL(139, sys_setfsgid16)
-__SYSCALL(140, sys_llseek)
-__SYSCALL(141, compat_sys_getdents)
-__SYSCALL(142, compat_sys_select)
-__SYSCALL(143, sys_flock)
-__SYSCALL(144, sys_msync)
-__SYSCALL(145, compat_sys_readv)
-__SYSCALL(146, compat_sys_writev)
-__SYSCALL(147, sys_getsid)
-__SYSCALL(148, sys_fdatasync)
-__SYSCALL(149, compat_sys_sysctl)
-__SYSCALL(150, sys_mlock)
-__SYSCALL(151, sys_munlock)
-__SYSCALL(152, sys_mlockall)
-__SYSCALL(153, sys_munlockall)
-__SYSCALL(154, sys_sched_setparam)
-__SYSCALL(155, sys_sched_getparam)
-__SYSCALL(156, sys_sched_setscheduler)
-__SYSCALL(157, sys_sched_getscheduler)
-__SYSCALL(158, sys_sched_yield)
-__SYSCALL(159, sys_sched_get_priority_max)
-__SYSCALL(160, sys_sched_get_priority_min)
-__SYSCALL(161, compat_sys_sched_rr_get_interval)
-__SYSCALL(162, compat_sys_nanosleep)
-__SYSCALL(163, sys_mremap)
-__SYSCALL(164, sys_setresuid16)
-__SYSCALL(165, sys_getresuid16)
-__SYSCALL(166, sys_ni_syscall)			/* 166 was sys_vm86 */
-__SYSCALL(167, sys_ni_syscall)			/* 167 was sys_query_module */
-__SYSCALL(168, sys_poll)
-__SYSCALL(169, sys_ni_syscall)
-__SYSCALL(170, sys_setresgid16)
-__SYSCALL(171, sys_getresgid16)
-__SYSCALL(172, sys_prctl)
-__SYSCALL(173, compat_sys_rt_sigreturn_wrapper)
-__SYSCALL(174, compat_sys_rt_sigaction)
-__SYSCALL(175, compat_sys_rt_sigprocmask)
-__SYSCALL(176, compat_sys_rt_sigpending)
-__SYSCALL(177, compat_sys_rt_sigtimedwait)
-__SYSCALL(178, compat_sys_rt_sigqueueinfo)
-__SYSCALL(179, compat_sys_rt_sigsuspend)
-__SYSCALL(180, compat_sys_pread64_wrapper)
-__SYSCALL(181, compat_sys_pwrite64_wrapper)
-__SYSCALL(182, sys_chown16)
-__SYSCALL(183, sys_getcwd)
-__SYSCALL(184, sys_capget)
-__SYSCALL(185, sys_capset)
-__SYSCALL(186, compat_sys_sigaltstack)
-__SYSCALL(187, compat_sys_sendfile)
-__SYSCALL(188, sys_ni_syscall)			/* 188 reserved */
-__SYSCALL(189, sys_ni_syscall)			/* 189 reserved */
-__SYSCALL(190, sys_vfork)
-__SYSCALL(191, compat_sys_getrlimit)		/* SuS compliant getrlimit */
-__SYSCALL(192, sys_mmap_pgoff)
-__SYSCALL(193, compat_sys_truncate64_wrapper)
-__SYSCALL(194, compat_sys_ftruncate64_wrapper)
-__SYSCALL(195, sys_stat64)
-__SYSCALL(196, sys_lstat64)
-__SYSCALL(197, sys_fstat64)
-__SYSCALL(198, sys_lchown)
-__SYSCALL(199, sys_getuid)
-__SYSCALL(200, sys_getgid)
-__SYSCALL(201, sys_geteuid)
-__SYSCALL(202, sys_getegid)
-__SYSCALL(203, sys_setreuid)
-__SYSCALL(204, sys_setregid)
-__SYSCALL(205, sys_getgroups)
-__SYSCALL(206, sys_setgroups)
-__SYSCALL(207, sys_fchown)
-__SYSCALL(208, sys_setresuid)
-__SYSCALL(209, sys_getresuid)
-__SYSCALL(210, sys_setresgid)
-__SYSCALL(211, sys_getresgid)
-__SYSCALL(212, sys_chown)
-__SYSCALL(213, sys_setuid)
-__SYSCALL(214, sys_setgid)
-__SYSCALL(215, sys_setfsuid)
-__SYSCALL(216, sys_setfsgid)
-__SYSCALL(217, compat_sys_getdents64)
-__SYSCALL(218, sys_pivot_root)
-__SYSCALL(219, sys_mincore)
-__SYSCALL(220, sys_madvise)
-__SYSCALL(221, compat_sys_fcntl64)
-__SYSCALL(222, sys_ni_syscall)			/* 222 for tux */
-__SYSCALL(223, sys_ni_syscall)			/* 223 is unused */
-__SYSCALL(224, sys_gettid)
-__SYSCALL(225, compat_sys_readahead_wrapper)
-__SYSCALL(226, sys_setxattr)
-__SYSCALL(227, sys_lsetxattr)
-__SYSCALL(228, sys_fsetxattr)
-__SYSCALL(229, sys_getxattr)
-__SYSCALL(230, sys_lgetxattr)
-__SYSCALL(231, sys_fgetxattr)
-__SYSCALL(232, sys_listxattr)
-__SYSCALL(233, sys_llistxattr)
-__SYSCALL(234, sys_flistxattr)
-__SYSCALL(235, sys_removexattr)
-__SYSCALL(236, sys_lremovexattr)
-__SYSCALL(237, sys_fremovexattr)
-__SYSCALL(238, sys_tkill)
-__SYSCALL(239, sys_sendfile64)
-__SYSCALL(240, compat_sys_futex)
-__SYSCALL(241, compat_sys_sched_setaffinity)
-__SYSCALL(242, compat_sys_sched_getaffinity)
-__SYSCALL(243, compat_sys_io_setup)
-__SYSCALL(244, sys_io_destroy)
-__SYSCALL(245, compat_sys_io_getevents)
-__SYSCALL(246, compat_sys_io_submit)
-__SYSCALL(247, sys_io_cancel)
-__SYSCALL(248, sys_exit_group)
-__SYSCALL(249, compat_sys_lookup_dcookie)
-__SYSCALL(250, sys_epoll_create)
-__SYSCALL(251, sys_epoll_ctl)
-__SYSCALL(252, sys_epoll_wait)
-__SYSCALL(253, sys_remap_file_pages)
-__SYSCALL(254, sys_ni_syscall)			/* 254 for set_thread_area */
-__SYSCALL(255, sys_ni_syscall)			/* 255 for get_thread_area */
-__SYSCALL(256, sys_set_tid_address)
-__SYSCALL(257, compat_sys_timer_create)
-__SYSCALL(258, compat_sys_timer_settime)
-__SYSCALL(259, compat_sys_timer_gettime)
-__SYSCALL(260, sys_timer_getoverrun)
-__SYSCALL(261, sys_timer_delete)
-__SYSCALL(262, compat_sys_clock_settime)
-__SYSCALL(263, compat_sys_clock_gettime)
-__SYSCALL(264, compat_sys_clock_getres)
-__SYSCALL(265, compat_sys_clock_nanosleep)
-__SYSCALL(266, compat_sys_statfs64_wrapper)
-__SYSCALL(267, compat_sys_fstatfs64_wrapper)
-__SYSCALL(268, sys_tgkill)
-__SYSCALL(269, compat_sys_utimes)
-__SYSCALL(270, compat_sys_fadvise64_64_wrapper)
-__SYSCALL(271, sys_pciconfig_iobase)
-__SYSCALL(272, sys_pciconfig_read)
-__SYSCALL(273, sys_pciconfig_write)
-__SYSCALL(274, compat_sys_mq_open)
-__SYSCALL(275, sys_mq_unlink)
-__SYSCALL(276, compat_sys_mq_timedsend)
-__SYSCALL(277, compat_sys_mq_timedreceive)
-__SYSCALL(278, compat_sys_mq_notify)
-__SYSCALL(279, compat_sys_mq_getsetattr)
-__SYSCALL(280, compat_sys_waitid)
-__SYSCALL(281, sys_socket)
-__SYSCALL(282, sys_bind)
-__SYSCALL(283, sys_connect)
-__SYSCALL(284, sys_listen)
-__SYSCALL(285, sys_accept)
-__SYSCALL(286, sys_getsockname)
-__SYSCALL(287, sys_getpeername)
-__SYSCALL(288, sys_socketpair)
-__SYSCALL(289, sys_send)
-__SYSCALL(290, sys_sendto)
-__SYSCALL(291, compat_sys_recv)
-__SYSCALL(292, compat_sys_recvfrom)
-__SYSCALL(293, sys_shutdown)
-__SYSCALL(294, compat_sys_setsockopt)
-__SYSCALL(295, compat_sys_getsockopt)
-__SYSCALL(296, compat_sys_sendmsg)
-__SYSCALL(297, compat_sys_recvmsg)
-__SYSCALL(298, sys_semop)
-__SYSCALL(299, sys_semget)
-__SYSCALL(300, compat_sys_semctl)
-__SYSCALL(301, compat_sys_msgsnd)
-__SYSCALL(302, compat_sys_msgrcv)
-__SYSCALL(303, sys_msgget)
-__SYSCALL(304, compat_sys_msgctl)
-__SYSCALL(305, compat_sys_shmat)
-__SYSCALL(306, sys_shmdt)
-__SYSCALL(307, sys_shmget)
-__SYSCALL(308, compat_sys_shmctl)
-__SYSCALL(309, sys_add_key)
-__SYSCALL(310, sys_request_key)
-__SYSCALL(311, compat_sys_keyctl)
-__SYSCALL(312, compat_sys_semtimedop)
-__SYSCALL(313, sys_ni_syscall)
-__SYSCALL(314, sys_ioprio_set)
-__SYSCALL(315, sys_ioprio_get)
-__SYSCALL(316, sys_inotify_init)
-__SYSCALL(317, sys_inotify_add_watch)
-__SYSCALL(318, sys_inotify_rm_watch)
-__SYSCALL(319, compat_sys_mbind)
-__SYSCALL(320, compat_sys_get_mempolicy)
-__SYSCALL(321, compat_sys_set_mempolicy)
-__SYSCALL(322, compat_sys_openat)
-__SYSCALL(323, sys_mkdirat)
-__SYSCALL(324, sys_mknodat)
-__SYSCALL(325, sys_fchownat)
-__SYSCALL(326, compat_sys_futimesat)
-__SYSCALL(327, sys_fstatat64)
-__SYSCALL(328, sys_unlinkat)
-__SYSCALL(329, sys_renameat)
-__SYSCALL(330, sys_linkat)
-__SYSCALL(331, sys_symlinkat)
-__SYSCALL(332, sys_readlinkat)
-__SYSCALL(333, sys_fchmodat)
-__SYSCALL(334, sys_faccessat)
-__SYSCALL(335, compat_sys_pselect6)
-__SYSCALL(336, compat_sys_ppoll)
-__SYSCALL(337, sys_unshare)
-__SYSCALL(338, compat_sys_set_robust_list)
-__SYSCALL(339, compat_sys_get_robust_list)
-__SYSCALL(340, sys_splice)
-__SYSCALL(341, compat_sys_sync_file_range2_wrapper)
-__SYSCALL(342, sys_tee)
-__SYSCALL(343, compat_sys_vmsplice)
-__SYSCALL(344, compat_sys_move_pages)
-__SYSCALL(345, sys_getcpu)
-__SYSCALL(346, compat_sys_epoll_pwait)
-__SYSCALL(347, compat_sys_kexec_load)
-__SYSCALL(348, compat_sys_utimensat)
-__SYSCALL(349, compat_sys_signalfd)
-__SYSCALL(350, sys_timerfd_create)
-__SYSCALL(351, sys_eventfd)
-__SYSCALL(352, compat_sys_fallocate_wrapper)
-__SYSCALL(353, compat_sys_timerfd_settime)
-__SYSCALL(354, compat_sys_timerfd_gettime)
-__SYSCALL(355, compat_sys_signalfd4)
-__SYSCALL(356, sys_eventfd2)
-__SYSCALL(357, sys_epoll_create1)
-__SYSCALL(358, sys_dup3)
-__SYSCALL(359, sys_pipe2)
-__SYSCALL(360, sys_inotify_init1)
-__SYSCALL(361, compat_sys_preadv)
-__SYSCALL(362, compat_sys_pwritev)
-__SYSCALL(363, compat_sys_rt_tgsigqueueinfo)
-__SYSCALL(364, sys_perf_event_open)
-__SYSCALL(365, compat_sys_recvmmsg)
-__SYSCALL(366, sys_accept4)
-__SYSCALL(367, sys_fanotify_init)
-__SYSCALL(368, compat_sys_fanotify_mark)
-__SYSCALL(369, sys_prlimit64)
-__SYSCALL(370, sys_name_to_handle_at)
-__SYSCALL(371, compat_sys_open_by_handle_at)
-__SYSCALL(372, compat_sys_clock_adjtime)
-__SYSCALL(373, sys_syncfs)
-__SYSCALL(374, compat_sys_sendmmsg)
-__SYSCALL(375, sys_setns)
-__SYSCALL(376, compat_sys_process_vm_readv)
-__SYSCALL(377, compat_sys_process_vm_writev)
-__SYSCALL(378, sys_ni_syscall)			/* 378 for kcmp */
-
-#define __NR_compat_syscalls		379
-
-/*
- * Compat syscall numbers used by the AArch64 kernel.
- */
-#define __NR_compat_restart_syscall	0
-#define __NR_compat_sigreturn		119
-#define __NR_compat_rt_sigreturn	173
-
-
-/*
- * The following SVCs are ARM private.
- */
-#define __ARM_NR_COMPAT_BASE		0x0f0000
-#define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
-#define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
+#define __NR_restart_syscall 0
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_exit 1
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_fork 2
+__SYSCALL(__NR_fork, sys_fork)
+#define __NR_read 3
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 4
+__SYSCALL(__NR_write, sys_write)
+#define __NR_open 5
+__SYSCALL(__NR_open, compat_sys_open)
+#define __NR_close 6
+__SYSCALL(__NR_close, sys_close)
+			/* 7 was sys_waitpid */
+__SYSCALL(7, sys_ni_syscall)
+#define __NR_creat 8
+__SYSCALL(__NR_creat, sys_creat)
+#define __NR_link 9
+__SYSCALL(__NR_link, sys_link)
+#define __NR_unlink 10
+__SYSCALL(__NR_unlink, sys_unlink)
+#define __NR_execve 11
+__SYSCALL(__NR_execve, compat_sys_execve)
+#define __NR_chdir 12
+__SYSCALL(__NR_chdir, sys_chdir)
+			/* 13 was sys_time */
+__SYSCALL(13, sys_ni_syscall)
+#define __NR_mknod 14
+__SYSCALL(__NR_mknod, sys_mknod)
+#define __NR_chmod 15
+__SYSCALL(__NR_chmod, sys_chmod)
+#define __NR_lchown 16
+__SYSCALL(__NR_lchown, sys_lchown16)
+			/* 17 was sys_break */
+__SYSCALL(17, sys_ni_syscall)
+			/* 18 was sys_stat */
+__SYSCALL(18, sys_ni_syscall)
+#define __NR_lseek 19
+__SYSCALL(__NR_lseek, compat_sys_lseek)
+#define __NR_getpid 20
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_mount 21
+__SYSCALL(__NR_mount, compat_sys_mount)
+			/* 22 was sys_umount */
+__SYSCALL(22, sys_ni_syscall)
+#define __NR_setuid 23
+__SYSCALL(__NR_setuid, sys_setuid16)
+#define __NR_getuid 24
+__SYSCALL(__NR_getuid, sys_getuid16)
+			/* 25 was sys_stime */
+__SYSCALL(25, sys_ni_syscall)
+#define __NR_ptrace 26
+__SYSCALL(__NR_ptrace, compat_sys_ptrace)
+			/* 27 was sys_alarm */
+__SYSCALL(27, sys_ni_syscall)
+			/* 28 was sys_fstat */
+__SYSCALL(28, sys_ni_syscall)
+#define __NR_pause 29
+__SYSCALL(__NR_pause, sys_pause)
+			/* 30 was sys_utime */
+__SYSCALL(30, sys_ni_syscall)
+			/* 31 was sys_stty */
+__SYSCALL(31, sys_ni_syscall)
+			/* 32 was sys_gtty */
+__SYSCALL(32, sys_ni_syscall)
+#define __NR_access 33
+__SYSCALL(__NR_access, sys_access)
+#define __NR_nice 34
+__SYSCALL(__NR_nice, sys_nice)
+			/* 35 was sys_ftime */
+__SYSCALL(35, sys_ni_syscall)
+#define __NR_sync 36
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_kill 37
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_rename 38
+__SYSCALL(__NR_rename, sys_rename)
+#define __NR_mkdir 39
+__SYSCALL(__NR_mkdir, sys_mkdir)
+#define __NR_rmdir 40
+__SYSCALL(__NR_rmdir, sys_rmdir)
+#define __NR_dup 41
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_pipe 42
+__SYSCALL(__NR_pipe, sys_pipe)
+#define __NR_times 43
+__SYSCALL(__NR_times, compat_sys_times)
+			/* 44 was sys_prof */
+__SYSCALL(44, sys_ni_syscall)
+#define __NR_brk 45
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_setgid 46
+__SYSCALL(__NR_setgid, sys_setgid16)
+#define __NR_getgid 47
+__SYSCALL(__NR_getgid, sys_getgid16)
+			/* 48 was sys_signal */
+__SYSCALL(48, sys_ni_syscall)
+#define __NR_geteuid 49
+__SYSCALL(__NR_geteuid, sys_geteuid16)
+#define __NR_getegid 50
+__SYSCALL(__NR_getegid, sys_getegid16)
+#define __NR_acct 51
+__SYSCALL(__NR_acct, sys_acct)
+#define __NR_umount2 52
+__SYSCALL(__NR_umount2, sys_umount)
+			/* 53 was sys_lock */
+__SYSCALL(53, sys_ni_syscall)
+#define __NR_ioctl 54
+__SYSCALL(__NR_ioctl, compat_sys_ioctl)
+#define __NR_fcntl 55
+__SYSCALL(__NR_fcntl, compat_sys_fcntl)
+			/* 56 was sys_mpx */
+__SYSCALL(56, sys_ni_syscall)
+#define __NR_setpgid 57
+__SYSCALL(__NR_setpgid, sys_setpgid)
+			/* 58 was sys_ulimit */
+__SYSCALL(58, sys_ni_syscall)
+			/* 59 was sys_olduname */
+__SYSCALL(59, sys_ni_syscall)
+#define __NR_umask 60
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_chroot 61
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_ustat 62
+__SYSCALL(__NR_ustat, compat_sys_ustat)
+#define __NR_dup2 63
+__SYSCALL(__NR_dup2, sys_dup2)
+#define __NR_getppid 64
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getpgrp 65
+__SYSCALL(__NR_getpgrp, sys_getpgrp)
+#define __NR_setsid 66
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_sigaction 67
+__SYSCALL(__NR_sigaction, compat_sys_sigaction)
+			/* 68 was sys_sgetmask */
+__SYSCALL(68, sys_ni_syscall)
+			/* 69 was sys_ssetmask */
+__SYSCALL(69, sys_ni_syscall)
+#define __NR_setreuid 70
+__SYSCALL(__NR_setreuid, sys_setreuid16)
+#define __NR_setregid 71
+__SYSCALL(__NR_setregid, sys_setregid16)
+#define __NR_sigsuspend 72
+__SYSCALL(__NR_sigsuspend, sys_sigsuspend)
+#define __NR_sigpending 73
+__SYSCALL(__NR_sigpending, compat_sys_sigpending)
+#define __NR_sethostname 74
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setrlimit 75
+__SYSCALL(__NR_setrlimit, compat_sys_setrlimit)
+			/* 76 was compat_sys_getrlimit */
+__SYSCALL(76, sys_ni_syscall)
+#define __NR_getrusage 77
+__SYSCALL(__NR_getrusage, compat_sys_getrusage)
+#define __NR_gettimeofday 78
+__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday)
+#define __NR_settimeofday 79
+__SYSCALL(__NR_settimeofday, compat_sys_settimeofday)
+#define __NR_getgroups 80
+__SYSCALL(__NR_getgroups, sys_getgroups16)
+#define __NR_setgroups 81
+__SYSCALL(__NR_setgroups, sys_setgroups16)
+			/* 82 was compat_sys_select */
+__SYSCALL(82, sys_ni_syscall)
+#define __NR_symlink 83
+__SYSCALL(__NR_symlink, sys_symlink)
+			/* 84 was sys_lstat */
+__SYSCALL(84, sys_ni_syscall)
+#define __NR_readlink 85
+__SYSCALL(__NR_readlink, sys_readlink)
+#define __NR_uselib 86
+__SYSCALL(__NR_uselib, sys_uselib)
+#define __NR_swapon 87
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_reboot 88
+__SYSCALL(__NR_reboot, sys_reboot)
+			/* 89 was sys_readdir */
+__SYSCALL(89, sys_ni_syscall)
+			/* 90 was sys_mmap */
+__SYSCALL(90, sys_ni_syscall)
+#define __NR_munmap 91
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_truncate 92
+__SYSCALL(__NR_truncate, compat_sys_truncate)
+#define __NR_ftruncate 93
+__SYSCALL(__NR_ftruncate, compat_sys_ftruncate)
+#define __NR_fchmod 94
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchown 95
+__SYSCALL(__NR_fchown, sys_fchown16)
+#define __NR_getpriority 96
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_setpriority 97
+__SYSCALL(__NR_setpriority, sys_setpriority)
+			/* 98 was sys_profil */
+__SYSCALL(98, sys_ni_syscall)
+#define __NR_statfs 99
+__SYSCALL(__NR_statfs, compat_sys_statfs)
+#define __NR_fstatfs 100
+__SYSCALL(__NR_fstatfs, compat_sys_fstatfs)
+			/* 101 was sys_ioperm */
+__SYSCALL(101, sys_ni_syscall)
+			/* 102 was sys_socketcall */
+__SYSCALL(102, sys_ni_syscall)
+#define __NR_syslog 103
+__SYSCALL(__NR_syslog, sys_syslog)
+#define __NR_setitimer 104
+__SYSCALL(__NR_setitimer, compat_sys_setitimer)
+#define __NR_getitimer 105
+__SYSCALL(__NR_getitimer, compat_sys_getitimer)
+#define __NR_stat 106
+__SYSCALL(__NR_stat, compat_sys_newstat)
+#define __NR_lstat 107
+__SYSCALL(__NR_lstat, compat_sys_newlstat)
+#define __NR_fstat 108
+__SYSCALL(__NR_fstat, compat_sys_newfstat)
+			/* 109 was sys_uname */
+__SYSCALL(109, sys_ni_syscall)
+			/* 110 was sys_iopl */
+__SYSCALL(110, sys_ni_syscall)
+#define __NR_vhangup 111
+__SYSCALL(__NR_vhangup, sys_vhangup)
+			/* 112 was sys_idle */
+__SYSCALL(112, sys_ni_syscall)
+			/* 113 was sys_syscall */
+__SYSCALL(113, sys_ni_syscall)
+#define __NR_wait4 114
+__SYSCALL(__NR_wait4, compat_sys_wait4)
+#define __NR_swapoff 115
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_sysinfo 116
+__SYSCALL(__NR_sysinfo, compat_sys_sysinfo)
+			/* 117 was sys_ipc */
+__SYSCALL(117, sys_ni_syscall)
+#define __NR_fsync 118
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_sigreturn 119
+__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper)
+#define __NR_clone 120
+__SYSCALL(__NR_clone, sys_clone)
+#define __NR_setdomainname 121
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_uname 122
+__SYSCALL(__NR_uname, sys_newuname)
+			/* 123 was sys_modify_ldt */
+__SYSCALL(123, sys_ni_syscall)
+#define __NR_adjtimex 124
+__SYSCALL(__NR_adjtimex, compat_sys_adjtimex)
+#define __NR_mprotect 125
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_sigprocmask 126
+__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask)
+			/* 127 was sys_create_module */
+__SYSCALL(127, sys_ni_syscall)
+#define __NR_init_module 128
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 129
+__SYSCALL(__NR_delete_module, sys_delete_module)
+			/* 130 was sys_get_kernel_syms */
+__SYSCALL(130, sys_ni_syscall)
+#define __NR_quotactl 131
+__SYSCALL(__NR_quotactl, sys_quotactl)
+#define __NR_getpgid 132
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_fchdir 133
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_bdflush 134
+__SYSCALL(__NR_bdflush, sys_bdflush)
+#define __NR_sysfs 135
+__SYSCALL(__NR_sysfs, sys_sysfs)
+#define __NR_personality 136
+__SYSCALL(__NR_personality, sys_personality)
+			/* 137 was sys_afs_syscall */
+__SYSCALL(137, sys_ni_syscall)
+#define __NR_setfsuid 138
+__SYSCALL(__NR_setfsuid, sys_setfsuid16)
+#define __NR_setfsgid 139
+__SYSCALL(__NR_setfsgid, sys_setfsgid16)
+#define __NR__llseek 140
+__SYSCALL(__NR__llseek, sys_llseek)
+#define __NR_getdents 141
+__SYSCALL(__NR_getdents, compat_sys_getdents)
+#define __NR__newselect 142
+__SYSCALL(__NR__newselect, compat_sys_select)
+#define __NR_flock 143
+__SYSCALL(__NR_flock, sys_flock)
+#define __NR_msync 144
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_readv 145
+__SYSCALL(__NR_readv, compat_sys_readv)
+#define __NR_writev 146
+__SYSCALL(__NR_writev, compat_sys_writev)
+#define __NR_getsid 147
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_fdatasync 148
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#define __NR__sysctl 149
+__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+#define __NR_mlock 150
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 151
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 152
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 153
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_sched_setparam 154
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_getparam 155
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setscheduler 156
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 157
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_yield 158
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 159
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 160
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 161
+__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval)
+#define __NR_nanosleep 162
+__SYSCALL(__NR_nanosleep, compat_sys_nanosleep)
+#define __NR_mremap 163
+__SYSCALL(__NR_mremap, sys_mremap)
+#define __NR_setresuid 164
+__SYSCALL(__NR_setresuid, sys_setresuid16)
+#define __NR_getresuid 165
+__SYSCALL(__NR_getresuid, sys_getresuid16)
+			/* 166 was sys_vm86 */
+__SYSCALL(166, sys_ni_syscall)
+			/* 167 was sys_query_module */
+__SYSCALL(167, sys_ni_syscall)
+#define __NR_poll 168
+__SYSCALL(__NR_poll, sys_poll)
+#define __NR_nfsservctl 169
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
+#define __NR_setresgid 170
+__SYSCALL(__NR_setresgid, sys_setresgid16)
+#define __NR_getresgid 171
+__SYSCALL(__NR_getresgid, sys_getresgid16)
+#define __NR_prctl 172
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_rt_sigreturn 173
+__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper)
+#define __NR_rt_sigaction 174
+__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction)
+#define __NR_rt_sigprocmask 175
+__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask)
+#define __NR_rt_sigpending 176
+__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 177
+__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 178
+__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo)
+#define __NR_rt_sigsuspend 179
+__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend)
+#define __NR_pread64 180
+__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper)
+#define __NR_pwrite64 181
+__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper)
+#define __NR_chown 182
+__SYSCALL(__NR_chown, sys_chown16)
+#define __NR_getcwd 183
+__SYSCALL(__NR_getcwd, sys_getcwd)
+#define __NR_capget 184
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 185
+__SYSCALL(__NR_capset, sys_capset)
+#define __NR_sigaltstack 186
+__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack)
+#define __NR_sendfile 187
+__SYSCALL(__NR_sendfile, compat_sys_sendfile)
+			/* 188 reserved */
+__SYSCALL(188, sys_ni_syscall)
+			/* 189 reserved */
+__SYSCALL(189, sys_ni_syscall)
+#define __NR_vfork 190
+__SYSCALL(__NR_vfork, sys_vfork)
+#define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
+__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
+#define __NR_mmap2 192
+__SYSCALL(__NR_mmap2, sys_mmap_pgoff)
+#define __NR_truncate64 193
+__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
+#define __NR_ftruncate64 194
+__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper)
+#define __NR_stat64 195
+__SYSCALL(__NR_stat64, sys_stat64)
+#define __NR_lstat64 196
+__SYSCALL(__NR_lstat64, sys_lstat64)
+#define __NR_fstat64 197
+__SYSCALL(__NR_fstat64, sys_fstat64)
+#define __NR_lchown32 198
+__SYSCALL(__NR_lchown32, sys_lchown)
+#define __NR_getuid32 199
+__SYSCALL(__NR_getuid32, sys_getuid)
+#define __NR_getgid32 200
+__SYSCALL(__NR_getgid32, sys_getgid)
+#define __NR_geteuid32 201
+__SYSCALL(__NR_geteuid32, sys_geteuid)
+#define __NR_getegid32 202
+__SYSCALL(__NR_getegid32, sys_getegid)
+#define __NR_setreuid32 203
+__SYSCALL(__NR_setreuid32, sys_setreuid)
+#define __NR_setregid32 204
+__SYSCALL(__NR_setregid32, sys_setregid)
+#define __NR_getgroups32 205
+__SYSCALL(__NR_getgroups32, sys_getgroups)
+#define __NR_setgroups32 206
+__SYSCALL(__NR_setgroups32, sys_setgroups)
+#define __NR_fchown32 207
+__SYSCALL(__NR_fchown32, sys_fchown)
+#define __NR_setresuid32 208
+__SYSCALL(__NR_setresuid32, sys_setresuid)
+#define __NR_getresuid32 209
+__SYSCALL(__NR_getresuid32, sys_getresuid)
+#define __NR_setresgid32 210
+__SYSCALL(__NR_setresgid32, sys_setresgid)
+#define __NR_getresgid32 211
+__SYSCALL(__NR_getresgid32, sys_getresgid)
+#define __NR_chown32 212
+__SYSCALL(__NR_chown32, sys_chown)
+#define __NR_setuid32 213
+__SYSCALL(__NR_setuid32, sys_setuid)
+#define __NR_setgid32 214
+__SYSCALL(__NR_setgid32, sys_setgid)
+#define __NR_setfsuid32 215
+__SYSCALL(__NR_setfsuid32, sys_setfsuid)
+#define __NR_setfsgid32 216
+__SYSCALL(__NR_setfsgid32, sys_setfsgid)
+#define __NR_getdents64 217
+__SYSCALL(__NR_getdents64, compat_sys_getdents64)
+#define __NR_pivot_root 218
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+#define __NR_mincore 219
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 220
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_fcntl64 221
+__SYSCALL(__NR_fcntl64, compat_sys_fcntl64)
+			/* 222 for tux */
+__SYSCALL(222, sys_ni_syscall)
+			/* 223 is unused */
+__SYSCALL(223, sys_ni_syscall)
+#define __NR_gettid 224
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_readahead 225
+__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper)
+#define __NR_setxattr 226
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 227
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 228
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 229
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 230
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 231
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 232
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 233
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 234
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 235
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 236
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 237
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+#define __NR_tkill 238
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_sendfile64 239
+__SYSCALL(__NR_sendfile64, sys_sendfile64)
+#define __NR_futex 240
+__SYSCALL(__NR_futex, compat_sys_futex)
+#define __NR_sched_setaffinity 241
+__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity)
+#define __NR_sched_getaffinity 242
+__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity)
+#define __NR_io_setup 243
+__SYSCALL(__NR_io_setup, compat_sys_io_setup)
+#define __NR_io_destroy 244
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_getevents 245
+__SYSCALL(__NR_io_getevents, compat_sys_io_getevents)
+#define __NR_io_submit 246
+__SYSCALL(__NR_io_submit, compat_sys_io_submit)
+#define __NR_io_cancel 247
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_exit_group 248
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_lookup_dcookie 249
+__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie)
+#define __NR_epoll_create 250
+__SYSCALL(__NR_epoll_create, sys_epoll_create)
+#define __NR_epoll_ctl 251
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_wait 252
+__SYSCALL(__NR_epoll_wait, sys_epoll_wait)
+#define __NR_remap_file_pages 253
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+			/* 254 for set_thread_area */
+__SYSCALL(254, sys_ni_syscall)
+			/* 255 for get_thread_area */
+__SYSCALL(255, sys_ni_syscall)
+#define __NR_set_tid_address 256
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_timer_create 257
+__SYSCALL(__NR_timer_create, compat_sys_timer_create)
+#define __NR_timer_settime 258
+__SYSCALL(__NR_timer_settime, compat_sys_timer_settime)
+#define __NR_timer_gettime 259
+__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime)
+#define __NR_timer_getoverrun 260
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_delete 261
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 262
+__SYSCALL(__NR_clock_settime, compat_sys_clock_settime)
+#define __NR_clock_gettime 263
+__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime)
+#define __NR_clock_getres 264
+__SYSCALL(__NR_clock_getres, compat_sys_clock_getres)
+#define __NR_clock_nanosleep 265
+__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep)
+#define __NR_statfs64 266
+__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper)
+#define __NR_fstatfs64 267
+__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper)
+#define __NR_tgkill 268
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_utimes 269
+__SYSCALL(__NR_utimes, compat_sys_utimes)
+#define __NR_arm_fadvise64_64 270
+__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper)
+#define __NR_pciconfig_iobase 271
+__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase)
+#define __NR_pciconfig_read 272
+__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read)
+#define __NR_pciconfig_write 273
+__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write)
+#define __NR_mq_open 274
+__SYSCALL(__NR_mq_open, compat_sys_mq_open)
+#define __NR_mq_unlink 275
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 276
+__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend)
+#define __NR_mq_timedreceive 277
+__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive)
+#define __NR_mq_notify 278
+__SYSCALL(__NR_mq_notify, compat_sys_mq_notify)
+#define __NR_mq_getsetattr 279
+__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr)
+#define __NR_waitid 280
+__SYSCALL(__NR_waitid, compat_sys_waitid)
+#define __NR_socket 281
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_bind 282
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_connect 283
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_listen 284
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 285
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_getsockname 286
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 287
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_socketpair 288
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_send 289
+__SYSCALL(__NR_send, sys_send)
+#define __NR_sendto 290
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recv 291
+__SYSCALL(__NR_recv, compat_sys_recv)
+#define __NR_recvfrom 292
+__SYSCALL(__NR_recvfrom, compat_sys_recvfrom)
+#define __NR_shutdown 293
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_setsockopt 294
+__SYSCALL(__NR_setsockopt, compat_sys_setsockopt)
+#define __NR_getsockopt 295
+__SYSCALL(__NR_getsockopt, compat_sys_getsockopt)
+#define __NR_sendmsg 296
+__SYSCALL(__NR_sendmsg, compat_sys_sendmsg)
+#define __NR_recvmsg 297
+__SYSCALL(__NR_recvmsg, compat_sys_recvmsg)
+#define __NR_semop 298
+__SYSCALL(__NR_semop, sys_semop)
+#define __NR_semget 299
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 300
+__SYSCALL(__NR_semctl, compat_sys_semctl)
+#define __NR_msgsnd 301
+__SYSCALL(__NR_msgsnd, compat_sys_msgsnd)
+#define __NR_msgrcv 302
+__SYSCALL(__NR_msgrcv, compat_sys_msgrcv)
+#define __NR_msgget 303
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 304
+__SYSCALL(__NR_msgctl, compat_sys_msgctl)
+#define __NR_shmat 305
+__SYSCALL(__NR_shmat, compat_sys_shmat)
+#define __NR_shmdt 306
+__SYSCALL(__NR_shmdt, sys_shmdt)
+#define __NR_shmget 307
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 308
+__SYSCALL(__NR_shmctl, compat_sys_shmctl)
+#define __NR_add_key 309
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 310
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 311
+__SYSCALL(__NR_keyctl, compat_sys_keyctl)
+#define __NR_semtimedop 312
+__SYSCALL(__NR_semtimedop, compat_sys_semtimedop)
+#define __NR_vserver 313
+__SYSCALL(__NR_vserver, sys_ni_syscall)
+#define __NR_ioprio_set 314
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 315
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+#define __NR_inotify_init 316
+__SYSCALL(__NR_inotify_init, sys_inotify_init)
+#define __NR_inotify_add_watch 317
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 318
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+#define __NR_mbind 319
+__SYSCALL(__NR_mbind, compat_sys_mbind)
+#define __NR_get_mempolicy 320
+__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+#define __NR_set_mempolicy 321
+__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+#define __NR_openat 322
+__SYSCALL(__NR_openat, compat_sys_openat)
+#define __NR_mkdirat 323
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_mknodat 324
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_fchownat 325
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_futimesat 326
+__SYSCALL(__NR_futimesat, compat_sys_futimesat)
+#define __NR_fstatat64 327
+__SYSCALL(__NR_fstatat64, sys_fstatat64)
+#define __NR_unlinkat 328
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_renameat 329
+__SYSCALL(__NR_renameat, sys_renameat)
+#define __NR_linkat 330
+__SYSCALL(__NR_linkat, sys_linkat)
+#define __NR_symlinkat 331
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_readlinkat 332
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#define __NR_fchmodat 333
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_faccessat 334
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_pselect6 335
+__SYSCALL(__NR_pselect6, compat_sys_pselect6)
+#define __NR_ppoll 336
+__SYSCALL(__NR_ppoll, compat_sys_ppoll)
+#define __NR_unshare 337
+__SYSCALL(__NR_unshare, sys_unshare)
+#define __NR_set_robust_list 338
+__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list)
+#define __NR_get_robust_list 339
+__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list)
+#define __NR_splice 340
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_sync_file_range2 341
+__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper)
+#define __NR_tee 342
+__SYSCALL(__NR_tee, sys_tee)
+#define __NR_vmsplice 343
+__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+#define __NR_move_pages 344
+__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+#define __NR_getcpu 345
+__SYSCALL(__NR_getcpu, sys_getcpu)
+#define __NR_epoll_pwait 346
+__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
+#define __NR_kexec_load 347
+__SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
+#define __NR_utimensat 348
+__SYSCALL(__NR_utimensat, compat_sys_utimensat)
+#define __NR_signalfd 349
+__SYSCALL(__NR_signalfd, compat_sys_signalfd)
+#define __NR_timerfd_create 350
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_eventfd 351
+__SYSCALL(__NR_eventfd, sys_eventfd)
+#define __NR_fallocate 352
+__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper)
+#define __NR_timerfd_settime 353
+__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime)
+#define __NR_timerfd_gettime 354
+__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime)
+#define __NR_signalfd4 355
+__SYSCALL(__NR_signalfd4, compat_sys_signalfd4)
+#define __NR_eventfd2 356
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+#define __NR_epoll_create1 357
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_dup3 358
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR_pipe2 359
+__SYSCALL(__NR_pipe2, sys_pipe2)
+#define __NR_inotify_init1 360
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_preadv 361
+__SYSCALL(__NR_preadv, compat_sys_preadv)
+#define __NR_pwritev 362
+__SYSCALL(__NR_pwritev, compat_sys_pwritev)
+#define __NR_rt_tgsigqueueinfo 363
+__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo)
+#define __NR_perf_event_open 364
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_recvmmsg 365
+__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg)
+#define __NR_accept4 366
+__SYSCALL(__NR_accept4, sys_accept4)
+#define __NR_fanotify_init 367
+__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
+#define __NR_fanotify_mark 368
+__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark)
+#define __NR_prlimit64 369
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 370
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 371
+__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at)
+#define __NR_clock_adjtime 372
+__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime)
+#define __NR_syncfs 373
+__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg 374
+__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
+#define __NR_setns 375
+__SYSCALL(__NR_setns, sys_setns)
+#define __NR_process_vm_readv 376
+__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 377
+__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+#define __NR_kcmp 378
+__SYSCALL(__NR_kcmp, sys_kcmp)
+#define __NR_finit_module 379
+__SYSCALL(__NR_finit_module, sys_finit_module)
+/* #define __NR_sched_setattr 380 */
+__SYSCALL(380, sys_ni_syscall)
+/* #define __NR_sched_getattr 381 */
+__SYSCALL(381, sys_ni_syscall)
+/* #define __NR_renameat2 382 */
+__SYSCALL(382, sys_ni_syscall)
+#define __NR_seccomp 383
+__SYSCALL(__NR_seccomp, sys_seccomp)
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 6913643bbe54..49c61746297d 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -23,6 +23,7 @@
 
 #include <asm/hwcap.h>
 
+#define PTRACE_SET_SYSCALL	23
 
 /*
  * PSR bits
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 12618cbd1bab..b52313653742 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -15,9 +15,10 @@ CFLAGS_REMOVE_return_address.o = -pg
 arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
-			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o return_address.o
+			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o return_address.o \
+			   opcodes.o
 
-arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
+arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o	\
 					   sys_compat.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
@@ -34,6 +35,8 @@ arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
 
+obj-$(CONFIG_SWP_EMULATE)	+= swp_emulate.o
+
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6dbfa6..d358ccacfc00 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+	fpsimd_save_partial x0, 1, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+	fpsimd_restore_partial x0, 8, 9
+	ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1ee38bd61f03..bd30119fd0c3 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -25,9 +25,9 @@
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
+#include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
-#include <asm/unistd32.h>
 
 /*
  * Bad Abort numbers
@@ -601,7 +601,7 @@ fast_work_pending:
 	str	x0, [sp, #S_X0]			// returned x0
 work_pending:
 	tbnz	x1, #TIF_NEED_RESCHED, work_resched
-	/* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */
+	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
 	ldr	x2, [sp, #S_PSTATE]
 	mov	x0, sp				// 'regs'
 	tst	x2, #PSR_MODE_MASK		// user mode regs?
@@ -677,6 +677,10 @@ __sys_trace:
 	mov	x0, sp
 	bl	syscall_trace_enter
 	adr	lr, __sys_trace_return		// return address
+	cmp	w0, #RET_SKIP_SYSCALL_TRACE	// skip syscall and tracing?
+	b.eq	ret_to_user
+	cmp	w0, #RET_SKIP_SYSCALL		// skip syscall?
+	b.eq	__sys_trace_return_skipped
 	uxtw	scno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
 	cmp	scno, sc_nr			// check upper syscall limit
@@ -690,6 +694,7 @@ __sys_trace:
 
 __sys_trace_return:
 	str	x0, [sp]			// save returned x0
+__sys_trace_return_skipped:			// x0 already in regs[0]
 	mov	x0, sp
 	bl	syscall_trace_exit
 	b	ret_to_user
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 522df9c7f3a4..5ba0217df39b 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -34,6 +34,60 @@
 #define FPEXC_IDF	(1 << 7)
 
 /*
+ * In order to reduce the number of times the FPSIMD state is needlessly saved
+ * and restored, we need to keep track of two things:
+ * (a) for each task, we need to remember which CPU was the last one to have
+ *     the task's FPSIMD state loaded into its FPSIMD registers;
+ * (b) for each CPU, we need to remember which task's userland FPSIMD state has
+ *     been loaded into its FPSIMD registers most recently, or whether it has
+ *     been used to perform kernel mode NEON in the meantime.
+ *
+ * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
+ * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
+ * address of the userland FPSIMD state of the task that was loaded onto the CPU
+ * the most recently, or NULL if kernel mode NEON has been performed after that.
+ *
+ * With this in place, we no longer have to restore the next FPSIMD state right
+ * when switching between tasks. Instead, we can defer this check to userland
+ * resume, at which time we verify whether the CPU's fpsimd_last_state and the
+ * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
+ * can omit the FPSIMD restore.
+ *
+ * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
+ * indicate whether or not the userland FPSIMD state of the current task is
+ * present in the registers. The flag is set unless the FPSIMD registers of this
+ * CPU currently contain the most recent userland FPSIMD state of the current
+ * task.
+ *
+ * For a certain task, the sequence may look something like this:
+ * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
+ *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
+ *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
+ *   cleared, otherwise it is set;
+ *
+ * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
+ *   userland FPSIMD state is copied from memory to the registers, the task's
+ *   fpsimd_state.cpu field is set to the id of the current CPU, the current
+ *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
+ *   TIF_FOREIGN_FPSTATE flag is cleared;
+ *
+ * - the task executes an ordinary syscall; upon return to userland, the
+ *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
+ *   restored;
+ *
+ * - the task executes a syscall which executes some NEON instructions; this is
+ *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
+ *   register contents to memory, clears the fpsimd_last_state per-cpu variable
+ *   and sets the TIF_FOREIGN_FPSTATE flag;
+ *
+ * - the task gets preempted after kernel_neon_end() is called; as we have not
+ *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
+ *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
+ */
+static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+
+/*
  * Trapped FP/ASIMD access.
  */
 void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
@@ -71,44 +125,140 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 
 void fpsimd_thread_switch(struct task_struct *next)
 {
-	/* check if not kernel threads */
-	if (current->mm)
+	/*
+	 * Save the current FPSIMD state to memory, but only if whatever is in
+	 * the registers is in fact the most recent userland FPSIMD state of
+	 * 'current'.
+	 */
+	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_save_state(&current->thread.fpsimd_state);
-	if (next->mm)
-		fpsimd_load_state(&next->thread.fpsimd_state);
+
+	if (next->mm) {
+		/*
+		 * If we are switching to a task whose most recent userland
+		 * FPSIMD state is already in the registers of *this* cpu,
+		 * we can skip loading the state from memory. Otherwise, set
+		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
+		 * upon the next return to userland.
+		 */
+		struct fpsimd_state *st = &next->thread.fpsimd_state;
+
+		if (__this_cpu_read(fpsimd_last_state) == st
+		    && st->cpu == smp_processor_id())
+			clear_ti_thread_flag(task_thread_info(next),
+					     TIF_FOREIGN_FPSTATE);
+		else
+			set_ti_thread_flag(task_thread_info(next),
+					   TIF_FOREIGN_FPSTATE);
+	}
 }
 
 void fpsimd_flush_thread(void)
 {
 	preempt_disable();
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
-	fpsimd_load_state(&current->thread.fpsimd_state);
+	set_thread_flag(TIF_FOREIGN_FPSTATE);
 	preempt_enable();
 }
 
-#ifdef CONFIG_KERNEL_MODE_NEON
+/*
+ * Save the userland FPSIMD state of 'current' to memory, but only if the state
+ * currently held in the registers does in fact belong to 'current'
+ */
+void fpsimd_preserve_current_state(void)
+{
+	preempt_disable();
+	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
+	preempt_enable();
+}
 
 /*
- * Kernel-side NEON support functions
+ * Load the userland FPSIMD state of 'current' from memory, but only if the
+ * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
+ * state of 'current'
  */
-void kernel_neon_begin(void)
+void fpsimd_restore_current_state(void)
 {
-	/* Avoid using the NEON in interrupt context */
-	BUG_ON(in_interrupt());
 	preempt_disable();
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
-	if (current->mm)
-		fpsimd_save_state(&current->thread.fpsimd_state);
+		fpsimd_load_state(st);
+		this_cpu_write(fpsimd_last_state, st);
+		st->cpu = smp_processor_id();
+	}
+	preempt_enable();
 }
-EXPORT_SYMBOL(kernel_neon_begin);
 
-void kernel_neon_end(void)
+/*
+ * Load an updated userland FPSIMD state for 'current' from memory and set the
+ * flag that indicates that the FPSIMD register contents are the most recent
+ * FPSIMD state of 'current'
+ */
+void fpsimd_update_current_state(struct fpsimd_state *state)
 {
-	if (current->mm)
-		fpsimd_load_state(&current->thread.fpsimd_state);
+	preempt_disable();
+	fpsimd_load_state(state);
+	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
+		this_cpu_write(fpsimd_last_state, st);
+		st->cpu = smp_processor_id();
+	}
 	preempt_enable();
 }
+
+/*
+ * Invalidate live CPU copies of task t's FPSIMD state
+ */
+void fpsimd_flush_task_state(struct task_struct *t)
+{
+	t->thread.fpsimd_state.cpu = NR_CPUS;
+}
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+
+/*
+ * Kernel-side NEON support functions
+ */
+void kernel_neon_begin_partial(u32 num_regs)
+{
+	if (in_interrupt()) {
+		struct fpsimd_partial_state *s = this_cpu_ptr(
+			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+
+		BUG_ON(num_regs > 32);
+		fpsimd_save_partial_state(s, roundup(num_regs, 2));
+	} else {
+		/*
+		 * Save the userland FPSIMD state if we have one and if we
+		 * haven't done so already. Clear fpsimd_last_state to indicate
+		 * that there is no longer userland FPSIMD state in the
+		 * registers.
+		 */
+		preempt_disable();
+		if (current->mm &&
+		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+			fpsimd_save_state(&current->thread.fpsimd_state);
+		this_cpu_write(fpsimd_last_state, NULL);
+	}
+}
+EXPORT_SYMBOL(kernel_neon_begin_partial);
+
+void kernel_neon_end(void)
+{
+	if (in_interrupt()) {
+		struct fpsimd_partial_state *s = this_cpu_ptr(
+			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+		fpsimd_load_partial_state(s);
+	} else {
+		preempt_enable();
+	}
+}
 EXPORT_SYMBOL(kernel_neon_end);
 
 #endif /* CONFIG_KERNEL_MODE_NEON */
@@ -119,12 +269,13 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
 {
 	switch (cmd) {
 	case CPU_PM_ENTER:
-		if (current->mm)
+		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
 			fpsimd_save_state(&current->thread.fpsimd_state);
+		this_cpu_write(fpsimd_last_state, NULL);
 		break;
 	case CPU_PM_EXIT:
 		if (current->mm)
-			fpsimd_load_state(&current->thread.fpsimd_state);
+			set_thread_flag(TIF_FOREIGN_FPSTATE);
 		break;
 	case CPU_PM_ENTER_FAILED:
 	default:
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 7787208e8cc6..997e6b27ff6a 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -28,7 +28,7 @@
  * See Documentation/arm/kernel_user_helpers.txt for formal definitions.
  */
 
-#include <asm/unistd32.h>
+#include <asm/unistd.h>
 
 	.align	5
 	.globl	__kuser_helper_start
diff --git a/arch/arm64/kernel/opcodes.c b/arch/arm64/kernel/opcodes.c
new file mode 100644
index 000000000000..ceb5a04a1e12
--- /dev/null
+++ b/arch/arm64/kernel/opcodes.c
@@ -0,0 +1,72 @@
+/*
+ *  Copied from linux/arch/arm/kernel/opcodes.c
+ *
+ *  A32 condition code lookup feature moved from nwfpe/fpopcode.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <asm/opcodes.h>
+
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
+
+/*
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+/*
+ * Returns:
+ * ARM_OPCODE_CONDTEST_FAIL   - if condition fails
+ * ARM_OPCODE_CONDTEST_PASS   - if condition passes (including AL)
+ * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional
+ *                              opcode space from v5 onwards
+ *
+ * Code that tests whether a conditional instruction would pass its condition
+ * check should check that return value == ARM_OPCODE_CONDTEST_PASS.
+ *
+ * Code that tests if a condition means that the instruction would be executed
+ * (regardless of conditional or unconditional) should instead check that the
+ * return value != ARM_OPCODE_CONDTEST_FAIL.
+ */
+asmlinkage unsigned int arm_check_condition(u32 opcode, u64 psr)
+{
+	u32 cc_bits  = opcode >> 28;
+	u32 psr_cond = (u32)(psr & 0xffffffff) >> 28;
+	unsigned int ret;
+
+	if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+		if ((cc_map[cc_bits] >> (psr_cond)) & 1)
+			ret = ARM_OPCODE_CONDTEST_PASS;
+		else
+			ret = ARM_OPCODE_CONDTEST_FAIL;
+	} else {
+		ret = ARM_OPCODE_CONDTEST_UNCOND;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(arm_check_condition);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a82af49bcc51..8e084d9cd7ec 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -141,6 +141,70 @@ void machine_restart(char *cmd)
 	while (1);
 }
 
+/*
+ * dump a block of kernel memory from around the given address
+ */
+static void show_data(unsigned long addr, int nbytes, const char *name)
+{
+	int	i, j;
+	int	nlines;
+	u32	*p;
+
+	/*
+	 * don't attempt to dump non-kernel addresses or
+	 * values that are probably just small negative numbers
+	 */
+	if (addr < PAGE_OFFSET || addr > -256UL)
+		return;
+
+	printk("\n%s: %#lx:\n", name, addr);
+
+	/*
+	 * round address down to a 32 bit boundary
+	 * and always dump a multiple of 32 bytes
+	 */
+	p = (u32 *)(addr & ~(sizeof(u32) - 1));
+	nbytes += (addr & (sizeof(u32) - 1));
+	nlines = (nbytes + 31) / 32;
+
+
+	for (i = 0; i < nlines; i++) {
+		/*
+		 * just display low 16 bits of address to keep
+		 * each line of the dump < 80 characters
+		 */
+		printk("%04lx ", (unsigned long)p & 0xffff);
+		for (j = 0; j < 8; j++) {
+			u32	data;
+			if (probe_kernel_address(p, data)) {
+				printk(" ********");
+			} else {
+				printk(" %08x", data);
+			}
+			++p;
+		}
+		printk("\n");
+	}
+}
+
+static void show_extra_register_data(struct pt_regs *regs, int nbytes)
+{
+	mm_segment_t fs;
+	unsigned int i;
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	show_data(regs->pc - nbytes, nbytes * 2, "PC");
+	show_data(regs->regs[30] - nbytes, nbytes * 2, "LR");
+	show_data(regs->sp - nbytes, nbytes * 2, "SP");
+	for (i = 0; i < 30; i++) {
+		char name[4];
+		snprintf(name, sizeof(name), "X%u", i);
+		show_data(regs->regs[i] - nbytes, nbytes * 2, name);
+	}
+	set_fs(fs);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
 	int i, top_reg;
@@ -167,6 +231,8 @@ void __show_regs(struct pt_regs *regs)
 		if (i % 2 == 0)
 			printk("\n");
 	}
+	if (!user_mode(regs))
+		show_extra_register_data(regs, 128);
 	printk("\n");
 }
 
@@ -213,7 +279,7 @@ void release_thread(struct task_struct *dead_task)
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	fpsimd_save_state(&current->thread.fpsimd_state);
+	fpsimd_preserve_current_state();
 	*dst = *src;
 	return 0;
 }
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index fc457d0fee9c..8c3c6f085cd9 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -23,6 +23,7 @@
 #include <linux/reboot.h>
 #include <linux/pm.h>
 #include <linux/delay.h>
+#include <linux/slab.h>
 #include <uapi/linux/psci.h>
 
 #include <asm/compiler.h>
@@ -104,12 +105,13 @@ static u32 psci_power_state_pack(struct psci_power_state state)
 static void psci_power_state_unpack(u32 power_state,
 				    struct psci_power_state *state)
 {
-	state->id = (power_state >> PSCI_0_2_POWER_STATE_ID_SHIFT)
-			& PSCI_0_2_POWER_STATE_ID_MASK;
-	state->type = (power_state >> PSCI_0_2_POWER_STATE_TYPE_SHIFT)
-			& PSCI_0_2_POWER_STATE_TYPE_MASK;
-	state->affinity_level = (power_state >> PSCI_0_2_POWER_STATE_AFFL_SHIFT)
-			& PSCI_0_2_POWER_STATE_AFFL_MASK;
+	state->id = (power_state & PSCI_0_2_POWER_STATE_ID_MASK) >>
+			PSCI_0_2_POWER_STATE_ID_SHIFT;
+	state->type = (power_state & PSCI_0_2_POWER_STATE_TYPE_MASK) >>
+			PSCI_0_2_POWER_STATE_TYPE_SHIFT;
+	state->affinity_level =
+			(power_state & PSCI_0_2_POWER_STATE_AFFL_MASK) >>
+			PSCI_0_2_POWER_STATE_AFFL_SHIFT;
 }
 
 static int psci_get_version(void)
@@ -184,6 +186,63 @@ static int psci_migrate_info_type(void)
 	return err;
 }
 
+static int __maybe_unused cpu_psci_cpu_init_idle(struct device_node *cpu_node,
+						 unsigned int cpu)
+{
+	int i, ret, count = 0;
+	struct psci_power_state *psci_states;
+	struct device_node *state_node;
+
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	/* Count idle states */
+	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+					      count))) {
+		count++;
+		of_node_put(state_node);
+	}
+
+	if (!count)
+		return -ENODEV;
+
+	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+	if (!psci_states)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		u32 psci_power_state;
+
+		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+
+		ret = of_property_read_u32(state_node,
+					   "arm,psci-suspend-param",
+					   &psci_power_state);
+		if (ret) {
+			pr_warn(" * %s missing arm,psci-suspend-param property\n",
+				state_node->full_name);
+			of_node_put(state_node);
+			goto free_mem;
+		}
+
+		of_node_put(state_node);
+		pr_debug("psci-power-state %#x index %d\n", psci_power_state,
+							    i);
+		psci_power_state_unpack(psci_power_state, &psci_states[i]);
+	}
+	/* Idle states parsed correctly, initialize per-cpu pointer */
+	per_cpu(psci_power_state, cpu) = psci_states;
+	return 0;
+
+free_mem:
+	kfree(psci_states);
+	return ret;
+}
+
 static int get_set_conduit_method(struct device_node *np)
 {
 	const char *method;
@@ -421,20 +480,41 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
 #endif
 #endif
 
-#ifdef CONFIG_ARM64_CPU_SUSPEND
-static int cpu_psci_cpu_suspend(unsigned long index)
+static int psci_suspend_finisher(unsigned long index)
 {
 	struct psci_power_state *state = __get_cpu_var(psci_power_state);
 
-	if (!state)
-		return -EOPNOTSUPP;
+	return psci_ops.cpu_suspend(state[index - 1],
+				    virt_to_phys(cpu_resume));
+}
+
+static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
+{
+	int ret;
+	struct psci_power_state *state = __get_cpu_var(psci_power_state);
+	/*
+	 * idle state index 0 corresponds to wfi, should never be called
+	 * from the cpu_suspend operations
+	 */
+	if (WARN_ON_ONCE(!index))
+		return -EINVAL;
+
+	if (state[index - 1].type == PSCI_POWER_STATE_TYPE_STANDBY)
+		ret = psci_ops.cpu_suspend(state[index - 1], 0);
+	else
+		ret = __cpu_suspend(index, psci_suspend_finisher);
 
-	return psci_ops.cpu_suspend(state[index], virt_to_phys(cpu_resume));
+	return ret;
 }
-#endif
 
 const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
+#ifdef CONFIG_CPU_IDLE
+	.cpu_init_idle	= cpu_psci_cpu_init_idle,
+#endif
+#ifdef CONFIG_ARM64_CPU_SUSPEND
+	.cpu_suspend	= cpu_psci_cpu_suspend,
+#endif
 #ifdef CONFIG_SMP
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
@@ -445,8 +525,5 @@ const struct cpu_operations cpu_psci_ops = {
 	.cpu_kill	= cpu_psci_cpu_kill,
 #endif
 #endif
-#ifdef CONFIG_ARM64_CPU_SUSPEND
-	.cpu_suspend	= cpu_psci_cpu_suspend,
-#endif
 };
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 5cafe0e40cfa..1267de080feb 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/audit.h>
 #include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -26,6 +27,7 @@
 #include <linux/smp.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/seccomp.h>
 #include <linux/security.h>
 #include <linux/init.h>
 #include <linux/signal.h>
@@ -39,6 +41,7 @@
 #include <asm/compat.h>
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
+#include <asm/syscall.h>
 #include <asm/traps.h>
 #include <asm/system_misc.h>
 
@@ -522,6 +525,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 		return ret;
 
 	target->thread.fpsimd_state.user_fpsimd = newstate;
+	fpsimd_flush_task_state(target);
 	return ret;
 }
 
@@ -783,6 +787,7 @@ static int compat_vfp_set(struct task_struct *target,
 		uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
 	}
 
+	fpsimd_flush_task_state(target);
 	return ret;
 }
 
@@ -1078,7 +1083,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
 {
-	return ptrace_request(child, request, addr, data);
+	int ret;
+
+	switch (request) {
+		case PTRACE_SET_SYSCALL:
+			task_pt_regs(child)->syscallno = data;
+			ret = 0;
+			break;
+		default:
+			ret = ptrace_request(child, request, addr, data);
+			break;
+	}
+
+	return ret;
 }
 
 enum ptrace_syscall_dir {
@@ -1116,6 +1133,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, regs->syscallno);
 
+	audit_syscall_entry(syscall_get_arch(), regs->syscallno,
+		regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]);
+
 	return regs->syscallno;
 }
 
@@ -1124,6 +1144,8 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs)
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_exit(regs, regs_return_value(regs));
 
+	audit_syscall_exit(regs);
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
 }
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 9d704df65a6a..2812c5a5540a 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -547,13 +547,19 @@ static int c_show(struct seq_file *m, void *v)
 		seq_puts(m, "\n");
 
 		seq_printf(m, "CPU implementer\t: 0x%02x\n", (midr >> 24));
-		seq_printf(m, "CPU architecture: 8\n");
+		seq_printf(m, "CPU architecture: %s\n",
+#if IS_ENABLED(CONFIG_ARMV7_COMPAT_CPUINFO)
+				is_compat_task() ? "8" :
+#endif
+				"AArch64");
 		seq_printf(m, "CPU variant\t: 0x%x\n", ((midr >> 20) & 0xf));
 		seq_printf(m, "CPU part\t: 0x%03x\n", ((midr >> 4) & 0xfff));
 		seq_printf(m, "CPU revision\t: %d\n\n", (midr & 0xf));
 	}
 
-	return 0;
+	seq_printf(m, "Hardware\t: %s\n", machine_name);
+
+ 	return 0;
 }
 
 static void *c_start(struct seq_file *m, loff_t *pos)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e3cf09626245..bbc1aad21ce6 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
 	int err;
 
 	/* dump the hardware registers to the fpsimd_state structure */
-	fpsimd_save_state(fpsimd);
+	fpsimd_preserve_current_state();
 
 	/* copy the FP and status/control registers */
 	err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
@@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
 	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
 
 	/* load the hardware registers from the fpsimd_state structure */
-	if (!err) {
-		preempt_disable();
-		fpsimd_load_state(&fpsimd);
-		preempt_enable();
-	}
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
 
 	return err ? -EFAULT : 0;
 }
@@ -423,4 +420,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
 	}
+
+	if (thread_flags & _TIF_FOREIGN_FPSTATE)
+		fpsimd_restore_current_state();
+
 }
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 96f545cfbb70..b2b945dcbf60 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -26,7 +26,7 @@
 #include <asm/fpsimd.h>
 #include <asm/signal32.h>
 #include <asm/uaccess.h>
-#include <asm/unistd32.h>
+#include <asm/unistd.h>
 
 struct compat_sigcontext {
 	/* We always set these two fields to 0 */
@@ -182,6 +182,14 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 		err |= __put_user(from->si_uid, &to->si_uid);
 		err |= __put_user(from->si_int, &to->si_int);
 		break;
+#ifdef __ARCH_SIGSYS
+	case __SI_SYS:
+		err |= __put_user((compat_uptr_t)(unsigned long)
+				from->si_call_addr, &to->si_call_addr);
+		err |= __put_user(from->si_syscall, &to->si_syscall);
+		err |= __put_user(from->si_arch, &to->si_arch);
+		break;
+#endif
 	default: /* this is just in case for now ... */
 		err |= __put_user(from->si_pid, &to->si_pid);
 		err |= __put_user(from->si_uid, &to->si_uid);
@@ -218,7 +226,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
 	 * Note that this also saves V16-31, which aren't visible
 	 * in AArch32.
 	 */
-	fpsimd_save_state(fpsimd);
+	fpsimd_preserve_current_state();
 
 	/* Place structure header on the stack */
 	__put_user_error(magic, &frame->magic, err);
@@ -281,11 +289,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
 	 * We don't need to touch the exception register, so
 	 * reload the hardware state.
 	 */
-	if (!err) {
-		preempt_disable();
-		fpsimd_load_state(&fpsimd);
-		preempt_enable();
-	}
+	if (!err)
+		fpsimd_update_current_state(&fpsimd);
 
 	return err ? -EFAULT : 0;
 }
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index b1925729c692..a564b440416a 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -49,28 +49,39 @@
 	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3)
 	.endm
 /*
- * Save CPU state for a suspend.  This saves callee registers, and allocates
- * space on the kernel stack to save the CPU specific registers + some
- * other data for resume.
+ * Save CPU state for a suspend and execute the suspend finisher.
+ * On success it will return 0 through cpu_resume - ie through a CPU
+ * soft/hard reboot from the reset vector.
+ * On failure it returns the suspend finisher return value or force
+ * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
+ * is not allowed to return, if it does this must be considered failure).
+ * It saves callee registers, and allocates space on the kernel stack
+ * to save the CPU specific registers + some other data for resume.
  *
  *  x0 = suspend finisher argument
+ *  x1 = suspend finisher function pointer
  */
-ENTRY(__cpu_suspend)
+ENTRY(__cpu_suspend_enter)
 	stp	x29, lr, [sp, #-96]!
 	stp	x19, x20, [sp,#16]
 	stp	x21, x22, [sp,#32]
 	stp	x23, x24, [sp,#48]
 	stp	x25, x26, [sp,#64]
 	stp	x27, x28, [sp,#80]
+	/*
+	 * Stash suspend finisher and its argument in x20 and x19
+	 */
+	mov	x19, x0
+	mov	x20, x1
 	mov	x2, sp
 	sub	sp, sp, #CPU_SUSPEND_SZ	// allocate cpu_suspend_ctx
-	mov	x1, sp
+	mov	x0, sp
 	/*
-	 * x1 now points to struct cpu_suspend_ctx allocated on the stack
+	 * x0 now points to struct cpu_suspend_ctx allocated on the stack
 	 */
-	str	x2, [x1, #CPU_CTX_SP]
-	ldr	x2, =sleep_save_sp
-	ldr	x2, [x2, #SLEEP_SAVE_SP_VIRT]
+	str	x2, [x0, #CPU_CTX_SP]
+	ldr	x1, =sleep_save_sp
+	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
 #ifdef CONFIG_SMP
 	mrs	x7, mpidr_el1
 	ldr	x9, =mpidr_hash
@@ -82,11 +93,21 @@ ENTRY(__cpu_suspend)
 	ldp	w3, w4, [x9, #MPIDR_HASH_SHIFTS]
 	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
 	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
-	add	x2, x2, x8, lsl #3
+	add	x1, x1, x8, lsl #3
 #endif
-	bl	__cpu_suspend_finisher
+	bl	__cpu_suspend_save
+	/*
+	 * Grab suspend finisher in x20 and its argument in x19
+	 */
+	mov	x0, x19
+	mov	x1, x20
+	/*
+	 * We are ready for power down, fire off the suspend finisher
+	 * in x1, with argument in x0
+	 */
+	blr	x1
         /*
-	 * Never gets here, unless suspend fails.
+	 * Never gets here, unless suspend finisher fails.
 	 * Successful cpu_suspend should return from cpu_resume, returning
 	 * through this code path is considered an error
 	 * If the return value is set to 0 force x0 = -EOPNOTSUPP
@@ -103,7 +124,7 @@ ENTRY(__cpu_suspend)
 	ldp	x27, x28, [sp, #80]
 	ldp	x29, lr, [sp], #96
 	ret
-ENDPROC(__cpu_suspend)
+ENDPROC(__cpu_suspend_enter)
 	.ltorg
 
 /*
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 1fa9ce4afd8f..872da2873d18 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -9,22 +9,19 @@
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
-extern int __cpu_suspend(unsigned long);
+extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
 /*
- * This is called by __cpu_suspend() to save the state, and do whatever
+ * This is called by __cpu_suspend_enter() to save the state, and do whatever
  * flushing is required to ensure that when the CPU goes to sleep we have
  * the necessary data available when the caches are not searched.
  *
- * @arg: Argument to pass to suspend operations
- * @ptr: CPU context virtual address
- * @save_ptr: address of the location where the context physical address
- *            must be saved
+ * ptr: CPU context virtual address
+ * save_ptr: address of the location where the context physical address
+ *           must be saved
  */
-int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr,
-			   phys_addr_t *save_ptr)
+void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
+				phys_addr_t *save_ptr)
 {
-	int cpu = smp_processor_id();
-
 	*save_ptr = virt_to_phys(ptr);
 
 	cpu_do_suspend(ptr);
@@ -35,8 +32,6 @@ int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr,
 	 */
 	__flush_dcache_area(ptr, sizeof(*ptr));
 	__flush_dcache_area(save_ptr, sizeof(*save_ptr));
-
-	return cpu_ops[cpu]->cpu_suspend(arg);
 }
 
 /*
@@ -56,15 +51,15 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
 }
 
 /**
- * cpu_suspend
+ * cpu_suspend() - function to enter a low-power state
+ * @arg: argument to pass to CPU suspend operations
  *
- * @arg: argument to pass to the finisher function
+ * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
+ * operations back-end error code otherwise.
  */
 int cpu_suspend(unsigned long arg)
 {
-	struct mm_struct *mm = current->active_mm;
-	int ret, cpu = smp_processor_id();
-	unsigned long flags;
+	int cpu = smp_processor_id();
 
 	/*
 	 * If cpu_ops have not been registered or suspend
@@ -72,6 +67,21 @@ int cpu_suspend(unsigned long arg)
 	 */
 	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
 		return -EOPNOTSUPP;
+	return cpu_ops[cpu]->cpu_suspend(arg);
+}
+
+/*
+ * __cpu_suspend
+ *
+ * arg: argument to pass to the finisher function
+ * fn: finisher function pointer
+ *
+ */
+int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+	unsigned long flags;
 
 	/*
 	 * From this point debug exceptions are disabled to prevent
@@ -86,7 +96,7 @@ int cpu_suspend(unsigned long arg)
 	 * page tables, so that the thread address space is properly
 	 * set-up on function return.
 	 */
-	ret = __cpu_suspend(arg);
+	ret = __cpu_suspend_enter(arg, fn);
 	if (ret == 0) {
 		cpu_switch_mm(mm->pgd, mm);
 		flush_tlb_all();
@@ -95,7 +105,7 @@ int cpu_suspend(unsigned long arg)
 		 * Restore per-cpu offset before any kernel
 		 * subsystem relying on it has a chance to run.
 		 */
-		set_my_cpu_offset(per_cpu_offset(cpu));
+		set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 
 		/*
 		 * Restore HW breakpoint registers to sane values
diff --git a/arch/arm64/kernel/swp_emulate.c b/arch/arm64/kernel/swp_emulate.c
new file mode 100644
index 000000000000..508fd2edb8ab
--- /dev/null
+++ b/arch/arm64/kernel/swp_emulate.c
@@ -0,0 +1,223 @@
+/*
+ *  Derived from from linux/arch/arm/kernel/swp_emulate.c
+ *
+ *  Copyright (C) 2009 ARM Limited
+ *  Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Implements emulation of the SWP/SWPB instructions using load-exclusive and
+ *  store-exclusive for processors that have them disabled (or future ones that
+ *  might not implement them).
+ *
+ *  Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ *  Where: Rt  = destination
+ *	   Rt2 = source
+ *	   Rn  = address
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/perf_event.h>
+
+#include <asm/opcodes.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/system_misc.h>
+#include <linux/debugfs.h>
+
+/*
+ * Error-checking SWP macros implemented using ldrex{b}/strex{b}
+ */
+
+static int swpb(u8 in, u8 *out, u8 *addr)
+{
+	u8 _out;
+	int res;
+	int err;
+
+	do {
+		__asm__ __volatile__(
+		"0:	ldxrb	%w1, %4\n"
+		"1:	stxrb	%w0, %w3, %4\n"
+		"	mov	%w2, #0\n"
+		"2:\n"
+		"	.section	 .fixup,\"ax\"\n"
+		"	.align		2\n"
+		"3:	mov	%w2, %5\n"
+		"	b	2b\n"
+		"	.previous\n"
+		"	.section	 __ex_table,\"a\"\n"
+		"	.align		3\n"
+		"	.quad		0b, 3b\n"
+		"	.quad		1b, 3b\n"
+		"	.previous"
+		: "=&r" (res), "=r" (_out), "=r" (err)
+		: "r" (in), "Q" (*addr), "i" (-EFAULT)
+		: "cc", "memory");
+	} while (err == 0 && res != 0);
+
+	if (err == 0)
+		*out = _out;
+	return err;
+}
+
+static int swp(u32 in, u32 *out, u32 *addr)
+{
+	u32 _out;
+	int res;
+	int err = 0;
+
+	do {
+		__asm__ __volatile__(
+		"0:	ldxr	%w1, %4\n"
+		"1:	stxr	%w0, %w3, %4\n"
+		"	mov	%w2, #0\n"
+		"2:\n"
+		"	.section	 .fixup,\"ax\"\n"
+		"	.align		2\n"
+		"3:	mov	%w2, %5\n"
+		"	b	2b\n"
+		"	.previous\n"
+		"	.section	 __ex_table,\"a\"\n"
+		"	.align		3\n"
+		"	.quad		0b, 3b\n"
+		"	.quad		1b, 3b\n"
+		"	.previous"
+		: "=&r" (res), "=r" (_out), "=r" (err)
+		: "r" (in), "Q" (*addr), "i" (-EFAULT)
+		: "cc", "memory");
+	} while (err == 0 && res != 0);
+
+	if (err == 0)
+		*out = _out;
+	return err;
+}
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+#define EXTRACT_REG_NUM(instruction, offset) \
+	(((instruction) & (0xf << (offset))) >> (offset))
+#define RN_OFFSET  16
+#define RT_OFFSET  12
+#define RT2_OFFSET  0
+/*
+ * Bit 22 of the instruction encoding distinguishes between
+ * the SWP and SWPB variants (bit set means SWPB).
+ */
+#define TYPE_SWPB (1 << 22)
+
+static pid_t previous_pid;
+
+u64 swpb_count = 0;
+u64 swp_count = 0;
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls emulate_swpX for the actual operation and
+ * deals with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, unsigned int instr)
+{
+	u32 destreg, data, type;
+	uintptr_t address;
+	unsigned int res = 0;
+	int err;
+	u32 temp32;
+	u8 temp8;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	res = arm_check_condition(instr, regs->pstate);
+	switch (res) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		regs->pc += 4;
+		return 0;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a SWP, undef */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	if (current->pid != previous_pid) {
+		pr_warn("\"%s\" (%ld) uses obsolete SWP{B} instruction\n",
+			 current->comm, (unsigned long)current->pid);
+		previous_pid = current->pid;
+	}
+
+	address = regs->regs[EXTRACT_REG_NUM(instr, RN_OFFSET)] & 0xffffffff;
+	data = regs->regs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
+	destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);
+
+	type = instr & TYPE_SWPB;
+
+	/* Check access in reasonable access range for both SWP and SWPB */
+	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+		pr_debug("SWP{B} emulation: access to %p not allowed!\n",
+			 (void *)address);
+		res = -EFAULT;
+	}
+	if (type == TYPE_SWPB) {
+		err = swpb((u8) data, &temp8, (u8 *) address);
+		if (err)
+			return err;
+		regs->regs[destreg] = temp8;
+		regs->pc += 4;
+		swpb_count++;
+	} else if (address & 0x3) {
+		/* SWP to unaligned address not permitted */
+		pr_debug("SWP instruction on unaligned pointer!\n");
+		return -EFAULT;
+	} else {
+		err = swp((u32) data, &temp32, (u32 *) address);
+		if (err)
+			return err;
+		regs->regs[destreg] = temp32;
+		regs->pc += 4;
+		swp_count++;
+	}
+
+	return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
+ */
+static struct undef_hook swp_hook = {
+	.instr_mask	= 0x0fb00ff0,
+	.instr_val	= 0x01000090,
+	.pstate_mask	= COMPAT_PSR_MODE_MASK | COMPAT_PSR_T_BIT,
+	.pstate_val	= COMPAT_PSR_MODE_USR,
+	.fn		= swp_handler
+};
+
+/*
+ * Register handler and create status file in /proc/cpu
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init swp_emulation_init(void)
+{
+	struct dentry *dir;
+	dir = debugfs_create_dir("swp_emulate", NULL);
+	debugfs_create_u64("swp_count", S_IRUGO | S_IWUSR, dir, &swp_count);
+	debugfs_create_u64("swpb_count", S_IRUGO | S_IWUSR, dir, &swpb_count);
+
+	pr_notice("Registering SWP/SWPB emulation handler\n");
+	register_undef_hook(&swp_hook);
+
+
+	return 0;
+}
+
+late_initcall(swp_emulation_init);
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 78039927c807..dc47e53e9e28 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -26,7 +26,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
-#include <asm/unistd32.h>
+#include <asm/unistd.h>
 
 static inline void
 do_compat_cache_op(unsigned long start, unsigned long end, int flags)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 12cd34ff35d0..2589ef53973a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1995-2009 Russell King
  * Copyright (C) 2012 ARM Ltd.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -256,15 +257,58 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
 		die(str, regs, err);
 }
 
+static LIST_HEAD(undef_hook);
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	list_add(&hook->node, &undef_hook);
+}
+
+static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+{
+	struct undef_hook *hook;
+	int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL;
+
+	list_for_each_entry(hook, &undef_hook, node)
+		if ((instr & hook->instr_mask) == hook->instr_val &&
+		    (regs->pstate & hook->pstate_mask) == hook->pstate_val)
+			fn = hook->fn;
+
+	return fn ? fn(regs, instr) : 1;
+}
+
 asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 {
+	u32 instr;
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
 		return;
+	if (user_mode(regs)) {
+		if (compat_thumb_mode(regs)) {
+			if (get_user(instr, (u16 __user *)pc))
+				goto die_sig;
+			if (is_wide_instruction(instr)) {
+				u32 instr2;
+				if (get_user(instr2, (u16 __user *)pc+1))
+					goto die_sig;
+				instr <<= 16;
+				instr |= instr2;
+			}
+		} else if (get_user(instr, (u32 __user *)pc)) {
+			goto die_sig;
+		}
+	} else {
+		/* kernel mode */
+		instr = *((u32 *)pc);
+	}
+
+	if (call_undef_hook(regs, instr) == 0)
+		return;
 
+die_sig:
 	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
 	    printk_ratelimit()) {
 		pr_info("%s[%d]: undefined instruction: pc=%p\n",
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f2bded0254c3..9a9eaadb8634 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -119,9 +119,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 }
 
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
+#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1)
+
 int pfn_valid(unsigned long pfn)
 {
-	return memblock_is_memory(pfn << PAGE_SHIFT);
+	return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT);
 }
 EXPORT_SYMBOL(pfn_valid);
 #endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c133ae53ed86..1cf94513b37f 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -240,6 +240,20 @@ ENTRY(__cpu_setup)
 	ret					// return to head.S
 ENDPROC(__cpu_setup)
 
+#ifdef CONFIG_ARMV7_COMPAT
+	/*
+	 *                 n n            T
+	 *       U E      WT T UD     US IHBS
+	 *       CE0      XWHW CZ     ME TEEA S
+	 * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
+	 * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved
+	 * .... .100 .... 01.1 11.1 ..01 0011 1101 < software settings
+	 */
+	.type	crval, #object
+crval:
+	.word	0x030802e2			// clear
+	.word	0x0405d03d			// set
+#else
 	/*
 	 *                 n n            T
 	 *       U E      WT T UD     US IHBS
@@ -252,3 +266,4 @@ ENDPROC(__cpu_setup)
 crval:
 	.word	0x000802e2			// clear
 	.word	0x0405d11d			// set
+#endif
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 60576e06b6fb..d0a69aa35e27 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -45,14 +45,37 @@
 # define smp_rmb()	rmb()
 # define smp_wmb()	wmb()
 # define smp_read_barrier_depends()	read_barrier_depends()
+
 #else
+
 # define smp_mb()	barrier()
 # define smp_rmb()	barrier()
 # define smp_wmb()	barrier()
 # define smp_read_barrier_depends()	do { } while(0)
+
 #endif
 
 /*
+ * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
+ * need for asm trickery!
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
+/*
  * XXX check on this ---I suspect what Linus really wants here is
  * acquire vs release semantics but we can't discuss this stuff with
  * Linus just yet.  Grrr...
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index e355a4c10968..2d6f0de77325 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -85,4 +85,19 @@ static inline void fence(void)
 #define smp_read_barrier_depends()     do { } while (0)
 #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 314ab5532019..52c5b61d7aba 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -180,4 +180,19 @@
 #define nudge_writes() mb()
 #endif
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 838b479a42c4..88d0962de65a 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -60,6 +60,7 @@ struct hpux_dirent {
 };
 
 struct getdents_callback {
+	struct dir_context ctx;
 	struct hpux_dirent __user *current_dir;
 	struct hpux_dirent __user *previous;
 	int count;
@@ -110,24 +111,23 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i
 {
 	struct fd arg;
 	struct hpux_dirent __user * lastdirent;
-	struct getdents_callback buf;
+	struct getdents_callback buf = {
+		.ctx.actor = filldir,
+		.current_dir = dirent,
+		.count = count
+	};
 	int error;
 
 	arg = fdget(fd);
 	if (!arg.file)
 		return -EBADF;
 
-	buf.current_dir = dirent;
-	buf.previous = NULL;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(arg.file, filldir, &buf);
+	error = iterate_dir(arg.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		if (put_user(arg.file->f_pos, &lastdirent->d_off))
+		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ae782254e731..f89da808ce31 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -45,11 +45,15 @@
 #    define SMPWMB      eieio
 #endif
 
+#define __lwsync()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+
 #define smp_mb()	mb()
-#define smp_rmb()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define smp_rmb()	__lwsync()
 #define smp_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
 #define smp_read_barrier_depends()	read_barrier_depends()
 #else
+#define __lwsync()	barrier()
+
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
@@ -65,4 +69,19 @@
 #define data_barrier(x)	\
 	asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	__lwsync();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	__lwsync();							\
+	___p1;								\
+})
+
 #endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
 
 #define set_mb(var, value)		do { var = value; mb(); } while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index cd29d2f4e4f3..bebc0bd8abc2 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
 		regs->orig_gpr2 = args[0];
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 #ifdef CONFIG_COMPAT
-	if (test_tsk_thread_flag(task, TIF_31BIT))
+	if (test_tsk_thread_flag(current, TIF_31BIT))
 		return AUDIT_ARCH_S390;
 #endif
 	return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 95d45986f908..b5aad964558e 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -53,4 +53,19 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 
 #define smp_read_barrier_depends()	do { } while(0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif /* !(__SPARC64_BARRIER_H) */
diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h
index e584e40ee832..f2ca5702a4e2 100644
--- a/arch/um/include/shared/frame_kern.h
+++ b/arch/um/include/shared/frame_kern.h
@@ -6,13 +6,13 @@
 #ifndef __FRAME_KERN_H_
 #define __FRAME_KERN_H_
 
-extern int setup_signal_stack_sc(unsigned long stack_top, int sig, 
+extern int setup_signal_stack_sc(unsigned long stack_top, int sig,
 				 struct k_sigaction *ka,
-				 struct pt_regs *regs, 
+				 struct pt_regs *regs,
 				 sigset_t *mask);
-extern int setup_signal_stack_si(unsigned long stack_top, int sig, 
+extern int setup_signal_stack_si(unsigned long stack_top, int sig,
 				 struct k_sigaction *ka,
-				 struct pt_regs *regs, siginfo_t *info, 
+				 struct pt_regs *regs, struct siginfo *info,
 				 sigset_t *mask);
 
 #endif
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 3e831b3fd07b..f57e02e7910f 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -19,7 +19,7 @@ EXPORT_SYMBOL(unblock_signals);
  * OK, we're invoking a handler
  */
 static void handle_signal(struct pt_regs *regs, unsigned long signr,
-			 struct k_sigaction *ka, siginfo_t *info)
+			 struct k_sigaction *ka, struct siginfo *info)
 {
 	sigset_t *oldset = sigmask_to_save();
 	int singlestep = 0;
@@ -71,7 +71,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 static int kern_do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka_copy;
-	siginfo_t info;
+	struct siginfo info;
 	int sig, handled_sig = 0;
 
 	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 9d9f1b4bf826..905924b773d3 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -25,7 +25,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGIO]		= sigio_handler,
 	[SIGVTALRM]	= timer_handler };
 
-static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
+static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
 {
 	struct uml_pt_regs r;
 	int save_errno = errno;
@@ -61,7 +61,7 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
 static int signals_enabled;
 static unsigned int signals_pending;
 
-void sig_handler(int sig, siginfo_t *si, mcontext_t *mc)
+void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
 	int enabled;
 
@@ -120,7 +120,7 @@ void set_sigstack(void *sig_stack, int size)
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = {
+static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 	[SIGSEGV] = sig_handler,
 	[SIGBUS] = sig_handler,
 	[SIGILL] = sig_handler,
@@ -162,7 +162,7 @@ static void hard_handler(int sig, siginfo_t *si, void *p)
 		while ((sig = ffs(pending)) != 0){
 			sig--;
 			pending &= ~(1 << sig);
-			(*handlers[sig])(sig, si, mc);
+			(*handlers[sig])(sig, (struct siginfo *)si, mc);
 		}
 
 		/*
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 4625949bf1e4..908579f2b0ab 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -409,7 +409,7 @@ void userspace(struct uml_pt_regs *regs)
 		if (WIFSTOPPED(status)) {
 			int sig = WSTOPSIG(status);
 
-			ptrace(PTRACE_GETSIGINFO, pid, 0, &si);
+			ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
 
 			switch (sig) {
 			case SIGSEGV:
@@ -417,7 +417,7 @@ void userspace(struct uml_pt_regs *regs)
 				    !ptrace_faultinfo) {
 					get_skas_faultinfo(pid,
 							   &regs->faultinfo);
-					(*sig_info[SIGSEGV])(SIGSEGV, &si,
+					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
 							     regs);
 				}
 				else handle_segv(pid, regs);
@@ -426,14 +426,14 @@ void userspace(struct uml_pt_regs *regs)
 			        handle_trap(pid, regs, local_using_sysemu);
 				break;
 			case SIGTRAP:
-				relay_signal(SIGTRAP, &si, regs);
+				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
 				break;
 			case SIGVTALRM:
 				now = os_nsecs();
 				if (now < nsecs)
 					break;
 				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
 				unblock_signals();
 				nsecs = timer.it_value.tv_sec *
 					UM_NSEC_PER_SEC +
@@ -447,7 +447,7 @@ void userspace(struct uml_pt_regs *regs)
 			case SIGFPE:
 			case SIGWINCH:
 				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
 				unblock_signals();
 				break;
 			default:
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index c6cd358a1eec..04a48903b2eb 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -92,12 +92,53 @@
 #endif
 #define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
+#else /* !SMP */
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif /* SMP */
+
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+/*
+ * For either of these options x86 doesn't have a strong TSO memory
+ * model and we should fall back to full barriers.
+ */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
+#else /* regular x86 TSO memory ordering */
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
 #endif
 
 /*
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index c5d1785373ed..02bab09707f2 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -1,13 +1,6 @@
 #ifndef _ASM_X86_IDLE_H
 #define _ASM_X86_IDLE_H
 
-#define IDLE_START 1
-#define IDLE_END 2
-
-struct notifier_block;
-void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
-
 #ifdef CONFIG_X86_64
 void enter_idle(void);
 void exit_idle(void);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2e188d68397c..f106908a12ec 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -90,8 +90,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->bx + i, args, n * sizeof(args[0]));
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 	return AUDIT_ARCH_I386;
 }
@@ -220,8 +219,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
 		}
 }
 
-static inline int syscall_get_arch(struct task_struct *task,
-				   struct pt_regs *regs)
+static inline int syscall_get_arch(void)
 {
 #ifdef CONFIG_IA32_EMULATION
 	/*
@@ -233,7 +231,7 @@ static inline int syscall_get_arch(struct task_struct *task,
 	 *
 	 * x32 tasks should be considered AUDIT_ARCH_X86_64.
 	 */
-	if (task_thread_info(task)->status & TS_COMPAT)
+	if (task_thread_info(current)->status & TS_COMPAT)
 		return AUDIT_ARCH_I386;
 #endif
 	/* Both x32 and x86_64 are considered "64-bit". */
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 59b90379cb6a..48f439953436 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -40,19 +40,6 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
-	atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-	atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 #endif
 
 struct kmem_cache *task_xstate_cachep;
@@ -257,14 +244,14 @@ static inline void play_dead(void)
 void enter_idle(void)
 {
 	this_cpu_write(is_idle, 1);
-	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+	idle_notifier_call_chain(IDLE_START);
 }
 
 static void __exit_idle(void)
 {
 	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
 		return;
-	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+	idle_notifier_call_chain(IDLE_END);
 }
 
 /* Called from interrupts to signify idle end */
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index b8f6172f4174..9dc0de1e4466 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -20,7 +20,7 @@
 	__entry->unsync = sp->unsync;
 
 #define KVM_MMU_PAGE_PRINTK() ({				        \
-	const char *ret = p->buffer + p->len;				\
+	const char *ret = trace_seq_buffer_ptr(p);			\
 	static const char *access_str[] = {			        \
 		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"  \
 	};							        \
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index aabfb8380a1c..01ed50255473 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -357,3 +357,7 @@
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
+# 351	i386	sched_setattr		sys_sched_setattr
+# 352	i386	sched_getattr		sys_sched_getattr
+# 353	i386	renameat2		sys_renameat2
+354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 63a899304d27..c7b4ac76cd37 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,10 @@
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
 313	common	finit_module		sys_finit_module
+# 314	common	sched_setattr		sys_sched_setattr
+# 315	common	sched_getattr		sys_sched_getattr
+# 316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index ae7319db18ee..5e04a1c899fa 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -508,7 +508,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0;
-	struct task_struct *me = current;
 
 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16);
diff --git a/block/genhd.c b/block/genhd.c
index ea49f8631c22..2d350aae2eb6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1116,6 +1116,22 @@ static void disk_release(struct device *dev)
 		blk_put_queue(disk->queue);
 	kfree(disk);
 }
+
+static int disk_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+	int cnt = 0;
+
+	disk_part_iter_init(&piter, disk, 0);
+	while((part = disk_part_iter_next(&piter)))
+		cnt++;
+	disk_part_iter_exit(&piter);
+	add_uevent_var(env, "NPARTS=%u", cnt);
+	return 0;
+}
+
 struct class block_class = {
 	.name		= "block",
 };
@@ -1135,6 +1151,7 @@ static struct device_type disk_type = {
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
 	.devnode	= block_devnode,
+	.uevent		= disk_uevent,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 0d9e5f97f0a8..47284e712650 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -217,10 +217,21 @@ static void part_release(struct device *dev)
 	kfree(p);
 }
 
+static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct hd_struct *part = dev_to_part(dev);
+
+	add_uevent_var(env, "PARTN=%u", part->partno);
+	if (part->info && part->info->volname[0])
+		add_uevent_var(env, "PARTNAME=%s", part->info->volname);
+	return 0;
+}
+
 struct device_type part_type = {
 	.name		= "partition",
 	.groups		= part_attr_groups,
 	.release	= part_release,
+	.uevent		= part_uevent,
 };
 
 static void delete_partition_rcu_cb(struct rcu_head *head)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a1eba1845367..fa7e2a8cc608 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -533,6 +533,17 @@ config CRYPTO_SHA1_PPC
 	  This is the powerpc hardware accelerated implementation of the
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
+config CRYPTO_SHA1_ARM_NEON
+	tristate "SHA1 digest algorithm (ARM NEON)"
+	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	select CRYPTO_SHA1_ARM
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using optimized ARM NEON assembly, when NEON instructions are
+	  available.
+
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
@@ -545,6 +556,13 @@ config CRYPTO_SHA256
 	  This code also includes SHA-224, a 224 bit hash with 112 bits
 	  of security against collision attacks.
 
+config CRYPTO_SHA256_ARM
+	tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
+	select CRYPTO_HASH
+	help
+	  SHA-256 secure hash standard (DFIPS 180-2) implemented
+	  using optimized ARM assembler and NEON, when available.
+
 config CRYPTO_SHA256_SPARC64
 	tristate "SHA224 and SHA256 digest algorithm (SPARC64)"
 	depends on SPARC64
@@ -566,6 +584,21 @@ config CRYPTO_SHA512
 	  This code also includes SHA-384, a 384 bit hash with 192 bits
 	  of security against collision attacks.
 
+config CRYPTO_SHA512_ARM_NEON
+	tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
+	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	select CRYPTO_SHA512
+	select CRYPTO_HASH
+	help
+	  SHA-512 secure hash standard (DFIPS 180-2) implemented
+	  using ARM NEON instructions, when available.
+
+	  This version of SHA implements a 512 bit hash with 256 bits of
+	  security against collision attacks.
+
+	  This code also includes SHA-384, a 384 bit hash with 192 bits
+	  of security against collision attacks.
+
 config CRYPTO_SHA512_SPARC64
 	tristate "SHA384 and SHA512 digest algorithm (SPARC64)"
 	depends on SPARC64
@@ -761,6 +794,22 @@ config CRYPTO_AES_ARM
 
 	  See <http://csrc.nist.gov/encryption/aes/> for more information.
 
+config CRYPTO_AES_ARM_BS
+	tristate "Bit sliced AES using NEON instructions"
+	depends on ARM && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM
+	select CRYPTO_ABLK_HELPER
+	help
+	  Use a faster and more secure NEON based implementation of AES in CBC,
+	  CTR and XTS modes
+
+	  Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
+	  and for XTS mode encryption, CBC and XTS mode decryption speedup is
+	  around 25%. (CBC encryption speed is not affected by this driver.)
+	  This implementation does not rely on any lookup tables so it is
+	  believed to be invulnerable to cache timing attacks.
+
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
 	select CRYPTO_ALGAPI
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index 62568b1fc885..ffe7278d4bd8 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -75,7 +75,7 @@ int ablk_encrypt(struct ablkcipher_request *req)
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 
-		memcpy(cryptd_req, req, sizeof(*req));
+		*cryptd_req = *req;
 		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
 
 		return crypto_ablkcipher_encrypt(cryptd_req);
@@ -94,7 +94,7 @@ int ablk_decrypt(struct ablkcipher_request *req)
 		struct ablkcipher_request *cryptd_req =
 			ablkcipher_request_ctx(req);
 
-		memcpy(cryptd_req, req, sizeof(*req));
+		*cryptd_req = *req;
 		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
 
 		return crypto_ablkcipher_decrypt(cryptd_req);
diff --git a/drivers/Kconfig b/drivers/Kconfig
index d27feb5460f3..4f990ce9515a 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -100,6 +100,8 @@ source "drivers/memstick/Kconfig"
 
 source "drivers/leds/Kconfig"
 
+source "drivers/switch/Kconfig"
+
 source "drivers/accessibility/Kconfig"
 
 source "drivers/infiniband/Kconfig"
@@ -168,4 +170,6 @@ source "drivers/reset/Kconfig"
 
 source "drivers/gator/Kconfig"
 
+source "drivers/android/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 292d528e22f1..3774b56c5d80 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-y				+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-y				+= leds/
+obj-$(CONFIG_SWITCH)		+= switch/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
@@ -155,3 +156,4 @@ obj-$(CONFIG_NTB)		+= ntb/
 
 obj-$(CONFIG_GATOR)		+= gator/
 obj-$(CONFIG_CORESIGHT)		+= coresight/
+obj-$(CONFIG_ANDROID)		+= android/
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index a33821ca3895..d672d13de70b 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -811,7 +811,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 				result =
 					thermal_zone_bind_cooling_device
 					(thermal, trip, cdev,
-					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+					 THERMAL_WEIGHT_DEFAULT);
 			else
 				result =
 					thermal_zone_unbind_cooling_device
@@ -835,7 +836,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 			if (bind)
 				result = thermal_zone_bind_cooling_device
 					(thermal, trip, cdev,
-					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+					 THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+					 THERMAL_WEIGHT_DEFAULT);
 			else
 				result = thermal_zone_unbind_cooling_device
 					(thermal, trip, cdev);
@@ -852,7 +854,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
 				result = thermal_zone_bind_cooling_device
 						(thermal, -1, cdev,
 						 THERMAL_NO_LIMIT,
-						 THERMAL_NO_LIMIT);
+						 THERMAL_NO_LIMIT,
+						 THERMAL_WEIGHT_DEFAULT);
 			else
 				result = thermal_zone_unbind_cooling_device
 						(thermal, -1, cdev);
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
new file mode 100644
index 000000000000..bdfc6c6f4f5a
--- /dev/null
+++ b/drivers/android/Kconfig
@@ -0,0 +1,37 @@
+menu "Android"
+
+config ANDROID
+	bool "Android Drivers"
+	---help---
+	  Enable support for various drivers needed on the Android platform
+
+if ANDROID
+
+config ANDROID_BINDER_IPC
+	bool "Android Binder IPC Driver"
+	depends on MMU
+	default n
+	---help---
+	  Binder is used in Android for both communication between processes,
+	  and remote method invocation.
+
+	  This means one Android process can call a method/routine in another
+	  Android process, using Binder to identify, invoke and pass arguments
+	  between said processes.
+
+config ANDROID_BINDER_IPC_32BIT
+	bool
+	depends on !64BIT && ANDROID_BINDER_IPC
+	default y
+	---help---
+	  The Binder API has been changed to support both 32 and 64bit
+	  applications in a mixed environment.
+
+	  Enable this to support an old 32-bit Android user-space (v4.4 and
+	  earlier).
+
+	  Note that enabling this will break newer Android user-space.
+
+endif # if ANDROID
+
+endmenu
diff --git a/drivers/android/Makefile b/drivers/android/Makefile
new file mode 100644
index 000000000000..3b7e4b072c58
--- /dev/null
+++ b/drivers/android/Makefile
@@ -0,0 +1,3 @@
+ccflags-y += -I$(src)			# needed for trace events
+
+obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o
diff --git a/drivers/staging/android/binder.c b/drivers/android/binder.c
index 0fce5fc9923b..c048624fc6b7 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/android/binder.c
@@ -20,11 +20,13 @@
 #include <asm/cacheflush.h>
 #include <linux/fdtable.h>
 #include <linux/file.h>
+#include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/miscdevice.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/rtmutex.h>
 #include <linux/mutex.h>
 #include <linux/nsproxy.h>
 #include <linux/poll.h>
@@ -36,11 +38,16 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/pid_namespace.h>
+#include <linux/security.h>
 
-#include "binder.h"
+#ifdef CONFIG_ANDROID_BINDER_IPC_32BIT
+#define BINDER_IPC_32BIT 1
+#endif
+
+#include <uapi/linux/android/binder.h>
 #include "binder_trace.h"
 
-static DEFINE_MUTEX(binder_main_lock);
+static DEFINE_RT_MUTEX(binder_main_lock);
 static DEFINE_MUTEX(binder_deferred_lock);
 static DEFINE_MUTEX(binder_mmap_lock);
 
@@ -117,6 +124,7 @@ static int binder_set_stop_on_user_error(const char *val,
 					 struct kernel_param *kp)
 {
 	int ret;
+
 	ret = param_set_int(val, kp);
 	if (binder_stop_on_user_error < 2)
 		wake_up(&binder_user_error_wait);
@@ -193,6 +201,7 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
 	struct binder_transaction_log *log)
 {
 	struct binder_transaction_log_entry *e;
+
 	e = &log->entry[log->next];
 	memset(e, 0, sizeof(*e));
 	log->next++;
@@ -227,8 +236,8 @@ struct binder_node {
 	int internal_strong_refs;
 	int local_weak_refs;
 	int local_strong_refs;
-	void __user *ptr;
-	void __user *cookie;
+	binder_uintptr_t ptr;
+	binder_uintptr_t cookie;
 	unsigned has_strong_ref:1;
 	unsigned pending_strong_ref:1;
 	unsigned has_weak_ref:1;
@@ -241,7 +250,7 @@ struct binder_node {
 
 struct binder_ref_death {
 	struct binder_work work;
-	void __user *cookie;
+	binder_uintptr_t cookie;
 };
 
 struct binder_ref {
@@ -418,19 +427,20 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
 static inline void binder_lock(const char *tag)
 {
 	trace_binder_lock(tag);
-	mutex_lock(&binder_main_lock);
+	rt_mutex_lock(&binder_main_lock);
 	trace_binder_locked(tag);
 }
 
 static inline void binder_unlock(const char *tag)
 {
 	trace_binder_unlock(tag);
-	mutex_unlock(&binder_main_lock);
+	rt_mutex_unlock(&binder_main_lock);
 }
 
 static void binder_set_nice(long nice)
 {
 	long min_nice;
+
 	if (can_nice(current, nice)) {
 		set_user_nice(current, nice);
 		return;
@@ -450,9 +460,8 @@ static size_t binder_buffer_size(struct binder_proc *proc,
 {
 	if (list_is_last(&buffer->entry, &proc->buffers))
 		return proc->buffer + proc->buffer_size - (void *)buffer->data;
-	else
-		return (size_t)list_entry(buffer->entry.next,
-			struct binder_buffer, entry) - (size_t)buffer->data;
+	return (size_t)list_entry(buffer->entry.next,
+			  struct binder_buffer, entry) - (size_t)buffer->data;
 }
 
 static void binder_insert_free_buffer(struct binder_proc *proc,
@@ -514,14 +523,14 @@ static void binder_insert_allocated_buffer(struct binder_proc *proc,
 }
 
 static struct binder_buffer *binder_buffer_lookup(struct binder_proc *proc,
-						  void __user *user_ptr)
+						  uintptr_t user_ptr)
 {
 	struct rb_node *n = proc->allocated_buffers.rb_node;
 	struct binder_buffer *buffer;
 	struct binder_buffer *kern_ptr;
 
-	kern_ptr = user_ptr - proc->user_buffer_offset
-		- offsetof(struct binder_buffer, data);
+	kern_ptr = (struct binder_buffer *)(user_ptr - proc->user_buffer_offset
+		- offsetof(struct binder_buffer, data));
 
 	while (n) {
 		buffer = rb_entry(n, struct binder_buffer, rb_node);
@@ -583,6 +592,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
 		int ret;
 		struct page **page_array_ptr;
+
 		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
 
 		BUG_ON(*page);
@@ -725,6 +735,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc,
 	binder_insert_allocated_buffer(proc, buffer);
 	if (buffer_size != size) {
 		struct binder_buffer *new_buffer = (void *)buffer->data + size;
+
 		list_add(&new_buffer->entry, &buffer->entry);
 		new_buffer->free = 1;
 		binder_insert_free_buffer(proc, new_buffer);
@@ -790,7 +801,7 @@ static void binder_delete_free_buffer(struct binder_proc *proc,
 	list_del(&buffer->entry);
 	if (free_page_start || free_page_end) {
 		binder_debug(BINDER_DEBUG_BUFFER_ALLOC,
-			     "%d: merge free, buffer %p do not share page%s%s with with %p or %p\n",
+			     "%d: merge free, buffer %p do not share page%s%s with %p or %p\n",
 			     proc->pid, buffer, free_page_start ? "" : " end",
 			     free_page_end ? "" : " start", prev, next);
 		binder_update_page_range(proc, 0, free_page_start ?
@@ -837,6 +848,7 @@ static void binder_free_buf(struct binder_proc *proc,
 	if (!list_is_last(&buffer->entry, &proc->buffers)) {
 		struct binder_buffer *next = list_entry(buffer->entry.next,
 						struct binder_buffer, entry);
+
 		if (next->free) {
 			rb_erase(&next->rb_node, &proc->free_buffers);
 			binder_delete_free_buffer(proc, next);
@@ -845,6 +857,7 @@ static void binder_free_buf(struct binder_proc *proc,
 	if (proc->buffers.next != &buffer->entry) {
 		struct binder_buffer *prev = list_entry(buffer->entry.prev,
 						struct binder_buffer, entry);
+
 		if (prev->free) {
 			binder_delete_free_buffer(proc, buffer);
 			rb_erase(&prev->rb_node, &proc->free_buffers);
@@ -855,7 +868,7 @@ static void binder_free_buf(struct binder_proc *proc,
 }
 
 static struct binder_node *binder_get_node(struct binder_proc *proc,
-					   void __user *ptr)
+					   binder_uintptr_t ptr)
 {
 	struct rb_node *n = proc->nodes.rb_node;
 	struct binder_node *node;
@@ -874,8 +887,8 @@ static struct binder_node *binder_get_node(struct binder_proc *proc,
 }
 
 static struct binder_node *binder_new_node(struct binder_proc *proc,
-					   void __user *ptr,
-					   void __user *cookie)
+					   binder_uintptr_t ptr,
+					   binder_uintptr_t cookie)
 {
 	struct rb_node **p = &proc->nodes.rb_node;
 	struct rb_node *parent = NULL;
@@ -907,9 +920,9 @@ static struct binder_node *binder_new_node(struct binder_proc *proc,
 	INIT_LIST_HEAD(&node->work.entry);
 	INIT_LIST_HEAD(&node->async_todo);
 	binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-		     "%d:%d node %d u%p c%p created\n",
+		     "%d:%d node %d u%016llx c%016llx created\n",
 		     proc->pid, current->pid, node->debug_id,
-		     node->ptr, node->cookie);
+		     (u64)node->ptr, (u64)node->cookie);
 	return node;
 }
 
@@ -1106,6 +1119,7 @@ static int binder_inc_ref(struct binder_ref *ref, int strong,
 			  struct list_head *target_list)
 {
 	int ret;
+
 	if (strong) {
 		if (ref->strong == 0) {
 			ret = binder_inc_node(ref->node, 1, 1, target_list);
@@ -1137,6 +1151,7 @@ static int binder_dec_ref(struct binder_ref *ref, int strong)
 		ref->strong--;
 		if (ref->strong == 0) {
 			int ret;
+
 			ret = binder_dec_node(ref->node, strong, 1);
 			if (ret)
 				return ret;
@@ -1176,6 +1191,8 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 				     uint32_t error_code)
 {
 	struct binder_thread *target_thread;
+	struct binder_transaction *next;
+
 	BUG_ON(t->flags & TF_ONE_WAY);
 	while (1) {
 		target_thread = t->from;
@@ -1189,7 +1206,8 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 			if (target_thread->return_error == BR_OK) {
 				binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
 					     "send failed reply for transaction %d to %d:%d\n",
-					      t->debug_id, target_thread->proc->pid,
+					      t->debug_id,
+					      target_thread->proc->pid,
 					      target_thread->pid);
 
 				binder_pop_transaction(target_thread, t);
@@ -1202,32 +1220,31 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 					target_thread->return_error);
 			}
 			return;
-		} else {
-			struct binder_transaction *next = t->from_parent;
+		}
+		next = t->from_parent;
 
-			binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
-				     "send failed reply for transaction %d, target dead\n",
-				     t->debug_id);
+		binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
+			     "send failed reply for transaction %d, target dead\n",
+			     t->debug_id);
 
-			binder_pop_transaction(target_thread, t);
-			if (next == NULL) {
-				binder_debug(BINDER_DEBUG_DEAD_BINDER,
-					     "reply failed, no target thread at root\n");
-				return;
-			}
-			t = next;
+		binder_pop_transaction(target_thread, t);
+		if (next == NULL) {
 			binder_debug(BINDER_DEBUG_DEAD_BINDER,
-				     "reply failed, no target thread -- retry %d\n",
-				      t->debug_id);
+				     "reply failed, no target thread at root\n");
+			return;
 		}
+		t = next;
+		binder_debug(BINDER_DEBUG_DEAD_BINDER,
+			     "reply failed, no target thread -- retry %d\n",
+			      t->debug_id);
 	}
 }
 
 static void binder_transaction_buffer_release(struct binder_proc *proc,
 					      struct binder_buffer *buffer,
-					      size_t *failed_at)
+					      binder_size_t *failed_at)
 {
-	size_t *offp, *off_end;
+	binder_size_t *offp, *off_end;
 	int debug_id = buffer->debug_id;
 
 	binder_debug(BINDER_DEBUG_TRANSACTION,
@@ -1238,18 +1255,20 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 	if (buffer->target_node)
 		binder_dec_node(buffer->target_node, 1, 0);
 
-	offp = (size_t *)(buffer->data + ALIGN(buffer->data_size, sizeof(void *)));
+	offp = (binder_size_t *)(buffer->data +
+				 ALIGN(buffer->data_size, sizeof(void *)));
 	if (failed_at)
 		off_end = failed_at;
 	else
 		off_end = (void *)offp + buffer->offsets_size;
 	for (; offp < off_end; offp++) {
 		struct flat_binder_object *fp;
+
 		if (*offp > buffer->data_size - sizeof(*fp) ||
 		    buffer->data_size < sizeof(*fp) ||
-		    !IS_ALIGNED(*offp, sizeof(void *))) {
-			pr_err("transaction release %d bad offset %zd, size %zd\n",
-			 debug_id, *offp, buffer->data_size);
+		    !IS_ALIGNED(*offp, sizeof(u32))) {
+			pr_err("transaction release %d bad offset %lld, size %zd\n",
+			       debug_id, (u64)*offp, buffer->data_size);
 			continue;
 		}
 		fp = (struct flat_binder_object *)(buffer->data + *offp);
@@ -1257,21 +1276,23 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 		case BINDER_TYPE_BINDER:
 		case BINDER_TYPE_WEAK_BINDER: {
 			struct binder_node *node = binder_get_node(proc, fp->binder);
+
 			if (node == NULL) {
-				pr_err("transaction release %d bad node %p\n",
-					debug_id, fp->binder);
+				pr_err("transaction release %d bad node %016llx\n",
+				       debug_id, (u64)fp->binder);
 				break;
 			}
 			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        node %d u%p\n",
-				     node->debug_id, node->ptr);
+				     "        node %d u%016llx\n",
+				     node->debug_id, (u64)node->ptr);
 			binder_dec_node(node, fp->type == BINDER_TYPE_BINDER, 0);
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
 			struct binder_ref *ref = binder_get_ref(proc, fp->handle);
+
 			if (ref == NULL) {
-				pr_err("transaction release %d bad handle %ld\n",
+				pr_err("transaction release %d bad handle %d\n",
 				 debug_id, fp->handle);
 				break;
 			}
@@ -1283,13 +1304,13 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
 
 		case BINDER_TYPE_FD:
 			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        fd %ld\n", fp->handle);
+				     "        fd %d\n", fp->handle);
 			if (failed_at)
 				task_close_fd(proc, fp->handle);
 			break;
 
 		default:
-			pr_err("transaction release %d bad object type %lx\n",
+			pr_err("transaction release %d bad object type %x\n",
 				debug_id, fp->type);
 			break;
 		}
@@ -1302,7 +1323,8 @@ static void binder_transaction(struct binder_proc *proc,
 {
 	struct binder_transaction *t;
 	struct binder_work *tcomplete;
-	size_t *offp, *off_end;
+	binder_size_t *offp, *off_end;
+	binder_size_t off_min;
 	struct binder_proc *target_proc;
 	struct binder_thread *target_thread = NULL;
 	struct binder_node *target_node = NULL;
@@ -1361,6 +1383,7 @@ static void binder_transaction(struct binder_proc *proc,
 	} else {
 		if (tr->target.handle) {
 			struct binder_ref *ref;
+
 			ref = binder_get_ref(proc, tr->target.handle);
 			if (ref == NULL) {
 				binder_user_error("%d:%d got transaction to invalid handle\n",
@@ -1382,8 +1405,13 @@ static void binder_transaction(struct binder_proc *proc,
 			return_error = BR_DEAD_REPLY;
 			goto err_dead_binder;
 		}
+		if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) {
+			return_error = BR_FAILED_REPLY;
+			goto err_invalid_target_handle;
+		}
 		if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) {
 			struct binder_transaction *tmp;
+
 			tmp = thread->transaction_stack;
 			if (tmp->to_thread != thread) {
 				binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n",
@@ -1431,24 +1459,26 @@ static void binder_transaction(struct binder_proc *proc,
 
 	if (reply)
 		binder_debug(BINDER_DEBUG_TRANSACTION,
-			     "%d:%d BC_REPLY %d -> %d:%d, data %p-%p size %zd-%zd\n",
+			     "%d:%d BC_REPLY %d -> %d:%d, data %016llx-%016llx size %lld-%lld\n",
 			     proc->pid, thread->pid, t->debug_id,
 			     target_proc->pid, target_thread->pid,
-			     tr->data.ptr.buffer, tr->data.ptr.offsets,
-			     tr->data_size, tr->offsets_size);
+			     (u64)tr->data.ptr.buffer,
+			     (u64)tr->data.ptr.offsets,
+			     (u64)tr->data_size, (u64)tr->offsets_size);
 	else
 		binder_debug(BINDER_DEBUG_TRANSACTION,
-			     "%d:%d BC_TRANSACTION %d -> %d - node %d, data %p-%p size %zd-%zd\n",
+			     "%d:%d BC_TRANSACTION %d -> %d - node %d, data %016llx-%016llx size %lld-%lld\n",
 			     proc->pid, thread->pid, t->debug_id,
 			     target_proc->pid, target_node->debug_id,
-			     tr->data.ptr.buffer, tr->data.ptr.offsets,
-			     tr->data_size, tr->offsets_size);
+			     (u64)tr->data.ptr.buffer,
+			     (u64)tr->data.ptr.offsets,
+			     (u64)tr->data_size, (u64)tr->offsets_size);
 
 	if (!reply && !(tr->flags & TF_ONE_WAY))
 		t->from = thread;
 	else
 		t->from = NULL;
-	t->sender_euid = proc->tsk->cred->euid;
+	t->sender_euid = task_euid(proc->tsk);
 	t->to_proc = target_proc;
 	t->to_thread = target_thread;
 	t->code = tr->code;
@@ -1471,43 +1501,54 @@ static void binder_transaction(struct binder_proc *proc,
 	if (target_node)
 		binder_inc_node(target_node, 1, 0, NULL);
 
-	offp = (size_t *)(t->buffer->data + ALIGN(tr->data_size, sizeof(void *)));
+	offp = (binder_size_t *)(t->buffer->data +
+				 ALIGN(tr->data_size, sizeof(void *)));
 
-	if (copy_from_user(t->buffer->data, tr->data.ptr.buffer, tr->data_size)) {
+	if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
+			   tr->data.ptr.buffer, tr->data_size)) {
 		binder_user_error("%d:%d got transaction with invalid data ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
 		goto err_copy_data_failed;
 	}
-	if (copy_from_user(offp, tr->data.ptr.offsets, tr->offsets_size)) {
+	if (copy_from_user(offp, (const void __user *)(uintptr_t)
+			   tr->data.ptr.offsets, tr->offsets_size)) {
 		binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
 		goto err_copy_data_failed;
 	}
-	if (!IS_ALIGNED(tr->offsets_size, sizeof(size_t))) {
-		binder_user_error("%d:%d got transaction with invalid offsets size, %zd\n",
-				proc->pid, thread->pid, tr->offsets_size);
+	if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) {
+		binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n",
+				proc->pid, thread->pid, (u64)tr->offsets_size);
 		return_error = BR_FAILED_REPLY;
 		goto err_bad_offset;
 	}
 	off_end = (void *)offp + tr->offsets_size;
+	off_min = 0;
 	for (; offp < off_end; offp++) {
 		struct flat_binder_object *fp;
+
 		if (*offp > t->buffer->data_size - sizeof(*fp) ||
+		    *offp < off_min ||
 		    t->buffer->data_size < sizeof(*fp) ||
-		    !IS_ALIGNED(*offp, sizeof(void *))) {
-			binder_user_error("%d:%d got transaction with invalid offset, %zd\n",
-					proc->pid, thread->pid, *offp);
+		    !IS_ALIGNED(*offp, sizeof(u32))) {
+			binder_user_error("%d:%d got transaction with invalid offset, %lld (min %lld, max %lld)\n",
+					  proc->pid, thread->pid, (u64)*offp,
+					  (u64)off_min,
+					  (u64)(t->buffer->data_size -
+					  sizeof(*fp)));
 			return_error = BR_FAILED_REPLY;
 			goto err_bad_offset;
 		}
 		fp = (struct flat_binder_object *)(t->buffer->data + *offp);
+		off_min = *offp + sizeof(struct flat_binder_object);
 		switch (fp->type) {
 		case BINDER_TYPE_BINDER:
 		case BINDER_TYPE_WEAK_BINDER: {
 			struct binder_ref *ref;
 			struct binder_node *node = binder_get_node(proc, fp->binder);
+
 			if (node == NULL) {
 				node = binder_new_node(proc, fp->binder, fp->cookie);
 				if (node == NULL) {
@@ -1518,10 +1559,15 @@ static void binder_transaction(struct binder_proc *proc,
 				node->accept_fds = !!(fp->flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
 			}
 			if (fp->cookie != node->cookie) {
-				binder_user_error("%d:%d sending u%p node %d, cookie mismatch %p != %p\n",
+				binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n",
 					proc->pid, thread->pid,
-					fp->binder, node->debug_id,
-					fp->cookie, node->cookie);
+					(u64)fp->binder, node->debug_id,
+					(u64)fp->cookie, (u64)node->cookie);
+				return_error = BR_FAILED_REPLY;
+				goto err_binder_get_ref_for_node_failed;
+			}
+			if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+				return_error = BR_FAILED_REPLY;
 				goto err_binder_get_ref_for_node_failed;
 			}
 			ref = binder_get_ref_for_node(target_proc, node);
@@ -1539,20 +1585,25 @@ static void binder_transaction(struct binder_proc *proc,
 
 			trace_binder_transaction_node_to_ref(t, node, ref);
 			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        node %d u%p -> ref %d desc %d\n",
-				     node->debug_id, node->ptr, ref->debug_id,
-				     ref->desc);
+				     "        node %d u%016llx -> ref %d desc %d\n",
+				     node->debug_id, (u64)node->ptr,
+				     ref->debug_id, ref->desc);
 		} break;
 		case BINDER_TYPE_HANDLE:
 		case BINDER_TYPE_WEAK_HANDLE: {
 			struct binder_ref *ref = binder_get_ref(proc, fp->handle);
+
 			if (ref == NULL) {
-				binder_user_error("%d:%d got transaction with invalid handle, %ld\n",
+				binder_user_error("%d:%d got transaction with invalid handle, %d\n",
 						proc->pid,
 						thread->pid, fp->handle);
 				return_error = BR_FAILED_REPLY;
 				goto err_binder_get_ref_failed;
 			}
+			if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+				return_error = BR_FAILED_REPLY;
+				goto err_binder_get_ref_failed;
+			}
 			if (ref->node->proc == target_proc) {
 				if (fp->type == BINDER_TYPE_HANDLE)
 					fp->type = BINDER_TYPE_BINDER;
@@ -1563,11 +1614,12 @@ static void binder_transaction(struct binder_proc *proc,
 				binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL);
 				trace_binder_transaction_ref_to_node(t, ref);
 				binder_debug(BINDER_DEBUG_TRANSACTION,
-					     "        ref %d desc %d -> node %d u%p\n",
+					     "        ref %d desc %d -> node %d u%016llx\n",
 					     ref->debug_id, ref->desc, ref->node->debug_id,
-					     ref->node->ptr);
+					     (u64)ref->node->ptr);
 			} else {
 				struct binder_ref *new_ref;
+
 				new_ref = binder_get_ref_for_node(target_proc, ref->node);
 				if (new_ref == NULL) {
 					return_error = BR_FAILED_REPLY;
@@ -1590,13 +1642,13 @@ static void binder_transaction(struct binder_proc *proc,
 
 			if (reply) {
 				if (!(in_reply_to->flags & TF_ACCEPT_FDS)) {
-					binder_user_error("%d:%d got reply with fd, %ld, but target does not allow fds\n",
+					binder_user_error("%d:%d got reply with fd, %d, but target does not allow fds\n",
 						proc->pid, thread->pid, fp->handle);
 					return_error = BR_FAILED_REPLY;
 					goto err_fd_not_allowed;
 				}
 			} else if (!target_node->accept_fds) {
-				binder_user_error("%d:%d got transaction with fd, %ld, but target does not allow fds\n",
+				binder_user_error("%d:%d got transaction with fd, %d, but target does not allow fds\n",
 					proc->pid, thread->pid, fp->handle);
 				return_error = BR_FAILED_REPLY;
 				goto err_fd_not_allowed;
@@ -1604,11 +1656,16 @@ static void binder_transaction(struct binder_proc *proc,
 
 			file = fget(fp->handle);
 			if (file == NULL) {
-				binder_user_error("%d:%d got transaction with invalid fd, %ld\n",
+				binder_user_error("%d:%d got transaction with invalid fd, %d\n",
 					proc->pid, thread->pid, fp->handle);
 				return_error = BR_FAILED_REPLY;
 				goto err_fget_failed;
 			}
+			if (security_binder_transfer_file(proc->tsk, target_proc->tsk, file) < 0) {
+				fput(file);
+				return_error = BR_FAILED_REPLY;
+				goto err_get_unused_fd_failed;
+			}
 			target_fd = task_get_unused_fd_flags(target_proc, O_CLOEXEC);
 			if (target_fd < 0) {
 				fput(file);
@@ -1618,13 +1675,13 @@ static void binder_transaction(struct binder_proc *proc,
 			task_fd_install(target_proc, target_fd, file);
 			trace_binder_transaction_fd(t, fp->handle, target_fd);
 			binder_debug(BINDER_DEBUG_TRANSACTION,
-				     "        fd %ld -> %d\n", fp->handle, target_fd);
+				     "        fd %d -> %d\n", fp->handle, target_fd);
 			/* TODO: fput? */
 			fp->handle = target_fd;
 		} break;
 
 		default:
-			binder_user_error("%d:%d got transaction with invalid object type, %lx\n",
+			binder_user_error("%d:%d got transaction with invalid object type, %x\n",
 				proc->pid, thread->pid, fp->type);
 			return_error = BR_FAILED_REPLY;
 			goto err_bad_object_type;
@@ -1681,12 +1738,13 @@ err_dead_binder:
 err_invalid_target_handle:
 err_no_context_mgr_node:
 	binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
-		     "%d:%d transaction failed %d, size %zd-%zd\n",
+		     "%d:%d transaction failed %d, size %lld-%lld\n",
 		     proc->pid, thread->pid, return_error,
-		     tr->data_size, tr->offsets_size);
+		     (u64)tr->data_size, (u64)tr->offsets_size);
 
 	{
 		struct binder_transaction_log_entry *fe;
+
 		fe = binder_transaction_log_add(&binder_transaction_log_failed);
 		*fe = *e;
 	}
@@ -1699,10 +1757,13 @@ err_no_context_mgr_node:
 		thread->return_error = return_error;
 }
 
-int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
-			void __user *buffer, int size, signed long *consumed)
+static int binder_thread_write(struct binder_proc *proc,
+			struct binder_thread *thread,
+			binder_uintptr_t binder_buffer, size_t size,
+			binder_size_t *consumed)
 {
 	uint32_t cmd;
+	void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
 	void __user *ptr = buffer + *consumed;
 	void __user *end = buffer + size;
 
@@ -1771,33 +1832,33 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 		}
 		case BC_INCREFS_DONE:
 		case BC_ACQUIRE_DONE: {
-			void __user *node_ptr;
-			void *cookie;
+			binder_uintptr_t node_ptr;
+			binder_uintptr_t cookie;
 			struct binder_node *node;
 
-			if (get_user(node_ptr, (void * __user *)ptr))
+			if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
-			ptr += sizeof(void *);
-			if (get_user(cookie, (void * __user *)ptr))
+			ptr += sizeof(binder_uintptr_t);
+			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
-			ptr += sizeof(void *);
+			ptr += sizeof(binder_uintptr_t);
 			node = binder_get_node(proc, node_ptr);
 			if (node == NULL) {
-				binder_user_error("%d:%d %s u%p no match\n",
+				binder_user_error("%d:%d %s u%016llx no match\n",
 					proc->pid, thread->pid,
 					cmd == BC_INCREFS_DONE ?
 					"BC_INCREFS_DONE" :
 					"BC_ACQUIRE_DONE",
-					node_ptr);
+					(u64)node_ptr);
 				break;
 			}
 			if (cookie != node->cookie) {
-				binder_user_error("%d:%d %s u%p node %d cookie mismatch %p != %p\n",
+				binder_user_error("%d:%d %s u%016llx node %d cookie mismatch %016llx != %016llx\n",
 					proc->pid, thread->pid,
 					cmd == BC_INCREFS_DONE ?
 					"BC_INCREFS_DONE" : "BC_ACQUIRE_DONE",
-					node_ptr, node->debug_id,
-					cookie, node->cookie);
+					(u64)node_ptr, node->debug_id,
+					(u64)cookie, (u64)node->cookie);
 				break;
 			}
 			if (cmd == BC_ACQUIRE_DONE) {
@@ -1833,27 +1894,28 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 			return -EINVAL;
 
 		case BC_FREE_BUFFER: {
-			void __user *data_ptr;
+			binder_uintptr_t data_ptr;
 			struct binder_buffer *buffer;
 
-			if (get_user(data_ptr, (void * __user *)ptr))
+			if (get_user(data_ptr, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
-			ptr += sizeof(void *);
+			ptr += sizeof(binder_uintptr_t);
 
 			buffer = binder_buffer_lookup(proc, data_ptr);
 			if (buffer == NULL) {
-				binder_user_error("%d:%d BC_FREE_BUFFER u%p no match\n",
-					proc->pid, thread->pid, data_ptr);
+				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
+					proc->pid, thread->pid, (u64)data_ptr);
 				break;
 			}
 			if (!buffer->allow_user_free) {
-				binder_user_error("%d:%d BC_FREE_BUFFER u%p matched unreturned buffer\n",
-					proc->pid, thread->pid, data_ptr);
+				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
+					proc->pid, thread->pid, (u64)data_ptr);
 				break;
 			}
 			binder_debug(BINDER_DEBUG_FREE_BUFFER,
-				     "%d:%d BC_FREE_BUFFER u%p found buffer %d for %s transaction\n",
-				     proc->pid, thread->pid, data_ptr, buffer->debug_id,
+				     "%d:%d BC_FREE_BUFFER u%016llx found buffer %d for %s transaction\n",
+				     proc->pid, thread->pid, (u64)data_ptr,
+				     buffer->debug_id,
 				     buffer->transaction ? "active" : "finished");
 
 			if (buffer->transaction) {
@@ -1923,16 +1985,16 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 		case BC_REQUEST_DEATH_NOTIFICATION:
 		case BC_CLEAR_DEATH_NOTIFICATION: {
 			uint32_t target;
-			void __user *cookie;
+			binder_uintptr_t cookie;
 			struct binder_ref *ref;
 			struct binder_ref_death *death;
 
 			if (get_user(target, (uint32_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
-			if (get_user(cookie, (void __user * __user *)ptr))
+			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
-			ptr += sizeof(void *);
+			ptr += sizeof(binder_uintptr_t);
 			ref = binder_get_ref(proc, target);
 			if (ref == NULL) {
 				binder_user_error("%d:%d %s invalid ref %d\n",
@@ -1945,12 +2007,12 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 			}
 
 			binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
-				     "%d:%d %s %p ref %d desc %d s %d w %d for node %d\n",
+				     "%d:%d %s %016llx ref %d desc %d s %d w %d for node %d\n",
 				     proc->pid, thread->pid,
 				     cmd == BC_REQUEST_DEATH_NOTIFICATION ?
 				     "BC_REQUEST_DEATH_NOTIFICATION" :
 				     "BC_CLEAR_DEATH_NOTIFICATION",
-				     cookie, ref->debug_id, ref->desc,
+				     (u64)cookie, ref->debug_id, ref->desc,
 				     ref->strong, ref->weak, ref->node->debug_id);
 
 			if (cmd == BC_REQUEST_DEATH_NOTIFICATION) {
@@ -1988,9 +2050,10 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 				}
 				death = ref->death;
 				if (death->cookie != cookie) {
-					binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %p != %p\n",
+					binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %016llx != %016llx\n",
 						proc->pid, thread->pid,
-						death->cookie, cookie);
+						(u64)death->cookie,
+						(u64)cookie);
 					break;
 				}
 				ref->death = NULL;
@@ -2010,25 +2073,28 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 		} break;
 		case BC_DEAD_BINDER_DONE: {
 			struct binder_work *w;
-			void __user *cookie;
+			binder_uintptr_t cookie;
 			struct binder_ref_death *death = NULL;
-			if (get_user(cookie, (void __user * __user *)ptr))
+
+			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 
 			ptr += sizeof(void *);
 			list_for_each_entry(w, &proc->delivered_death, entry) {
 				struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
+
 				if (tmp_death->cookie == cookie) {
 					death = tmp_death;
 					break;
 				}
 			}
 			binder_debug(BINDER_DEBUG_DEAD_BINDER,
-				     "%d:%d BC_DEAD_BINDER_DONE %p found %p\n",
-				     proc->pid, thread->pid, cookie, death);
+				     "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n",
+				     proc->pid, thread->pid, (u64)cookie,
+				     death);
 			if (death == NULL) {
-				binder_user_error("%d:%d BC_DEAD_BINDER_DONE %p not found\n",
-					proc->pid, thread->pid, cookie);
+				binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n",
+					proc->pid, thread->pid, (u64)cookie);
 				break;
 			}
 
@@ -2054,8 +2120,8 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread,
 	return 0;
 }
 
-void binder_stat_br(struct binder_proc *proc, struct binder_thread *thread,
-		    uint32_t cmd)
+static void binder_stat_br(struct binder_proc *proc,
+			   struct binder_thread *thread, uint32_t cmd)
 {
 	trace_binder_return(cmd);
 	if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) {
@@ -2080,9 +2146,10 @@ static int binder_has_thread_work(struct binder_thread *thread)
 
 static int binder_thread_read(struct binder_proc *proc,
 			      struct binder_thread *thread,
-			      void  __user *buffer, int size,
-			      signed long *consumed, int non_block)
+			      binder_uintptr_t binder_buffer, size_t size,
+			      binder_size_t *consumed, int non_block)
 {
+	void __user *buffer = (void __user *)(uintptr_t)binder_buffer;
 	void __user *ptr = buffer + *consumed;
 	void __user *end = buffer + size;
 
@@ -2140,13 +2207,13 @@ retry:
 			if (!binder_has_proc_work(proc, thread))
 				ret = -EAGAIN;
 		} else
-			ret = wait_event_interruptible_exclusive(proc->wait, binder_has_proc_work(proc, thread));
+			ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread));
 	} else {
 		if (non_block) {
 			if (!binder_has_thread_work(thread))
 				ret = -EAGAIN;
 		} else
-			ret = wait_event_interruptible(thread->wait, binder_has_thread_work(thread));
+			ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread));
 	}
 
 	binder_lock(__func__);
@@ -2164,12 +2231,16 @@ retry:
 		struct binder_work *w;
 		struct binder_transaction *t = NULL;
 
-		if (!list_empty(&thread->todo))
-			w = list_first_entry(&thread->todo, struct binder_work, entry);
-		else if (!list_empty(&proc->todo) && wait_for_proc_work)
-			w = list_first_entry(&proc->todo, struct binder_work, entry);
-		else {
-			if (ptr - buffer == 4 && !(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN)) /* no data added */
+		if (!list_empty(&thread->todo)) {
+			w = list_first_entry(&thread->todo, struct binder_work,
+					     entry);
+		} else if (!list_empty(&proc->todo) && wait_for_proc_work) {
+			w = list_first_entry(&proc->todo, struct binder_work,
+					     entry);
+		} else {
+			/* no data added */
+			if (ptr - buffer == 4 &&
+			    !(thread->looper & BINDER_LOOPER_STATE_NEED_RETURN))
 				goto retry;
 			break;
 		}
@@ -2202,6 +2273,7 @@ retry:
 			const char *cmd_name;
 			int strong = node->internal_strong_refs || node->local_strong_refs;
 			int weak = !hlist_empty(&node->refs) || node->local_weak_refs || strong;
+
 			if (weak && !node->has_weak_ref) {
 				cmd = BR_INCREFS;
 				cmd_name = "BR_INCREFS";
@@ -2227,32 +2299,40 @@ retry:
 				if (put_user(cmd, (uint32_t __user *)ptr))
 					return -EFAULT;
 				ptr += sizeof(uint32_t);
-				if (put_user(node->ptr, (void * __user *)ptr))
+				if (put_user(node->ptr,
+					     (binder_uintptr_t __user *)ptr))
 					return -EFAULT;
-				ptr += sizeof(void *);
-				if (put_user(node->cookie, (void * __user *)ptr))
+				ptr += sizeof(binder_uintptr_t);
+				if (put_user(node->cookie,
+					     (binder_uintptr_t __user *)ptr))
 					return -EFAULT;
-				ptr += sizeof(void *);
+				ptr += sizeof(binder_uintptr_t);
 
 				binder_stat_br(proc, thread, cmd);
 				binder_debug(BINDER_DEBUG_USER_REFS,
-					     "%d:%d %s %d u%p c%p\n",
-					     proc->pid, thread->pid, cmd_name, node->debug_id, node->ptr, node->cookie);
+					     "%d:%d %s %d u%016llx c%016llx\n",
+					     proc->pid, thread->pid, cmd_name,
+					     node->debug_id,
+					     (u64)node->ptr, (u64)node->cookie);
 			} else {
 				list_del_init(&w->entry);
 				if (!weak && !strong) {
 					binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-						     "%d:%d node %d u%p c%p deleted\n",
-						     proc->pid, thread->pid, node->debug_id,
-						     node->ptr, node->cookie);
+						     "%d:%d node %d u%016llx c%016llx deleted\n",
+						     proc->pid, thread->pid,
+						     node->debug_id,
+						     (u64)node->ptr,
+						     (u64)node->cookie);
 					rb_erase(&node->rb_node, &proc->nodes);
 					kfree(node);
 					binder_stats_deleted(BINDER_STAT_NODE);
 				} else {
 					binder_debug(BINDER_DEBUG_INTERNAL_REFS,
-						     "%d:%d node %d u%p c%p state unchanged\n",
-						     proc->pid, thread->pid, node->debug_id, node->ptr,
-						     node->cookie);
+						     "%d:%d node %d u%016llx c%016llx state unchanged\n",
+						     proc->pid, thread->pid,
+						     node->debug_id,
+						     (u64)node->ptr,
+						     (u64)node->cookie);
 				}
 			}
 		} break;
@@ -2270,17 +2350,18 @@ retry:
 			if (put_user(cmd, (uint32_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
-			if (put_user(death->cookie, (void * __user *)ptr))
+			if (put_user(death->cookie,
+				     (binder_uintptr_t __user *)ptr))
 				return -EFAULT;
-			ptr += sizeof(void *);
+			ptr += sizeof(binder_uintptr_t);
 			binder_stat_br(proc, thread, cmd);
 			binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,
-				     "%d:%d %s %p\n",
+				     "%d:%d %s %016llx\n",
 				      proc->pid, thread->pid,
 				      cmd == BR_DEAD_BINDER ?
 				      "BR_DEAD_BINDER" :
 				      "BR_CLEAR_DEATH_NOTIFICATION_DONE",
-				      death->cookie);
+				      (u64)death->cookie);
 
 			if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) {
 				list_del(&w->entry);
@@ -2299,6 +2380,7 @@ retry:
 		BUG_ON(t->buffer == NULL);
 		if (t->buffer->target_node) {
 			struct binder_node *target_node = t->buffer->target_node;
+
 			tr.target.ptr = target_node->ptr;
 			tr.cookie =  target_node->cookie;
 			t->saved_priority = task_nice(current);
@@ -2310,8 +2392,8 @@ retry:
 				binder_set_nice(target_node->min_priority);
 			cmd = BR_TRANSACTION;
 		} else {
-			tr.target.ptr = NULL;
-			tr.cookie = NULL;
+			tr.target.ptr = 0;
+			tr.cookie = 0;
 			cmd = BR_REPLY;
 		}
 		tr.code = t->code;
@@ -2320,6 +2402,7 @@ retry:
 
 		if (t->from) {
 			struct task_struct *sender = t->from->proc->tsk;
+
 			tr.sender_pid = task_tgid_nr_ns(sender,
 							task_active_pid_ns(current));
 		} else {
@@ -2328,8 +2411,9 @@ retry:
 
 		tr.data_size = t->buffer->data_size;
 		tr.offsets_size = t->buffer->offsets_size;
-		tr.data.ptr.buffer = (void *)t->buffer->data +
-					proc->user_buffer_offset;
+		tr.data.ptr.buffer = (binder_uintptr_t)(
+					(uintptr_t)t->buffer->data +
+					proc->user_buffer_offset);
 		tr.data.ptr.offsets = tr.data.ptr.buffer +
 					ALIGN(t->buffer->data_size,
 					    sizeof(void *));
@@ -2344,14 +2428,14 @@ retry:
 		trace_binder_transaction_received(t);
 		binder_stat_br(proc, thread, cmd);
 		binder_debug(BINDER_DEBUG_TRANSACTION,
-			     "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %p-%p\n",
+			     "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %016llx-%016llx\n",
 			     proc->pid, thread->pid,
 			     (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" :
 			     "BR_REPLY",
 			     t->debug_id, t->from ? t->from->proc->pid : 0,
 			     t->from ? t->from->pid : 0, cmd,
 			     t->buffer->data_size, t->buffer->offsets_size,
-			     tr.data.ptr.buffer, tr.data.ptr.offsets);
+			     (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets);
 
 		list_del(&t->work.entry);
 		t->buffer->allow_user_free = 1;
@@ -2389,6 +2473,7 @@ done:
 static void binder_release_work(struct list_head *list)
 {
 	struct binder_work *w;
+
 	while (!list_empty(list)) {
 		w = list_first_entry(list, struct binder_work, entry);
 		list_del_init(&w->entry);
@@ -2421,8 +2506,8 @@ static void binder_release_work(struct list_head *list)
 
 			death = container_of(w, struct binder_ref_death, work);
 			binder_debug(BINDER_DEBUG_DEAD_TRANSACTION,
-				"undelivered death notification, %p\n",
-				death->cookie);
+				"undelivered death notification, %016llx\n",
+				(u64)death->cookie);
 			kfree(death);
 			binder_stats_deleted(BINDER_STAT_DEATH);
 		} break;
@@ -2543,6 +2628,109 @@ static unsigned int binder_poll(struct file *filp,
 	return 0;
 }
 
+static int binder_ioctl_write_read(struct file *filp,
+				unsigned int cmd, unsigned long arg,
+				struct binder_thread *thread)
+{
+	int ret = 0;
+	struct binder_proc *proc = filp->private_data;
+	unsigned int size = _IOC_SIZE(cmd);
+	void __user *ubuf = (void __user *)arg;
+	struct binder_write_read bwr;
+
+	if (size != sizeof(struct binder_write_read)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
+		ret = -EFAULT;
+		goto out;
+	}
+	binder_debug(BINDER_DEBUG_READ_WRITE,
+		     "%d:%d write %lld at %016llx, read %lld at %016llx\n",
+		     proc->pid, thread->pid,
+		     (u64)bwr.write_size, (u64)bwr.write_buffer,
+		     (u64)bwr.read_size, (u64)bwr.read_buffer);
+
+	if (bwr.write_size > 0) {
+		ret = binder_thread_write(proc, thread,
+					  bwr.write_buffer,
+					  bwr.write_size,
+					  &bwr.write_consumed);
+		trace_binder_write_done(ret);
+		if (ret < 0) {
+			bwr.read_consumed = 0;
+			if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+	if (bwr.read_size > 0) {
+		ret = binder_thread_read(proc, thread, bwr.read_buffer,
+					 bwr.read_size,
+					 &bwr.read_consumed,
+					 filp->f_flags & O_NONBLOCK);
+		trace_binder_read_done(ret);
+		if (!list_empty(&proc->todo))
+			wake_up_interruptible(&proc->wait);
+		if (ret < 0) {
+			if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+	binder_debug(BINDER_DEBUG_READ_WRITE,
+		     "%d:%d wrote %lld of %lld, read return %lld of %lld\n",
+		     proc->pid, thread->pid,
+		     (u64)bwr.write_consumed, (u64)bwr.write_size,
+		     (u64)bwr.read_consumed, (u64)bwr.read_size);
+	if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
+		ret = -EFAULT;
+		goto out;
+	}
+out:
+	return ret;
+}
+
+static int binder_ioctl_set_ctx_mgr(struct file *filp)
+{
+	int ret = 0;
+	struct binder_proc *proc = filp->private_data;
+	kuid_t curr_euid = current_euid();
+
+	if (binder_context_mgr_node != NULL) {
+		pr_err("BINDER_SET_CONTEXT_MGR already set\n");
+		ret = -EBUSY;
+		goto out;
+	}
+	ret = security_binder_set_context_mgr(proc->tsk);
+	if (ret < 0)
+		goto out;
+	if (uid_valid(binder_context_mgr_uid)) {
+		if (!uid_eq(binder_context_mgr_uid, curr_euid)) {
+			pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n",
+			       from_kuid(&init_user_ns, curr_euid),
+			       from_kuid(&init_user_ns,
+					binder_context_mgr_uid));
+			ret = -EPERM;
+			goto out;
+		}
+	} else {
+		binder_context_mgr_uid = curr_euid;
+	}
+	binder_context_mgr_node = binder_new_node(proc, 0, 0);
+	if (binder_context_mgr_node == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	binder_context_mgr_node->local_weak_refs++;
+	binder_context_mgr_node->local_strong_refs++;
+	binder_context_mgr_node->has_strong_ref = 1;
+	binder_context_mgr_node->has_weak_ref = 1;
+out:
+	return ret;
+}
+
 static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	int ret;
@@ -2551,7 +2739,8 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	unsigned int size = _IOC_SIZE(cmd);
 	void __user *ubuf = (void __user *)arg;
 
-	/*pr_info("binder_ioctl: %d:%d %x %lx\n", proc->pid, current->pid, cmd, arg);*/
+	/*pr_info("binder_ioctl: %d:%d %x %lx\n",
+			proc->pid, current->pid, cmd, arg);*/
 
 	trace_binder_ioctl(cmd, arg);
 
@@ -2567,52 +2756,11 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	}
 
 	switch (cmd) {
-	case BINDER_WRITE_READ: {
-		struct binder_write_read bwr;
-		if (size != sizeof(struct binder_write_read)) {
-			ret = -EINVAL;
+	case BINDER_WRITE_READ:
+		ret = binder_ioctl_write_read(filp, cmd, arg, thread);
+		if (ret)
 			goto err;
-		}
-		if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
-			ret = -EFAULT;
-			goto err;
-		}
-		binder_debug(BINDER_DEBUG_READ_WRITE,
-			     "%d:%d write %ld at %08lx, read %ld at %08lx\n",
-			     proc->pid, thread->pid, bwr.write_size,
-			     bwr.write_buffer, bwr.read_size, bwr.read_buffer);
-
-		if (bwr.write_size > 0) {
-			ret = binder_thread_write(proc, thread, (void __user *)bwr.write_buffer, bwr.write_size, &bwr.write_consumed);
-			trace_binder_write_done(ret);
-			if (ret < 0) {
-				bwr.read_consumed = 0;
-				if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
-					ret = -EFAULT;
-				goto err;
-			}
-		}
-		if (bwr.read_size > 0) {
-			ret = binder_thread_read(proc, thread, (void __user *)bwr.read_buffer, bwr.read_size, &bwr.read_consumed, filp->f_flags & O_NONBLOCK);
-			trace_binder_read_done(ret);
-			if (!list_empty(&proc->todo))
-				wake_up_interruptible(&proc->wait);
-			if (ret < 0) {
-				if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
-					ret = -EFAULT;
-				goto err;
-			}
-		}
-		binder_debug(BINDER_DEBUG_READ_WRITE,
-			     "%d:%d wrote %ld of %ld, read return %ld of %ld\n",
-			     proc->pid, thread->pid, bwr.write_consumed, bwr.write_size,
-			     bwr.read_consumed, bwr.read_size);
-		if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
-			ret = -EFAULT;
-			goto err;
-		}
 		break;
-	}
 	case BINDER_SET_MAX_THREADS:
 		if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) {
 			ret = -EINVAL;
@@ -2620,30 +2768,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	case BINDER_SET_CONTEXT_MGR:
-		if (binder_context_mgr_node != NULL) {
-			pr_err("BINDER_SET_CONTEXT_MGR already set\n");
-			ret = -EBUSY;
-			goto err;
-		}
-		if (uid_valid(binder_context_mgr_uid)) {
-			if (!uid_eq(binder_context_mgr_uid, current->cred->euid)) {
-				pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n",
-				       from_kuid(&init_user_ns, current->cred->euid),
-				       from_kuid(&init_user_ns, binder_context_mgr_uid));
-				ret = -EPERM;
-				goto err;
-			}
-		} else
-			binder_context_mgr_uid = current->cred->euid;
-		binder_context_mgr_node = binder_new_node(proc, NULL, NULL);
-		if (binder_context_mgr_node == NULL) {
-			ret = -ENOMEM;
+		ret = binder_ioctl_set_ctx_mgr(filp);
+		if (ret)
 			goto err;
-		}
-		binder_context_mgr_node->local_weak_refs++;
-		binder_context_mgr_node->local_strong_refs++;
-		binder_context_mgr_node->has_strong_ref = 1;
-		binder_context_mgr_node->has_weak_ref = 1;
 		break;
 	case BINDER_THREAD_EXIT:
 		binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n",
@@ -2651,16 +2778,20 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		binder_free_thread(proc, thread);
 		thread = NULL;
 		break;
-	case BINDER_VERSION:
+	case BINDER_VERSION: {
+		struct binder_version __user *ver = ubuf;
+
 		if (size != sizeof(struct binder_version)) {
 			ret = -EINVAL;
 			goto err;
 		}
-		if (put_user(BINDER_CURRENT_PROTOCOL_VERSION, &((struct binder_version *)ubuf)->protocol_version)) {
+		if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
+			     &ver->protocol_version)) {
 			ret = -EINVAL;
 			goto err;
 		}
 		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
@@ -2681,6 +2812,7 @@ err_unlocked:
 static void binder_vma_open(struct vm_area_struct *vma)
 {
 	struct binder_proc *proc = vma->vm_private_data;
+
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE,
 		     "%d open vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
 		     proc->pid, vma->vm_start, vma->vm_end,
@@ -2691,6 +2823,7 @@ static void binder_vma_open(struct vm_area_struct *vma)
 static void binder_vma_close(struct vm_area_struct *vma)
 {
 	struct binder_proc *proc = vma->vm_private_data;
+
 	binder_debug(BINDER_DEBUG_OPEN_CLOSE,
 		     "%d close vm area %lx-%lx (%ld K) vma %lx pagep %lx\n",
 		     proc->pid, vma->vm_start, vma->vm_end,
@@ -2701,9 +2834,15 @@ static void binder_vma_close(struct vm_area_struct *vma)
 	binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
 }
 
+static int binder_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
 static struct vm_operations_struct binder_vm_ops = {
 	.open = binder_vma_open,
 	.close = binder_vma_close,
+	.fault = binder_vm_fault,
 };
 
 static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -2833,6 +2972,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 
 	if (binder_debugfs_dir_entry_proc) {
 		char strbuf[11];
+
 		snprintf(strbuf, sizeof(strbuf), "%u", proc->pid);
 		proc->debugfs_entry = debugfs_create_file(strbuf, S_IRUGO,
 			binder_debugfs_dir_entry_proc, proc, &binder_proc_fops);
@@ -2854,8 +2994,10 @@ static void binder_deferred_flush(struct binder_proc *proc)
 {
 	struct rb_node *n;
 	int wake_count = 0;
+
 	for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) {
 		struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node);
+
 		thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN;
 		if (thread->looper & BINDER_LOOPER_STATE_WAITING) {
 			wake_up_interruptible(&thread->wait);
@@ -2872,6 +3014,7 @@ static void binder_deferred_flush(struct binder_proc *proc)
 static int binder_release(struct inode *nodp, struct file *filp)
 {
 	struct binder_proc *proc = filp->private_data;
+
 	debugfs_remove(proc->debugfs_entry);
 	binder_defer_work(proc, BINDER_DEFERRED_RELEASE);
 
@@ -3033,6 +3176,7 @@ static void binder_deferred_func(struct work_struct *work)
 	struct files_struct *files;
 
 	int defer;
+
 	do {
 		binder_lock(__func__);
 		mutex_lock(&binder_deferred_lock);
@@ -3130,8 +3274,9 @@ static void print_binder_work(struct seq_file *m, const char *prefix,
 		break;
 	case BINDER_WORK_NODE:
 		node = container_of(w, struct binder_node, work);
-		seq_printf(m, "%snode work %d: u%p c%p\n",
-			   prefix, node->debug_id, node->ptr, node->cookie);
+		seq_printf(m, "%snode work %d: u%016llx c%016llx\n",
+			   prefix, node->debug_id,
+			   (u64)node->ptr, (u64)node->cookie);
 		break;
 	case BINDER_WORK_DEAD_BINDER:
 		seq_printf(m, "%shas dead binder\n", prefix);
@@ -3191,8 +3336,8 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node)
 	hlist_for_each_entry(ref, &node->refs, node_entry)
 		count++;
 
-	seq_printf(m, "  node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d",
-		   node->debug_id, node->ptr, node->cookie,
+	seq_printf(m, "  node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d",
+		   node->debug_id, (u64)node->ptr, (u64)node->cookie,
 		   node->has_strong_ref, node->has_weak_ref,
 		   node->local_strong_refs, node->local_weak_refs,
 		   node->internal_strong_refs, count);
@@ -3494,6 +3639,7 @@ static const struct file_operations binder_fops = {
 	.owner = THIS_MODULE,
 	.poll = binder_poll,
 	.unlocked_ioctl = binder_ioctl,
+	.compat_ioctl = binder_ioctl,
 	.mmap = binder_mmap,
 	.open = binder_open,
 	.flush = binder_flush,
diff --git a/drivers/staging/android/binder_trace.h b/drivers/android/binder_trace.h
index 82a567c2af67..7f20f3dc8369 100644
--- a/drivers/staging/android/binder_trace.h
+++ b/drivers/android/binder_trace.h
@@ -152,7 +152,7 @@ TRACE_EVENT(binder_transaction_node_to_ref,
 	TP_STRUCT__entry(
 		__field(int, debug_id)
 		__field(int, node_debug_id)
-		__field(void __user *, node_ptr)
+		__field(binder_uintptr_t, node_ptr)
 		__field(int, ref_debug_id)
 		__field(uint32_t, ref_desc)
 	),
@@ -163,8 +163,9 @@ TRACE_EVENT(binder_transaction_node_to_ref,
 		__entry->ref_debug_id = ref->debug_id;
 		__entry->ref_desc = ref->desc;
 	),
-	TP_printk("transaction=%d node=%d src_ptr=0x%p ==> dest_ref=%d dest_desc=%d",
-		  __entry->debug_id, __entry->node_debug_id, __entry->node_ptr,
+	TP_printk("transaction=%d node=%d src_ptr=0x%016llx ==> dest_ref=%d dest_desc=%d",
+		  __entry->debug_id, __entry->node_debug_id,
+		  (u64)__entry->node_ptr,
 		  __entry->ref_debug_id, __entry->ref_desc)
 );
 
@@ -177,7 +178,7 @@ TRACE_EVENT(binder_transaction_ref_to_node,
 		__field(int, ref_debug_id)
 		__field(uint32_t, ref_desc)
 		__field(int, node_debug_id)
-		__field(void __user *, node_ptr)
+		__field(binder_uintptr_t, node_ptr)
 	),
 	TP_fast_assign(
 		__entry->debug_id = t->debug_id;
@@ -186,9 +187,10 @@ TRACE_EVENT(binder_transaction_ref_to_node,
 		__entry->node_debug_id = ref->node->debug_id;
 		__entry->node_ptr = ref->node->ptr;
 	),
-	TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%p",
+	TP_printk("transaction=%d node=%d src_ref=%d src_desc=%d ==> dest_ptr=0x%016llx",
 		  __entry->debug_id, __entry->node_debug_id,
-		  __entry->ref_debug_id, __entry->ref_desc, __entry->node_ptr)
+		  __entry->ref_debug_id, __entry->ref_desc,
+		  (u64)__entry->node_ptr)
 );
 
 TRACE_EVENT(binder_transaction_ref_to_ref,
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5d93bb519753..67d2a8693af0 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -25,3 +25,5 @@ obj-$(CONFIG_PINCTRL) += pinctrl.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 
+obj-$(CONFIG_KDS) += kds/
+obj-$(CONFIG_UMP) += ump/
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 607efe6b7dc8..7acaf9a57a4f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -319,6 +319,7 @@ int __cpuinit register_cpu(struct cpu *cpu, int num)
 	cpu->dev.id = num;
 	cpu->dev.bus = &cpu_subsys;
 	cpu->dev.release = cpu_device_release;
+	cpu->dev.of_node = of_get_cpu_node(num, NULL);
 #ifdef CONFIG_HAVE_CPU_AUTOPROBE
 	cpu->dev.bus->uevent = cpu_uevent;
 #endif
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index dd302ea3b599..27c2c3a5647a 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -228,9 +228,19 @@ EXPORT_SYMBOL(dmam_release_declared_memory);
 int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
 		 void *cpu_addr, dma_addr_t handle, size_t size)
 {
-	struct page *page = virt_to_page(cpu_addr);
+	struct page *page;
 	int ret;
 
+	if (virt_addr_valid(cpu_addr))
+		page = virt_to_page(cpu_addr);
+	else {
+#ifdef dma_to_phys
+		page = pfn_to_page(dma_to_phys(dev, handle) >> PAGE_SHIFT);
+#else
+		BUG();
+#endif
+	}
+
 	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
 	if (unlikely(ret))
 		return ret;
diff --git a/drivers/base/dma_buf_lock/src/Kbuild b/drivers/base/dma_buf_lock/src/Kbuild
new file mode 100755
index 000000000000..68dad07ad607
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-m := dma_buf_lock.o
+endif
diff --git a/drivers/base/dma_buf_lock/src/Makefile b/drivers/base/dma_buf_lock/src/Makefile
new file mode 100755
index 000000000000..cf76132e6dde
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: dma_buf_lock
+
+dma_buf_lock:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include"
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/drivers/base/dma_buf_lock/src/dma_buf_lock.c
new file mode 100755
index 000000000000..9a0b35b05db5
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/dma_buf_lock.c
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/dma-buf.h>
+#include <linux/kds.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+
+#include "dma_buf_lock.h"
+
+/* Maximum number of buffers that a single handle can address */
+#define DMA_BUF_LOCK_BUF_MAX 32
+
+#define DMA_BUF_LOCK_DEBUG 1
+
+static dev_t dma_buf_lock_dev;
+static struct cdev dma_buf_lock_cdev;
+static struct class *dma_buf_lock_class;
+static char dma_buf_lock_dev_name[] = "dma_buf_lock";
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+static struct file_operations dma_buf_lock_fops =
+{
+	.owner   = THIS_MODULE,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = dma_buf_lock_ioctl,
+#else
+	.ioctl   = dma_buf_lock_ioctl,
+#endif
+	.compat_ioctl   = dma_buf_lock_ioctl,
+};
+
+typedef struct dma_buf_lock_resource
+{
+	int *list_of_dma_buf_fds;               /* List of buffers copied from userspace */
+	atomic_t locked;                        /* Status of lock */
+	struct dma_buf **dma_bufs;
+	struct kds_resource **kds_resources;    /* List of KDS resources associated with buffers */
+	struct kds_resource_set *resource_set;
+	unsigned long exclusive;                /* Exclusive access bitmap */
+	wait_queue_head_t wait;
+	struct kds_callback cb;
+	struct kref refcount;
+	struct list_head link;
+	int count;
+} dma_buf_lock_resource;
+
+static LIST_HEAD(dma_buf_lock_resource_list);
+static DEFINE_MUTEX(dma_buf_lock_mutex);
+
+static inline int is_dma_buf_lock_file(struct file *);
+static void dma_buf_lock_dounlock(struct kref *ref);
+
+static int dma_buf_lock_handle_release(struct inode *inode, struct file *file)
+{
+	dma_buf_lock_resource *resource;
+
+	if (!is_dma_buf_lock_file(file))
+		return -EINVAL;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_release\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return 0;
+}
+
+static void dma_buf_lock_kds_callback(void *param1, void *param2)
+{
+	dma_buf_lock_resource *resource = param1;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_kds_callback\n");
+#endif
+	atomic_set(&resource->locked, 1);
+
+	wake_up(&resource->wait);
+}
+
+static unsigned int dma_buf_lock_handle_poll(struct file *file,
+                                             struct poll_table_struct *wait)
+{
+	dma_buf_lock_resource *resource;
+	unsigned int ret = 0;
+
+	if (!is_dma_buf_lock_file(file))
+		return POLLERR;
+
+	resource = file->private_data;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll\n");
+#endif
+	if (1 == atomic_read(&resource->locked))
+	{
+		/* Resources have been locked */
+		ret = POLLIN | POLLRDNORM;
+		if (resource->exclusive)
+		{
+			ret |=  POLLOUT | POLLWRNORM;
+		}
+	}
+	else
+	{
+		if (!poll_does_not_wait(wait)) 
+		{
+			poll_wait(file, &resource->wait, wait);
+		}
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_handle_poll : return %i\n", ret);
+#endif
+	return ret;
+}
+
+static const struct file_operations dma_buf_lock_handle_fops = {
+	.release	= dma_buf_lock_handle_release,
+	.poll		= dma_buf_lock_handle_poll,
+};
+
+/*
+ * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock
+ */
+static inline int is_dma_buf_lock_file(struct file *file)
+{
+	return file->f_op == &dma_buf_lock_handle_fops;
+}
+
+
+
+/*
+ * Start requested lock.
+ *
+ * Allocates required memory, copies dma_buf_fd list from userspace,
+ * acquires related KDS resources, and starts the lock.
+ */
+static int dma_buf_lock_dolock(dma_buf_lock_k_request *request)
+{
+	dma_buf_lock_resource *resource;
+	int size;
+	int fd;
+	int i;
+	int ret;
+
+	if (NULL == request->list_of_dma_buf_fds)
+	{
+		return -EINVAL;
+	}
+	if (request->count <= 0)
+	{
+		return -EINVAL;
+	}
+	if (request->count > DMA_BUF_LOCK_BUF_MAX)
+	{
+		return -EINVAL;
+	}
+	if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE &&
+	    request->exclusive != DMA_BUF_LOCK_EXCLUSIVE)
+	{
+		return -EINVAL;
+	}
+
+	resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL);
+	if (NULL == resource)
+	{
+		return -ENOMEM;
+	}
+
+	atomic_set(&resource->locked, 0);
+	kref_init(&resource->refcount);
+	INIT_LIST_HEAD(&resource->link);
+	resource->count = request->count;
+
+	/* Allocate space to store dma_buf_fds received from user space */
+	size = request->count * sizeof(int);
+	resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->list_of_dma_buf_fds)
+	{
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Allocate space to store dma_buf pointers associated with dma_buf_fds */
+	size = sizeof(struct dma_buf *) * request->count;
+	resource->dma_bufs = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->dma_bufs)
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+	/* Allocate space to store kds_resources associated with dma_buf_fds */
+	size = sizeof(struct kds_resource *) * request->count;
+	resource->kds_resources = kmalloc(size, GFP_KERNEL);
+
+	if (NULL == resource->kds_resources)
+	{
+		kfree(resource->dma_bufs);
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource);
+		return -ENOMEM;
+	}
+
+	/* Copy requested list of dma_buf_fds from user space */
+	size = request->count * sizeof(int);
+	if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size))
+	{
+		kfree(resource->list_of_dma_buf_fds);
+		kfree(resource->dma_bufs);
+		kfree(resource->kds_resources);
+		kfree(resource);
+		return -ENOMEM;
+	}
+#if DMA_BUF_LOCK_DEBUG
+	for (i = 0; i < request->count; i++)
+	{
+		printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]);
+	}
+#endif
+
+	/* Add resource to global list */
+	mutex_lock(&dma_buf_lock_mutex);
+
+	list_add(&resource->link, &dma_buf_lock_resource_list);
+
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	for (i = 0; i < request->count; i++)
+	{
+		/* Convert fd into dma_buf structure */
+		resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]);
+
+		if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i])))
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+
+		/*Get kds_resource associated with dma_buf */
+		resource->kds_resources[i] = get_dma_buf_kds_resource(resource->dma_bufs[i]);
+
+		if (NULL == resource->kds_resources[i])
+		{
+			mutex_lock(&dma_buf_lock_mutex);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+			return -EINVAL;
+		}
+#if DMA_BUF_LOCK_DEBUG
+		printk("dma_buf_lock_dolock : dma_buf_fd %i dma_buf %X kds_resource %X\n", resource->list_of_dma_buf_fds[i],
+		       (unsigned int)resource->dma_bufs[i], (unsigned int)resource->kds_resources[i]);
+#endif	
+	}
+
+	kds_callback_init(&resource->cb, 1, dma_buf_lock_kds_callback);
+	init_waitqueue_head(&resource->wait);
+
+	kref_get(&resource->refcount);
+
+	/* Create file descriptor associated with lock request */
+	fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, 
+	                      (void *)resource, 0);
+	if (fd < 0)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+		return fd;
+	}
+
+	resource->exclusive = request->exclusive;
+
+	/* Start locking process */
+	ret = kds_async_waitall(&resource->resource_set,
+	                        &resource->cb, resource, NULL,
+	                        request->count,  &resource->exclusive,
+	                        resource->kds_resources);
+
+	if (IS_ERR_VALUE(ret))
+	{
+		put_unused_fd(fd);
+
+		mutex_lock(&dma_buf_lock_mutex);
+		kref_put(&resource->refcount, dma_buf_lock_dounlock);
+		mutex_unlock(&dma_buf_lock_mutex);
+
+		return ret;
+	}
+
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_dolock : complete\n");
+#endif
+	mutex_lock(&dma_buf_lock_mutex);
+	kref_put(&resource->refcount, dma_buf_lock_dounlock);
+	mutex_unlock(&dma_buf_lock_mutex);
+
+	return fd;
+}
+
+static void dma_buf_lock_dounlock(struct kref *ref)
+{
+	int i;
+	dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount);
+
+	atomic_set(&resource->locked, 0);
+
+	kds_callback_term(&resource->cb);
+
+	kds_resource_set_release(&resource->resource_set);
+
+	list_del(&resource->link);
+
+	for (i = 0; i < resource->count; i++)
+	{
+		dma_buf_put(resource->dma_bufs[i]);
+	}
+
+	kfree(resource->kds_resources);
+	kfree(resource->dma_bufs);
+	kfree(resource->list_of_dma_buf_fds);
+	kfree(resource);
+}
+
+static int __init dma_buf_lock_init(void)
+{
+	int err;
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init\n");
+#endif
+	err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name);
+
+	if (0 == err)
+	{
+		cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops);
+
+		err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1);
+
+		if (0 == err)
+		{
+			dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name);
+			if (IS_ERR(dma_buf_lock_class))
+			{
+				err = PTR_ERR(dma_buf_lock_class);
+			}
+			else
+			{
+				struct device *mdev;
+				mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return 0;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(dma_buf_lock_class);
+			}
+			cdev_del(&dma_buf_lock_cdev);
+		}
+
+		unregister_chrdev_region(dma_buf_lock_dev, 1);
+	}
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_init failed\n");
+#endif
+	return err;
+}
+
+static void __exit dma_buf_lock_exit(void)
+{
+#if DMA_BUF_LOCK_DEBUG
+	printk("dma_buf_lock_exit\n");
+#endif
+
+	/* Unlock all outstanding references */
+	while (1)
+	{
+		mutex_lock(&dma_buf_lock_mutex);
+		if (list_empty(&dma_buf_lock_resource_list))
+		{
+			mutex_unlock(&dma_buf_lock_mutex);
+			break;
+		}
+		else
+		{
+			dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, 
+			                                             dma_buf_lock_resource, link);
+			kref_put(&resource->refcount, dma_buf_lock_dounlock);
+			mutex_unlock(&dma_buf_lock_mutex);
+		}
+	}
+
+	device_destroy(dma_buf_lock_class, dma_buf_lock_dev);
+
+	class_destroy(dma_buf_lock_class);
+
+	cdev_del(&dma_buf_lock_cdev);
+
+	unregister_chrdev_region(dma_buf_lock_dev, 1);
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	dma_buf_lock_k_request request;
+	int size = _IOC_SIZE(cmd);
+
+	if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR))
+	{
+		return -ENOTTY;
+	}
+
+	switch (cmd)
+	{
+		case DMA_BUF_LOCK_FUNC_LOCK_ASYNC:
+			if (size != sizeof(dma_buf_lock_k_request))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&request, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+#if DMA_BUF_LOCK_DEBUG
+			printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count);
+#endif
+			return dma_buf_lock_dolock(&request);
+	}
+
+	return -ENOTTY;
+}
+
+module_init(dma_buf_lock_init);
+module_exit(dma_buf_lock_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/drivers/base/dma_buf_lock/src/dma_buf_lock.h
new file mode 100755
index 000000000000..5d248e8ccaed
--- /dev/null
+++ b/drivers/base/dma_buf_lock/src/dma_buf_lock.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _DMA_BUF_LOCK_H
+#define _DMA_BUF_LOCK_H
+
+typedef enum dma_buf_lock_exclusive
+{
+	DMA_BUF_LOCK_NONEXCLUSIVE = 0,
+	DMA_BUF_LOCK_EXCLUSIVE = -1
+} dma_buf_lock_exclusive;
+
+typedef struct dma_buf_lock_k_request
+{
+	int count;
+	int *list_of_dma_buf_fds;
+	int timeout;
+	dma_buf_lock_exclusive exclusive;
+} dma_buf_lock_k_request;
+
+#define DMA_BUF_LOCK_IOC_MAGIC '~'
+
+#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC       _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request)
+
+#define DMA_BUF_LOCK_IOC_MINNR 11
+#define DMA_BUF_LOCK_IOC_MAXNR 11
+
+#endif /* _DMA_BUF_LOCK_H */
diff --git a/drivers/base/dma_buf_test_exporter/Kbuild b/drivers/base/dma_buf_test_exporter/Kbuild
new file mode 100755
index 000000000000..56b9f86d2d52
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+ifneq ($(CONFIG_DMA_SHARED_BUFFER),)
+obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o
+endif
diff --git a/drivers/base/dma_buf_test_exporter/Kconfig b/drivers/base/dma_buf_test_exporter/Kconfig
new file mode 100755
index 000000000000..974f0c23dbd2
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+config DMA_SHARED_BUFFER_TEST_EXPORTER
+	tristate "Test exporter for the dma-buf framework"
+	depends on DMA_SHARED_BUFFER
+	help
+	  This option enables the test exporter usable to help test importerts.
diff --git a/drivers/base/dma_buf_test_exporter/Makefile b/drivers/base/dma_buf_test_exporter/Makefile
new file mode 100755
index 000000000000..06e3d5c121a5
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/Makefile
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
new file mode 100755
index 000000000000..6427e864426c
--- /dev/null
+++ b/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c
@@ -0,0 +1,617 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/dma-buf-test-exporter.h>
+#include <linux/dma-buf.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/atomic.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+#endif
+
+struct dma_buf_te_alloc {
+	/* the real alloc */
+	int nr_pages;
+	struct page **pages;
+
+	/* the debug usage tracking */
+	int nr_attached_devices;
+	int nr_device_mappings;
+	int nr_cpu_mappings;
+
+	/* failure simulation */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+
+	bool contiguous;
+	dma_addr_t contig_dma_addr;
+	void *contig_cpu_addr;
+};
+
+static struct miscdevice te_device;
+
+static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc	*alloc;
+	alloc = buf->priv;
+
+	if (alloc->fail_attach)
+		return -EFAULT;
+
+	/* dma_buf is externally locked during call */
+	alloc->nr_attached_devices++;
+	return 0;
+}
+
+static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* dma_buf is externally locked during call */
+
+	alloc->nr_attached_devices--;
+}
+
+static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction)
+{
+	struct sg_table *sg;
+	struct scatterlist *iter;
+	struct dma_buf_te_alloc	*alloc;
+	int i;
+	int ret;
+
+	alloc = attachment->dmabuf->priv;
+
+	if (alloc->fail_map)
+		return ERR_PTR(-ENOMEM);
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/* if the ARCH can't chain we can't have allocs larger than a single sg can hold */
+	if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC)
+		return ERR_PTR(-EINVAL);
+#endif
+
+	sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!sg)
+		return ERR_PTR(-ENOMEM);
+
+	/* from here we access the allocation object, so lock the dmabuf pointing to it */
+	mutex_lock(&attachment->dmabuf->lock);
+
+	if (alloc->contiguous)
+		ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	else
+		ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL);
+	if (ret) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		kfree(sg);
+		return ERR_PTR(ret);
+	}
+
+	if (alloc->contiguous) {
+		sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE;
+		sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0);
+		sg_dma_address(sg->sgl) = alloc->contig_dma_addr;
+	} else {
+		for_each_sg(sg->sgl, iter, alloc->nr_pages, i)
+			sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0);
+	}
+
+	if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) {
+		mutex_unlock(&attachment->dmabuf->lock);
+		sg_free_table(sg);
+		kfree(sg);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alloc->nr_device_mappings++;
+	mutex_unlock(&attachment->dmabuf->lock);
+	return sg;
+}
+
+static void dma_buf_te_unmap(struct dma_buf_attachment *attachment,
+							 struct sg_table *sg, enum dma_data_direction direction)
+{
+	struct dma_buf_te_alloc *alloc;
+
+	alloc = attachment->dmabuf->priv;
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction);
+	sg_free_table(sg);
+	kfree(sg);
+
+	mutex_lock(&attachment->dmabuf->lock);
+	alloc->nr_device_mappings--;
+	mutex_unlock(&attachment->dmabuf->lock);
+}
+
+static void dma_buf_te_release(struct dma_buf *buf)
+{
+	int i;
+	struct dma_buf_te_alloc *alloc;
+	alloc = buf->priv;
+	/* no need for locking */
+
+	dev_info(te_device.this_device, "%s", __func__);
+
+	if (alloc->contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++)
+			__free_page(alloc->pages[i]);
+	}
+	kfree(alloc->pages);
+	kfree(alloc);
+}
+
+
+static void dma_buf_te_mmap_open(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings++;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static void dma_buf_te_mmap_close(struct vm_area_struct *vma)
+{
+	struct dma_buf *dma_buf;
+	struct dma_buf_te_alloc *alloc;
+	dma_buf = vma->vm_private_data;
+	alloc = dma_buf->priv;
+
+	BUG_ON(alloc->nr_cpu_mappings <= 0);
+	mutex_lock(&dma_buf->lock);
+	alloc->nr_cpu_mappings--;
+	mutex_unlock(&dma_buf->lock);
+}
+
+static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dmabuf;
+	struct page *pageptr;
+
+	dmabuf = vma->vm_private_data;
+	alloc = dmabuf->priv;
+
+	if (vmf->pgoff > alloc->nr_pages)
+		return VM_FAULT_SIGBUS;
+
+	pageptr = alloc->pages[vmf->pgoff];
+
+	BUG_ON(!pageptr);
+
+	get_page(pageptr);
+	vmf->page = pageptr;
+
+	return 0;
+}
+
+struct vm_operations_struct dma_buf_te_vm_ops = {
+	.open = dma_buf_te_mmap_open,
+	.close = dma_buf_te_mmap_close,
+	.fault = dma_buf_te_mmap_fault
+};
+
+static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct dma_buf_te_alloc *alloc;
+	alloc = dmabuf->priv;
+
+	if (alloc->fail_mmap)
+		return -ENOMEM;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND;
+#endif
+	vma->vm_ops = &dma_buf_te_vm_ops;
+	vma->vm_private_data = dmabuf;
+
+	/*  we fault in the pages on access */
+
+	/* call open to do the ref-counting */
+	dma_buf_te_vm_ops.open(vma);
+
+	return 0;
+}
+
+static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num)
+{
+	/* IGNORE */
+	return NULL;
+}
+
+static struct dma_buf_ops dma_buf_te_ops = {
+	/* real handlers */
+	.attach = dma_buf_te_attach,
+	.detach = dma_buf_te_detach,
+	.map_dma_buf = dma_buf_te_map,
+	.unmap_dma_buf = dma_buf_te_unmap,
+	.release = dma_buf_te_release,
+	.mmap = dma_buf_te_mmap,
+
+	/* nop handlers for mandatory functions we ignore */
+	.kmap_atomic = dma_buf_te_kmap_atomic,
+	.kmap = dma_buf_te_kmap
+};
+
+static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf)
+{
+	struct dma_buf_te_ioctl_version v;
+
+	if (copy_from_user(&v, buf, sizeof(v)))
+		return -EFAULT;
+
+	if (v.op != DMA_BUF_TE_ENQ)
+		return -EFAULT;
+
+	v.op = DMA_BUF_TE_ACK;
+	v.major = DMA_BUF_TE_VER_MAJOR;
+	v.minor = DMA_BUF_TE_VER_MINOR;
+
+	if (copy_to_user(buf, &v, sizeof(v)))
+		return -EFAULT;
+	else
+		return 0;
+}
+
+static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous)
+{
+	struct dma_buf_te_ioctl_alloc alloc_req;
+	struct dma_buf_te_alloc *alloc;
+	struct dma_buf *dma_buf;
+	int i = 0;
+	int fd;
+
+	if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) {
+		dev_err(te_device.this_device, "%s: couldn't get user data", __func__);
+		goto no_input;
+	}
+
+	if (!alloc_req.size) {
+		dev_err(te_device.this_device, "%s: no size specified", __func__);
+		goto zero_size;
+	}
+
+	alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL);
+	if (NULL == alloc) {
+		dev_err(te_device.this_device, "%s: couldn't alloc object", __func__);
+		goto no_alloc_object;
+	}
+
+	alloc->nr_pages = alloc_req.size;
+	alloc->contiguous = contiguous;
+
+	alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL);
+	if (!alloc->pages) {
+		dev_err(te_device.this_device,
+				"%s: couldn't alloc %d page structures", __func__,
+				alloc->nr_pages);
+		goto free_alloc_object;
+	}
+
+	if (contiguous) {
+		dma_addr_t dma_aux;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device,
+												alloc->nr_pages * PAGE_SIZE,
+												&alloc->contig_dma_addr, GFP_KERNEL, &attrs);
+#else
+		alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device,
+														alloc->nr_pages * PAGE_SIZE,
+														&alloc->contig_dma_addr, GFP_KERNEL);
+#endif
+		if (!alloc->contig_cpu_addr) {
+			dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %d pages", __func__, alloc->nr_pages);
+			goto free_page_struct;
+		}
+		dma_aux = alloc->contig_dma_addr;
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux));
+			dma_aux += PAGE_SIZE;
+		}
+	} else {
+		for (i = 0; i < alloc->nr_pages; i++) {
+			alloc->pages[i] = alloc_page(GFP_KERNEL);
+			if (NULL == alloc->pages[i]) {
+				dev_err(te_device.this_device, "%s: couldn't alloc page", __func__);
+				goto no_page;
+			}
+		}
+	}
+
+	/* alloc ready, let's export it */
+	dma_buf = dma_buf_export(alloc, &dma_buf_te_ops, alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR);
+
+	if (IS_ERR_OR_NULL(dma_buf)) {
+		dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__);
+		goto no_export;
+	}
+
+	/* get fd for buf */
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+
+	if (fd < 0) {
+		dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__);
+		goto no_fd;
+	}
+
+	return fd;
+
+no_fd:
+	dma_buf_put(dma_buf);
+no_export:
+	/* i still valid */
+no_page:
+	if (contiguous) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+		DEFINE_DMA_ATTRS(attrs);
+
+		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+		dma_free_attrs(te_device.this_device,
+						alloc->nr_pages * PAGE_SIZE,
+						alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs);
+#else
+		dma_free_writecombine(te_device.this_device,
+								alloc->nr_pages * PAGE_SIZE,
+								alloc->contig_cpu_addr, alloc->contig_dma_addr);
+#endif
+	} else {
+		while (i-- > 0)
+			__free_page(alloc->pages[i]);
+	}
+free_page_struct:
+	kfree(alloc->pages);
+free_alloc_object:
+	kfree(alloc);
+no_alloc_object:
+zero_size:
+no_input:
+	return -EFAULT;
+}
+
+static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg)
+{
+	struct dma_buf_te_ioctl_status status;
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&status, arg, sizeof(status)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(status.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, get the current status */
+	alloc = dmabuf->priv;
+
+	/* lock while reading status to take a snapshot */
+	mutex_lock(&dmabuf->lock);
+	status.attached_devices = alloc->nr_attached_devices;
+	status.device_mappings = alloc->nr_device_mappings;
+	status.cpu_mappings = alloc->nr_cpu_mappings;
+	mutex_unlock(&dmabuf->lock);
+
+	if (copy_to_user(arg, &status, sizeof(status)))
+		goto err_have_dmabuf;
+
+	/* All OK */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_set_failing f;
+	struct dma_buf_te_alloc *alloc;
+	int res = -EINVAL;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	/* verify it's one of ours */
+	if (dmabuf->ops != &dma_buf_te_ops)
+		goto err_have_dmabuf;
+
+	/* ours, set the fail modes */
+	alloc = dmabuf->priv;
+	/* lock to set the fail modes atomically */
+	mutex_lock(&dmabuf->lock);
+	alloc->fail_attach = f.fail_attach;
+	alloc->fail_map    = f.fail_map;
+	alloc->fail_mmap   = f.fail_mmap;
+	mutex_unlock(&dmabuf->lock);
+
+	/* success */
+	res = 0;
+
+err_have_dmabuf:
+	dma_buf_put(dmabuf);
+	return res;
+}
+
+static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value)
+{
+	struct dma_buf_attachment *attachment;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int count;
+	unsigned int offset = 0;
+	int ret = 0;
+	int i;
+
+	attachment = dma_buf_attach(dma_buf, te_device.this_device);
+	if (IS_ERR_OR_NULL(attachment))
+		return -EBUSY;
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto no_import;
+	}
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		ret = dma_buf_begin_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		if (ret)
+			goto no_cpu_access;
+		for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) {
+			void *addr;
+
+			addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT);
+			if (!addr) {
+				/* dma_buf_kmap is unimplemented in exynos and returns NULL */
+				dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+				ret = -EPERM;
+				goto no_cpu_access;
+			}
+			memset(addr, value, PAGE_SIZE);
+			dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr);
+		}
+		dma_buf_end_cpu_access(dma_buf, offset, sg_dma_len(sg), DMA_BIDIRECTIONAL);
+		offset += sg_dma_len(sg);
+	}
+
+no_cpu_access:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+no_import:
+	dma_buf_detach(dma_buf, attachment);
+	return ret;
+}
+
+static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg)
+{
+
+	struct dma_buf *dmabuf;
+	struct dma_buf_te_ioctl_fill f;
+	int ret;
+
+	if (copy_from_user(&f, arg, sizeof(f)))
+		return -EFAULT;
+
+	dmabuf = dma_buf_get(f.fd);
+	if (IS_ERR_OR_NULL(dmabuf))
+		return -EINVAL;
+
+	ret = dma_te_buf_fill(dmabuf, f.value);
+	dma_buf_put(dmabuf);
+
+	return ret;
+}
+
+static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case DMA_BUF_TE_VERSION:
+		return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg);
+	case DMA_BUF_TE_ALLOC:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false);
+	case DMA_BUF_TE_ALLOC_CONT:
+		return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true);
+	case DMA_BUF_TE_QUERY:
+		return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg);
+	case DMA_BUF_TE_SET_FAILING:
+		return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg);
+	case DMA_BUF_TE_FILL:
+		return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct file_operations dma_buf_te_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = dma_buf_te_ioctl,
+	.compat_ioctl = dma_buf_te_ioctl,
+};
+
+static int __init dma_buf_te_init(void)
+{
+	int res;
+	te_device.minor = MISC_DYNAMIC_MINOR;
+	te_device.name = "dma_buf_te";
+	te_device.fops = &dma_buf_te_fops;
+
+	res = misc_register(&te_device);
+	if (res) {
+		printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n");
+		return res;
+	}
+	te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	dev_info(te_device.this_device, "dma_buf_te ready\n");
+	return 0;
+
+}
+
+static void __exit dma_buf_te_exit(void)
+{
+	misc_deregister(&te_device);
+}
+
+module_init(dma_buf_te_init);
+module_exit(dma_buf_te_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 00a565676583..9cf8dc7448e2 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -27,6 +27,7 @@
 #include <linux/pm.h>
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
+#include <linux/reboot.h>
 
 #include <generated/utsrelease.h>
 
@@ -130,6 +131,7 @@ struct firmware_buf {
 	struct page **pages;
 	int nr_pages;
 	int page_array_size;
+	struct list_head pending_list;
 #endif
 	char fw_id[];
 };
@@ -171,6 +173,9 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name,
 	strcpy(buf->fw_id, fw_name);
 	buf->fwc = fwc;
 	init_completion(&buf->completion);
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	INIT_LIST_HEAD(&buf->pending_list);
+#endif
 
 	pr_debug("%s: fw-%s buf=%p\n", __func__, fw_name, buf);
 
@@ -446,10 +451,8 @@ static struct firmware_priv *to_firmware_priv(struct device *dev)
 	return container_of(dev, struct firmware_priv, dev);
 }
 
-static void fw_load_abort(struct firmware_priv *fw_priv)
+static void __fw_load_abort(struct firmware_buf *buf)
 {
-	struct firmware_buf *buf = fw_priv->buf;
-
 	/*
 	 * There is a small window in which user can write to 'loading'
 	 * between loading done and disappearance of 'loading'
@@ -457,8 +460,16 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
 	if (test_bit(FW_STATUS_DONE, &buf->status))
 		return;
 
+	list_del_init(&buf->pending_list);
 	set_bit(FW_STATUS_ABORT, &buf->status);
 	complete_all(&buf->completion);
+}
+
+static void fw_load_abort(struct firmware_priv *fw_priv)
+{
+	struct firmware_buf *buf = fw_priv->buf;
+
+	__fw_load_abort(buf);
 
 	/* avoid user action after loading abort */
 	fw_priv->buf = NULL;
@@ -467,6 +478,25 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
 #define is_fw_load_aborted(buf)	\
 	test_bit(FW_STATUS_ABORT, &(buf)->status)
 
+static LIST_HEAD(pending_fw_head);
+
+/* reboot notifier for avoid deadlock with usermode_lock */
+static int fw_shutdown_notify(struct notifier_block *unused1,
+			      unsigned long unused2, void *unused3)
+{
+	mutex_lock(&fw_lock);
+	while (!list_empty(&pending_fw_head))
+		__fw_load_abort(list_first_entry(&pending_fw_head,
+					       struct firmware_buf,
+					       pending_list));
+	mutex_unlock(&fw_lock);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block fw_shutdown_nb = {
+	.notifier_call = fw_shutdown_notify,
+};
+
 static ssize_t firmware_timeout_show(struct class *class,
 				     struct class_attribute *attr,
 				     char *buf)
@@ -619,6 +649,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 			 * is completed.
 			 * */
 			fw_map_pages_buf(fw_buf);
+			list_del_init(&fw_buf->pending_list);
 			complete_all(&fw_buf->completion);
 			break;
 		}
@@ -853,8 +884,15 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent,
 		goto err_del_dev;
 	}
 
+	mutex_lock(&fw_lock);
+	list_add(&buf->pending_list, &pending_fw_head);
+	mutex_unlock(&fw_lock);
+
 	retval = device_create_file(f_dev, &dev_attr_loading);
 	if (retval) {
+		mutex_lock(&fw_lock);
+		list_del_init(&buf->pending_list);
+		mutex_unlock(&fw_lock);
 		dev_err(f_dev, "%s: device_create_file failed\n", __func__);
 		goto err_del_bin_attr;
 	}
@@ -1529,6 +1567,7 @@ static int __init firmware_class_init(void)
 {
 	fw_cache_init();
 #ifdef CONFIG_FW_LOADER_USER_HELPER
+	register_reboot_notifier(&fw_shutdown_nb);
 	return class_register(&firmware_class);
 #else
 	return 0;
@@ -1542,6 +1581,7 @@ static void __exit firmware_class_exit(void)
 	unregister_pm_notifier(&fw_cache.pm_notify);
 #endif
 #ifdef CONFIG_FW_LOADER_USER_HELPER
+	unregister_reboot_notifier(&fw_shutdown_nb);
 	class_unregister(&firmware_class);
 #endif
 }
diff --git a/drivers/base/kds/Kbuild b/drivers/base/kds/Kbuild
new file mode 100755
index 000000000000..a88acd8fdce8
--- /dev/null
+++ b/drivers/base/kds/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_KDS) += kds.o
+obj-$(CONFIG_KDS_TEST) += kds_test.o
diff --git a/drivers/base/kds/Kconfig b/drivers/base/kds/Kconfig
new file mode 100755
index 000000000000..5f96165f67d5
--- /dev/null
+++ b/drivers/base/kds/Kconfig
@@ -0,0 +1,20 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config KDS
+	tristate "Kernel dependency system"
+	help
+	  This option enables the generic kernel dependency system
diff --git a/drivers/base/kds/Makefile b/drivers/base/kds/Makefile
new file mode 100755
index 000000000000..364d1515c1ae
--- /dev/null
+++ b/drivers/base/kds/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+CONFIG_KDS_TEST ?= n
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: kds
+
+kds:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS=m
+
+kds_test:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_KDS_TEST=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/base/kds/kds.c b/drivers/base/kds/kds.c
new file mode 100755
index 000000000000..56f61f7bddc9
--- /dev/null
+++ b/drivers/base/kds/kds.c
@@ -0,0 +1,545 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/kds.h>
+
+#include <asm/atomic.h>
+
+#define KDS_LINK_TRIGGERED (1u << 0)
+#define KDS_LINK_EXCLUSIVE (1u << 1)
+
+#define KDS_INVALID (void *)-2
+#define KDS_RESOURCE (void *)-1
+
+struct kds_resource_set
+{
+	unsigned long         num_resources;
+	unsigned long         pending;
+	struct kds_callback  *cb;
+	void                 *callback_parameter;
+	void                 *callback_extra_parameter;
+	struct list_head      callback_link;
+	struct work_struct    callback_work;
+	atomic_t              cb_queued;
+
+	/* This is only initted when kds_waitall() is called. */
+	wait_queue_head_t     wake;
+
+	struct kds_link       resources[0];
+};
+
+static DEFINE_SPINLOCK(kds_lock);
+
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb)
+{
+	int ret = 0;
+
+	cb->direct = direct;
+	cb->user_cb = user_cb;
+
+	if (!direct)
+	{
+		cb->wq = alloc_workqueue("kds", WQ_UNBOUND | WQ_HIGHPRI, WQ_UNBOUND_MAX_ACTIVE);
+		if (!cb->wq)
+			ret = -ENOMEM;
+	}
+	else
+	{
+		cb->wq = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kds_callback_init);
+
+void kds_callback_term(struct kds_callback *cb)
+{
+	if (!cb->direct)
+	{
+		BUG_ON(!cb->wq);
+		destroy_workqueue(cb->wq);
+	}
+	else
+	{
+		BUG_ON(cb->wq);
+	}
+}
+
+EXPORT_SYMBOL(kds_callback_term);
+
+static void kds_do_user_callback(struct kds_resource_set *rset)
+{
+	rset->cb->user_cb(rset->callback_parameter, rset->callback_extra_parameter);
+}
+
+static void kds_queued_callback(struct work_struct *work)
+{
+	struct kds_resource_set *rset;
+	rset = container_of(work, struct kds_resource_set, callback_work);
+
+	atomic_dec(&rset->cb_queued);
+
+	kds_do_user_callback(rset);
+}
+
+static void kds_callback_perform(struct kds_resource_set *rset)
+{
+	if (rset->cb->direct)
+		kds_do_user_callback(rset);
+	else
+	{
+		int result;
+
+		atomic_inc(&rset->cb_queued);
+
+		result = queue_work(rset->cb->wq, &rset->callback_work);
+		/* if we got a 0 return it means we've triggered the same rset twice! */
+		WARN_ON(!result);
+	}
+}
+
+void kds_resource_init(struct kds_resource * const res)
+{
+	BUG_ON(!res);
+	INIT_LIST_HEAD(&res->waiters.link);
+	res->waiters.parent = KDS_RESOURCE;
+}
+EXPORT_SYMBOL(kds_resource_init);
+
+int kds_resource_term(struct kds_resource *res)
+{
+	unsigned long lflags;
+	BUG_ON(!res);
+	spin_lock_irqsave(&kds_lock, lflags);
+	if (!list_empty(&res->waiters.link))
+	{
+		spin_unlock_irqrestore(&kds_lock, lflags);
+		printk(KERN_ERR "ERROR: KDS resource is still in use\n");
+		return -EBUSY;
+	}
+	res->waiters.parent = KDS_INVALID;
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	return 0;
+}
+EXPORT_SYMBOL(kds_resource_term);
+
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list)
+{
+	struct kds_resource_set *rset = NULL;
+	unsigned long lflags;
+	int i;
+	int triggered;
+	int err = -EFAULT;
+
+	BUG_ON(!pprset);
+	BUG_ON(!resource_list);
+	BUG_ON(!cb);
+
+	WARN_ONCE(number_resources > 10, "Waiting on a high numbers of resources may increase latency, see documentation.");
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+	{
+		return -ENOMEM;
+	}
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	rset->cb = cb;
+	rset->callback_parameter = callback_parameter;
+	rset->callback_extra_parameter = callback_extra_parameter;
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+	}
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		/* no-one else waiting? */
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	triggered = (rset->pending == 0);
+
+	/* set the pointer before the callback is called so it sees it */
+	*pprset = rset;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (triggered)
+	{
+		/* all resources obtained, trigger callback */
+		kds_callback_perform(rset);
+	}
+
+	return 0;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+	err = -EINVAL;
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return err;
+}
+EXPORT_SYMBOL(kds_async_waitall);
+
+static void wake_up_sync_call(void *callback_parameter, void *callback_extra_parameter)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *)callback_parameter;
+	wake_up(wait);
+}
+
+static struct kds_callback sync_cb =
+{
+	wake_up_sync_call,
+	1,
+	NULL,
+};
+
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jiffies_timeout)
+{
+	struct kds_resource_set *rset;
+	unsigned long lflags;
+	int i;
+	int triggered = 0;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	rset = kmalloc(sizeof(*rset) + number_resources * sizeof(struct kds_link), GFP_KERNEL);
+	if (!rset)
+		return rset;
+
+	rset->num_resources = number_resources;
+	rset->pending = number_resources;
+	init_waitqueue_head(&rset->wake);
+	INIT_LIST_HEAD(&rset->callback_link);
+	INIT_WORK(&rset->callback_work, kds_queued_callback);
+	atomic_set(&rset->cb_queued, 0);
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < number_resources; i++)
+	{
+		unsigned long link_state = 0;
+
+		if (test_bit(i, exclusive_access_bitmap))
+		{
+			link_state |= KDS_LINK_EXCLUSIVE;
+		}
+
+		if (list_empty(&resource_list[i]->waiters.link))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+		/* Adding a non-exclusive and the current tail is a triggered non-exclusive? */
+		else if (((link_state & KDS_LINK_EXCLUSIVE) == 0) &&
+				(((list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->state & (KDS_LINK_EXCLUSIVE | KDS_LINK_TRIGGERED)) == KDS_LINK_TRIGGERED)))
+		{
+			link_state |= KDS_LINK_TRIGGERED;
+			rset->pending--;
+		}
+
+		INIT_LIST_HEAD(&rset->resources[i].link);
+		rset->resources[i].parent = rset;
+		rset->resources[i].state = link_state;
+
+		/* avoid double wait (hang) */
+		if (!list_empty(&resource_list[i]->waiters.link))
+		{
+			/* adding same rset again? */
+			if (list_entry(resource_list[i]->waiters.link.prev, struct kds_link, link)->parent == rset)
+			{
+				goto roll_back;
+			}
+		}
+
+		list_add_tail(&rset->resources[i].link, &resource_list[i]->waiters.link);
+	}
+
+	if (rset->pending == 0)
+		triggered = 1;
+	else
+	{
+		rset->cb = &sync_cb;
+		rset->callback_parameter = &rset->wake;
+		rset->callback_extra_parameter = NULL;
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	if (!triggered)
+	{
+		long wait_res = 0;
+		long timeout = (jiffies_timeout == KDS_WAIT_BLOCKING) ?
+				MAX_SCHEDULE_TIMEOUT : jiffies_timeout;
+
+		if (timeout)
+		{
+			wait_res = wait_event_interruptible_timeout(rset->wake,
+					rset->pending == 0, timeout);
+		}
+
+		if ((wait_res == -ERESTARTSYS) || (wait_res == 0))
+		{
+			/* use \a kds_resource_set_release to roll back */
+			kds_resource_set_release(&rset);
+			return ERR_PTR(wait_res);
+		}
+	}
+	return rset;
+
+roll_back:
+	/* roll back */
+	while (i-- > 0)
+	{
+		list_del(&rset->resources[i].link);
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+	kfree(rset);
+	return ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL(kds_waitall);
+
+static void __kds_resource_set_release_common(struct kds_resource_set *rset)
+{
+	struct list_head triggered = LIST_HEAD_INIT(triggered);
+	struct kds_resource_set *it;
+	unsigned long lflags;
+	int i;
+
+	spin_lock_irqsave(&kds_lock, lflags);
+
+	for (i = 0; i < rset->num_resources; i++)
+	{
+		struct kds_resource *resource;
+		struct kds_link *it = NULL;
+
+		/* fetch the previous entry on the linked list */
+		it = list_entry(rset->resources[i].link.prev, struct kds_link, link);
+		/* unlink ourself */
+		list_del(&rset->resources[i].link);
+
+		/* any waiters? */
+		if (list_empty(&it->link))
+			continue;
+
+		/* were we the head of the list? (head if prev is a resource) */
+		if (it->parent != KDS_RESOURCE)
+		{
+			if ((it->state & KDS_LINK_TRIGGERED) && !(it->state & KDS_LINK_EXCLUSIVE))
+			{
+				/*
+				 * previous was triggered and not exclusive, so we
+				 * trigger non-exclusive until end-of-list or first
+				 * exclusive
+				 */
+
+				struct kds_link *it_waiting = it;
+
+				list_for_each_entry(it, &it_waiting->link, link)
+				{
+					/* exclusive found, stop triggering */
+					if (it->state & KDS_LINK_EXCLUSIVE)
+						break;
+
+					it->state |= KDS_LINK_TRIGGERED;
+					/* a parent to update? */
+					if (it->parent != KDS_RESOURCE)
+					{
+						if (0 == --it->parent->pending)
+						{
+							/* new owner now triggered, track for callback later */
+							list_add(&it->parent->callback_link, &triggered);
+						}
+					}
+				}
+			}
+			continue;
+		}
+
+		/* we were the head, find the kds_resource */
+		resource = container_of(it, struct kds_resource, waiters);
+
+		/* we know there is someone waiting from the any-waiters test above */
+
+		/* find the head of the waiting list */
+		it = list_first_entry(&resource->waiters.link, struct kds_link, link);
+
+		/* new exclusive owner? */
+		if (it->state & KDS_LINK_EXCLUSIVE)
+		{
+			/* link now triggered */
+			it->state |= KDS_LINK_TRIGGERED;
+			/* a parent to update? */
+			if (0 == --it->parent->pending)
+			{
+				/* new owner now triggered, track for callback later */
+				list_add(&it->parent->callback_link, &triggered);
+			}
+		}
+		/* exclusive releasing ? */
+		else if (rset->resources[i].state & KDS_LINK_EXCLUSIVE)
+		{
+			/* trigger non-exclusive until end-of-list or first exclusive */
+			list_for_each_entry(it, &resource->waiters.link, link)
+			{
+				/* exclusive found, stop triggering */
+				if (it->state & KDS_LINK_EXCLUSIVE)
+					break;
+
+				it->state |= KDS_LINK_TRIGGERED;
+				/* a parent to update? */
+				if (0 == --it->parent->pending)
+				{
+					/* new owner now triggered, track for callback later */
+					list_add(&it->parent->callback_link, &triggered);
+				}
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&kds_lock, lflags);
+
+	while (!list_empty(&triggered))
+	{
+		it = list_first_entry(&triggered, struct kds_resource_set, callback_link);
+		list_del(&it->callback_link);
+		kds_callback_perform(it);
+	}
+}
+
+void kds_resource_set_release(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+	int queued;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * Caller is responsible for guaranteeing that callback work is not
+	 * pending (i.e. its running or completed) prior to calling release.
+	 */
+	queued = atomic_read(&rset->cb_queued);
+	BUG_ON(queued);
+
+	/* free the resource set */
+	kfree(rset);
+}
+EXPORT_SYMBOL(kds_resource_set_release);
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset)
+{
+	struct kds_resource_set *rset;
+
+	rset = cmpxchg(pprset, *pprset, NULL);
+	if (!rset)
+	{
+		/* caught a race between a cancelation
+		 * and a completion, nothing to do */
+		return;
+	}
+
+	__kds_resource_set_release_common(rset);
+
+	/*
+	 * In the case of a kds async wait cancellation ensure the deferred
+	 * call back does not get scheduled if a trigger fired at the same time
+	 * to release the wait.
+	 */
+	cancel_work_sync(&rset->callback_work);
+
+	/* free the resource set */
+	kfree(rset);
+}
+EXPORT_SYMBOL(kds_resource_set_release_sync);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 5a9b6569dd74..5131ad8ca17f 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,6 +29,9 @@
 #include <linux/async.h>
 #include <linux/suspend.h>
 #include <linux/cpuidle.h>
+#include <linux/timer.h>
+#include <linux/wakeup_reason.h>
+
 #include "../base.h"
 #include "power.h"
 
@@ -54,6 +57,12 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+struct dpm_watchdog {
+	struct device		*dev;
+	struct task_struct	*tsk;
+	struct timer_list	timer;
+};
+
 static int async_error;
 
 /**
@@ -384,6 +393,56 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
 	return error;
 }
 
+/**
+ * dpm_wd_handler - Driver suspend / resume watchdog handler.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so BUG() out for
+ * a crash-dump
+ */
+static void dpm_wd_handler(unsigned long data)
+{
+	struct dpm_watchdog *wd = (void *)data;
+	struct device *dev      = wd->dev;
+	struct task_struct *tsk = wd->tsk;
+
+	dev_emerg(dev, "**** DPM device timeout ****\n");
+	show_stack(tsk, NULL);
+
+	BUG();
+}
+
+/**
+ * dpm_wd_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev)
+{
+	struct timer_list *timer = &wd->timer;
+
+	wd->dev = dev;
+	wd->tsk = get_current();
+
+	init_timer_on_stack(timer);
+	timer->expires = jiffies + HZ * 12;
+	timer->function = dpm_wd_handler;
+	timer->data = (unsigned long)wd;
+	add_timer(timer);
+}
+
+/**
+ * dpm_wd_clear - Disable pm watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_wd_clear(struct dpm_watchdog *wd)
+{
+	struct timer_list *timer = &wd->timer;
+
+	del_timer_sync(timer);
+	destroy_timer_on_stack(timer);
+}
+
 /*------------------------- Resume routines -------------------------*/
 
 /**
@@ -570,6 +629,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	pm_callback_t callback = NULL;
 	char *info = NULL;
 	int error = 0;
+	struct dpm_watchdog wd;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
@@ -585,6 +645,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	 * a resumed device, even if the device hasn't been completed yet.
 	 */
 	dev->power.is_prepared = false;
+	dpm_wd_set(&wd, dev);
 
 	if (!dev->power.is_suspended)
 		goto Unlock;
@@ -636,6 +697,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 
  Unlock:
 	device_unlock(dev);
+	dpm_wd_clear(&wd);
 
  Complete:
 	complete_all(&dev->power.completion);
@@ -877,6 +939,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 static int dpm_suspend_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	int error = 0;
 
 	cpuidle_pause();
@@ -904,6 +967,9 @@ static int dpm_suspend_noirq(pm_message_t state)
 		put_device(dev);
 
 		if (pm_wakeup_pending()) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			error = -EBUSY;
 			break;
 		}
@@ -962,6 +1028,7 @@ static int device_suspend_late(struct device *dev, pm_message_t state)
 static int dpm_suspend_late(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 	int error = 0;
 
 	mutex_lock(&dpm_list_mtx);
@@ -987,6 +1054,9 @@ static int dpm_suspend_late(pm_message_t state)
 		put_device(dev);
 
 		if (pm_wakeup_pending()) {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 			error = -EBUSY;
 			break;
 		}
@@ -1053,6 +1123,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	pm_callback_t callback = NULL;
 	char *info = NULL;
 	int error = 0;
+	struct dpm_watchdog wd;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
 
 	dpm_wait_for_children(dev, async);
 
@@ -1069,12 +1141,17 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
+		pm_get_active_wakeup_sources(suspend_abort,
+			MAX_SUSPEND_ABORT_LEN);
+		log_suspend_abort_reason(suspend_abort);
 		async_error = -EBUSY;
 		goto Complete;
 	}
 
 	if (dev->power.syscore)
 		goto Complete;
+	
+	dpm_wd_set(&wd, dev);
 
 	device_lock(dev);
 
@@ -1131,6 +1208,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
 	device_unlock(dev);
 
+	dpm_wd_clear(&wd);
+
  Complete:
 	complete_all(&dev->power.completion);
 	if (error)
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 79715e7fa43e..4b35cc08c3b6 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -14,6 +14,7 @@
 #include <linux/suspend.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/types.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -659,6 +660,37 @@ void pm_wakeup_event(struct device *dev, unsigned int msec)
 }
 EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
+void pm_get_active_wakeup_sources(char *pending_wakeup_source, size_t max)
+{
+	struct wakeup_source *ws, *last_active_ws = NULL;
+	int len = 0;
+	bool active = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		if (ws->active) {
+			if (!active)
+				len += scnprintf(pending_wakeup_source, max,
+						"Pending Wakeup Sources: ");
+			len += scnprintf(pending_wakeup_source + len, max - len,
+				"%s ", ws->name);
+			active = true;
+		} else if (!active &&
+			   (!last_active_ws ||
+			    ktime_to_ns(ws->last_time) >
+			    ktime_to_ns(last_active_ws->last_time))) {
+			last_active_ws = ws;
+		}
+	}
+	if (!active && last_active_ws) {
+		scnprintf(pending_wakeup_source, max,
+				"Last active Wakeup Source: %s",
+				last_active_ws->name);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pm_get_active_wakeup_sources);
+
 static void print_active_wakeup_sources(void)
 {
 	struct wakeup_source *ws;
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index e8d11b6630ee..0ab546558c4e 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -10,6 +10,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/wakeup_reason.h>
 
 static LIST_HEAD(syscore_ops_list);
 static DEFINE_MUTEX(syscore_ops_lock);
@@ -73,6 +74,8 @@ int syscore_suspend(void)
 	return 0;
 
  err_out:
+	log_suspend_abort_reason("System core suspend callback %pF failed",
+		ops->suspend);
 	pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
 
 	list_for_each_entry_continue(ops, &syscore_ops_list, node)
diff --git a/drivers/base/ump/Kbuild b/drivers/base/ump/Kbuild
new file mode 100755
index 000000000000..2bbdba27d76f
--- /dev/null
+++ b/drivers/base/ump/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += src/
+
diff --git a/drivers/base/ump/Kconfig b/drivers/base/ump/Kconfig
new file mode 100755
index 000000000000..f7451e67a5ee
--- /dev/null
+++ b/drivers/base/ump/Kconfig
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+config UMP
+	tristate "Enable Unified Memory Provider (UMP) support"
+	default n
+    help
+	  Enable this option to build support for the ARM UMP module.
+	  UMP can be used by the Mali T6xx module to improve performance
+	  by reducing the copying of data by sharing memory.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate one module, called ump.
diff --git a/drivers/base/ump/docs/Doxyfile b/drivers/base/ump/docs/Doxyfile
new file mode 100755
index 000000000000..fbec8eb40a02
--- /dev/null
+++ b/drivers/base/ump/docs/Doxyfile
@@ -0,0 +1,125 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/include/linux/ump-common.h ../../kernel/include/linux/ump.h
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           += ../../kernel/drivers/base/ump
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           +=
diff --git a/drivers/base/ump/example_kernel_api.c b/drivers/base/ump/example_kernel_api.c
new file mode 100755
index 000000000000..eb9b12930f8a
--- /dev/null
+++ b/drivers/base/ump/example_kernel_api.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * Example routine to display information about an UMP allocation
+ * The routine takes an secure_id which can come from a different kernel module
+ * or from a client application (i.e. an ioctl).
+ * It creates a ump handle from the secure id (which validates the secure id)
+ * and if successful dumps the physical memory information.
+ * It follows the API and pins the memory while "using" the physical memory.
+ * Finally it calls the release function to indicate it's finished with the handle.
+ *
+ * If the function can't look up the handle it fails with return value -1.
+ * If the testy succeeds then it return 0.
+ * */
+
+static int display_ump_memory_information(ump_secure_id secure_id)
+{
+	const ump_dd_physical_block_64 * ump_blocks = NULL;
+	ump_dd_handle ump_mem;
+	uint64_t nr_blocks;
+	int i;
+	ump_alloc_flags flags;
+
+	/* get a handle from the secure id */
+	ump_mem = ump_dd_from_secure_id(secure_id);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE == ump_mem)
+	{
+		/* invalid ID received */
+		return -1;
+	}
+
+	/* at this point we know we've added a reference to the ump allocation, so we must release it with ump_dd_release */
+
+	ump_dd_phys_blocks_get_64(ump_mem, &nr_blocks, &ump_blocks);
+	flags = ump_dd_allocation_flags_get(ump_mem);
+
+	printf("UMP allocation with secure ID %u consists of %zd physical block(s):\n", secure_id, nr_blocks);
+
+	for(i=0; i<nr_blocks; ++i)
+	{
+		printf("\tBlock %d: 0x%08zX size 0x%08zX\n", i, ump_blocks[i].addr, ump_blocks[i].size);
+	}
+
+	printf("and was allocated using the following bitflag combo:  0x%lX\n", flags);
+
+	ump_dd_release(ump_mem);
+
+	return 0;
+}
+
diff --git a/drivers/base/ump/example_user_api.c b/drivers/base/ump/example_user_api.c
new file mode 100755
index 000000000000..5ae4650fc4ea
--- /dev/null
+++ b/drivers/base/ump/example_user_api.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <ump/ump.h>
+#include <memory.h>
+#include <stdio.h>
+
+/*
+ * Example routine to exercise the user space UMP api.
+ * This routine initializes the UMP api and allocates some CPU+device X memory.
+ * No usage hints are given, so the driver will use the default cacheability policy.
+ * With the allocation it creates a duplicate handle and plays with the reference count.
+ * Then it simulates interacting with a device and contains pseudo code for the device.
+ *
+ * If any error is detected correct cleanup will be performed and -1 will be returned.
+ * If successful then 0 will be returned.
+ */
+
+static int test_ump_user_api(void)
+{
+	/* This is the size we try to allocate*/
+	const size_t alloc_size = 4096;
+
+	ump_handle h = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_copy = UMP_INVALID_MEMORY_HANDLE;
+	ump_handle h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+	void * mapping = NULL;
+
+	ump_result ump_api_res;
+	int result = -1;
+
+	ump_secure_id id;
+
+	size_t size_returned;
+
+	ump_api_res = ump_open();
+	if (UMP_OK != ump_api_res)
+	{
+		/* failed to open an ump session */
+		/* early out */
+		return -1;
+	}
+
+	h = ump_allocate_64(alloc_size, UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | UMP_PROT_X_RD | UMP_PROT_X_WR);
+	/* the refcount is now 1 */
+	if (UMP_INVALID_MEMORY_HANDLE == h)
+	{
+		/* allocation failure */
+		goto cleanup;
+	}
+
+	/* this is how we could share this allocation with another process */
+
+	/* in process A: */
+	id = ump_secure_id_get(h);
+	/* still ref count 1 */
+	/* send the id to process B */
+
+	/* in process B: */
+	/* receive the id from A */
+	h_clone = ump_from_secure_id(id);
+	/* the ref count of the allocation is now 2 (one from each handle to it) */
+	/* do something ... */
+	/* release our clone */
+	ump_release(h_clone); /* safe to call even if ump_from_secure_id failed */
+	h_clone = UMP_INVALID_MEMORY_HANDLE;
+
+
+	/* a simple save-for-future-use logic inside the driver would just copy the handle (but add a ref manually too!) */
+	/*
+	 * void assign_memory_to_job(h)
+	 * {
+	  */
+	h_copy = h;
+	ump_retain(h_copy); /* manual retain needed as we just assigned the handle, now 2 */
+	/*
+	 * }
+	 *
+	 * void job_completed(void)
+	 * {
+	 */
+	 ump_release(h_copy); /* normal handle release as if we got via an ump_allocate */
+	 h_copy = UMP_INVALID_MEMORY_HANDLE;
+	 /*
+	 * }
+	 */
+	
+	/* we're now back at ref count 1, and only h is a valid handle */
+	/* enough handle duplication show-off, let's play with the contents instead */
+
+	mapping = ump_map(h, 0, alloc_size);
+	if (NULL == mapping)
+	{
+		/* mapping failure, either out of address space or some other error */
+		goto cleanup;
+	}
+
+	memset(mapping, 0, alloc_size);
+
+	/* let's pretend we're going to start some hw device on this buffer and read the result afterwards */
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN, mapping, alloc_size);
+ 	/*
+		device cache invalidate
+
+		memory barrier
+
+		start device
+
+		memory barrier
+
+		wait for device
+
+		memory barrier
+
+		device cache clean
+
+		memory barrier
+	*/
+	ump_cpu_msync_now(h, UMP_MSYNC_CLEAN_AND_INVALIDATE, mapping, alloc_size);
+
+	/* we could now peek at the result produced by the hw device, which is now accessible via our mapping */
+
+	/* unmap the buffer when we're done with it */
+	ump_unmap(h, mapping, alloc_size);
+
+	result = 0;
+
+cleanup:
+	ump_release(h);
+	h = UMP_INVALID_MEMORY_HANDLE;
+
+	ump_close();
+
+	return result;
+}
+
diff --git a/drivers/base/ump/src/Kbuild b/drivers/base/ump/src/Kbuild
new file mode 100755
index 000000000000..de6d30770d15
--- /dev/null
+++ b/drivers/base/ump/src/Kbuild
@@ -0,0 +1,50 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Paths required for build
+UMP_PATH = $(src)/../..
+UMP_DEVICEDRV_PATH = $(src)/.
+
+# Set up defaults if not defined by the user
+MALI_UNIT_TEST ?= 0
+
+SRC :=\
+	common/ump_kernel_core.c \
+	common/ump_kernel_descriptor_mapping.c \
+	linux/ump_kernel_linux.c \
+	linux/ump_kernel_linux_mem.c
+
+UNIT_TEST_DEFINES=
+ifeq ($(MALI_UNIT_TEST), 1)
+	MALI_DEBUG ?= 1
+
+	UNIT_TEST_DEFINES = -DMALI_UNIT_TEST=1 \
+	                    -DMALI_DEBUG=$(MALI_DEBUG)
+endif
+
+# Use our defines when compiling
+ccflags-y += -I$(UMP_PATH) -I$(UMP_DEVICEDRV_PATH) $(UNIT_TEST_DEFINES)
+
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_UMP) += ump.o
+ifeq ($(CONFIG_ION),y)
+ccflags-y += -I$(srctree)/drivers/staging/android/ion -I$(srctree)/include/linux
+obj-$(CONFIG_UMP) += imports/ion/ump_kernel_import_ion.o
+endif
+
+# Tell the Linux build system to enable building of our .c files
+ump-y := $(SRC:.c=.o)
diff --git a/drivers/base/ump/src/Makefile b/drivers/base/ump/src/Makefile
new file mode 100755
index 000000000000..45428adbdf77
--- /dev/null
+++ b/drivers/base/ump/src/Makefile
@@ -0,0 +1,81 @@
+#
+# (C) COPYRIGHT 2008-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifneq ($(KBUILD_EXTMOD),)
+include $(KBUILD_EXTMOD)/Makefile.common
+else
+include ./Makefile.common
+endif
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+RELATIVE_ROOT=../../../../..
+ROOT = $(CURDIR)/$(RELATIVE_ROOT)
+
+EXTRA_CFLAGS=-I$(CURDIR)/../../../../include
+
+ifeq ($(MALI_UNIT_TEST),1)
+	EXTRA_CFLAGS += -DMALI_UNIT_TEST=$(MALI_UNIT_TEST)
+endif
+
+# Get any user defined KDIR-<names> or maybe even a hardcoded KDIR
+-include KDIR_CONFIGURATION
+
+# Define host system directory
+KDIR-$(shell uname -m):=/lib/modules/$(shell uname -r)/build
+
+CONFIG ?= $(ARCH)
+
+# default cpu to select
+CPU ?= $(shell uname -m)
+
+# look up KDIR based om CPU selection
+KDIR ?= $(KDIR-$(CPU))
+
+ifeq ($(KDIR),)
+$(error No KDIR found for platform $(CPU))
+endif
+
+# Validate selected config
+ifneq ($(shell [ -d arch-$(CONFIG) ] && [ -f arch-$(CONFIG)/config.h ] && echo "OK"), OK)
+$(warning Current directory is $(shell pwd))
+$(error No configuration found for config $(CONFIG). Check that arch-$(CONFIG)/config.h exists)
+else
+# Link arch to the selected arch-config directory
+$(shell [ -L arch ] && rm arch)
+$(shell ln -sf arch-$(CONFIG) arch)
+$(shell touch arch/config.h)
+endif
+
+EXTRA_SYMBOLS=
+
+ifeq ($(MALI_UNIT_TEST),1)
+	KBASE_PATH=$(ROOT)/kernel/drivers/gpu/arm/midgard
+	EXTRA_SYMBOLS+=$(KBASE_PATH)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+KDS_PATH=$(ROOT)/kernel/drivers/base/kds
+EXTRA_SYMBOLS+=$(KDS_PATH)/Module.symvers
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="$(EXTRA_CFLAGS) $(SCONS_CFLAGS)" CONFIG_UMP=m KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+kernelrelease:
+	$(MAKE) -C $(KDIR) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" kernelrelease
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/base/ump/src/Makefile.common b/drivers/base/ump/src/Makefile.common
new file mode 100755
index 000000000000..f29a4c1cffa5
--- /dev/null
+++ b/drivers/base/ump/src/Makefile.common
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2008-2010, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+SRC = $(UMP_FILE_PREFIX)/common/ump_kernel_core.c \
+      $(UMP_FILE_PREFIX)/common/ump_kernel_descriptor_mapping.c
+
diff --git a/drivers/base/ump/src/arch-arm/config.h b/drivers/base/ump/src/arch-arm/config.h
new file mode 100755
index 000000000000..afb44d6cb745
--- /dev/null
+++ b/drivers/base/ump/src/arch-arm/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/drivers/base/ump/src/arch-arm64/config.h b/drivers/base/ump/src/arch-arm64/config.h
new file mode 100755
index 000000000000..afb44d6cb745
--- /dev/null
+++ b/drivers/base/ump/src/arch-arm64/config.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef __ARCH_CONFIG_H__
+#define __ARCH_CONFIG_H__
+
+#define ARCH_UMP_BACKEND_DEFAULT          1
+#define ARCH_UMP_MEMORY_ADDRESS_DEFAULT   0x00000000
+#define ARCH_UMP_MEMORY_SIZE_DEFAULT 32UL * 1024UL * 1024UL
+
+#endif /* __ARCH_CONFIG_H__ */
diff --git a/drivers/base/ump/src/common/ump_kernel_core.c b/drivers/base/ump/src/common/ump_kernel_core.c
new file mode 100755
index 000000000000..95bb2ddb0bd6
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_core.c
@@ -0,0 +1,756 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* module headers */
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+/* local headers */
+#include <common/ump_kernel_core.h>
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+#define UMP_FLAGS_RANGE ((UMP_PROT_SHAREABLE<<1) - 1u)
+
+static umpp_device device;
+
+ump_result umpp_core_constructor(void)
+{
+	mutex_init(&device.secure_id_map_lock);
+	device.secure_id_map = umpp_descriptor_mapping_create(UMP_EXPECTED_IDS, UMP_MAX_IDS);
+	if (NULL != device.secure_id_map)
+	{
+		if (UMP_OK == umpp_device_initialize())
+		{
+			return UMP_OK;
+		}
+		umpp_descriptor_mapping_destroy(device.secure_id_map);
+	}
+	mutex_destroy(&device.secure_id_map_lock);
+
+	return UMP_ERROR;
+}
+
+void umpp_core_destructor(void)
+{
+	umpp_device_terminate();
+	umpp_descriptor_mapping_destroy(device.secure_id_map);
+	mutex_destroy(&device.secure_id_map_lock);
+}
+
+umpp_session *umpp_core_session_start(void)
+{
+	umpp_session * session;
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (NULL != session)
+	{
+		mutex_init(&session->session_lock);
+
+		INIT_LIST_HEAD(&session->memory_usage);
+
+		/* try to create import client session, not a failure if they fail to initialize */
+		umpp_import_handlers_init(session);
+	}
+
+	return session;
+}
+
+void umpp_core_session_end(umpp_session *session)
+{
+	umpp_session_memory_usage * usage, *_usage;
+	UMP_ASSERT(session);
+
+	list_for_each_entry_safe(usage, _usage, &session->memory_usage, link)
+	{
+		printk(KERN_WARNING "UMP: Memory usage cleanup, releasing secure ID %d\n", ump_dd_secure_id_get(usage->mem));
+		ump_dd_release(usage->mem);
+		kfree(usage);
+
+	}
+
+	/* we should now not hold any imported memory objects,
+	 * detatch all import handlers */
+	umpp_import_handlers_term(session);
+
+	mutex_destroy(&session->session_lock);
+	kfree(session);
+}
+
+ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	umpp_allocation * alloc;
+	int i;
+
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD ) ) == 0 ||
+	    ( flags & (UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read and one write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL | __GFP_HARDWALL);
+
+	if (NULL == alloc)
+		goto out1;
+
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+	alloc->size = size;
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	if (0 != umpp_phys_commit(alloc))
+	{
+		goto out2;
+	}
+
+	/* all set up, allocate an ID for it */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	return alloc;
+
+out3:
+	umpp_phys_free(alloc);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+uint64_t ump_dd_size_get_64(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->size;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_size_get(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->size <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->size;
+}
+
+ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return alloc->id;
+}
+
+#ifdef CONFIG_KDS
+struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	return &alloc->kds_res;
+}
+#endif
+
+ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	return alloc->flags;
+}
+
+ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id)
+{
+	umpp_allocation * alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+
+	mutex_lock(&device.secure_id_map_lock);
+
+	if (0 == umpp_descriptor_mapping_lookup(device.secure_id_map, secure_id, (void**)&alloc))
+	{
+		if (NULL != alloc->filter_func)
+		{
+			if (!alloc->filter_func(secure_id, alloc, alloc->callback_data))
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /* the filter denied access */
+			}
+		}
+
+		/* check permission to access it */
+		if ((UMP_DD_INVALID_MEMORY_HANDLE != alloc) && !(alloc->flags & UMP_PROT_SHAREABLE))
+		{
+			if (alloc->owner != get_current()->pid)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE; /*no rights for the current process*/
+			}
+		}
+
+		if (UMP_DD_INVALID_MEMORY_HANDLE != alloc)
+		{
+			if( ump_dd_retain(alloc) != UMP_DD_SUCCESS)
+			{
+				alloc = UMP_DD_INVALID_MEMORY_HANDLE;
+			}
+		}
+	}
+	mutex_unlock(&device.secure_id_map_lock);
+
+	return alloc;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id)
+{
+	return ump_dd_from_secure_id(secure_id);
+}
+
+int ump_dd_retain(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* check for overflow */
+	while(1)
+	{
+		int refcnt = atomic_read(&alloc->refcount);
+		if (refcnt + 1 > 0)
+		{
+			if(atomic_cmpxchg(&alloc->refcount, refcnt, refcnt + 1) == refcnt)
+			{
+				return 0;
+			}
+		}
+		else
+		{
+			return -EBUSY;
+		}
+	}
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_add(ump_dd_handle mem)
+{
+	ump_dd_retain(mem);
+}
+
+
+void ump_dd_release(ump_dd_handle mem)
+{
+	umpp_allocation * alloc;
+	uint32_t new_cnt;
+
+	UMP_ASSERT(mem);
+
+	alloc = (umpp_allocation*)mem;
+
+	/* secure the id for lookup while releasing */
+	mutex_lock(&device.secure_id_map_lock);
+
+	/* do the actual release */
+	new_cnt = atomic_sub_return(1, &alloc->refcount);
+	if (0 == new_cnt)
+	{
+		/* remove from the table as this was the last ref */
+		umpp_descriptor_mapping_remove(device.secure_id_map, alloc->id);
+	}
+
+	/* release the lock as early as possible */
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if (0 != new_cnt)
+	{
+		/* exit if still have refs */
+		return;
+	}
+
+	UMP_ASSERT(list_empty(&alloc->map_list));
+
+#ifdef CONFIG_KDS
+	if (kds_resource_term(&alloc->kds_res))
+	{
+		printk(KERN_ERR "ump_dd_release: kds_resource_term failed,"
+				"unable to release UMP allocation\n");
+		return;
+	}
+#endif
+	/* cleanup */
+	if (NULL != alloc->final_release_func)
+	{
+		alloc->final_release_func(alloc, alloc->callback_data);
+	}
+
+	if (0 == (alloc->management_flags & UMP_MGMT_EXTERNAL))
+	{
+		umpp_phys_free(alloc);
+	}
+	else
+	{
+		kfree(alloc->block_array);
+	}
+
+	mutex_destroy(&alloc->map_list_lock);
+
+	kfree(alloc);
+}
+
+/*
+ * UMP v1 API
+ */
+void ump_dd_reference_release(ump_dd_handle mem)
+{
+	ump_dd_release(mem);
+}
+
+void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(pCount);
+	UMP_ASSERT(pArray);
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+	*pCount = alloc->blocksCount;
+	*pArray = alloc->block_array;
+}
+
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks)
+{
+	const umpp_allocation * alloc;
+	unsigned long i;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	if((uint64_t)num_blocks != alloc->blocksCount)
+	{
+		return UMP_DD_INVALID;
+	}
+
+	for( i = 0; i < num_blocks; i++)
+	{
+		UMP_ASSERT(alloc->block_array[i].addr <= UMP_UINT32_MAX);
+		UMP_ASSERT(alloc->block_array[i].size <= UMP_UINT32_MAX);
+
+		blocks[i].addr = (unsigned long)alloc->block_array[i].addr;
+		blocks[i].size = (unsigned long)alloc->block_array[i].size;
+	}
+
+	return UMP_DD_SUCCESS;
+}
+/*
+ * UMP v1 API
+ */
+ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	UMP_ASSERT(block);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+
+	UMP_ASSERT(alloc->block_array[index].addr <= UMP_UINT32_MAX);
+	UMP_ASSERT(alloc->block_array[index].size <= UMP_UINT32_MAX);
+
+	block->addr = (unsigned long)alloc->block_array[index].addr;
+	block->size = (unsigned long)alloc->block_array[index].size;
+
+	return UMP_DD_SUCCESS;
+}
+
+/*
+ * UMP v1 API
+ */
+unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem)
+{
+	const umpp_allocation * alloc;
+	UMP_ASSERT(mem);
+	alloc = (const umpp_allocation *)mem;
+
+	UMP_ASSERT(alloc->flags & UMP_CONSTRAINT_32BIT_ADDRESSABLE);
+	UMP_ASSERT(alloc->blocksCount <= UMP_UINT32_MAX);
+
+	return (unsigned long)alloc->blocksCount;
+}
+
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void *uaddr, size_t size)
+{
+	umpp_cpu_mapping *map;
+
+	void *target_first = uaddr;
+	void *target_last = (void*)((uintptr_t)uaddr - 1 + size);
+
+	if (target_last < target_first) /* wrapped */
+	{
+		return NULL;
+	}
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if ( map->vaddr_start <= target_first &&
+		   (void*)((uintptr_t)map->vaddr_start + (map->nr_pages << PAGE_SHIFT) - 1) >= target_last)
+		{
+			goto out;
+		}
+	}
+	map = NULL;
+out:
+	mutex_unlock(&alloc->map_list_lock);
+
+	return map;
+}
+
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map)
+{
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(map);
+	mutex_lock(&alloc->map_list_lock);
+	list_add(&map->link, &alloc->map_list);
+	mutex_unlock(&alloc->map_list_lock);
+}
+
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target)
+{
+	umpp_cpu_mapping * map;
+
+	UMP_ASSERT(alloc);
+	UMP_ASSERT(target);
+
+	mutex_lock(&alloc->map_list_lock);
+	list_for_each_entry(map, &alloc->map_list, link)
+	{
+		if (map == target)
+		{
+			list_del(&target->link);
+			kfree(target);
+			mutex_unlock(&alloc->map_list_lock);
+			return;
+		}
+	}
+
+	/* not found, error */
+	UMP_ASSERT(0);
+}
+
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const  block_index, uint64_t * const block_internal_offset)
+{
+	uint64_t i;
+
+	for (i = 0 ; i < alloc->blocksCount; i++)
+	{
+		if (offset < alloc->block_array[i].size)
+		{
+			/* found the block_array element containing this offset */
+			*block_index = i;
+			*block_internal_offset = offset;
+			return 0;
+		}
+		offset -= alloc->block_array[i].size;
+	}
+
+	return -ENXIO;
+}
+
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size)
+{
+	umpp_allocation * alloc;
+	void *vaddr;
+	umpp_cpu_mapping * mapping;
+	uint64_t virt_page_off; /* offset of given address from beginning of the virtual mapping */
+	uint64_t phys_page_off; /* offset of the virtual mapping from the beginning of the physical buffer */
+	uint64_t page_count; /* number of pages to sync */
+	uint64_t i;
+	uint64_t block_idx;
+	uint64_t block_offset;
+	uint64_t paddr;
+
+	UMP_ASSERT((UMP_MSYNC_CLEAN == op) || (UMP_MSYNC_CLEAN_AND_INVALIDATE == op));
+
+	alloc = (umpp_allocation*)mem;
+	vaddr = (void*)(uintptr_t)address;
+
+	if((alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+	{
+		/* mapping is not cached */
+		return;
+	}
+
+	mapping = umpp_dd_find_enclosing_mapping(alloc, vaddr, size);
+	if (NULL == mapping)
+	{
+		printk(KERN_WARNING "UMP: Illegal cache sync address %lx\n", (uintptr_t)vaddr);
+		return; /* invalid pointer or size causes out-of-bounds */
+	}
+
+	/* we already know that address + size don't wrap around as umpp_dd_find_enclosing_mapping didn't fail */
+	page_count = ((((((uintptr_t)address + size - 1) & PAGE_MASK) - ((uintptr_t)address & PAGE_MASK))) >> PAGE_SHIFT) + 1;
+	virt_page_off = (vaddr - mapping->vaddr_start) >> PAGE_SHIFT;
+	phys_page_off = mapping->page_off;
+
+	if (umpp_dd_find_start_block(alloc, (virt_page_off + phys_page_off) << PAGE_SHIFT, &block_idx, &block_offset))
+	{
+		/* should not fail as a valid mapping was found, so the phys mem must exists */
+		printk(KERN_WARNING "UMP: Unable to find physical start block with offset %llx\n", virt_page_off + phys_page_off);
+		UMP_ASSERT(0);
+		return;
+	}
+
+	paddr = alloc->block_array[block_idx].addr + block_offset + (((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1));
+
+	for (i = 0; i < page_count; i++)
+	{
+		size_t offset = ((uintptr_t)vaddr) & ((1u << PAGE_SHIFT)-1);
+		size_t sz = min((size_t)PAGE_SIZE - offset, size);
+
+		/* check if we've overrrun the current block, if so move to the next block */
+		if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+		{
+			block_idx++;
+			UMP_ASSERT(block_idx < alloc->blocksCount);
+			paddr = alloc->block_array[block_idx].addr;
+		}
+
+		if (UMP_MSYNC_CLEAN == op)
+		{
+			ump_sync_to_memory(paddr, vaddr, sz);
+		}
+		else /* (UMP_MSYNC_CLEAN_AND_INVALIDATE == op) already validated on entry */
+		{
+			ump_sync_to_cpu(paddr, vaddr, sz);
+		}
+
+		/* advance to next page  */
+		vaddr = (void*)((uintptr_t)vaddr + sz);
+		size -= sz;
+		paddr += sz;
+	}
+}
+
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data)
+{
+	uint64_t size = 0;
+	uint64_t i;
+	umpp_allocation * alloc;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		size += blocks[i].size;
+	}
+	UMP_ASSERT(size);
+
+	if (flags & (~UMP_FLAGS_RANGE))
+	{
+		printk(KERN_WARNING "UMP: allocation flags out of allowed bits range\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	if( ( flags & (UMP_PROT_CPU_RD | UMP_PROT_W_RD | UMP_PROT_X_RD | UMP_PROT_Y_RD | UMP_PROT_Z_RD
+	    | UMP_PROT_CPU_WR | UMP_PROT_W_WR | UMP_PROT_X_WR | UMP_PROT_Y_WR | UMP_PROT_Z_WR )) == 0 )
+	{
+		printk(KERN_WARNING "UMP: allocation flags should have at least one read or write permission bit set\n");
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/*check permission flags to be set if hit flags are set too*/
+	for (i = UMP_DEVICE_CPU_SHIFT; i<=UMP_DEVICE_Z_SHIFT; i+=4)
+	{
+		if (flags & (UMP_HINT_DEVICE_RD<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_RD<<i));
+		}
+		if (flags & (UMP_HINT_DEVICE_WR<<i))
+		{
+			UMP_ASSERT(flags & (UMP_PROT_DEVICE_WR<<i));
+		}
+	}
+
+	alloc = kzalloc(sizeof(*alloc),__GFP_HARDWALL | GFP_KERNEL);
+
+	if (NULL == alloc)
+	{
+		goto out1;
+	}
+
+	alloc->block_array = kzalloc(sizeof(ump_dd_physical_block_64) * num_blocks,__GFP_HARDWALL | GFP_KERNEL);
+	if (NULL == alloc->block_array)
+	{
+		goto out2;
+	}
+
+	memcpy(alloc->block_array, blocks, sizeof(ump_dd_physical_block_64) * num_blocks);
+
+#ifdef CONFIG_KDS
+	kds_resource_init(&alloc->kds_res);
+#endif
+	alloc->size = size;
+	alloc->blocksCount = num_blocks;
+	alloc->flags = flags;
+	alloc->filter_func = filter_func;
+	alloc->final_release_func = final_release_func;
+	alloc->callback_data = callback_data;
+
+	if (!(alloc->flags & UMP_PROT_SHAREABLE))
+	{
+		alloc->owner = get_current()->pid;
+	}
+
+	mutex_init(&alloc->map_list_lock);
+	INIT_LIST_HEAD(&alloc->map_list);
+	atomic_set(&alloc->refcount, 1);
+
+	/* all set up, allocate an ID */
+
+	mutex_lock(&device.secure_id_map_lock);
+	alloc->id = umpp_descriptor_mapping_allocate(device.secure_id_map, (void*)alloc);
+	mutex_unlock(&device.secure_id_map_lock);
+
+	if ((int)alloc->id == 0)
+	{
+		/* failed to allocate a secure_id */
+		goto out3;
+	}
+
+	alloc->management_flags |= UMP_MGMT_EXTERNAL;
+
+	return alloc;
+
+out3:
+	kfree(alloc->block_array);
+out2:
+	kfree(alloc);
+out1:
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+
+/*
+ * UMP v1 API
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks)
+{
+	ump_dd_handle mem;
+	ump_dd_physical_block_64 *block_64_array;
+	ump_alloc_flags flags = UMP_V1_API_DEFAULT_ALLOCATION_FLAGS;
+	unsigned long i;
+
+	UMP_ASSERT(blocks);
+	UMP_ASSERT(num_blocks);
+
+	block_64_array = kzalloc(num_blocks * sizeof(*block_64_array), __GFP_HARDWALL | GFP_KERNEL);
+
+	if(block_64_array == NULL)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	/* copy physical blocks */
+	for( i = 0; i < num_blocks; i++)
+	{
+		block_64_array[i].addr = blocks[i].addr;
+		block_64_array[i].size = blocks[i].size;
+	}
+
+	mem = ump_dd_create_from_phys_blocks_64(block_64_array, num_blocks, flags, NULL, NULL, NULL);
+
+	kfree(block_64_array);
+
+	return mem;
+
+}
diff --git a/drivers/base/ump/src/common/ump_kernel_core.h b/drivers/base/ump/src/common/ump_kernel_core.h
new file mode 100755
index 000000000000..2329c45f0ebe
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_core.h
@@ -0,0 +1,228 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_CORE_H_
+#define _UMP_KERNEL_CORE_H_
+
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/cred.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+#include <linux/ump-common.h>
+#include <ump/src/common/ump_kernel_descriptor_mapping.h>
+
+/* forward decl */
+struct umpp_session;
+
+/**
+ * UMP handle metadata.
+ * Tracks various data about a handle not of any use to user space
+ */
+typedef enum
+{
+	UMP_MGMT_EXTERNAL = (1ul << 0) /**< Handle created via the ump_dd_create_from_phys_blocks interface */
+	/* (1ul << 31) not to be used */
+} umpp_management_flags;
+
+/**
+ * Structure tracking the single global UMP device.
+ * Holds global data like the ID map
+ */
+typedef struct umpp_device
+{
+	struct mutex secure_id_map_lock; /**< Lock protecting access to the map */
+	umpp_descriptor_mapping * secure_id_map; /**< Map of all known secure IDs on the system */
+} umpp_device;
+
+/**
+ * Structure tracking all memory allocations of a UMP allocation.
+ * Tracks info about an mapping so we can verify cache maintenace
+ * operations and help in the unmap cleanup.
+ */
+typedef struct umpp_cpu_mapping
+{
+	struct list_head        link; /**< link to list of mappings for an allocation */
+	void                  *vaddr_start; /**< CPU VA start of the mapping */
+	size_t                nr_pages; /**< Size (in pages) of the mapping */
+	uint64_t              page_off; /**< Offset (in pages) from start of the allocation where the mapping starts */
+	ump_dd_handle         handle; /**< Which handle this mapping is linked to */
+	struct umpp_session * session; /**< Which session created the mapping */
+} umpp_cpu_mapping;
+
+/**
+ * Structure tracking UMP allocation.
+ * Represent a memory allocation with its ID.
+ * Tracks all needed meta-data about an allocation.
+ * */
+typedef struct umpp_allocation
+{
+	ump_secure_id id; /**< Secure ID of the allocation */
+	atomic_t refcount; /**< Usage count */
+
+	ump_alloc_flags flags; /**< Flags for all supported devices */
+	uint32_t management_flags; /**< Managment flags tracking */
+
+	pid_t owner; /**< The process ID owning the memory if not sharable */
+
+	ump_dd_security_filter filter_func; /**< Hook to verify use, called during retains from new clients */
+	ump_dd_final_release_callback final_release_func; /**< Hook called when the last reference is removed */
+	void* callback_data; /**< Additional data given to release hook */
+
+	uint64_t size; /**< Size (in bytes) of the allocation */
+	uint64_t blocksCount; /**< Number of physsical blocks the allocation is built up of */
+	ump_dd_physical_block_64 * block_array; /**< Array, one entry per block, describing block start and length */
+
+	struct mutex     map_list_lock; /**< Lock protecting the map_list */
+	struct list_head map_list; /**< Tracks all CPU VA mappings of this allocation */
+
+#ifdef CONFIG_KDS
+	struct kds_resource kds_res; /**< The KDS resource controlling access to this allocation */
+#endif
+
+	void * backendData; /**< Physical memory backend meta-data */
+} umpp_allocation;
+
+/**
+ * Structure tracking use of UMP memory by a session.
+ * Tracks the use of an allocation by a session so session termination can clean up any outstanding references.
+ * Also protects agains non-matched release calls from user space.
+ */
+typedef struct umpp_session_memory_usage
+{
+	ump_secure_id id; /**< ID being used. For quick look-up */
+	ump_dd_handle mem; /**< Handle being used. */
+
+	/**
+	 * Track how many times has the process retained this handle in the kernel.
+	 * This should usually just be 1(allocated or resolved) or 2(mapped),
+	 * but could be more if someone is playing with the low-level API
+	 * */
+	atomic_t process_usage_count;
+
+	struct list_head link; /**< link to other usage trackers for a session */
+} umpp_session_memory_usage;
+
+/**
+ * Structure representing a session/client.
+ * Tracks the UMP allocations being used by this client.
+ */
+typedef struct umpp_session
+{
+	struct mutex session_lock; /**< Lock for memory usage manipulation */
+	struct list_head memory_usage; /**< list of memory currently being used by the this session */
+	void*  import_handler_data[UMPP_EXTERNAL_MEM_COUNT]; /**< Import modules per-session data pointer */
+} umpp_session;
+
+/**
+ * UMP core setup.
+ * Called by any OS specific startup function to initialize the common part.
+ * @return UMP_OK if core initialized correctly, any other value for errors
+ */
+ump_result umpp_core_constructor(void);
+
+/**
+ * UMP core teardown.
+ * Called by any OS specific unload function to clean up the common part.
+ */
+void umpp_core_destructor(void);
+
+/**
+ * UMP session start.
+ * Called by any OS specific session handler when a new session is detected
+ * @return Non-NULL if a matching core session could be set up. NULL on failure
+ */
+umpp_session *umpp_core_session_start(void);
+
+/**
+ * UMP session end.
+ * Called by any OS specific session handler when a session is ended/terminated.
+ * @param session The core session object returned by ump_core_session_start
+ */
+void umpp_core_session_end(umpp_session *session);
+
+/**
+ * Find a mapping object (if any) for this allocation.
+ * Called by any function needing to identify a mapping from a user virtual address.
+ * Verifies that the whole range to be within a mapping object.
+ * @param alloc The UMP allocation to find a matching mapping object of
+ * @param uaddr User mapping address to find the mapping object for
+ * @param size Length of the mapping
+ * @return NULL on error (no match found), pointer to mapping object if match found
+ */
+umpp_cpu_mapping * umpp_dd_find_enclosing_mapping(umpp_allocation * alloc, void* uaddr, size_t size);
+
+/**
+ * Register a new mapping of an allocation.
+ * Called by functions creating a new mapping of an allocation, typically OS specific handlers.
+ * @param alloc The allocation object which has been mapped
+ * @param map Info about the mapping
+ */
+void umpp_dd_add_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * map);
+
+/**
+ * Remove and free mapping object from an allocation.
+ * @param alloc The allocation object to remove the mapping info from
+ * @param target The mapping object to remove
+ */
+void umpp_dd_remove_cpu_mapping(umpp_allocation * alloc, umpp_cpu_mapping * target);
+
+/**
+ * Helper to find a block in the blockArray which holds a given byte offset.
+ * @param alloc The allocation object to find the block in
+ * @param offset Offset (in bytes) from allocation start to find the block of
+ * @param[out] block_index Pointer to the index of the block matching
+ * @param[out] block_internal_offset Offset within the returned block of the searched offset
+ * @return 0 if a matching block was found, any other value for error
+ */
+int umpp_dd_find_start_block(const umpp_allocation * alloc, uint64_t offset, uint64_t * const block_index, uint64_t * const block_internal_offset);
+
+/**
+ * Cache maintenance helper.
+ * Performs the requested cache operation on the given handle.
+ * @param mem Allocation handle
+ * @param op Cache maintenance operation to perform
+ * @param address User mapping at which to do the operation
+ * @param size Length (in bytes) of the range to do the operation on
+ */
+void umpp_dd_cpu_msync_now(ump_dd_handle mem, ump_cpu_msync_op op, void * address, size_t size);
+
+/**
+ * Import module session early init.
+ * Calls session_begin on all installed import modules.
+ * @param session The core session object to initialized the import handler for
+ * */
+void umpp_import_handlers_init(umpp_session * session);
+
+/**
+ * Import module session cleanup.
+ * Calls session_end on all import modules bound to the session.
+ * @param session The core session object to initialized the import handler for
+ */
+void umpp_import_handlers_term(umpp_session * session);
+
+#endif /* _UMP_KERNEL_CORE_H_ */
+
diff --git a/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
new file mode 100755
index 000000000000..c25f21af790a
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.c
@@ -0,0 +1,162 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+
+#include <common/ump_kernel_descriptor_mapping.h>
+#include <common/ump_kernel_priv.h>
+
+#define MALI_PAD_INT(x) (((x) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1))
+
+/**
+ * Allocate a descriptor table capable of holding 'count' mappings
+ * @param count Number of mappings in the table
+ * @return Pointer to a new table, NULL on error
+ */
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count);
+
+/**
+ * Free a descriptor table
+ * @param table The table to free
+ */
+static void descriptor_table_free(umpp_descriptor_table * table);
+
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries)
+{
+	umpp_descriptor_mapping * map = kzalloc(sizeof(umpp_descriptor_mapping), GFP_KERNEL);
+
+	init_entries = MALI_PAD_INT(init_entries);
+	max_entries = MALI_PAD_INT(max_entries);
+
+	if (NULL != map)
+	{
+		map->table = descriptor_table_alloc(init_entries);
+		if (NULL != map->table)
+		{
+			init_rwsem( &map->lock);
+			set_bit(0, map->table->usage);
+			map->max_nr_mappings_allowed = max_entries;
+			map->current_nr_mappings = init_entries;
+			return map;
+
+			descriptor_table_free(map->table);
+		}
+		kfree(map);
+	}
+	return NULL;
+}
+
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map)
+{
+	UMP_ASSERT(NULL != map);
+	descriptor_table_free(map->table);
+	kfree(map);
+}
+
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target)
+{
+ 	int descriptor = 0;
+	UMP_ASSERT(NULL != map);
+	down_write( &map->lock);
+	descriptor = find_first_zero_bit(map->table->usage, map->current_nr_mappings);
+	if (descriptor == map->current_nr_mappings)
+	{
+		/* no free descriptor, try to expand the table */
+		umpp_descriptor_table * new_table;
+		umpp_descriptor_table * old_table = map->table;
+		int nr_mappings_new = map->current_nr_mappings + BITS_PER_LONG;
+
+		if (map->current_nr_mappings >= map->max_nr_mappings_allowed)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+		new_table = descriptor_table_alloc(nr_mappings_new);
+		if (NULL == new_table)
+		{
+			descriptor = 0;
+			goto unlock_and_exit;
+		}
+
+ 		memcpy(new_table->usage, old_table->usage, (sizeof(unsigned long)*map->current_nr_mappings) / BITS_PER_LONG);
+ 		memcpy(new_table->mappings, old_table->mappings, map->current_nr_mappings * sizeof(void*));
+
+ 		map->table = new_table;
+		map->current_nr_mappings = nr_mappings_new;
+		descriptor_table_free(old_table);
+	}
+
+	/* we have found a valid descriptor, set the value and usage bit */
+	set_bit(descriptor, map->table->usage);
+	map->table->mappings[descriptor] = target;
+
+unlock_and_exit:
+	up_write(&map->lock);
+	return descriptor;
+}
+
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target)
+{
+	int result = -EINVAL;
+ 	UMP_ASSERT(map);
+	UMP_ASSERT(target);
+ 	down_read(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		*target = map->table->mappings[descriptor];
+		result = 0;
+	}
+	/* keep target untouched if the descriptor was not found */
+	up_read(&map->lock);
+	return result;
+}
+
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor)
+{
+	UMP_ASSERT(map);
+ 	down_write(&map->lock);
+ 	if ( (descriptor > 0) && (descriptor < map->current_nr_mappings) && test_bit(descriptor, map->table->usage) )
+ 	{
+		map->table->mappings[descriptor] = NULL;
+		clear_bit(descriptor, map->table->usage);
+	}
+	up_write(&map->lock);
+}
+
+static umpp_descriptor_table * descriptor_table_alloc(unsigned int count)
+{
+	umpp_descriptor_table * table;
+
+	table = kzalloc(sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG) + (sizeof(void*) * count), __GFP_HARDWALL | GFP_KERNEL );
+
+	if (NULL != table)
+	{
+		table->usage = (unsigned long*)((u8*)table + sizeof(umpp_descriptor_table));
+		table->mappings = (void**)((u8*)table + sizeof(umpp_descriptor_table) + ((sizeof(unsigned long) * count)/BITS_PER_LONG));
+	}
+
+	return table;
+}
+
+static void descriptor_table_free(umpp_descriptor_table * table)
+{
+ 	UMP_ASSERT(table);
+	kfree(table);
+}
+
diff --git a/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
new file mode 100755
index 000000000000..3260d52c5945
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_descriptor_mapping.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_kernel_descriptor_mapping.h
+ */
+
+#ifndef _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+#define _UMP_KERNEL_DESCRIPTOR_MAPPING_H_
+
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+/**
+ * The actual descriptor mapping table, never directly accessed by clients
+ */
+typedef struct umpp_descriptor_table
+{
+	/* keep as a unsigned long to rely on the OS's bitops support */
+	unsigned long * usage; /**< Pointer to bitpattern indicating if a descriptor is valid/used(1) or not(0) */
+	void** mappings; /**< Array of the pointers the descriptors map to */
+} umpp_descriptor_table;
+
+/**
+ * The descriptor mapping object
+ * Provides a separate namespace where we can map an integer to a pointer
+ */
+typedef struct umpp_descriptor_mapping
+{
+	struct rw_semaphore lock; /**< Lock protecting access to the mapping object */
+	unsigned int max_nr_mappings_allowed; /**< Max number of mappings to support in this namespace */
+	unsigned int current_nr_mappings; /**< Current number of possible mappings */
+	umpp_descriptor_table * table; /**< Pointer to the current mapping table */
+} umpp_descriptor_mapping;
+
+/**
+ * Create a descriptor mapping object.
+ * Create a descriptor mapping capable of holding init_entries growable to max_entries.
+ * ID 0 is reserved so the number of available entries will be max - 1.
+ * @param init_entries Number of entries to preallocate memory for
+ * @param max_entries Number of entries to max support
+ * @return Pointer to a descriptor mapping object, NULL on failure
+ */
+umpp_descriptor_mapping * umpp_descriptor_mapping_create(unsigned int init_entries, unsigned int max_entries);
+
+/**
+ * Destroy a descriptor mapping object
+ * @param[in] map The map to free
+ */
+void umpp_descriptor_mapping_destroy(umpp_descriptor_mapping * map);
+
+/**
+ * Allocate a new mapping entry (descriptor ID)
+ * Allocates a new entry in the map.
+ * @param[in] map The map to allocate a new entry in
+ * @param[in] target The value to map to
+ * @return The descriptor allocated, ID 0 on failure.
+ */
+unsigned int umpp_descriptor_mapping_allocate(umpp_descriptor_mapping * map, void * target);
+
+/**
+ * Get the value mapped to by a descriptor ID
+ * @param[in] map The map to lookup the descriptor id in
+ * @param[in] descriptor The descriptor ID to lookup
+ * @param[out] target Pointer to a pointer which will receive the stored value
+ *
+ * @return 0 on success lookup, -EINVAL on lookup failure.
+ */
+int umpp_descriptor_mapping_lookup(umpp_descriptor_mapping * map, unsigned int descriptor, void** const target);
+
+/**
+ * Free the descriptor ID
+ * For the descriptor to be reused it has to be freed
+ * @param[in] map The map to free the descriptor from
+ * @param descriptor The descriptor ID to free
+ */
+void umpp_descriptor_mapping_remove(umpp_descriptor_mapping * map, unsigned int descriptor);
+
+#endif /* _UMP_KERNEL_DESCRIPTOR_MAPPING_H_ */
diff --git a/drivers/base/ump/src/common/ump_kernel_priv.h b/drivers/base/ump/src/common/ump_kernel_priv.h
new file mode 100755
index 000000000000..bb9d8f125fe6
--- /dev/null
+++ b/drivers/base/ump/src/common/ump_kernel_priv.h
@@ -0,0 +1,80 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_PRIV_H_
+#define _UMP_KERNEL_PRIV_H_
+
+#ifdef __KERNEL__
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <asm/cacheflush.h>
+#endif
+
+
+#define UMP_EXPECTED_IDS 64
+#define UMP_MAX_IDS 32768
+
+#ifdef __KERNEL__
+#define UMP_ASSERT(expr) \
+		if (!(expr)) { \
+			printk(KERN_ERR "UMP: Assertion failed! %s,%s,%s,line=%d\n",\
+					#expr,__FILE__,__func__,__LINE__); \
+					BUG(); \
+		}
+
+static inline void ump_sync_to_memory(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/*TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_TO_DEVICE);
+	mb(); /* for outer_sync (if needed) */
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+
+static inline void ump_sync_to_cpu(uint64_t paddr, void* vaddr, size_t sz)
+{
+#ifdef CONFIG_ARM
+	__cpuc_flush_dcache_area(vaddr, sz);
+	outer_flush_range(paddr, paddr+sz);
+#elif defined(CONFIG_ARM64)
+	/* TODO (MID64-46): There's no other suitable cache flush function for ARM64 */
+	flush_cache_all();
+#elif defined(CONFIG_X86)
+	struct scatterlist scl = {0, };
+	sg_set_page(&scl, pfn_to_page(PFN_DOWN(paddr)), sz,
+			paddr & (PAGE_SIZE -1 ));
+	dma_sync_sg_for_cpu(NULL, &scl, 1, DMA_FROM_DEVICE);
+#else
+#error Implement cache maintenance for your architecture here
+#endif
+}
+#endif /* __KERNEL__*/
+#endif /* _UMP_KERNEL_PRIV_H_ */
+
diff --git a/drivers/base/ump/src/imports/ion/Makefile b/drivers/base/ump/src/imports/ion/Makefile
new file mode 100755
index 000000000000..ef74b273f7ad
--- /dev/null
+++ b/drivers/base/ump/src/imports/ion/Makefile
@@ -0,0 +1,53 @@
+#
+# (C) COPYRIGHT 2011, 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+# linux build system integration
+
+ifneq ($(KERNELRELEASE),)
+# Inside the kernel build system
+
+EXTRA_CFLAGS += -I$(KBUILD_EXTMOD) -I$(KBUILD_EXTMOD)/../../../../..
+KBUILD_EXTRA_SYMBOLS += "$(KBUILD_EXTMOD)/../../Module.symvers"
+
+SRC += ump_kernel_import_ion.c
+
+MODULE:=ump_ion_import.ko
+
+obj-m := $(MODULE:.ko=.o)
+$(MODULE:.ko=-y) := $(SRC:.c=.o)
+$(MODULE:.ko=-objs) := $(SRC:.c=.o)
+
+else
+# Outside the kernel build system
+#
+#
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR)
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
+
diff --git a/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c b/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
new file mode 100755
index 000000000000..56dd610ba1c9
--- /dev/null
+++ b/drivers/base/ump/src/imports/ion/ump_kernel_import_ion.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/dma-mapping.h>
+#include "ion.h"
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+struct ion_wrapping_info
+{
+	struct ion_client *   ion_client;
+	struct ion_handle *   ion_handle;
+	int                   num_phys_blocks;
+	struct scatterlist *  sglist;
+};
+
+static struct ion_device * ion_device_get(void)
+{
+	/* < Customer to provide implementation >
+	 * Return a pointer to the global ion_device on the system
+	 */
+	return NULL;
+}
+
+static int import_ion_client_create(void** const custom_session_data)
+{
+	struct ion_client ** ion_client;
+
+	ion_client = (struct ion_client**)custom_session_data;
+
+	*ion_client = ion_client_create(ion_device_get(), "ump");
+
+	return PTR_RET(*ion_client);
+}
+
+
+static void import_ion_client_destroy(void* custom_session_data)
+{
+	struct ion_client * ion_client;
+
+	ion_client = (struct ion_client*)custom_session_data;
+	BUG_ON(!ion_client);
+
+	ion_client_destroy(ion_client);
+}
+
+
+static void import_ion_final_release_callback(const ump_dd_handle handle, void * info)
+{
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!info);
+
+	(void)handle;
+	ion_info = (struct ion_wrapping_info*)info;
+
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+	kfree(ion_info);
+	module_put(THIS_MODULE);
+}
+
+static ump_dd_handle import_ion_import(void * custom_session_data, void * pfd, ump_alloc_flags flags)
+{
+	int fd;
+	ump_dd_handle ump_handle;
+	struct scatterlist * sg;
+	int num_dma_blocks;
+	ump_dd_physical_block_64 * phys_blocks;
+	unsigned long i;
+	struct sg_table * sgt;
+
+	struct ion_wrapping_info * ion_info;
+
+	BUG_ON(!custom_session_data);
+	BUG_ON(!pfd);
+
+	ion_info = kzalloc(GFP_KERNEL, sizeof(*ion_info));
+	if (NULL == ion_info)
+	{
+		return UMP_DD_INVALID_MEMORY_HANDLE;
+	}
+
+	ion_info->ion_client = (struct ion_client*)custom_session_data;
+
+	if (get_user(fd, (int*)pfd))
+	{
+		goto out;
+	}
+
+	ion_info->ion_handle = ion_import_dma_buf(ion_info->ion_client, fd);
+
+	if (IS_ERR_OR_NULL(ion_info->ion_handle))
+	{
+		goto out;
+	}
+
+	sgt = ion_sg_table(ion_info->ion_client, ion_info->ion_handle);
+	if (IS_ERR_OR_NULL(sgt))
+	{
+		goto ion_dma_map_failed;
+	}
+
+	ion_info->sglist = sgt->sgl;
+
+	sg = ion_info->sglist;
+	while (sg)
+	{
+		ion_info->num_phys_blocks++;
+		sg = sg_next(sg);
+	}
+
+	num_dma_blocks = dma_map_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+
+	if (0 == num_dma_blocks)
+	{
+		goto linux_dma_map_failed;
+	}
+
+	phys_blocks = vmalloc(num_dma_blocks * sizeof(*phys_blocks));
+	if (NULL == phys_blocks)
+	{
+		goto vmalloc_failed;
+	}
+
+	for_each_sg(ion_info->sglist, sg, num_dma_blocks, i)
+	{
+		phys_blocks[i].addr = sg_phys(sg);
+		phys_blocks[i].size = sg_dma_len(sg);
+	}
+
+	ump_handle = ump_dd_create_from_phys_blocks_64(phys_blocks, num_dma_blocks, flags, NULL, import_ion_final_release_callback, ion_info);
+
+	vfree(phys_blocks);
+
+	if (ump_handle != UMP_DD_INVALID_MEMORY_HANDLE)
+	{
+		/*
+		 * As we have a final release callback installed
+		 * we must keep the module locked until
+		 * the callback has been triggered
+		 * */
+		__module_get(THIS_MODULE);
+		return ump_handle;
+	}
+
+	/* failed*/
+vmalloc_failed:
+	dma_unmap_sg(NULL, ion_info->sglist, ion_info->num_phys_blocks, DMA_BIDIRECTIONAL);
+linux_dma_map_failed:
+ion_dma_map_failed:
+	ion_free(ion_info->ion_client, ion_info->ion_handle);
+out:
+	kfree(ion_info);
+	return UMP_DD_INVALID_MEMORY_HANDLE;
+}
+
+struct ump_import_handler import_handler_ion =
+{
+	.linux_module =  THIS_MODULE,
+	.session_begin = import_ion_client_create,
+	.session_end =   import_ion_client_destroy,
+	.import =        import_ion_import
+};
+
+static int __init import_ion_initialize_module(void)
+{
+	/* register with UMP */
+	return ump_import_module_register(UMP_EXTERNAL_MEM_TYPE_ION, &import_handler_ion);
+}
+
+static void __exit import_ion_cleanup_module(void)
+{
+	/* unregister import handler */
+	ump_import_module_unregister(UMP_EXTERNAL_MEM_TYPE_ION);
+}
+
+/* Setup init and exit functions for this module */
+module_init(import_ion_initialize_module);
+module_exit(import_ion_cleanup_module);
+
+/* And some module information */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION("1.0");
diff --git a/drivers/base/ump/src/linux/ump_kernel_linux.c b/drivers/base/ump/src/linux/ump_kernel_linux.c
new file mode 100755
index 000000000000..b4d02429999a
--- /dev/null
+++ b/drivers/base/ump/src/linux/ump_kernel_linux.c
@@ -0,0 +1,831 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump-ioctl.h>
+#include <linux/ump.h>
+
+#include <asm/uaccess.h>	         /* copy_*_user */
+#include <linux/compat.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/device.h>            /* class registration support */
+
+#include <common/ump_kernel_core.h>
+
+#include "ump_kernel_linux_mem.h"
+#include <ump_arch.h>
+
+
+struct ump_linux_device
+{
+	struct cdev cdev;
+	struct class * ump_class;
+};
+
+/* Name of the UMP device driver */
+static char ump_dev_name[] = "ump"; /* should be const, but the functions we call requires non-cost */
+
+/* Module parameter to control log level */
+int ump_debug_level = 2;
+module_param(ump_debug_level, int, S_IRUSR | S_IWUSR | S_IWGRP | S_IRGRP | S_IROTH); /* rw-rw-r-- */
+MODULE_PARM_DESC(ump_debug_level, "Higher number, more dmesg output");
+
+/* By default the module uses any available major, but it's possible to set it at load time to a specific number */
+int ump_major = 0;
+module_param(ump_major, int, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_major, "Device major number");
+
+#define UMP_REV_STRING "1.0"
+
+char * ump_revision = UMP_REV_STRING;
+module_param(ump_revision, charp, S_IRUGO); /* r--r--r-- */
+MODULE_PARM_DESC(ump_revision, "Revision info");
+
+static int umpp_linux_open(struct inode *inode, struct file *filp);
+static int umpp_linux_release(struct inode *inode, struct file *filp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+#endif
+
+/* This variable defines the file operations this UMP device driver offers */
+static struct file_operations ump_fops =
+{
+	.owner   = THIS_MODULE,
+	.open    = umpp_linux_open,
+	.release = umpp_linux_release,
+#ifdef HAVE_UNLOCKED_IOCTL
+	.unlocked_ioctl   = umpp_linux_ioctl,
+#else
+	.ioctl   = umpp_linux_ioctl,
+#endif
+	.compat_ioctl = umpp_linux_ioctl,
+	.mmap = umpp_linux_mmap
+};
+
+/* import module handling */
+DEFINE_MUTEX(import_list_lock);
+struct ump_import_handler *  import_handlers[UMPP_EXTERNAL_MEM_COUNT];
+
+/* The global variable containing the global device data */
+static struct ump_linux_device ump_linux_device;
+
+#define DBG_MSG(level, ...) do { \
+if ((level) <=  ump_debug_level)\
+{\
+printk(KERN_DEBUG "UMP<" #level ">:\n" __VA_ARGS__);\
+} \
+} while (0)
+
+#define MSG_ERR(...) do{ \
+printk(KERN_ERR "UMP: ERR: %s\n           %s()%4d\n", __FILE__, __func__  , __LINE__) ; \
+printk(KERN_ERR __VA_ARGS__); \
+printk(KERN_ERR "\n"); \
+} while(0)
+
+#define MSG(...) do{ \
+printk(KERN_INFO "UMP: " __VA_ARGS__);\
+} while (0)
+
+/*
+ * This function is called by Linux to initialize this module.
+ * All we do is initialize the UMP device driver.
+ */
+static int __init umpp_linux_initialize_module(void)
+{
+	ump_result err;
+
+	err = umpp_core_constructor();
+	if (UMP_OK != err)
+	{
+		MSG_ERR("UMP device driver init failed\n");
+		return -ENOTTY;
+	}
+
+	MSG("UMP device driver %s loaded\n", UMP_REV_STRING);
+	return 0;
+}
+
+
+
+/*
+ * This function is called by Linux to unload/terminate/exit/cleanup this module.
+ * All we do is terminate the UMP device driver.
+ */
+static void __exit umpp_linux_cleanup_module(void)
+{
+	DBG_MSG(2, "Unloading UMP device driver\n");
+	umpp_core_destructor();
+	DBG_MSG(2, "Module unloaded\n");
+}
+
+
+
+/*
+ * Initialize the UMP device driver.
+ */
+ump_result umpp_device_initialize(void)
+{
+	int err;
+	dev_t dev = 0;
+
+	if (0 == ump_major)
+	{
+		/* auto select a major */
+		err = alloc_chrdev_region(&dev, 0, 1, ump_dev_name);
+		ump_major = MAJOR(dev);
+	}
+	else
+	{
+		/* use load time defined major number */
+		dev = MKDEV(ump_major, 0);
+		err = register_chrdev_region(dev, 1, ump_dev_name);
+	}
+
+	if (0 == err)
+	{
+		memset(&ump_linux_device, 0, sizeof(ump_linux_device));
+
+		/* initialize our char dev data */
+		cdev_init(&ump_linux_device.cdev, &ump_fops);
+		ump_linux_device.cdev.owner = THIS_MODULE;
+		ump_linux_device.cdev.ops = &ump_fops;
+
+		/* register char dev with the kernel */
+		err = cdev_add(&ump_linux_device.cdev, dev, 1/*count*/);
+		if (0 == err)
+		{
+
+			ump_linux_device.ump_class = class_create(THIS_MODULE, ump_dev_name);
+			if (IS_ERR(ump_linux_device.ump_class))
+			{
+				err = PTR_ERR(ump_linux_device.ump_class);
+			}
+			else
+			{
+				struct device * mdev;
+				mdev = device_create(ump_linux_device.ump_class, NULL, dev, NULL, ump_dev_name);
+				if (!IS_ERR(mdev))
+				{
+					return UMP_OK;
+				}
+
+				err = PTR_ERR(mdev);
+				class_destroy(ump_linux_device.ump_class);
+			}
+			cdev_del(&ump_linux_device.cdev);
+		}
+
+		unregister_chrdev_region(dev, 1);
+	}
+
+	return UMP_ERROR;
+}
+
+
+
+/*
+ * Terminate the UMP device driver
+ */
+void umpp_device_terminate(void)
+{
+	dev_t dev = MKDEV(ump_major, 0);
+
+	device_destroy(ump_linux_device.ump_class, dev);
+	class_destroy(ump_linux_device.ump_class);
+
+	/* unregister char device */
+	cdev_del(&ump_linux_device.cdev);
+
+	/* free major */
+	unregister_chrdev_region(dev, 1);
+}
+
+
+static int umpp_linux_open(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = umpp_core_session_start();
+	if (NULL == session)
+	{
+		return -EFAULT;
+	}
+	
+	filp->private_data = session;
+
+	return 0;
+}
+
+static int umpp_linux_release(struct inode *inode, struct file *filp)
+{
+	umpp_session *session;
+	
+	session = filp->private_data;
+
+	umpp_core_session_end(session);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+/**************************/
+/*ioctl specific functions*/
+/**************************/
+static int do_ump_dd_allocate(umpp_session * session, ump_k_allocate * params)
+{
+	ump_dd_handle new_allocation;
+	new_allocation = ump_dd_allocate_64(params->size, params->alloc_flags, NULL, NULL, NULL);
+
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	printk(KERN_WARNING "UMP: Allocation FAILED\n");
+	return -ENOMEM;
+}
+
+static int do_ump_dd_retain(umpp_session * session, ump_k_retain * params)
+{
+	umpp_session_memory_usage * it;
+
+	mutex_lock(&session->session_lock);
+
+	/* try to find it on the session usage list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found to already be in use */
+			/* check for overflow */
+			while(1)
+			{
+				int refcnt = atomic_read(&it->process_usage_count);
+				if (refcnt + 1 > 0)
+				{
+					/* add a process local ref */
+					if(atomic_cmpxchg(&it->process_usage_count, refcnt, refcnt + 1) == refcnt)
+					{
+						mutex_unlock(&session->session_lock);
+						return 0;
+					}
+				}
+				else
+				{
+					/* maximum usage cap reached */
+					mutex_unlock(&session->session_lock);
+					return -EBUSY;
+				}
+			}
+		}
+	}
+	/* try to look it up globally */
+
+	it = kmalloc(sizeof(*it), GFP_KERNEL);
+
+	if (NULL != it)
+	{
+		it->mem = ump_dd_from_secure_id(params->secure_id);
+		if (UMP_DD_INVALID_MEMORY_HANDLE != it->mem)
+		{
+			/* found, add it to the session usage list */
+			it->id = params->secure_id;
+			atomic_set(&it->process_usage_count, 1);
+			list_add(&it->link, &session->memory_usage);
+		}
+		else
+		{
+			/* not found */
+			kfree(it);
+			it = NULL;
+		}
+	}
+
+	mutex_unlock(&session->session_lock);
+
+	return (NULL != it) ? 0 : -ENODEV;
+}
+
+
+static int do_ump_dd_release(umpp_session * session, ump_k_release * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only do a release if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			result = 0;
+
+			if (0 == atomic_sub_return(1, &it->process_usage_count))
+			{
+				/* last ref in this process remove from the usage list and remove the underlying ref */
+				list_del(&it->link);
+				ump_dd_release(it->mem);
+				kfree(it);
+			}
+
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_sizequery(umpp_session * session, ump_k_sizequery * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->size = ump_dd_size_get_64(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_allocation_flags_get(umpp_session * session, ump_k_allocation_flags * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, a valid call */
+			params->alloc_flags = ump_dd_allocation_flags_get(it->mem);
+			result = 0;
+			break;
+		}
+
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+static int do_ump_dd_msync_now(umpp_session * session, ump_k_msync * params)
+{
+	umpp_session_memory_usage * it;
+	int result = -ENODEV;
+
+	mutex_lock(&session->session_lock);
+
+	/* only valid if found on the session list */
+	list_for_each_entry(it, &session->memory_usage, link)
+	{
+		if (it->id == params->secure_id)
+		{
+			/* found, do the cache op */
+#ifdef CONFIG_COMPAT
+			if (is_compat_task())
+			{
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, compat_ptr(params->mapped_ptr.compat_value), params->size);
+				result = 0;
+			}
+			else
+			{
+#endif
+				umpp_dd_cpu_msync_now(it->mem, params->cache_operation, params->mapped_ptr.value, params->size);
+				result = 0;
+#ifdef CONFIG_COMPAT
+			}
+#endif
+			break;
+		}
+	}
+	mutex_unlock(&session->session_lock);
+
+	return result;
+}
+
+
+void umpp_import_handlers_init(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		if (import_handlers[i])
+		{
+			import_handlers[i]->session_begin(&session->import_handler_data[i]);
+			/* It is OK if session_begin returned an error.
+			 * We won't do any import calls if so */
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+void umpp_import_handlers_term(umpp_session * session)
+{
+	int i;
+	mutex_lock(&import_list_lock);
+	for ( i = 1; i < UMPP_EXTERNAL_MEM_COUNT; i++ )
+	{
+		/* only call if session_begin succeeded */
+		if (session->import_handler_data[i] != NULL)
+		{
+			/* if session_beging succeeded the handler
+			 * should not have unregistered with us */
+			BUG_ON(!import_handlers[i]);
+			import_handlers[i]->session_end(session->import_handler_data[i]);
+			session->import_handler_data[i] = NULL;
+		}
+	}
+	mutex_unlock(&import_list_lock);
+}
+
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler)
+{
+	int res = -EEXIST;
+
+	/* validate input */
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+	BUG_ON(!handler);
+	BUG_ON(!handler->linux_module);
+	BUG_ON(!handler->session_begin);
+	BUG_ON(!handler->session_end);
+	BUG_ON(!handler->import);
+
+	mutex_lock(&import_list_lock);
+
+	if (!import_handlers[type])
+	{
+		import_handlers[type] = handler;
+		res = 0;
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return res;
+}
+
+void ump_import_module_unregister(enum ump_external_memory_type type)
+{
+	BUG_ON(type == 0 || type >= UMPP_EXTERNAL_MEM_COUNT);
+
+	mutex_lock(&import_list_lock);
+	/* an error to call this if ump_import_module_register didn't succeed */
+	BUG_ON(!import_handlers[type]);
+	import_handlers[type] = NULL;
+	mutex_unlock(&import_list_lock);
+}
+
+static struct ump_import_handler * import_handler_get(unsigned int type_id)
+{
+	enum ump_external_memory_type type;
+	struct ump_import_handler * handler;
+
+	/* validate and convert input */
+	/* handle bad data here, not just BUG_ON */
+	if (type_id == 0 || type_id >= UMPP_EXTERNAL_MEM_COUNT)
+		return NULL;
+
+	type = (enum ump_external_memory_type)type_id;
+
+	/* find the handler */
+	mutex_lock(&import_list_lock);
+
+	handler = import_handlers[type];
+
+	if (handler)
+	{
+		if (!try_module_get(handler->linux_module))
+		{
+			handler = NULL;
+		}
+	}
+
+	mutex_unlock(&import_list_lock);
+
+	return handler;
+}
+
+static void import_handler_put(struct ump_import_handler * handler)
+{
+	module_put(handler->linux_module);
+}
+
+static int do_ump_dd_import(umpp_session * session, ump_k_import * params)
+{
+	ump_dd_handle new_allocation = UMP_DD_INVALID_MEMORY_HANDLE;
+	struct ump_import_handler * handler;
+
+	handler = import_handler_get(params->type);
+
+	if (handler)
+	{
+		/* try late binding if not already bound */
+		if (!session->import_handler_data[params->type])
+		{
+			handler->session_begin(&session->import_handler_data[params->type]);
+		}
+
+		/* do we have a bound session? */
+		if (session->import_handler_data[params->type])
+		{
+			new_allocation = handler->import( session->import_handler_data[params->type],
+		                                      params->phandle.value,
+		                                      params->alloc_flags);
+		}
+
+		/* done with the handler */
+		import_handler_put(handler);
+	}
+
+	/* did the import succeed? */
+	if (UMP_DD_INVALID_MEMORY_HANDLE != new_allocation)
+	{
+		umpp_session_memory_usage * tracker;
+
+		tracker = kmalloc(sizeof(*tracker), GFP_KERNEL | __GFP_HARDWALL);
+		if (NULL != tracker)
+		{
+			/* update the return struct with the new ID */
+			params->secure_id = ump_dd_secure_id_get(new_allocation);
+
+			tracker->mem = new_allocation;
+			tracker->id = params->secure_id;
+			atomic_set(&tracker->process_usage_count, 1);
+
+			/* link it into the session in-use list */
+			mutex_lock(&session->session_lock);
+			list_add(&tracker->link, &session->memory_usage);
+			mutex_unlock(&session->session_lock);
+
+			return 0;
+		}
+		ump_dd_release(new_allocation);
+	}
+
+	return -ENOMEM;
+
+}
+
+#ifdef HAVE_UNLOCKED_IOCTL
+static long umpp_linux_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+#else
+static int umpp_linux_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+#endif
+{
+	int ret;
+	uint64_t msg[(UMP_CALL_MAX_SIZE+7)>>3]; /* alignment fixup */
+	uint32_t size = _IOC_SIZE(cmd);
+	struct umpp_session *session = filp->private_data;
+
+#ifndef HAVE_UNLOCKED_IOCTL
+	(void)inode; /* unused arg */
+#endif
+
+	/*
+	 * extract the type and number bitfields, and don't decode
+	 * wrong cmds: return ENOTTY (inappropriate ioctl) before access_ok()
+	 */
+	if (_IOC_TYPE(cmd) != UMP_IOC_MAGIC)
+	{
+		return -ENOTTY;
+
+	}
+	if (_IOC_NR(cmd) > UMP_IOC_MAXNR)
+	{
+		return -ENOTTY;
+	}
+
+	switch(cmd)
+	{
+		case UMP_FUNC_ALLOCATE:
+			if (size != sizeof(ump_k_allocate))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocate(session, (ump_k_allocate *)&msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_SIZEQUERY:
+			if (size != sizeof(ump_k_sizequery))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_sizequery(session,(ump_k_sizequery*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_MSYNC:
+			if (size != sizeof(ump_k_msync))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_msync_now(session,(ump_k_msync*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_IMPORT:
+			if (size != sizeof(ump_k_import))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user*)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_import(session, (ump_k_import*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		/* used only by v1 API */
+		case UMP_FUNC_ALLOCATION_FLAGS_GET:
+			if (size != sizeof(ump_k_allocation_flags))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_allocation_flags_get(session,(ump_k_allocation_flags*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			if (copy_to_user((void *)arg, &msg, size))
+			{
+				return -EFAULT;
+			}
+			return 0;
+		case UMP_FUNC_RETAIN:
+			if (size != sizeof(ump_k_retain))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_retain(session,(ump_k_retain*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		case UMP_FUNC_RELEASE:
+			if (size != sizeof(ump_k_release))
+			{
+				return -ENOTTY;
+			}
+			if (copy_from_user(&msg, (void __user *)arg, size))
+			{
+				return -EFAULT;
+			}
+			ret = do_ump_dd_release(session,(ump_k_release*) &msg);
+			if (ret)
+			{
+				return ret;
+			}
+			return 0;
+		default:
+			/* not ours */
+			return -ENOTTY;
+	}
+	/*redundant below*/
+	return -ENOTTY;
+}
+
+
+/* Export UMP kernel space API functions */
+EXPORT_SYMBOL(ump_dd_allocate_64);
+EXPORT_SYMBOL(ump_dd_allocation_flags_get);
+EXPORT_SYMBOL(ump_dd_secure_id_get);
+EXPORT_SYMBOL(ump_dd_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get_64);
+EXPORT_SYMBOL(ump_dd_size_get_64);
+EXPORT_SYMBOL(ump_dd_retain);
+EXPORT_SYMBOL(ump_dd_release);
+EXPORT_SYMBOL(ump_dd_create_from_phys_blocks_64);
+#ifdef CONFIG_KDS
+EXPORT_SYMBOL(ump_dd_kds_resource_get);
+#endif
+
+/* import API */
+EXPORT_SYMBOL(ump_import_module_register);
+EXPORT_SYMBOL(ump_import_module_unregister);
+
+
+
+/* V1 API */
+EXPORT_SYMBOL(ump_dd_handle_create_from_secure_id);
+EXPORT_SYMBOL(ump_dd_phys_block_count_get);
+EXPORT_SYMBOL(ump_dd_phys_block_get);
+EXPORT_SYMBOL(ump_dd_phys_blocks_get);
+EXPORT_SYMBOL(ump_dd_size_get);
+EXPORT_SYMBOL(ump_dd_reference_add);
+EXPORT_SYMBOL(ump_dd_reference_release);
+EXPORT_SYMBOL(ump_dd_handle_create_from_phys_blocks);
+
+
+/* Setup init and exit functions for this module */
+module_init(umpp_linux_initialize_module);
+module_exit(umpp_linux_cleanup_module);
+
+/* And some module informatio */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ARM Ltd.");
+MODULE_VERSION(UMP_REV_STRING);
diff --git a/drivers/base/ump/src/linux/ump_kernel_linux_mem.c b/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
new file mode 100755
index 000000000000..e575c9573c90
--- /dev/null
+++ b/drivers/base/ump/src/linux/ump_kernel_linux_mem.c
@@ -0,0 +1,250 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ump.h>
+#include <linux/ump-ioctl.h>
+
+#include <linux/version.h>
+#include <linux/module.h>            /* kernel module definitions */
+#include <linux/fs.h>                /* file system operations */
+#include <linux/cdev.h>              /* character device definitions */
+#include <linux/ioport.h>            /* request_mem_region */
+#include <linux/mm.h> /* memory mananger definitions */
+#include <linux/pfn.h>
+#include <linux/highmem.h> /*kmap*/
+
+#include <linux/compat.h> /* is_compat_task */
+
+#include <common/ump_kernel_core.h>
+#include <ump_arch.h>
+#include <common/ump_kernel_priv.h>
+
+static void umpp_vm_close(struct vm_area_struct *vma)
+{
+	umpp_cpu_mapping * mapping;
+	umpp_session * session;
+	ump_dd_handle handle;
+
+	mapping = (umpp_cpu_mapping*)vma->vm_private_data;
+	UMP_ASSERT(mapping);
+	
+	session = mapping->session;
+	handle = mapping->handle;
+
+	umpp_dd_remove_cpu_mapping(mapping->handle, mapping); /* will free the mapping object */
+	ump_dd_release(handle);
+}
+
+
+static const struct vm_operations_struct umpp_vm_ops = {
+	.close = umpp_vm_close
+};
+
+int umpp_phys_commit(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	/* round up to a page boundary */
+	alloc->size = (alloc->size + PAGE_SIZE - 1) & ~((uint64_t)PAGE_SIZE-1) ;
+	/* calculate number of pages */
+	alloc->blocksCount = alloc->size >> PAGE_SHIFT;
+
+	if( (sizeof(ump_dd_physical_block_64) * alloc->blocksCount) > ((size_t)-1))
+	{
+		printk(KERN_WARNING "UMP: umpp_phys_commit - trying to allocate more than possible\n");
+		return -ENOMEM;
+	}
+
+	alloc->block_array = kmalloc(sizeof(ump_dd_physical_block_64) * alloc->blocksCount, __GFP_HARDWALL | GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (NULL == alloc->block_array)
+	{
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		void * mp;
+		struct page * page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOWARN | __GFP_COLD);
+		if (NULL == page)
+		{
+			break;
+		}
+
+		alloc->block_array[i].addr = page_to_pfn(page) << PAGE_SHIFT;
+		alloc->block_array[i].size = PAGE_SIZE;
+
+		mp = kmap(page);
+		if (NULL == mp)
+		{
+			__free_page(page);
+			break;
+		}
+
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		ump_sync_to_memory(PFN_PHYS(page_to_pfn(page)), mp, PAGE_SIZE);
+		kunmap(page);
+	}
+
+	if (i == alloc->blocksCount)
+	{
+		return 0;
+	}
+	else
+	{
+		uint64_t j;
+		for (j = 0; j < i; j++)
+		{
+			struct page * page;
+			page = pfn_to_page(alloc->block_array[j].addr >> PAGE_SHIFT);
+			__free_page(page);
+		}
+		
+		kfree(alloc->block_array);
+
+		return -ENOMEM;
+	}
+}
+
+void umpp_phys_free(umpp_allocation * alloc)
+{
+	uint64_t i;
+
+	for (i = 0; i < alloc->blocksCount; i++)
+	{
+		__free_page(pfn_to_page(alloc->block_array[i].addr >> PAGE_SHIFT));
+	}
+
+	kfree(alloc->block_array);
+}
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma)
+{
+	ump_secure_id id;
+	ump_dd_handle h;
+	size_t offset;
+	int err = -EINVAL;
+	size_t length = vma->vm_end - vma->vm_start;
+
+	umpp_cpu_mapping * map = NULL;
+	umpp_session *session = filp->private_data;
+
+	if ( 0 == length )
+	{
+		return -EINVAL;
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (NULL == map)
+	{
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* unpack our arg */
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	if (is_compat_task())
+	{
+#endif
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_32;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_32;
+#if defined CONFIG_64BIT && CONFIG_64BIT
+	}
+	else
+	{
+		id = vma->vm_pgoff >> UMP_LINUX_OFFSET_BITS_64;
+		offset = vma->vm_pgoff & UMP_LINUX_OFFSET_MASK_64;
+	}
+#endif
+
+	h = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE != h)
+	{
+		uint64_t i;
+		uint64_t block_idx;
+		uint64_t block_offset;
+		uint64_t paddr;
+		umpp_allocation * alloc;
+		uint64_t last_byte;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0))
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO | VM_MIXEDMAP | VM_DONTDUMP;
+#else
+		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO | VM_MIXEDMAP;
+#endif
+		vma->vm_ops = &umpp_vm_ops;
+		vma->vm_private_data = map;
+
+		alloc = (umpp_allocation*)h;
+
+		if( (alloc->flags & UMP_CONSTRAINT_UNCACHED) != 0)
+		{
+			/* cache disabled flag set, disable caching for cpu mappings */
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		}
+
+		last_byte = length + (offset << PAGE_SHIFT) - 1;
+		if (last_byte >= alloc->size || last_byte < (offset << PAGE_SHIFT))
+		{
+			goto err_out;
+		}
+
+		if (umpp_dd_find_start_block(alloc, offset << PAGE_SHIFT, &block_idx, &block_offset))
+		{
+			goto err_out;
+		}
+
+		paddr = alloc->block_array[block_idx].addr + block_offset;
+
+		for (i = 0; i < (length >> PAGE_SHIFT); i++)
+		{
+			/* check if we've overrrun the current block, if so move to the next block */
+			if (paddr >= (alloc->block_array[block_idx].addr + alloc->block_array[block_idx].size))
+			{
+				block_idx++;
+				UMP_ASSERT(block_idx < alloc->blocksCount);
+				paddr = alloc->block_array[block_idx].addr;
+			}
+
+			err = vm_insert_mixed(vma, vma->vm_start + (i << PAGE_SHIFT), paddr >> PAGE_SHIFT);
+			paddr += PAGE_SIZE;
+		}
+
+		map->vaddr_start = (void*)vma->vm_start;
+		map->nr_pages = length >> PAGE_SHIFT;
+		map->page_off = offset;
+		map->handle = h;
+		map->session = session;
+
+		umpp_dd_add_cpu_mapping(h, map);
+
+		return 0;
+
+		err_out:
+
+		ump_dd_release(h);
+	}
+
+	kfree(map);
+
+out:
+
+	return err;
+}
+
diff --git a/drivers/base/ump/src/linux/ump_kernel_linux_mem.h b/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
new file mode 100755
index 000000000000..40dcd30bd3be
--- /dev/null
+++ b/drivers/base/ump/src/linux/ump_kernel_linux_mem.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_KERNEL_LINUX_MEM_H_
+#define _UMP_KERNEL_LINUX_MEM_H_
+
+
+int umpp_linux_mmap(struct file * filp, struct vm_area_struct * vma);
+
+#endif /* _UMP_KERNEL_LINUX_MEM_H_ */
diff --git a/drivers/base/ump/src/ump_arch.h b/drivers/base/ump/src/ump_arch.h
new file mode 100755
index 000000000000..bbbadabef11a
--- /dev/null
+++ b/drivers/base/ump/src/ump_arch.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_ARCH_H_
+#define _UMP_ARCH_H_
+
+#include <common/ump_kernel_core.h>
+
+/**
+ * Device specific setup.
+ * Called by the UMP core code to to host OS/device specific setup.
+ * Typical use case is device node creation for talking to user space.
+ * @return UMP_OK on success, any other value on failure
+ */
+extern ump_result umpp_device_initialize(void);
+
+/**
+ * Device specific teardown.
+ * Undo any things done by ump_device_initialize.
+ */
+extern void umpp_device_terminate(void);
+
+extern int umpp_phys_commit(umpp_allocation * alloc);
+extern void umpp_phys_free(umpp_allocation * alloc);
+
+#endif /* _UMP_ARCH_H_ */
diff --git a/drivers/base/ump/ump_ref_drv.h b/drivers/base/ump/ump_ref_drv.h
new file mode 100755
index 000000000000..60018326597a
--- /dev/null
+++ b/drivers/base/ump/ump_ref_drv.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump_ref_drv.h
+ *
+ * This file contains the link to user space part of the UMP API for usage by MALI 400 gralloc.
+ *
+ */
+
+#ifndef _UMP_REF_DRV_H_
+#define _UMP_REF_DRV_H_
+
+#include <ump/ump.h>
+
+
+#endif /* _UMP_REF_DRV_H_ */
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 3bb6fa3930be..6fcb9b01e57c 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -6,6 +6,19 @@ menu "Character devices"
 
 source "drivers/tty/Kconfig"
 
+config DEVMEM
+	bool "Memory device driver"
+	default y
+	help
+	  The memory driver provides two character devices, mem and kmem, which
+	  provide access to the system's memory. The mem device is a view of
+	  physical memory, and each byte in the device corresponds to the
+	  matching physical address. The kmem device is the same as mem, but
+	  the addresses correspond to the kernel's virtual address space rather
+	  than physical memory. These devices are standard parts of a Linux
+	  system and most users should say Y here. You might say N if very
+	  security conscience or memory is tight.
+
 config DEVKMEM
 	bool "/dev/kmem virtual device support"
 	default y
@@ -584,6 +597,10 @@ config DEVPORT
 	depends on ISA || PCI
 	default y
 
+config DCC_TTY
+	tristate "DCC tty driver"
+	depends on ARM
+
 source "drivers/s390/char/Kconfig"
 
 config MSM_SMD_PKT
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 7ff1d0d208a7..e0047ed1e74c 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_PCMCIA)		+= pcmcia/
 obj-$(CONFIG_HANGCHECK_TIMER)	+= hangcheck-timer.o
 obj-$(CONFIG_TCG_TPM)		+= tpm/
 
+obj-$(CONFIG_DCC_TTY)		+= dcc_tty.o
 obj-$(CONFIG_PS3_FLASH)		+= ps3flash.o
 
 obj-$(CONFIG_JS_RTC)		+= js-rtc.o
diff --git a/drivers/char/dcc_tty.c b/drivers/char/dcc_tty.c
new file mode 100644
index 000000000000..0a62d410286f
--- /dev/null
+++ b/drivers/char/dcc_tty.c
@@ -0,0 +1,326 @@
+/* drivers/char/dcc_tty.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/hrtimer.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+
+MODULE_DESCRIPTION("DCC TTY Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+
+DEFINE_SPINLOCK(g_dcc_tty_lock);
+static struct hrtimer g_dcc_timer;
+static char g_dcc_buffer[16];
+static int g_dcc_buffer_head;
+static int g_dcc_buffer_count;
+static unsigned g_dcc_write_delay_usecs = 1;
+static struct tty_driver *g_dcc_tty_driver;
+static struct tty_struct *g_dcc_tty;
+static int g_dcc_tty_open_count;
+
+static void dcc_poll_locked(void)
+{
+	char ch;
+	int rch;
+	int written;
+
+	while (g_dcc_buffer_count) {
+		ch = g_dcc_buffer[g_dcc_buffer_head];
+		asm(
+			"mrc 14, 0, r15, c0, c1, 0\n"
+			"mcrcc 14, 0, %1, c0, c5, 0\n"
+			"movcc %0, #1\n"
+			"movcs %0, #0\n"
+			: "=r" (written)
+			: "r" (ch)
+		);
+		if (written) {
+			if (ch == '\n')
+				g_dcc_buffer[g_dcc_buffer_head] = '\r';
+			else {
+				g_dcc_buffer_head = (g_dcc_buffer_head + 1) % ARRAY_SIZE(g_dcc_buffer);
+				g_dcc_buffer_count--;
+				if (g_dcc_tty)
+					tty_wakeup(g_dcc_tty);
+			}
+			g_dcc_write_delay_usecs = 1;
+		} else {
+			if (g_dcc_write_delay_usecs > 0x100)
+				break;
+			g_dcc_write_delay_usecs <<= 1;
+			udelay(g_dcc_write_delay_usecs);
+		}
+	}
+
+	if (g_dcc_tty && !test_bit(TTY_THROTTLED, &g_dcc_tty->flags)) {
+		asm(
+			"mrc 14, 0, %0, c0, c1, 0\n"
+			"tst %0, #(1 << 30)\n"
+			"moveq %0, #-1\n"
+			"mrcne 14, 0, %0, c0, c5, 0\n"
+			: "=r" (rch)
+		);
+		if (rch >= 0) {
+			ch = rch;
+			tty_insert_flip_string(g_dcc_tty->port, &ch, 1);
+			tty_flip_buffer_push(g_dcc_tty->port);
+		}
+	}
+
+
+	if (g_dcc_buffer_count)
+		hrtimer_start(&g_dcc_timer, ktime_set(0, g_dcc_write_delay_usecs * NSEC_PER_USEC), HRTIMER_MODE_REL);
+	else
+		hrtimer_start(&g_dcc_timer, ktime_set(0, 20 * NSEC_PER_MSEC), HRTIMER_MODE_REL);
+}
+
+static int dcc_tty_open(struct tty_struct * tty, struct file * filp)
+{
+	int ret;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&g_dcc_tty_lock, irq_flags);
+	if (g_dcc_tty == NULL || g_dcc_tty == tty) {
+		g_dcc_tty = tty;
+		g_dcc_tty_open_count++;
+		ret = 0;
+	} else
+		ret = -EBUSY;
+	spin_unlock_irqrestore(&g_dcc_tty_lock, irq_flags);
+
+	printk("dcc_tty_open, tty %p, f_flags %x, returned %d\n", tty, filp->f_flags, ret);
+
+	return ret;
+}
+
+static void dcc_tty_close(struct tty_struct * tty, struct file * filp)
+{
+	printk("dcc_tty_close, tty %p, f_flags %x\n", tty, filp->f_flags);
+	if (g_dcc_tty == tty) {
+		if (--g_dcc_tty_open_count == 0)
+			g_dcc_tty = NULL;
+	}
+}
+
+static int dcc_write(const unsigned char *buf_start, int count)
+{
+	const unsigned char *buf = buf_start;
+	unsigned long irq_flags;
+	int copy_len;
+	int space_left;
+	int tail;
+
+	if (count < 1)
+		return 0;
+
+	spin_lock_irqsave(&g_dcc_tty_lock, irq_flags);
+	do {
+		tail = (g_dcc_buffer_head + g_dcc_buffer_count) % ARRAY_SIZE(g_dcc_buffer);
+		copy_len = ARRAY_SIZE(g_dcc_buffer) - tail;
+		space_left = ARRAY_SIZE(g_dcc_buffer) - g_dcc_buffer_count;
+		if (copy_len > space_left)
+			copy_len = space_left;
+		if (copy_len > count)
+			copy_len = count;
+		memcpy(&g_dcc_buffer[tail], buf, copy_len);
+		g_dcc_buffer_count += copy_len;
+		buf += copy_len;
+		count -= copy_len;
+		if (copy_len < count && copy_len < space_left) {
+			space_left -= copy_len;
+			copy_len = count;
+			if (copy_len > space_left) {
+				copy_len = space_left;
+			}
+			memcpy(g_dcc_buffer, buf, copy_len);
+			buf += copy_len;
+			count -= copy_len;
+			g_dcc_buffer_count += copy_len;
+		}
+		dcc_poll_locked();
+		space_left = ARRAY_SIZE(g_dcc_buffer) - g_dcc_buffer_count;
+	} while(count && space_left);
+	spin_unlock_irqrestore(&g_dcc_tty_lock, irq_flags);
+	return buf - buf_start;
+}
+
+static int dcc_tty_write(struct tty_struct * tty, const unsigned char *buf, int count)
+{
+	int ret;
+	/* printk("dcc_tty_write %p, %d\n", buf, count); */
+	ret = dcc_write(buf, count);
+	if (ret != count)
+		printk("dcc_tty_write %p, %d, returned %d\n", buf, count, ret);
+	return ret;
+}
+
+static int dcc_tty_write_room(struct tty_struct *tty)
+{
+	int space_left;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&g_dcc_tty_lock, irq_flags);
+	space_left = ARRAY_SIZE(g_dcc_buffer) - g_dcc_buffer_count;
+	spin_unlock_irqrestore(&g_dcc_tty_lock, irq_flags);
+	return space_left;
+}
+
+static int dcc_tty_chars_in_buffer(struct tty_struct *tty)
+{
+	int ret;
+	asm(
+		"mrc 14, 0, %0, c0, c1, 0\n"
+		"mov %0, %0, LSR #30\n"
+		"and %0, %0, #1\n"
+		: "=r" (ret)
+	);
+	return ret;
+}
+
+static void dcc_tty_unthrottle(struct tty_struct * tty)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&g_dcc_tty_lock, irq_flags);
+	dcc_poll_locked();
+	spin_unlock_irqrestore(&g_dcc_tty_lock, irq_flags);
+}
+
+static enum hrtimer_restart dcc_tty_timer_func(struct hrtimer *timer)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&g_dcc_tty_lock, irq_flags);
+	dcc_poll_locked();
+	spin_unlock_irqrestore(&g_dcc_tty_lock, irq_flags);
+	return HRTIMER_NORESTART;
+}
+
+void dcc_console_write(struct console *co, const char *b, unsigned count)
+{
+#if 1
+	dcc_write(b, count);
+#else
+	/* blocking printk */
+	while (count > 0) {
+		int written;
+		written = dcc_write(b, count);
+		if (written) {
+			b += written;
+			count -= written;
+		}
+	}
+#endif
+}
+
+static struct tty_driver *dcc_console_device(struct console *c, int *index)
+{
+	*index = 0;
+	return g_dcc_tty_driver;
+}
+
+static int __init dcc_console_setup(struct console *co, char *options)
+{
+	if (co->index != 0)
+		return -ENODEV;
+	return 0;
+}
+
+
+static struct console dcc_console =
+{
+	.name		= "ttyDCC",
+	.write		= dcc_console_write,
+	.device		= dcc_console_device,
+	.setup		= dcc_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+};
+
+static struct tty_operations dcc_tty_ops = {
+	.open = dcc_tty_open,
+	.close = dcc_tty_close,
+	.write = dcc_tty_write,
+	.write_room = dcc_tty_write_room,
+	.chars_in_buffer = dcc_tty_chars_in_buffer,
+	.unthrottle = dcc_tty_unthrottle,
+};
+
+static int __init dcc_tty_init(void)
+{
+	int ret;
+
+	hrtimer_init(&g_dcc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	g_dcc_timer.function = dcc_tty_timer_func;
+
+	g_dcc_tty_driver = alloc_tty_driver(1);
+	if (!g_dcc_tty_driver) {
+		printk(KERN_ERR "dcc_tty_probe: alloc_tty_driver failed\n");
+		ret = -ENOMEM;
+		goto err_alloc_tty_driver_failed;
+	}
+	g_dcc_tty_driver->owner = THIS_MODULE;
+	g_dcc_tty_driver->driver_name = "dcc";
+	g_dcc_tty_driver->name = "ttyDCC";
+	g_dcc_tty_driver->major = 0; // auto assign
+	g_dcc_tty_driver->minor_start = 0;
+	g_dcc_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
+	g_dcc_tty_driver->subtype = SERIAL_TYPE_NORMAL;
+	g_dcc_tty_driver->init_termios = tty_std_termios;
+	g_dcc_tty_driver->flags = TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
+	tty_set_operations(g_dcc_tty_driver, &dcc_tty_ops);
+	ret = tty_register_driver(g_dcc_tty_driver);
+	if (ret) {
+		printk(KERN_ERR "dcc_tty_probe: tty_register_driver failed, %d\n", ret);
+		goto err_tty_register_driver_failed;
+	}
+	tty_register_device(g_dcc_tty_driver, 0, NULL);
+
+	register_console(&dcc_console);
+	hrtimer_start(&g_dcc_timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+
+	return 0;
+
+err_tty_register_driver_failed:
+	put_tty_driver(g_dcc_tty_driver);
+	g_dcc_tty_driver = NULL;
+err_alloc_tty_driver_failed:
+	return ret;
+}
+
+static void  __exit dcc_tty_exit(void)
+{
+	int ret;
+
+	tty_unregister_device(g_dcc_tty_driver, 0);
+	ret = tty_unregister_driver(g_dcc_tty_driver);
+	if (ret < 0) {
+		printk(KERN_ERR "dcc_tty_remove: tty_unregister_driver failed, %d\n", ret);
+	} else {
+		put_tty_driver(g_dcc_tty_driver);
+	}
+	g_dcc_tty_driver = NULL;
+}
+
+module_init(dcc_tty_init);
+module_exit(dcc_tty_exit);
+
+
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 1ccbe9482faa..38d3069b7f0a 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -60,6 +60,7 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 }
 #endif
 
+#if defined(CONFIG_DEVMEM) || defined(CONFIG_DEVKMEM)
 #ifdef CONFIG_STRICT_DEVMEM
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
@@ -85,7 +86,9 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	return 1;
 }
 #endif
+#endif
 
+#ifdef CONFIG_DEVMEM
 void __weak unxlate_dev_mem_ptr(unsigned long phys, void *addr)
 {
 }
@@ -212,6 +215,9 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
 	*ppos += written;
 	return written;
 }
+#endif	/* CONFIG_DEVMEM */
+
+#if defined(CONFIG_DEVMEM) || defined(CONFIG_DEVKMEM)
 
 int __weak phys_mem_access_prot_allowed(struct file *file,
 	unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
@@ -333,6 +339,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 	}
 	return 0;
 }
+#endif	/* CONFIG_DEVMEM */
 
 #ifdef CONFIG_DEVKMEM
 static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
@@ -727,6 +734,8 @@ static loff_t null_lseek(struct file *file, loff_t offset, int orig)
 	return file->f_pos = 0;
 }
 
+#if defined(CONFIG_DEVMEM) || defined(CONFIG_DEVKMEM) || defined(CONFIG_DEVPORT)
+
 /*
  * The memory devices use the full 32/64 bits of the offset, and so we cannot
  * check against negative addresses: they are ok. The return value is weird,
@@ -760,10 +769,14 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 	return ret;
 }
 
+#endif
+
+#if defined(CONFIG_DEVMEM) || defined(CONFIG_DEVKMEM) || defined(CONFIG_DEVPORT)
 static int open_port(struct inode *inode, struct file *filp)
 {
 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
+#endif
 
 #define zero_lseek	null_lseek
 #define full_lseek      null_lseek
@@ -774,6 +787,7 @@ static int open_port(struct inode *inode, struct file *filp)
 #define open_kmem	open_mem
 #define open_oldmem	open_mem
 
+#ifdef CONFIG_DEVMEM
 static const struct file_operations mem_fops = {
 	.llseek		= memory_lseek,
 	.read		= read_mem,
@@ -782,6 +796,7 @@ static const struct file_operations mem_fops = {
 	.open		= open_mem,
 	.get_unmapped_area = get_unmapped_area_mem,
 };
+#endif
 
 #ifdef CONFIG_DEVKMEM
 static const struct file_operations kmem_fops = {
@@ -851,7 +866,9 @@ static const struct memdev {
 	const struct file_operations *fops;
 	struct backing_dev_info *dev_info;
 } devlist[] = {
+#ifdef CONFIG_DEVMEM
 	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
+#endif
 #ifdef CONFIG_DEVKMEM
 	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
 #endif
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index d0d9b2124752..fb42892dff7f 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -55,6 +55,16 @@ config COMMON_CLK_MAX77686
 	---help---
 	  This driver supports Maxim 77686 crystal oscillator clock. 
 
+config COMMON_CLK_SCPI
+        tristate "Clock driver controlled via SCPI interface"
+        depends on ARM_SCPI_PROTOCOL
+        ---help---
+          This driver provides support for clocks that are controlled
+          by firmware that implements the SCPI interface.
+
+	  This driver uses SCPI Message Protocol to interact with the
+	  firware providing all the clock controls.
+
 config COMMON_CLK_SI5351
 	tristate "Clock driver for SiLabs 5351A/B/C"
 	depends on I2C
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 137d3e730f86..30c12a8222c1 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARCH_MXS)		+= mxs/
 obj-$(CONFIG_ARCH_SOCFPGA)	+= socfpga/
 obj-$(CONFIG_PLAT_SPEAR)	+= spear/
 obj-$(CONFIG_ARCH_U300)		+= clk-u300.o
+obj-$(CONFIG_COMMON_CLK_SCPI)	+= clk-scpi.o
 obj-$(CONFIG_COMMON_CLK_VERSATILE) += versatile/
 obj-$(CONFIG_ARCH_PRIMA2)	+= clk-prima2.o
 obj-$(CONFIG_PLAT_ORION)	+= mvebu/
diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
new file mode 100644
index 000000000000..2ced0ceff36b
--- /dev/null
+++ b/drivers/clk/clk-scpi.c
@@ -0,0 +1,357 @@
+/*
+ * System Control and Power Interface (SCPI) Protocol based clock driver
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/scpi_protocol.h>
+
+struct scpi_clk {
+	u32 id;
+	const char *name;
+	struct clk_hw hw;
+	struct scpi_dvfs_info *info;
+	unsigned long rate_min;
+	unsigned long rate_max;
+};
+
+#define to_scpi_clk(clk) container_of(clk, struct scpi_clk, hw)
+
+static struct scpi_ops *scpi_ops;
+static struct platform_device *cpufreq_dev;
+
+static unsigned long scpi_clk_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct scpi_clk *clk = to_scpi_clk(hw);
+
+	return scpi_ops->clk_get_val(clk->id);
+}
+
+static long scpi_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *parent_rate)
+{
+	struct scpi_clk *clk = to_scpi_clk(hw);
+
+	if (clk->rate_min && rate < clk->rate_min)
+		rate = clk->rate_min;
+	if (clk->rate_max && rate > clk->rate_max)
+		rate = clk->rate_max;
+
+	return rate;
+}
+
+static int scpi_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long parent_rate)
+{
+	struct scpi_clk *clk = to_scpi_clk(hw);
+
+	return scpi_ops->clk_set_val(clk->id, rate);
+}
+
+static struct clk_ops scpi_clk_ops = {
+	.recalc_rate = scpi_clk_recalc_rate,
+	.round_rate = scpi_clk_round_rate,
+	.set_rate = scpi_clk_set_rate,
+};
+
+/* find closest match to given frequency in OPP table */
+static int __scpi_dvfs_round_rate(struct scpi_clk *clk, unsigned long rate)
+{
+	int idx;
+	u32 fmin = 0, fmax = ~0, ftmp;
+	struct scpi_opp *opp = clk->info->opps;
+
+	for (idx = 0; idx < clk->info->count; idx++, opp++) {
+		ftmp = opp->freq;
+		if (ftmp >= (u32)rate) {
+			if (ftmp <= fmax)
+				fmax = ftmp;
+		} else {
+			if (ftmp >= fmin)
+				fmin = ftmp;
+		}
+	}
+	if (fmax != ~0)
+		return fmax;
+	return fmin;
+}
+
+static unsigned long scpi_dvfs_recalc_rate(struct clk_hw *hw,
+					   unsigned long parent_rate)
+{
+	struct scpi_clk *clk = to_scpi_clk(hw);
+	int idx = scpi_ops->dvfs_get_idx(clk->id);
+	struct scpi_opp *opp;
+
+	if (idx < 0)
+		return 0;
+
+	opp = clk->info->opps + idx;
+	return opp->freq;
+}
+
+static long scpi_dvfs_round_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *parent_rate)
+{
+	struct scpi_clk *clk = to_scpi_clk(hw);
+
+	return __scpi_dvfs_round_rate(clk, rate);
+}
+
+static int __scpi_find_dvfs_index(struct scpi_clk *clk, unsigned long rate)
+{
+	int idx, max_opp = clk->info->count;
+	struct scpi_opp *opp = clk->info->opps;
+
+	for (idx = 0; idx < max_opp; idx++, opp++)
+		if (opp->freq == rate)
+			break;
+	return (idx == max_opp) ? -EINVAL : idx;
+}
+
+static int scpi_dvfs_set_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long parent_rate)
+{
+	struct scpi_clk *clk = to_scpi_clk(hw);
+	int ret = __scpi_find_dvfs_index(clk, rate);
+
+	if (ret < 0)
+		return ret;
+	return scpi_ops->dvfs_set_idx(clk->id, (u8)ret);
+}
+
+static struct clk_ops scpi_dvfs_ops = {
+	.recalc_rate = scpi_dvfs_recalc_rate,
+	.round_rate = scpi_dvfs_round_rate,
+	.set_rate = scpi_dvfs_set_rate,
+};
+
+static struct clk *
+scpi_dvfs_ops_init(struct device *dev, struct device_node *np,
+		   struct scpi_clk *sclk)
+{
+	struct clk_init_data init;
+	struct scpi_dvfs_info *info;
+
+	init.name = sclk->name;
+	init.flags = CLK_IS_ROOT;
+	init.num_parents = 0;
+	init.ops = &scpi_dvfs_ops;
+	sclk->hw.init = &init;
+
+	info = scpi_ops->dvfs_get_info(sclk->id);
+	if (IS_ERR(info))
+		return (struct clk *)info;
+
+	sclk->info = info;
+
+	return devm_clk_register(dev, &sclk->hw);
+}
+
+static struct clk *
+scpi_clk_ops_init(struct device *dev, struct device_node *np,
+		  struct scpi_clk *sclk)
+{
+	struct clk_init_data init;
+	int ret;
+
+	init.name = sclk->name;
+	init.flags = CLK_IS_ROOT;
+	init.num_parents = 0;
+	init.ops = &scpi_clk_ops;
+	sclk->hw.init = &init;
+
+	ret = scpi_ops->clk_get_range(sclk->id, &sclk->rate_min,
+				      &sclk->rate_max);
+	if (!sclk->rate_max)
+		ret = -EINVAL;
+	if (ret)
+		return ERR_PTR(ret);
+
+	return devm_clk_register(dev, &sclk->hw);
+}
+
+static const struct of_device_id scpi_clk_match[] = {
+	{ .compatible = "arm,scpi-dvfs", .data = scpi_dvfs_ops_init, },
+	{ .compatible = "arm,scpi-clk", .data = scpi_clk_ops_init, },
+	{}
+};
+
+static int scpi_clk_probe(struct platform_device *pdev)
+{
+	struct clk **clks;
+	int idx, count;
+	struct clk_onecell_data *clk_data;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct clk *(*clk_ops_init)(struct device *, struct device_node *,
+				    struct scpi_clk *);
+
+	if (!of_device_is_available(np))
+		return -ENODEV;
+
+	clk_ops_init = of_match_node(scpi_clk_match, np)->data;
+	if (!clk_ops_init)
+		return -ENODEV;
+
+	count = of_property_count_strings(np, "clock-output-names");
+	if (count < 0) {
+		dev_err(dev, "%s: invalid clock output count\n", np->name);
+		return -EINVAL;
+	}
+
+	clk_data = devm_kmalloc(dev, sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data) {
+		dev_err(dev, "failed to allocate clock provider data\n");
+		return -ENOMEM;
+	}
+
+	clks = devm_kmalloc(dev, count * sizeof(*clks), GFP_KERNEL);
+	if (!clks) {
+		dev_err(dev, "failed to allocate clock providers\n");
+		return -ENOMEM;
+	}
+
+	for (idx = 0; idx < count; idx++) {
+		struct scpi_clk *sclk;
+		u32 val;
+
+		sclk = devm_kzalloc(dev, sizeof(*sclk), GFP_KERNEL);
+		if (!sclk) {
+			dev_err(dev, "failed to allocate scpi clocks\n");
+			return -ENOMEM;
+		}
+
+		if (of_property_read_string_index(np, "clock-output-names",
+						  idx, &sclk->name)) {
+			dev_err(dev, "invalid clock name @ %s\n", np->name);
+			return -EINVAL;
+		}
+
+		if (of_property_read_u32_index(np, "clock-indices",
+					       idx, &val)) {
+			dev_err(dev, "invalid clock index @ %s\n", np->name);
+			return -EINVAL;
+		}
+
+		sclk->id = val;
+
+		clks[idx] = clk_ops_init(dev, np, sclk);
+		if (IS_ERR(clks[idx])) {
+			dev_err(dev, "failed to register clock '%s'\n",
+				sclk->name);
+		}
+
+		dev_dbg(dev, "Registered clock '%s'\n", sclk->name);
+	}
+
+	clk_data->clks = clks;
+	clk_data->clk_num = idx;
+	of_clk_add_provider(np, of_clk_src_onecell_get, clk_data);
+
+	platform_set_drvdata(pdev, clk_data);
+
+	if (clk_ops_init == scpi_dvfs_ops_init) {
+		/* Add virtual cpufreq device depending SCPI clock */
+		cpufreq_dev = platform_device_register_simple("scpi-cpufreq",
+							      -1, NULL, 0);
+		if (!cpufreq_dev)
+			pr_warn("unable to register cpufreq device");
+	}
+	return 0;
+}
+
+static int scpi_clk_remove(struct platform_device *pdev)
+{
+	if (cpufreq_dev) {
+		platform_device_unregister(cpufreq_dev);
+		cpufreq_dev = NULL;
+	}
+
+	of_clk_del_provider(pdev->dev.of_node);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static struct platform_driver scpi_clk_driver = {
+	.driver	= {
+		.name = "scpi_clk",
+		.of_match_table = scpi_clk_match,
+	},
+	.probe = scpi_clk_probe,
+	.remove = scpi_clk_remove,
+};
+
+static int scpi_clocks_probe(struct platform_device *pdev)
+{
+	scpi_ops = get_scpi_ops();
+	if (!scpi_ops)
+		return -ENXIO;
+
+	return of_platform_populate(pdev->dev.of_node, scpi_clk_match,
+				    NULL, &pdev->dev);
+}
+
+static int scpi_clocks_remove(struct platform_device *pdev)
+{
+	of_platform_depopulate(&pdev->dev);
+	scpi_ops = NULL;
+	return 0;
+}
+
+static const struct of_device_id scpi_clocks_ids[] = {
+	{ .compatible = "arm,scpi-clocks", },
+	{}
+};
+
+static struct platform_driver scpi_clocks_driver = {
+	.driver	= {
+		.name = "scpi_clocks",
+		.of_match_table = scpi_clocks_ids,
+	},
+	.probe = scpi_clocks_probe,
+	.remove = scpi_clocks_remove,
+};
+
+static int __init scpi_clocks_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&scpi_clk_driver);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&scpi_clocks_driver);
+}
+module_init(scpi_clocks_init);
+
+static void __exit scpi_clocks_exit(void)
+{
+	platform_driver_unregister(&scpi_clk_driver);
+	platform_driver_unregister(&scpi_clocks_driver);
+}
+module_exit(scpi_clocks_exit);
+
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("ARM SCPI clock driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index fccac4a41386..ffe379292f0e 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2069,7 +2069,7 @@ EXPORT_SYMBOL_GPL(of_clk_del_provider);
 struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
 {
 	struct of_clk_provider *provider;
-	struct clk *clk = ERR_PTR(-ENOENT);
+	struct clk *clk = ERR_PTR(-EPROBE_DEFER);
 
 	/* Check if we have such a provider in our array */
 	mutex_lock(&of_clk_lock);
diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 442a31363873..cc6a6d19ba4b 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -73,8 +73,9 @@ struct clk *of_clk_get_by_name(struct device_node *np, const char *name)
 		if (!IS_ERR(clk))
 			break;
 		else if (name && index >= 0) {
-			pr_err("ERROR: could not get clock %s:%s(%i)\n",
-				np->full_name, name ? name : "", index);
+			if (PTR_ERR(clk) != -EPROBE_DEFER)
+				pr_err("ERROR: could not get clock %s:%s(%i)\n",
+					np->full_name, name ? name : "", index);
 			return clk;
 		}
 
@@ -159,6 +160,8 @@ struct clk *clk_get(struct device *dev, const char *con_id)
 		clk = of_clk_get_by_name(dev->of_node, con_id);
 		if (!IS_ERR(clk) && __clk_get(clk))
 			return clk;
+		if (PTR_ERR(clk) == -EPROBE_DEFER)
+			return clk;
 	}
 
 	return clk_get_sys(dev_id, con_id);
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index a535c7bf8574..a076a6e4487b 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -100,6 +100,9 @@ void __init vexpress_osc_of_setup(struct device_node *node)
 	struct clk *clk;
 	u32 range[2];
 
+	if (!of_device_is_available(node))
+		return;
+
 	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
 	if (!osc)
 		return;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 67bbbd8ae507..297538fef10a 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -673,14 +673,16 @@ static void __init arch_timer_mem_init(struct device_node *np)
 	u32 cnttidr;
 
 	arch_timers_present |= ARCH_MEM_TIMER;
-	cntctlbase = of_iomap(np, 0);
-	if (!cntctlbase) {
-		pr_err("arch_timer: Can't find CNTCTLBase\n");
-		return;
-	}
+	if (of_property_read_u32(np, "arm,cnttidr", &cnttidr)) {
+		cntctlbase = of_iomap(np, 0);
+		if (!cntctlbase) {
+			pr_err("arch_timer: Can't find CNTCTLBase\n");
+			return;
+		}
 
-	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
-	iounmap(cntctlbase);
+		cnttidr = readl_relaxed(cntctlbase + CNTTIDR);
+		iounmap(cntctlbase);
+	}
 
 	/*
 	 * Try to find a virtual capable frame. Otherwise fall back to a
diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c
index 37f5325bec95..b9ddd9e3a2f5 100644
--- a/drivers/clocksource/clksrc-of.c
+++ b/drivers/clocksource/clksrc-of.c
@@ -30,6 +30,9 @@ void __init clocksource_of_init(void)
 	clocksource_of_init_fn init_func;
 
 	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
+		if (!of_device_is_available(np))
+			continue;
+
 		init_func = match->data;
 		init_func(np);
 	}
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index a9c1324843eb..9e1f7d9b52b5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -102,6 +102,16 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
 	  Be aware that not all cpufreq drivers support the conservative
 	  governor. If unsure have a look at the help section of the
 	  driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+	bool "interactive"
+	select CPU_FREQ_GOV_INTERACTIVE
+	help
+	  Use the CPUFreq governor 'interactive' as default. This allows
+	  you to get a full dynamic cpu frequency capable system by simply
+	  loading your cpufreq low-level hardware driver, using the
+	  'interactive' governor for latency-sensitive workloads.
+
 endchoice
 
 config CPU_FREQ_GOV_PERFORMANCE
@@ -160,6 +170,24 @@ config CPU_FREQ_GOV_ONDEMAND
 
 	  If in doubt, say N.
 
+config CPU_FREQ_GOV_INTERACTIVE
+	tristate "'interactive' cpufreq policy governor"
+	default n
+	help
+	  'interactive' - This driver adds a dynamic cpufreq policy governor
+	  designed for latency-sensitive workloads.
+
+	  This governor attempts to reduce the latency of clock
+	  increases so that the system is more responsive to
+	  interactive workloads.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cpufreq_interactive.
+
+	  For details, take a look at linux/Documentation/cpu-freq.
+
+	  If in doubt, say N.
+
 config CPU_FREQ_GOV_CONSERVATIVE
 	tristate "'conservative' cpufreq governor"
 	depends on CPU_FREQ
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 22727792350b..06c82cf6edab 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -24,6 +24,16 @@ config ARM_VEXPRESS_BL_CPUFREQ
 	  This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform.
 	  If in doubt, say N.
 
+config ARM_SCPI_CPUFREQ
+        tristate "SCPI based CPUfreq driver"
+	depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL
+        help
+          This add the CPUfreq driver support for ARM big.LITTLE platforms
+	  using SCPI interface for CPU power management.
+
+	  This driver works only if firmware the supporting CPU DVFS adhere
+	  to SCPI protocol.
+
 config ARM_EXYNOS_CPUFREQ
 	bool "SAMSUNG EXYNOS SoCs"
 	depends on ARCH_EXYNOS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 505c62bceb9d..517fe85726c9 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE)	+= cpufreq_powersave.o
 obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)	+= cpufreq_userspace.o
 obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o
 obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
+obj-$(CONFIG_CPU_FREQ_GOV_INTERACTIVE)	+= cpufreq_interactive.o
 obj-$(CONFIG_CPU_FREQ_GOV_COMMON)		+= cpufreq_governor.o
 
 # CPUfreq cross-arch helpers
@@ -73,6 +74,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ)	+= sa1100-cpufreq.o
 obj-$(CONFIG_ARM_SA1110_CPUFREQ)	+= sa1110-cpufreq.o
 obj-$(CONFIG_ARM_SPEAR_CPUFREQ)		+= spear-cpufreq.o
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra-cpufreq.o
+obj-$(CONFIG_ARM_SCPI_CPUFREQ)		+= scpi-cpufreq.o
 
 ##################################################################################
 # PowerPC platform drivers
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index eaee6e222207..b95b27f76db0 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -165,6 +165,8 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
 		mutex_unlock(&cluster_lock[old_cluster]);
 	}
 
+	if (bL_cpufreq_get_rate(cpu) != new_rate)
+		return -EIO;
 	return 0;
 }
 
@@ -330,7 +332,6 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
 static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
-	char name[14] = "cpu-cluster.X";
 	int ret;
 
 	if (atomic_inc_return(&cluster_usage[cluster]) != 1)
@@ -350,8 +351,12 @@ static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
 		goto atomic_dec;
 	}
 
-	name[12] = cluster + '0';
-	clk[cluster] = clk_get_sys(name, NULL);
+	clk[cluster] = clk_get(cpu_dev, NULL);
+	if (IS_ERR(clk[cluster])) {
+		char name[14] = "cpu-cluster.X";
+		name[12] = cluster + '0';
+		clk[cluster] = clk_get_sys(name, NULL);
+	}
 	if (!IS_ERR(clk[cluster])) {
 		dev_dbg(cpu_dev, "%s: clk: %p & freq table: %p, cluster: %d\n",
 				__func__, clk[cluster], freq_table[cluster],
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 66f6cf5064ec..016294db2a79 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -17,7 +17,9 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <asm/cputime.h>
 #include <linux/kernel.h>
+#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/notifier.h>
@@ -25,6 +27,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/tick.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
@@ -133,6 +136,51 @@ bool have_governor_per_policy(void)
 {
 	return cpufreq_driver->have_governor_per_policy;
 }
+EXPORT_SYMBOL_GPL(have_governor_per_policy);
+
+struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
+{
+	if (have_governor_per_policy())
+		return &policy->kobj;
+	else
+		return cpufreq_global_kobject;
+}
+EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
+
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
+{
+	u64 idle_time;
+	u64 cur_wall_time;
+	u64 busy_time;
+
+	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+
+	idle_time = cur_wall_time - busy_time;
+	if (wall)
+		*wall = cputime_to_usecs(cur_wall_time);
+
+	return cputime_to_usecs(idle_time);
+}
+
+u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
+{
+	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
+
+	if (idle_time == -1ULL)
+		return get_cpu_idle_time_jiffy(cpu, wall);
+	else if (!io_busy)
+		idle_time += get_cpu_iowait_time_us(cpu, wall);
+
+	return idle_time;
+}
+EXPORT_SYMBOL_GPL(get_cpu_idle_time);
 
 static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
 {
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 27b0e2a295ea..ece1df8eac85 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -23,20 +23,11 @@
 #include <linux/kernel_stat.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/tick.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
 #include "cpufreq_governor.h"
 
-static struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
-{
-	if (have_governor_per_policy())
-		return &policy->kobj;
-	else
-		return cpufreq_global_kobject;
-}
-
 static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
 {
 	if (have_governor_per_policy())
@@ -45,41 +36,6 @@ static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data)
 		return dbs_data->cdata->attr_group_gov_sys;
 }
 
-static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
-{
-	u64 idle_time;
-	u64 cur_wall_time;
-	u64 busy_time;
-
-	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-
-	busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
-	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
-
-	idle_time = cur_wall_time - busy_time;
-	if (wall)
-		*wall = cputime_to_usecs(cur_wall_time);
-
-	return cputime_to_usecs(idle_time);
-}
-
-u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
-{
-	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
-
-	if (idle_time == -1ULL)
-		return get_cpu_idle_time_jiffy(cpu, wall);
-	else if (!io_busy)
-		idle_time += get_cpu_iowait_time_us(cpu, wall);
-
-	return idle_time;
-}
-EXPORT_SYMBOL_GPL(get_cpu_idle_time);
-
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 {
 	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
@@ -396,6 +352,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
 		mutex_lock(&dbs_data->mutex);
 		mutex_destroy(&cpu_cdbs->timer_mutex);
+		cpu_cdbs->cur_policy = NULL;
 
 		mutex_unlock(&dbs_data->mutex);
 
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 4a9058aeb57e..c8028ce7554e 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -255,7 +255,6 @@ static ssize_t show_sampling_rate_min_gov_pol				\
 	return sprintf(buf, "%u\n", dbs_data->min_sampling_rate);	\
 }
 
-u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
 bool need_load_eval(struct cpu_dbs_common_info *cdbs,
 		unsigned int sampling_rate);
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
new file mode 100644
index 000000000000..cb477d6a21d6
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -0,0 +1,1375 @@
+/*
+ * drivers/cpufreq/cpufreq_interactive.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ *
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/tick.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include "cpufreq_governor.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/cpufreq_interactive.h>
+
+struct cpufreq_interactive_cpuinfo {
+	struct timer_list cpu_timer;
+	struct timer_list cpu_slack_timer;
+	spinlock_t load_lock; /* protects the next 4 fields */
+	u64 time_in_idle;
+	u64 time_in_idle_timestamp;
+	u64 cputime_speedadj;
+	u64 cputime_speedadj_timestamp;
+	struct cpufreq_policy *policy;
+	struct cpufreq_frequency_table *freq_table;
+	spinlock_t target_freq_lock; /*protects target freq */
+	unsigned int target_freq;
+	unsigned int floor_freq;
+	unsigned int max_freq;
+	u64 floor_validate_time;
+	u64 hispeed_validate_time;
+	struct rw_semaphore enable_sem;
+	int governor_enabled;
+};
+
+static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
+
+/* realtime thread handles frequency scaling */
+static struct task_struct *speedchange_task;
+static cpumask_t speedchange_cpumask;
+static spinlock_t speedchange_cpumask_lock;
+static struct mutex gov_lock;
+
+/* Target load.  Lower values result in higher CPU speeds. */
+#define DEFAULT_TARGET_LOAD 90
+static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
+
+#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
+#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
+static unsigned int default_above_hispeed_delay[] = {
+	DEFAULT_ABOVE_HISPEED_DELAY };
+
+struct cpufreq_interactive_tunables {
+	int usage_count;
+	/* Hi speed to bump to from lo speed when load burst (default max) */
+	unsigned int hispeed_freq;
+	/* Go to hi speed when CPU load at or above this value. */
+#define DEFAULT_GO_HISPEED_LOAD 99
+	unsigned long go_hispeed_load;
+	/* Target load. Lower values result in higher CPU speeds. */
+	spinlock_t target_loads_lock;
+	unsigned int *target_loads;
+	int ntarget_loads;
+	/*
+	 * The minimum amount of time to spend at a frequency before we can ramp
+	 * down.
+	 */
+#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
+	unsigned long min_sample_time;
+	/*
+	 * The sample rate of the timer used to increase frequency
+	 */
+	unsigned long timer_rate;
+	/*
+	 * Wait this long before raising speed above hispeed, by default a
+	 * single timer interval.
+	 */
+	spinlock_t above_hispeed_delay_lock;
+	unsigned int *above_hispeed_delay;
+	int nabove_hispeed_delay;
+	/* Non-zero means indefinite speed boost active */
+	int boost_val;
+	/* Duration of a boot pulse in usecs */
+	int boostpulse_duration_val;
+	/* End time of boost pulse in ktime converted to usecs */
+	u64 boostpulse_endtime;
+	bool boosted;
+	/*
+	 * Max additional time to wait in idle, beyond timer_rate, at speeds
+	 * above minimum before wakeup to reduce speed, or -1 if unnecessary.
+	 */
+#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
+	int timer_slack_val;
+	bool io_is_busy;
+};
+
+/* For cases where we have single governor instance for system */
+static struct cpufreq_interactive_tunables *common_tunables;
+
+static struct attribute_group *get_sysfs_attr(void);
+
+static void cpufreq_interactive_timer_resched(
+	struct cpufreq_interactive_cpuinfo *pcpu)
+{
+	struct cpufreq_interactive_tunables *tunables =
+		pcpu->policy->governor_data;
+	unsigned long expires;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pcpu->load_lock, flags);
+	pcpu->time_in_idle =
+		get_cpu_idle_time(smp_processor_id(),
+				  &pcpu->time_in_idle_timestamp,
+				  tunables->io_is_busy);
+	pcpu->cputime_speedadj = 0;
+	pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
+	expires = jiffies + usecs_to_jiffies(tunables->timer_rate);
+	mod_timer_pinned(&pcpu->cpu_timer, expires);
+
+	if (tunables->timer_slack_val >= 0 &&
+	    pcpu->target_freq > pcpu->policy->min) {
+		expires += usecs_to_jiffies(tunables->timer_slack_val);
+		mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
+	}
+
+	spin_unlock_irqrestore(&pcpu->load_lock, flags);
+}
+
+/* The caller shall take enable_sem write semaphore to avoid any timer race.
+ * The cpu_timer and cpu_slack_timer must be deactivated when calling this
+ * function.
+ */
+static void cpufreq_interactive_timer_start(
+	struct cpufreq_interactive_tunables *tunables, int cpu)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
+	unsigned long expires = jiffies +
+		usecs_to_jiffies(tunables->timer_rate);
+	unsigned long flags;
+
+	pcpu->cpu_timer.expires = expires;
+	add_timer_on(&pcpu->cpu_timer, cpu);
+	if (tunables->timer_slack_val >= 0 &&
+	    pcpu->target_freq > pcpu->policy->min) {
+		expires += usecs_to_jiffies(tunables->timer_slack_val);
+		pcpu->cpu_slack_timer.expires = expires;
+		add_timer_on(&pcpu->cpu_slack_timer, cpu);
+	}
+
+	spin_lock_irqsave(&pcpu->load_lock, flags);
+	pcpu->time_in_idle =
+		get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp,
+				  tunables->io_is_busy);
+	pcpu->cputime_speedadj = 0;
+	pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
+	spin_unlock_irqrestore(&pcpu->load_lock, flags);
+}
+
+static unsigned int freq_to_above_hispeed_delay(
+	struct cpufreq_interactive_tunables *tunables,
+	unsigned int freq)
+{
+	int i;
+	unsigned int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+
+	for (i = 0; i < tunables->nabove_hispeed_delay - 1 &&
+			freq >= tunables->above_hispeed_delay[i+1]; i += 2)
+		;
+
+	ret = tunables->above_hispeed_delay[i];
+	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+	return ret;
+}
+
+static unsigned int freq_to_targetload(
+	struct cpufreq_interactive_tunables *tunables, unsigned int freq)
+{
+	int i;
+	unsigned int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tunables->target_loads_lock, flags);
+
+	for (i = 0; i < tunables->ntarget_loads - 1 &&
+		    freq >= tunables->target_loads[i+1]; i += 2)
+		;
+
+	ret = tunables->target_loads[i];
+	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+	return ret;
+}
+
+/*
+ * If increasing frequencies never map to a lower target load then
+ * choose_freq() will find the minimum frequency that does not exceed its
+ * target load given the current load.
+ */
+static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu,
+		unsigned int loadadjfreq)
+{
+	unsigned int freq = pcpu->policy->cur;
+	unsigned int prevfreq, freqmin, freqmax;
+	unsigned int tl;
+	int index;
+
+	freqmin = 0;
+	freqmax = UINT_MAX;
+
+	do {
+		prevfreq = freq;
+		tl = freq_to_targetload(pcpu->policy->governor_data, freq);
+
+		/*
+		 * Find the lowest frequency where the computed load is less
+		 * than or equal to the target load.
+		 */
+
+		if (cpufreq_frequency_table_target(
+			    pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
+			    CPUFREQ_RELATION_L, &index))
+			break;
+		freq = pcpu->freq_table[index].frequency;
+
+		if (freq > prevfreq) {
+			/* The previous frequency is too low. */
+			freqmin = prevfreq;
+
+			if (freq >= freqmax) {
+				/*
+				 * Find the highest frequency that is less
+				 * than freqmax.
+				 */
+				if (cpufreq_frequency_table_target(
+					    pcpu->policy, pcpu->freq_table,
+					    freqmax - 1, CPUFREQ_RELATION_H,
+					    &index))
+					break;
+				freq = pcpu->freq_table[index].frequency;
+
+				if (freq == freqmin) {
+					/*
+					 * The first frequency below freqmax
+					 * has already been found to be too
+					 * low.  freqmax is the lowest speed
+					 * we found that is fast enough.
+					 */
+					freq = freqmax;
+					break;
+				}
+			}
+		} else if (freq < prevfreq) {
+			/* The previous frequency is high enough. */
+			freqmax = prevfreq;
+
+			if (freq <= freqmin) {
+				/*
+				 * Find the lowest frequency that is higher
+				 * than freqmin.
+				 */
+				if (cpufreq_frequency_table_target(
+					    pcpu->policy, pcpu->freq_table,
+					    freqmin + 1, CPUFREQ_RELATION_L,
+					    &index))
+					break;
+				freq = pcpu->freq_table[index].frequency;
+
+				/*
+				 * If freqmax is the first frequency above
+				 * freqmin then we have already found that
+				 * this speed is fast enough.
+				 */
+				if (freq == freqmax)
+					break;
+			}
+		}
+
+		/* If same frequency chosen as previous then done. */
+	} while (freq != prevfreq);
+
+	return freq;
+}
+
+static u64 update_load(int cpu)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
+	struct cpufreq_interactive_tunables *tunables =
+		pcpu->policy->governor_data;
+	u64 now;
+	u64 now_idle;
+	u64 delta_idle;
+	u64 delta_time;
+	u64 active_time;
+
+	now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy);
+	delta_idle = (now_idle - pcpu->time_in_idle);
+	delta_time = (now - pcpu->time_in_idle_timestamp);
+
+	if (delta_time <= delta_idle)
+		active_time = 0;
+	else
+		active_time = delta_time - delta_idle;
+
+	pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
+
+	pcpu->time_in_idle = now_idle;
+	pcpu->time_in_idle_timestamp = now;
+	return now;
+}
+
+static void cpufreq_interactive_timer(unsigned long data)
+{
+	u64 now;
+	unsigned int delta_time;
+	u64 cputime_speedadj;
+	int cpu_load;
+	struct cpufreq_interactive_cpuinfo *pcpu =
+		&per_cpu(cpuinfo, data);
+	struct cpufreq_interactive_tunables *tunables =
+		pcpu->policy->governor_data;
+	unsigned int new_freq;
+	unsigned int loadadjfreq;
+	unsigned int index;
+	unsigned long flags;
+
+	if (!down_read_trylock(&pcpu->enable_sem))
+		return;
+	if (!pcpu->governor_enabled)
+		goto exit;
+
+	spin_lock_irqsave(&pcpu->load_lock, flags);
+	now = update_load(data);
+	delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
+	cputime_speedadj = pcpu->cputime_speedadj;
+	spin_unlock_irqrestore(&pcpu->load_lock, flags);
+
+	if (WARN_ON_ONCE(!delta_time))
+		goto rearm;
+
+	spin_lock_irqsave(&pcpu->target_freq_lock, flags);
+	do_div(cputime_speedadj, delta_time);
+	loadadjfreq = (unsigned int)cputime_speedadj * 100;
+	cpu_load = loadadjfreq / pcpu->policy->cur;
+	tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime;
+
+	if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) {
+		if (pcpu->target_freq < tunables->hispeed_freq) {
+			new_freq = tunables->hispeed_freq;
+		} else {
+			new_freq = choose_freq(pcpu, loadadjfreq);
+
+			if (new_freq < tunables->hispeed_freq)
+				new_freq = tunables->hispeed_freq;
+		}
+	} else {
+		new_freq = choose_freq(pcpu, loadadjfreq);
+		if (new_freq > tunables->hispeed_freq &&
+				pcpu->target_freq < tunables->hispeed_freq)
+			new_freq = tunables->hispeed_freq;
+	}
+
+	if (pcpu->target_freq >= tunables->hispeed_freq &&
+	    new_freq > pcpu->target_freq &&
+	    now - pcpu->hispeed_validate_time <
+	    freq_to_above_hispeed_delay(tunables, pcpu->target_freq)) {
+		trace_cpufreq_interactive_notyet(
+			data, cpu_load, pcpu->target_freq,
+			pcpu->policy->cur, new_freq);
+		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+		goto rearm;
+	}
+
+	pcpu->hispeed_validate_time = now;
+
+	if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
+					   new_freq, CPUFREQ_RELATION_L,
+					   &index)) {
+		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+		goto rearm;
+	}
+
+	new_freq = pcpu->freq_table[index].frequency;
+
+	/*
+	 * Do not scale below floor_freq unless we have been at or above the
+	 * floor frequency for the minimum sample time since last validated.
+	 */
+	if (new_freq < pcpu->floor_freq) {
+		if (now - pcpu->floor_validate_time <
+				tunables->min_sample_time) {
+			trace_cpufreq_interactive_notyet(
+				data, cpu_load, pcpu->target_freq,
+				pcpu->policy->cur, new_freq);
+			spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+			goto rearm;
+		}
+	}
+
+	/*
+	 * Update the timestamp for checking whether speed has been held at
+	 * or above the selected frequency for a minimum of min_sample_time,
+	 * if not boosted to hispeed_freq.  If boosted to hispeed_freq then we
+	 * allow the speed to drop as soon as the boostpulse duration expires
+	 * (or the indefinite boost is turned off).
+	 */
+
+	if (!tunables->boosted || new_freq > tunables->hispeed_freq) {
+		pcpu->floor_freq = new_freq;
+		pcpu->floor_validate_time = now;
+	}
+
+	if (pcpu->target_freq == new_freq &&
+			pcpu->target_freq <= pcpu->policy->cur) {
+		trace_cpufreq_interactive_already(
+			data, cpu_load, pcpu->target_freq,
+			pcpu->policy->cur, new_freq);
+		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+		goto rearm_if_notmax;
+	}
+
+	trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
+					 pcpu->policy->cur, new_freq);
+
+	pcpu->target_freq = new_freq;
+	spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+	spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+	cpumask_set_cpu(data, &speedchange_cpumask);
+	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+	wake_up_process(speedchange_task);
+
+rearm_if_notmax:
+	/*
+	 * Already set max speed and don't see a need to change that,
+	 * wait until next idle to re-evaluate, don't need timer.
+	 */
+	if (pcpu->target_freq == pcpu->policy->max)
+		goto exit;
+
+rearm:
+	if (!timer_pending(&pcpu->cpu_timer))
+		cpufreq_interactive_timer_resched(pcpu);
+
+exit:
+	up_read(&pcpu->enable_sem);
+	return;
+}
+
+static void cpufreq_interactive_idle_start(void)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu =
+		&per_cpu(cpuinfo, smp_processor_id());
+	int pending;
+
+	if (!down_read_trylock(&pcpu->enable_sem))
+		return;
+	if (!pcpu->governor_enabled) {
+		up_read(&pcpu->enable_sem);
+		return;
+	}
+
+	pending = timer_pending(&pcpu->cpu_timer);
+
+	if (pcpu->target_freq != pcpu->policy->min) {
+		/*
+		 * Entering idle while not at lowest speed.  On some
+		 * platforms this can hold the other CPU(s) at that speed
+		 * even though the CPU is idle. Set a timer to re-evaluate
+		 * speed so this idle CPU doesn't hold the other CPUs above
+		 * min indefinitely.  This should probably be a quirk of
+		 * the CPUFreq driver.
+		 */
+		if (!pending)
+			cpufreq_interactive_timer_resched(pcpu);
+	}
+
+	up_read(&pcpu->enable_sem);
+}
+
+static void cpufreq_interactive_idle_end(void)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu =
+		&per_cpu(cpuinfo, smp_processor_id());
+
+	if (!down_read_trylock(&pcpu->enable_sem))
+		return;
+	if (!pcpu->governor_enabled) {
+		up_read(&pcpu->enable_sem);
+		return;
+	}
+
+	/* Arm the timer for 1-2 ticks later if not already. */
+	if (!timer_pending(&pcpu->cpu_timer)) {
+		cpufreq_interactive_timer_resched(pcpu);
+	} else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) {
+		del_timer(&pcpu->cpu_timer);
+		del_timer(&pcpu->cpu_slack_timer);
+		cpufreq_interactive_timer(smp_processor_id());
+	}
+
+	up_read(&pcpu->enable_sem);
+}
+
+static int cpufreq_interactive_speedchange_task(void *data)
+{
+	unsigned int cpu;
+	cpumask_t tmp_mask;
+	unsigned long flags;
+	struct cpufreq_interactive_cpuinfo *pcpu;
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+
+		if (cpumask_empty(&speedchange_cpumask)) {
+			spin_unlock_irqrestore(&speedchange_cpumask_lock,
+					       flags);
+			schedule();
+
+			if (kthread_should_stop())
+				break;
+
+			spin_lock_irqsave(&speedchange_cpumask_lock, flags);
+		}
+
+		set_current_state(TASK_RUNNING);
+		tmp_mask = speedchange_cpumask;
+		cpumask_clear(&speedchange_cpumask);
+		spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
+
+		for_each_cpu(cpu, &tmp_mask) {
+			unsigned int j;
+			unsigned int max_freq = 0;
+
+			pcpu = &per_cpu(cpuinfo, cpu);
+			if (!down_read_trylock(&pcpu->enable_sem))
+				continue;
+			if (!pcpu->governor_enabled) {
+				up_read(&pcpu->enable_sem);
+				continue;
+			}
+
+			for_each_cpu(j, pcpu->policy->cpus) {
+				struct cpufreq_interactive_cpuinfo *pjcpu =
+					&per_cpu(cpuinfo, j);
+
+				if (pjcpu->target_freq > max_freq)
+					max_freq = pjcpu->target_freq;
+			}
+
+			if (max_freq != pcpu->policy->cur)
+				__cpufreq_driver_target(pcpu->policy,
+							max_freq,
+							CPUFREQ_RELATION_H);
+			trace_cpufreq_interactive_setspeed(cpu,
+						     pcpu->target_freq,
+						     pcpu->policy->cur);
+
+			up_read(&pcpu->enable_sem);
+		}
+	}
+
+	return 0;
+}
+
+static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables)
+{
+	int i;
+	int anyboost = 0;
+	unsigned long flags[2];
+	struct cpufreq_interactive_cpuinfo *pcpu;
+
+	tunables->boosted = true;
+
+	spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]);
+
+	for_each_online_cpu(i) {
+		pcpu = &per_cpu(cpuinfo, i);
+		if (tunables != pcpu->policy->governor_data)
+			continue;
+
+		spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]);
+		if (pcpu->target_freq < tunables->hispeed_freq) {
+			pcpu->target_freq = tunables->hispeed_freq;
+			cpumask_set_cpu(i, &speedchange_cpumask);
+			pcpu->hispeed_validate_time =
+				ktime_to_us(ktime_get());
+			anyboost = 1;
+		}
+
+		/*
+		 * Set floor freq and (re)start timer for when last
+		 * validated.
+		 */
+
+		pcpu->floor_freq = tunables->hispeed_freq;
+		pcpu->floor_validate_time = ktime_to_us(ktime_get());
+		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]);
+	}
+
+	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]);
+
+	if (anyboost)
+		wake_up_process(speedchange_task);
+}
+
+static int cpufreq_interactive_notifier(
+	struct notifier_block *nb, unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	struct cpufreq_interactive_cpuinfo *pcpu;
+	int cpu;
+	unsigned long flags;
+
+	if (val == CPUFREQ_POSTCHANGE) {
+		pcpu = &per_cpu(cpuinfo, freq->cpu);
+		if (!down_read_trylock(&pcpu->enable_sem))
+			return 0;
+		if (!pcpu->governor_enabled) {
+			up_read(&pcpu->enable_sem);
+			return 0;
+		}
+
+		for_each_cpu(cpu, pcpu->policy->cpus) {
+			struct cpufreq_interactive_cpuinfo *pjcpu =
+				&per_cpu(cpuinfo, cpu);
+			if (cpu != freq->cpu) {
+				if (!down_read_trylock(&pjcpu->enable_sem))
+					continue;
+				if (!pjcpu->governor_enabled) {
+					up_read(&pjcpu->enable_sem);
+					continue;
+				}
+			}
+			spin_lock_irqsave(&pjcpu->load_lock, flags);
+			update_load(cpu);
+			spin_unlock_irqrestore(&pjcpu->load_lock, flags);
+			if (cpu != freq->cpu)
+				up_read(&pjcpu->enable_sem);
+		}
+
+		up_read(&pcpu->enable_sem);
+	}
+	return 0;
+}
+
+static struct notifier_block cpufreq_notifier_block = {
+	.notifier_call = cpufreq_interactive_notifier,
+};
+
+static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
+{
+	const char *cp;
+	int i;
+	int ntokens = 1;
+	unsigned int *tokenized_data;
+	int err = -EINVAL;
+
+	cp = buf;
+	while ((cp = strpbrk(cp + 1, " :")))
+		ntokens++;
+
+	if (!(ntokens & 0x1))
+		goto err;
+
+	tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
+	if (!tokenized_data) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	cp = buf;
+	i = 0;
+	while (i < ntokens) {
+		if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
+			goto err_kfree;
+
+		cp = strpbrk(cp, " :");
+		if (!cp)
+			break;
+		cp++;
+	}
+
+	if (i != ntokens)
+		goto err_kfree;
+
+	*num_tokens = ntokens;
+	return tokenized_data;
+
+err_kfree:
+	kfree(tokenized_data);
+err:
+	return ERR_PTR(err);
+}
+
+static ssize_t show_target_loads(
+	struct cpufreq_interactive_tunables *tunables,
+	char *buf)
+{
+	int i;
+	ssize_t ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tunables->target_loads_lock, flags);
+
+	for (i = 0; i < tunables->ntarget_loads; i++)
+		ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i],
+			       i & 0x1 ? ":" : " ");
+
+	sprintf(buf + ret - 1, "\n");
+	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+	return ret;
+}
+
+static ssize_t store_target_loads(
+	struct cpufreq_interactive_tunables *tunables,
+	const char *buf, size_t count)
+{
+	int ntokens;
+	unsigned int *new_target_loads = NULL;
+	unsigned long flags;
+
+	new_target_loads = get_tokenized_data(buf, &ntokens);
+	if (IS_ERR(new_target_loads))
+		return PTR_RET(new_target_loads);
+
+	spin_lock_irqsave(&tunables->target_loads_lock, flags);
+	if (tunables->target_loads != default_target_loads)
+		kfree(tunables->target_loads);
+	tunables->target_loads = new_target_loads;
+	tunables->ntarget_loads = ntokens;
+	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
+	return count;
+}
+
+static ssize_t show_above_hispeed_delay(
+	struct cpufreq_interactive_tunables *tunables, char *buf)
+{
+	int i;
+	ssize_t ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+
+	for (i = 0; i < tunables->nabove_hispeed_delay; i++)
+		ret += sprintf(buf + ret, "%u%s",
+			       tunables->above_hispeed_delay[i],
+			       i & 0x1 ? ":" : " ");
+
+	sprintf(buf + ret - 1, "\n");
+	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+	return ret;
+}
+
+static ssize_t store_above_hispeed_delay(
+	struct cpufreq_interactive_tunables *tunables,
+	const char *buf, size_t count)
+{
+	int ntokens;
+	unsigned int *new_above_hispeed_delay = NULL;
+	unsigned long flags;
+
+	new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
+	if (IS_ERR(new_above_hispeed_delay))
+		return PTR_RET(new_above_hispeed_delay);
+
+	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
+	if (tunables->above_hispeed_delay != default_above_hispeed_delay)
+		kfree(tunables->above_hispeed_delay);
+	tunables->above_hispeed_delay = new_above_hispeed_delay;
+	tunables->nabove_hispeed_delay = ntokens;
+	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
+	return count;
+
+}
+
+static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
+		char *buf)
+{
+	return sprintf(buf, "%u\n", tunables->hispeed_freq);
+}
+
+static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
+		const char *buf, size_t count)
+{
+	int ret;
+	long unsigned int val;
+
+	ret = strict_strtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->hispeed_freq = val;
+	return count;
+}
+
+static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables
+		*tunables, char *buf)
+{
+	return sprintf(buf, "%lu\n", tunables->go_hispeed_load);
+}
+
+static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables
+		*tunables, const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = strict_strtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->go_hispeed_load = val;
+	return count;
+}
+
+static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables
+		*tunables, char *buf)
+{
+	return sprintf(buf, "%lu\n", tunables->min_sample_time);
+}
+
+static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables
+		*tunables, const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = strict_strtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->min_sample_time = val;
+	return count;
+}
+
+static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables,
+		char *buf)
+{
+	return sprintf(buf, "%lu\n", tunables->timer_rate);
+}
+
+static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables,
+		const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = strict_strtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->timer_rate = val;
+	return count;
+}
+
+static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables,
+		char *buf)
+{
+	return sprintf(buf, "%d\n", tunables->timer_slack_val);
+}
+
+static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables,
+		const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->timer_slack_val = val;
+	return count;
+}
+
+static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables,
+			  char *buf)
+{
+	return sprintf(buf, "%d\n", tunables->boost_val);
+}
+
+static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables,
+			   const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->boost_val = val;
+
+	if (tunables->boost_val) {
+		trace_cpufreq_interactive_boost("on");
+		if (!tunables->boosted)
+			cpufreq_interactive_boost(tunables);
+	} else {
+		tunables->boostpulse_endtime = ktime_to_us(ktime_get());
+		trace_cpufreq_interactive_unboost("off");
+	}
+
+	return count;
+}
+
+static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables,
+				const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->boostpulse_endtime = ktime_to_us(ktime_get()) +
+		tunables->boostpulse_duration_val;
+	trace_cpufreq_interactive_boost("pulse");
+	if (!tunables->boosted)
+		cpufreq_interactive_boost(tunables);
+	return count;
+}
+
+static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables
+		*tunables, char *buf)
+{
+	return sprintf(buf, "%d\n", tunables->boostpulse_duration_val);
+}
+
+static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables
+		*tunables, const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	tunables->boostpulse_duration_val = val;
+	return count;
+}
+
+static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables,
+		char *buf)
+{
+	return sprintf(buf, "%u\n", tunables->io_is_busy);
+}
+
+static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables,
+		const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	tunables->io_is_busy = val;
+	return count;
+}
+
+/*
+ * Create show/store routines
+ * - sys: One governor instance for complete SYSTEM
+ * - pol: One governor instance per struct cpufreq_policy
+ */
+#define show_gov_pol_sys(file_name)					\
+static ssize_t show_##file_name##_gov_sys				\
+(struct kobject *kobj, struct attribute *attr, char *buf)		\
+{									\
+	return show_##file_name(common_tunables, buf);			\
+}									\
+									\
+static ssize_t show_##file_name##_gov_pol				\
+(struct cpufreq_policy *policy, char *buf)				\
+{									\
+	return show_##file_name(policy->governor_data, buf);		\
+}
+
+#define store_gov_pol_sys(file_name)					\
+static ssize_t store_##file_name##_gov_sys				\
+(struct kobject *kobj, struct attribute *attr, const char *buf,		\
+	size_t count)							\
+{									\
+	return store_##file_name(common_tunables, buf, count);		\
+}									\
+									\
+static ssize_t store_##file_name##_gov_pol				\
+(struct cpufreq_policy *policy, const char *buf, size_t count)		\
+{									\
+	return store_##file_name(policy->governor_data, buf, count);	\
+}
+
+#define show_store_gov_pol_sys(file_name)				\
+show_gov_pol_sys(file_name);						\
+store_gov_pol_sys(file_name)
+
+show_store_gov_pol_sys(target_loads);
+show_store_gov_pol_sys(above_hispeed_delay);
+show_store_gov_pol_sys(hispeed_freq);
+show_store_gov_pol_sys(go_hispeed_load);
+show_store_gov_pol_sys(min_sample_time);
+show_store_gov_pol_sys(timer_rate);
+show_store_gov_pol_sys(timer_slack);
+show_store_gov_pol_sys(boost);
+store_gov_pol_sys(boostpulse);
+show_store_gov_pol_sys(boostpulse_duration);
+show_store_gov_pol_sys(io_is_busy);
+
+#define gov_sys_attr_rw(_name)						\
+static struct global_attr _name##_gov_sys =				\
+__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys)
+
+#define gov_pol_attr_rw(_name)						\
+static struct freq_attr _name##_gov_pol =				\
+__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
+
+#define gov_sys_pol_attr_rw(_name)					\
+	gov_sys_attr_rw(_name);						\
+	gov_pol_attr_rw(_name)
+
+gov_sys_pol_attr_rw(target_loads);
+gov_sys_pol_attr_rw(above_hispeed_delay);
+gov_sys_pol_attr_rw(hispeed_freq);
+gov_sys_pol_attr_rw(go_hispeed_load);
+gov_sys_pol_attr_rw(min_sample_time);
+gov_sys_pol_attr_rw(timer_rate);
+gov_sys_pol_attr_rw(timer_slack);
+gov_sys_pol_attr_rw(boost);
+gov_sys_pol_attr_rw(boostpulse_duration);
+gov_sys_pol_attr_rw(io_is_busy);
+
+static struct global_attr boostpulse_gov_sys =
+	__ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys);
+
+static struct freq_attr boostpulse_gov_pol =
+	__ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol);
+
+/* One Governor instance for entire system */
+static struct attribute *interactive_attributes_gov_sys[] = {
+	&target_loads_gov_sys.attr,
+	&above_hispeed_delay_gov_sys.attr,
+	&hispeed_freq_gov_sys.attr,
+	&go_hispeed_load_gov_sys.attr,
+	&min_sample_time_gov_sys.attr,
+	&timer_rate_gov_sys.attr,
+	&timer_slack_gov_sys.attr,
+	&boost_gov_sys.attr,
+	&boostpulse_gov_sys.attr,
+	&boostpulse_duration_gov_sys.attr,
+	&io_is_busy_gov_sys.attr,
+	NULL,
+};
+
+static struct attribute_group interactive_attr_group_gov_sys = {
+	.attrs = interactive_attributes_gov_sys,
+	.name = "interactive",
+};
+
+/* Per policy governor instance */
+static struct attribute *interactive_attributes_gov_pol[] = {
+	&target_loads_gov_pol.attr,
+	&above_hispeed_delay_gov_pol.attr,
+	&hispeed_freq_gov_pol.attr,
+	&go_hispeed_load_gov_pol.attr,
+	&min_sample_time_gov_pol.attr,
+	&timer_rate_gov_pol.attr,
+	&timer_slack_gov_pol.attr,
+	&boost_gov_pol.attr,
+	&boostpulse_gov_pol.attr,
+	&boostpulse_duration_gov_pol.attr,
+	&io_is_busy_gov_pol.attr,
+	NULL,
+};
+
+static struct attribute_group interactive_attr_group_gov_pol = {
+	.attrs = interactive_attributes_gov_pol,
+	.name = "interactive",
+};
+
+static struct attribute_group *get_sysfs_attr(void)
+{
+	if (have_governor_per_policy())
+		return &interactive_attr_group_gov_pol;
+	else
+		return &interactive_attr_group_gov_sys;
+}
+
+static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
+					     unsigned long val,
+					     void *data)
+{
+	switch (val) {
+	case IDLE_START:
+		cpufreq_interactive_idle_start();
+		break;
+	case IDLE_END:
+		cpufreq_interactive_idle_end();
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block cpufreq_interactive_idle_nb = {
+	.notifier_call = cpufreq_interactive_idle_notifier,
+};
+
+static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
+		unsigned int event)
+{
+	int rc;
+	unsigned int j;
+	struct cpufreq_interactive_cpuinfo *pcpu;
+	struct cpufreq_frequency_table *freq_table;
+	struct cpufreq_interactive_tunables *tunables;
+	unsigned long flags;
+
+	if (have_governor_per_policy())
+		tunables = policy->governor_data;
+	else
+		tunables = common_tunables;
+
+	WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT));
+
+	switch (event) {
+	case CPUFREQ_GOV_POLICY_INIT:
+		if (have_governor_per_policy()) {
+			WARN_ON(tunables);
+		} else if (tunables) {
+			tunables->usage_count++;
+			policy->governor_data = tunables;
+			return 0;
+		}
+
+		tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
+		if (!tunables) {
+			pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
+			return -ENOMEM;
+		}
+
+		tunables->usage_count = 1;
+		tunables->above_hispeed_delay = default_above_hispeed_delay;
+		tunables->nabove_hispeed_delay =
+			ARRAY_SIZE(default_above_hispeed_delay);
+		tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
+		tunables->target_loads = default_target_loads;
+		tunables->ntarget_loads = ARRAY_SIZE(default_target_loads);
+		tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
+		tunables->timer_rate = DEFAULT_TIMER_RATE;
+		tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME;
+		tunables->timer_slack_val = DEFAULT_TIMER_SLACK;
+
+		spin_lock_init(&tunables->target_loads_lock);
+		spin_lock_init(&tunables->above_hispeed_delay_lock);
+
+		policy->governor_data = tunables;
+		if (!have_governor_per_policy())
+			common_tunables = tunables;
+
+		rc = sysfs_create_group(get_governor_parent_kobj(policy),
+				get_sysfs_attr());
+		if (rc) {
+			kfree(tunables);
+			policy->governor_data = NULL;
+			if (!have_governor_per_policy())
+				common_tunables = NULL;
+			return rc;
+		}
+
+		if (!policy->governor->initialized) {
+			idle_notifier_register(&cpufreq_interactive_idle_nb);
+			cpufreq_register_notifier(&cpufreq_notifier_block,
+					CPUFREQ_TRANSITION_NOTIFIER);
+		}
+
+		break;
+
+	case CPUFREQ_GOV_POLICY_EXIT:
+		if (!--tunables->usage_count) {
+			if (policy->governor->initialized == 1) {
+				cpufreq_unregister_notifier(&cpufreq_notifier_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+				idle_notifier_unregister(&cpufreq_interactive_idle_nb);
+			}
+
+			sysfs_remove_group(get_governor_parent_kobj(policy),
+					get_sysfs_attr());
+			kfree(tunables);
+			common_tunables = NULL;
+		}
+
+		policy->governor_data = NULL;
+		break;
+
+	case CPUFREQ_GOV_START:
+		mutex_lock(&gov_lock);
+
+		freq_table = cpufreq_frequency_get_table(policy->cpu);
+		if (!tunables->hispeed_freq)
+			tunables->hispeed_freq = policy->max;
+
+		for_each_cpu(j, policy->cpus) {
+			pcpu = &per_cpu(cpuinfo, j);
+			pcpu->policy = policy;
+			pcpu->target_freq = policy->cur;
+			pcpu->freq_table = freq_table;
+			pcpu->floor_freq = pcpu->target_freq;
+			pcpu->floor_validate_time =
+				ktime_to_us(ktime_get());
+			pcpu->hispeed_validate_time =
+				pcpu->floor_validate_time;
+			pcpu->max_freq = policy->max;
+			down_write(&pcpu->enable_sem);
+			del_timer_sync(&pcpu->cpu_timer);
+			del_timer_sync(&pcpu->cpu_slack_timer);
+			cpufreq_interactive_timer_start(tunables, j);
+			pcpu->governor_enabled = 1;
+			up_write(&pcpu->enable_sem);
+		}
+
+		mutex_unlock(&gov_lock);
+		break;
+
+	case CPUFREQ_GOV_STOP:
+		mutex_lock(&gov_lock);
+		for_each_cpu(j, policy->cpus) {
+			pcpu = &per_cpu(cpuinfo, j);
+			down_write(&pcpu->enable_sem);
+			pcpu->governor_enabled = 0;
+			del_timer_sync(&pcpu->cpu_timer);
+			del_timer_sync(&pcpu->cpu_slack_timer);
+			up_write(&pcpu->enable_sem);
+		}
+
+		mutex_unlock(&gov_lock);
+		break;
+
+	case CPUFREQ_GOV_LIMITS:
+		if (policy->max < policy->cur)
+			__cpufreq_driver_target(policy,
+					policy->max, CPUFREQ_RELATION_H);
+		else if (policy->min > policy->cur)
+			__cpufreq_driver_target(policy,
+					policy->min, CPUFREQ_RELATION_L);
+		for_each_cpu(j, policy->cpus) {
+			pcpu = &per_cpu(cpuinfo, j);
+
+			down_read(&pcpu->enable_sem);
+			if (pcpu->governor_enabled == 0) {
+				up_read(&pcpu->enable_sem);
+				continue;
+			}
+
+			spin_lock_irqsave(&pcpu->target_freq_lock, flags);
+			if (policy->max < pcpu->target_freq)
+				pcpu->target_freq = policy->max;
+			else if (policy->min > pcpu->target_freq)
+				pcpu->target_freq = policy->min;
+
+			spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
+			up_read(&pcpu->enable_sem);
+
+			/* Reschedule timer only if policy->max is raised.
+			 * Delete the timers, else the timer callback may
+			 * return without re-arm the timer when failed
+			 * acquire the semaphore. This race may cause timer
+			 * stopped unexpectedly.
+			 */
+
+			if (policy->max > pcpu->max_freq) {
+				down_write(&pcpu->enable_sem);
+				del_timer_sync(&pcpu->cpu_timer);
+				del_timer_sync(&pcpu->cpu_slack_timer);
+				cpufreq_interactive_timer_start(tunables, j);
+				up_write(&pcpu->enable_sem);
+			}
+
+			pcpu->max_freq = policy->max;
+		}
+		break;
+	}
+	return 0;
+}
+
+#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+static
+#endif
+struct cpufreq_governor cpufreq_gov_interactive = {
+	.name = "interactive",
+	.governor = cpufreq_governor_interactive,
+	.max_transition_latency = 10000000,
+	.owner = THIS_MODULE,
+};
+
+static void cpufreq_interactive_nop_timer(unsigned long data)
+{
+}
+
+static int __init cpufreq_interactive_init(void)
+{
+	unsigned int i;
+	struct cpufreq_interactive_cpuinfo *pcpu;
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+	/* Initalize per-cpu timers */
+	for_each_possible_cpu(i) {
+		pcpu = &per_cpu(cpuinfo, i);
+		init_timer_deferrable(&pcpu->cpu_timer);
+		pcpu->cpu_timer.function = cpufreq_interactive_timer;
+		pcpu->cpu_timer.data = i;
+		init_timer(&pcpu->cpu_slack_timer);
+		pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
+		spin_lock_init(&pcpu->load_lock);
+		spin_lock_init(&pcpu->target_freq_lock);
+		init_rwsem(&pcpu->enable_sem);
+	}
+
+	spin_lock_init(&speedchange_cpumask_lock);
+	mutex_init(&gov_lock);
+	speedchange_task =
+		kthread_create(cpufreq_interactive_speedchange_task, NULL,
+			       "cfinteractive");
+	if (IS_ERR(speedchange_task))
+		return PTR_ERR(speedchange_task);
+
+	sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
+	get_task_struct(speedchange_task);
+
+	/* NB: wake up so the thread does not look hung to the freezer */
+	wake_up_process(speedchange_task);
+
+	return cpufreq_register_governor(&cpufreq_gov_interactive);
+}
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
+fs_initcall(cpufreq_interactive_init);
+#else
+module_init(cpufreq_interactive_init);
+#endif
+
+static void __exit cpufreq_interactive_exit(void)
+{
+	cpufreq_unregister_governor(&cpufreq_gov_interactive);
+	kthread_stop(speedchange_task);
+	put_task_struct(speedchange_task);
+}
+
+module_exit(cpufreq_interactive_exit);
+
+MODULE_AUTHOR("Mike Chan <mike@android.com>");
+MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
+	"Latency sensitive workloads");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 9823037f194b..690af18f55a9 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -20,6 +20,8 @@
 #include <linux/kobject.h>
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
+#include <linux/sort.h>
+#include <linux/err.h>
 #include <asm/cputime.h>
 #ifdef CONFIG_BL_SWITCHER
 #include <asm/bL_switcher.h>
@@ -41,6 +43,20 @@ struct cpufreq_stats {
 #endif
 };
 
+struct all_cpufreq_stats {
+	unsigned int state_num;
+	cputime64_t *time_in_state;
+	unsigned int *freq_table;
+};
+
+struct all_freq_table {
+	unsigned int *freq_table;
+	unsigned int table_size;
+};
+
+static struct all_freq_table *all_freq_table;
+
+static DEFINE_PER_CPU(struct all_cpufreq_stats *, all_cpufreq_stats);
 static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table);
 
 struct cpufreq_stats_attribute {
@@ -51,14 +67,24 @@ struct cpufreq_stats_attribute {
 static int cpufreq_stats_update(unsigned int cpu)
 {
 	struct cpufreq_stats *stat;
+	struct all_cpufreq_stats *all_stat;
 	unsigned long long cur_time;
 
 	cur_time = get_jiffies_64();
 	spin_lock(&cpufreq_stats_lock);
 	stat = per_cpu(cpufreq_stats_table, cpu);
-	if (stat->time_in_state)
+	all_stat = per_cpu(all_cpufreq_stats, cpu);
+	if (!stat) {
+		spin_unlock(&cpufreq_stats_lock);
+		return 0;
+	}
+	if (stat->time_in_state) {
 		stat->time_in_state[stat->last_index] +=
 			cur_time - stat->last_time;
+		if (all_stat)
+			all_stat->time_in_state[stat->last_index] +=
+					cur_time - stat->last_time;
+	}
 	stat->last_time = cur_time;
 	spin_unlock(&cpufreq_stats_lock);
 	return 0;
@@ -89,6 +115,62 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	return len;
 }
 
+static int get_index_all_cpufreq_stat(struct all_cpufreq_stats *all_stat,
+		unsigned int freq)
+{
+	int i;
+	if (!all_stat)
+		return -1;
+	for (i = 0; i < all_stat->state_num; i++) {
+		if (all_stat->freq_table[i] == freq)
+			return i;
+	}
+	return -1;
+}
+
+static ssize_t show_all_time_in_state(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	ssize_t len = 0;
+	unsigned int i, cpu, freq, index;
+	struct all_cpufreq_stats *all_stat;
+	struct cpufreq_policy *policy;
+
+	len += scnprintf(buf + len, PAGE_SIZE - len, "freq\t\t");
+	for_each_possible_cpu(cpu) {
+		len += scnprintf(buf + len, PAGE_SIZE - len, "cpu%d\t\t", cpu);
+		if (cpu_online(cpu))
+			cpufreq_stats_update(cpu);
+	}
+
+	if (!all_freq_table)
+		goto out;
+	for (i = 0; i < all_freq_table->table_size; i++) {
+		freq = all_freq_table->freq_table[i];
+		len += scnprintf(buf + len, PAGE_SIZE - len, "\n%u\t\t", freq);
+		for_each_possible_cpu(cpu) {
+			policy = cpufreq_cpu_get(cpu);
+			if (policy == NULL)
+				continue;
+			all_stat = per_cpu(all_cpufreq_stats, policy->cpu);
+			index = get_index_all_cpufreq_stat(all_stat, freq);
+			if (index != -1) {
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+					"%llu\t\t", (unsigned long long)
+					cputime64_to_clock_t(all_stat->time_in_state[index]));
+			} else {
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						"N/A\t\t");
+			}
+			cpufreq_cpu_put(policy);
+		}
+	}
+
+out:
+	len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
+	return len;
+}
+
 #ifdef CONFIG_CPU_FREQ_STAT_DETAILS
 static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 {
@@ -152,6 +234,9 @@ static struct attribute_group stats_attr_group = {
 	.name = "stats"
 };
 
+static struct kobj_attribute _attr_all_time_in_state = __ATTR(all_time_in_state,
+		0444, show_all_time_in_state, NULL);
+
 static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
 {
 	int index;
@@ -198,6 +283,29 @@ put_ref:
 	cpufreq_cpu_put(policy);
 }
 
+static void cpufreq_allstats_free(void)
+{
+	int cpu;
+	struct all_cpufreq_stats *all_stat;
+
+	sysfs_remove_file(cpufreq_global_kobject,
+						&_attr_all_time_in_state.attr);
+
+	for_each_possible_cpu(cpu) {
+		all_stat = per_cpu(all_cpufreq_stats, cpu);
+		if (!all_stat)
+			continue;
+		kfree(all_stat->time_in_state);
+		kfree(all_stat);
+		per_cpu(all_cpufreq_stats, cpu) = NULL;
+	}
+	if (all_freq_table) {
+		kfree(all_freq_table->freq_table);
+		kfree(all_freq_table);
+		all_freq_table = NULL;
+	}
+}
+
 static int cpufreq_stats_create_table(struct cpufreq_policy *policy,
 		struct cpufreq_frequency_table *table)
 {
@@ -284,6 +392,106 @@ static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy)
 	stat->cpu = policy->cpu;
 }
 
+static int compare_for_sort(const void *lhs_ptr, const void *rhs_ptr)
+{
+	unsigned int lhs = *(const unsigned int *)(lhs_ptr);
+	unsigned int rhs = *(const unsigned int *)(rhs_ptr);
+	if (lhs < rhs)
+		return -1;
+	if (lhs > rhs)
+		return 1;
+	return 0;
+}
+
+static bool check_all_freq_table(unsigned int freq)
+{
+	int i;
+	for (i = 0; i < all_freq_table->table_size; i++) {
+		if (freq == all_freq_table->freq_table[i])
+			return true;
+	}
+	return false;
+}
+
+static void create_all_freq_table(void)
+{
+	all_freq_table = kzalloc(sizeof(struct all_freq_table),
+			GFP_KERNEL);
+	if (!all_freq_table)
+		pr_warn("could not allocate memory for all_freq_table\n");
+	return;
+}
+
+static void add_all_freq_table(unsigned int freq)
+{
+	unsigned int size;
+	size = sizeof(unsigned int) * (all_freq_table->table_size + 1);
+	all_freq_table->freq_table = krealloc(all_freq_table->freq_table,
+			size, GFP_ATOMIC);
+	if (IS_ERR(all_freq_table->freq_table)) {
+		pr_warn("Could not reallocate memory for freq_table\n");
+		all_freq_table->freq_table = NULL;
+		return;
+	}
+	all_freq_table->freq_table[all_freq_table->table_size++] = freq;
+}
+
+static void cpufreq_allstats_create(unsigned int cpu)
+{
+	int i , j = 0;
+	unsigned int alloc_size, count = 0;
+	struct cpufreq_frequency_table *table = cpufreq_frequency_get_table(cpu);
+	struct all_cpufreq_stats *all_stat;
+	bool sort_needed = false;
+
+	if (!table)
+		return;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned int freq = table[i].frequency;
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+		count++;
+	}
+
+	all_stat = kzalloc(sizeof(struct all_cpufreq_stats),
+			GFP_KERNEL);
+	if (!all_stat) {
+		pr_warn("Cannot allocate memory for cpufreq stats\n");
+		return;
+	}
+
+	/*Allocate memory for freq table per cpu as well as clockticks per freq*/
+	alloc_size = count * sizeof(int) + count * sizeof(cputime64_t);
+	all_stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
+	if (!all_stat->time_in_state) {
+		pr_warn("Cannot allocate memory for cpufreq time_in_state\n");
+		kfree(all_stat);
+		all_stat = NULL;
+		return;
+	}
+	all_stat->freq_table = (unsigned int *)
+		(all_stat->time_in_state + count);
+
+	spin_lock(&cpufreq_stats_lock);
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned int freq = table[i].frequency;
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+		all_stat->freq_table[j++] = freq;
+		if (all_freq_table && !check_all_freq_table(freq)) {
+			add_all_freq_table(freq);
+			sort_needed = true;
+		}
+	}
+	if (sort_needed)
+		sort(all_freq_table->freq_table, all_freq_table->table_size,
+				sizeof(unsigned int), &compare_for_sort, NULL);
+	all_stat->state_num = j;
+	per_cpu(all_cpufreq_stats, cpu) = all_stat;
+	spin_unlock(&cpufreq_stats_lock);
+}
+
 static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
 		unsigned long val, void *data)
 {
@@ -302,6 +510,10 @@ static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
 	table = cpufreq_frequency_get_table(cpu);
 	if (!table)
 		return 0;
+
+	if (!per_cpu(all_cpufreq_stats, cpu))
+		cpufreq_allstats_create(cpu);
+
 	ret = cpufreq_stats_create_table(policy, table);
 	if (ret)
 		return ret;
@@ -344,6 +556,30 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
 	return 0;
 }
 
+static int cpufreq_stats_create_table_cpu(unsigned int cpu)
+{
+	struct cpufreq_policy *policy;
+	struct cpufreq_frequency_table *table;
+	int ret = -ENODEV;
+
+	policy = cpufreq_cpu_get(cpu);
+	if (!policy)
+		return -ENODEV;
+
+	table = cpufreq_frequency_get_table(cpu);
+	if (!table)
+		goto out;
+
+	if (!per_cpu(all_cpufreq_stats, cpu))
+		cpufreq_allstats_create(cpu);
+
+	ret = cpufreq_stats_create_table(policy, table);
+
+out:
+	cpufreq_cpu_put(policy);
+	return ret;
+}
+
 static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
 					       unsigned long action,
 					       void *hcpu)
@@ -363,6 +599,10 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
 	case CPU_DEAD_FROZEN:
 		cpufreq_stats_free_table(cpu);
 		break;
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+		cpufreq_stats_create_table_cpu(cpu);
+		break;
 	}
 	return NOTIFY_OK;
 }
@@ -407,6 +647,12 @@ static int cpufreq_stats_setup(void)
 		return ret;
 	}
 
+	create_all_freq_table();
+	ret = sysfs_create_file(cpufreq_global_kobject,
+			&_attr_all_time_in_state.attr);
+	if (ret)
+		pr_warn("Error creating sysfs file for cpufreq stats\n");
+
 	return 0;
 }
 
@@ -423,6 +669,7 @@ static void cpufreq_stats_cleanup(void)
 		cpufreq_stats_free_table(cpu);
 		cpufreq_stats_free_sysfs(cpu);
 	}
+	cpufreq_allstats_free();
 }
 
 #ifdef CONFIG_BL_SWITCHER
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
new file mode 100644
index 000000000000..6311feb52a07
--- /dev/null
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -0,0 +1,103 @@
+/*
+ * SCPI CPUFreq Interface driver
+ *
+ * It provides necessary ops to arm_big_little cpufreq driver.
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ * Sudeep Holla <sudeep.holla@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/opp.h>
+#include <linux/scpi_protocol.h>
+#include <linux/types.h>
+
+#include "arm_big_little.h"
+
+static struct scpi_ops *scpi_ops;
+
+static int scpi_init_opp_table(struct device *cpu_dev)
+{
+	u8 domain = topology_physical_package_id(cpu_dev->id);
+	struct scpi_dvfs_info *info;
+	struct scpi_opp *opp;
+	int idx, ret = 0;
+
+	if ((opp_get_opp_count(cpu_dev)) > 0)
+		return 0;
+	info = scpi_ops->dvfs_get_info(domain);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	opp = info->opps;
+	if (!opp)
+		return -EIO;
+
+	for (idx = 0; idx < info->count; idx++, opp++) {
+		ret = opp_add(cpu_dev, opp->freq, opp->m_volt * 1000);
+		if (ret) {
+			dev_warn(cpu_dev, "failed to add opp %uHz %umV\n",
+				 opp->freq, opp->m_volt);
+			return ret;
+		}
+	}
+	return ret;
+}
+
+static int scpi_get_transition_latency(struct device *cpu_dev)
+{
+	u8 domain = topology_physical_package_id(cpu_dev->id);
+	struct scpi_dvfs_info *info;
+
+	info = scpi_ops->dvfs_get_info(domain);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	return info->latency;
+}
+
+static struct cpufreq_arm_bL_ops scpi_cpufreq_ops = {
+	.name	= "scpi",
+	.get_transition_latency = scpi_get_transition_latency,
+	.init_opp_table = scpi_init_opp_table,
+};
+
+static int scpi_cpufreq_probe(struct platform_device *pdev)
+{
+	scpi_ops = get_scpi_ops();
+	if (!scpi_ops)
+		return -EIO;
+
+	return bL_cpufreq_register(&scpi_cpufreq_ops);
+}
+
+static int scpi_cpufreq_remove(struct platform_device *pdev)
+{
+	bL_cpufreq_unregister(&scpi_cpufreq_ops);
+	return 0;
+}
+
+static struct platform_driver scpi_cpufreq_platdrv = {
+	.driver = {
+		.name	= "scpi-cpufreq",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= scpi_cpufreq_probe,
+	.remove		= scpi_cpufreq_remove,
+};
+module_platform_driver(scpi_cpufreq_platdrv);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index bc580b67a652..efe23dd391ed 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -126,7 +126,7 @@ struct menu_device {
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-static int get_loadavg(void)
+static int __maybe_unused get_loadavg(void)
 {
 	unsigned long this = this_cpu_load();
 
@@ -173,7 +173,12 @@ static inline int performance_multiplier(void)
 
 	/* for higher loadavg, we are more reluctant */
 
-	mult += 2 * get_loadavg();
+	/*
+	 * this doesn't work as intended - it is almost always 0, but can
+	 * sometimes, depending on workload, spike very high into the hundreds
+	 * even when the average cpu load is under 10%.
+	 */
+	/* mult += 2 * get_loadavg(); */
 
 	/* for IO wait tasks (per cpu!) we add 5x each */
 	mult += 10 * nr_iowait_cpu(smp_processor_id());
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 3b367973a802..cc87f303c3be 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -684,6 +684,48 @@ err_out:
 }
 EXPORT_SYMBOL(devfreq_remove_governor);
 
+int devfreq_qos_set_max(struct devfreq *df, unsigned long value)
+{
+	unsigned long min;
+	int ret = 0;
+
+	mutex_lock(&df->lock);
+	min = df->min_freq;
+	if (value && min && value < min) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	df->max_freq = value;
+	update_devfreq(df);
+unlock:
+	mutex_unlock(&df->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_qos_set_max);
+
+int devfreq_qos_set_min(struct devfreq *df, unsigned long value)
+{
+	unsigned long max;
+	int ret = 0;
+
+	mutex_lock(&df->lock);
+	max = df->max_freq;
+	if (value && max && value > max) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	df->min_freq = value;
+	update_devfreq(df);
+unlock:
+	mutex_unlock(&df->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_qos_set_min);
+
 static ssize_t show_governor(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
@@ -809,24 +851,15 @@ static ssize_t store_min_freq(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long max;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-	max = df->max_freq;
-	if (value && max && value > max) {
-		ret = -EINVAL;
-		goto unlock;
-	}
+	ret = devfreq_qos_set_min(df, value);
+	if (!ret)
+		ret = count;
 
-	df->min_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
 	return ret;
 }
 
@@ -842,24 +875,15 @@ static ssize_t store_max_freq(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long min;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-	min = df->min_freq;
-	if (value && min && value < min) {
-		ret = -EINVAL;
-		goto unlock;
-	}
+	ret = devfreq_qos_set_max(df, value);
+	if (!ret)
+		ret = count;
 
-	df->max_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
 	return ret;
 }
 
@@ -1023,6 +1047,7 @@ struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq,
 
 	return opp;
 }
+EXPORT_SYMBOL(devfreq_recommended_opp);
 
 /**
  * devfreq_register_opp_notifier() - Helper function to get devfreq notified
@@ -1046,6 +1071,7 @@ int devfreq_register_opp_notifier(struct device *dev, struct devfreq *devfreq)
 
 	return ret;
 }
+EXPORT_SYMBOL(devfreq_register_opp_notifier);
 
 /**
  * devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq
@@ -1072,6 +1098,7 @@ int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq)
 
 	return ret;
 }
+EXPORT_SYMBOL(devfreq_unregister_opp_notifier);
 
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
 MODULE_DESCRIPTION("devfreq class support");
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
index d14e2a02fee0..4f97237cb1ea 100644
--- a/drivers/gator/Makefile
+++ b/drivers/gator/Makefile
@@ -77,7 +77,7 @@ clean-files := gator_src_md5.h
 silent_chk_events.h = :
 $(obj)/gator_src_md5.h: FORCE
 	@$($(quiet)chk_events.h)
-	$(Q)cd $(srctree) && cd $(src) ; $(CONFIG_SHELL) -c "echo 'static char *gator_src_md5 = \"'\`ls *.c *.h mali/*.h | grep -Ev '^(gator_src_md5\.c|gator\.mod\.c)$$' | LC_ALL=C sort | xargs cat | md5sum | cut -b 1-32\`'\";'" > $(abspath $@)
+	$(Q)cd $(srctree) && cd $(src) ; $(CONFIG_SHELL) -c "echo 'static char *gator_src_md5 = \"'\`ls *.c *.h mali/*.h | grep -Ev '^(gator_src_md5\.h|gator\.mod\.c)$$' | LC_ALL=C sort | xargs cat | md5sum | cut -b 1-32\`'\";'" > $(abspath $@)
 
 else
 
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
index 202eb41c485b..50b0180fa82a 100644
--- a/drivers/gator/gator.h
+++ b/drivers/gator/gator.h
@@ -77,8 +77,8 @@ int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 /******************************************************************************
  * Tracepoints
  ******************************************************************************/
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
-#	error Kernels prior to 2.6.32 not supported
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0)
+#	error Kernels prior to 3.4.0 not supported. DS-5 v5.21 and earlier supported 2.6.32 and later.
 #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
 #	define GATOR_DEFINE_PROBE(probe_name, proto) \
 		static void probe_##probe_name(PARAMS(proto))
@@ -103,10 +103,15 @@ int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
 		tracepoint_probe_unregister(gator_tracepoint_##probe_name, probe_##probe_name, NULL)
 #endif
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+#define setup_deferrable_timer_on_stack setup_timer
+#endif
+
 /******************************************************************************
  * Events
  ******************************************************************************/
 struct gator_interface {
+	const char *const name;
 	/* Complementary function to init */
 	void (*shutdown)(void);
 	int (*create_files)(struct super_block *sb, struct dentry *root);
@@ -120,11 +125,12 @@ struct gator_interface {
 	/* called in process context but may not be running on core 'cpu' */
 	void (*offline_dispatch)(int cpu, bool migrate);
 	int (*read)(int **buffer, bool sched_switch);
-	int (*read64)(long long **buffer);
+	int (*read64)(long long **buffer, bool sched_switch);
 	int (*read_proc)(long long **buffer, struct task_struct *);
 	struct list_head list;
 };
 
+u64 gator_get_time(void);
 int gator_events_install(struct gator_interface *interface);
 int gator_events_get_key(void);
 u32 gator_cpuid(void);
diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c
index 9bd4c8b98c9e..7636726b5c18 100644
--- a/drivers/gator/gator_cookies.c
+++ b/drivers/gator/gator_cookies.c
@@ -7,6 +7,29 @@
  *
  */
 
+#include <linux/mount.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
+
+struct mount {
+	struct mount *mnt_parent;
+	struct dentry *mnt_mountpoint;
+	struct vfsmount mnt;
+};
+
+static inline struct mount *real_mount(struct vfsmount *mnt)
+{
+	return container_of(mnt, struct mount, mnt);
+}
+
+#define GET_MNT_ROOT(mount) (mount)->mnt.mnt_root
+
+#else
+
+#define GET_MNT_ROOT(mount) (mount)->mnt_root
+
+#endif
+
 /* must be power of 2 */
 #define COOKIEMAP_ENTRIES	1024
 /* must be a power of 2 - 512/4 = 128 entries */
@@ -23,6 +46,7 @@ struct cookie_args {
 };
 
 static DEFINE_PER_CPU(char *, translate_text);
+static DEFINE_PER_CPU(char *, scratch);
 static DEFINE_PER_CPU(uint32_t, cookie_next_key);
 static DEFINE_PER_CPU(uint64_t *, cookie_keys);
 static DEFINE_PER_CPU(uint32_t *, cookie_values);
@@ -285,7 +309,7 @@ static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text,
 		return cookie;
 
 	/* On 64-bit android app_process can be app_process32 or app_process64 */
-	if (strncmp(text, APP_PROCESS, sizeof(APP_PROCESS) - 1) == 0) {
+	if (strstr(text, APP_PROCESS) != NULL) {
 		if (!translate_app_process(&text, cpu, task, from_wq))
 			return UNRESOLVED_COOKIE;
 	}
@@ -305,6 +329,75 @@ static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text,
 	return cookie;
 }
 
+/* Can't call d_path in interrupt context so create something similar */
+static const char *gator_d_path(const struct path *path, char *buf, int buflen)
+{
+	struct dentry *dentry = path->dentry;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
+	struct mount *mount = real_mount(path->mnt);
+#else
+	struct vfsmount *mount = path->mnt;
+#endif
+	int pos = buflen - 1;
+	int len;
+
+	buf[pos] = '\0';
+
+	for (;;) {
+		if (dentry == NULL) {
+			pr_err("gator: dentry is null!\n");
+			break;
+		}
+		if (dentry->d_name.name[0] == '\0') {
+			pr_err("gator: path is empty string\n");
+			break;
+		}
+		if (dentry->d_name.name[0] == '/' && dentry->d_name.name[1] == '\0') {
+			/* Normal operation */
+			/* pr_err("gator: path is /\n"); */
+			break;
+		}
+
+		len = strlen(dentry->d_name.name);
+		if (pos < len) {
+			pr_err("gator: path is too long\n");
+			break;
+		}
+		pos -= len;
+		memcpy(buf + pos, dentry->d_name.name, len);
+
+		if (pos == 0) {
+			pr_err("gator: no room for slash\n");
+			/* Fall back to name only */
+			return path->dentry->d_name.name;
+		}
+		--pos;
+		buf[pos] = '/';
+
+		if (dentry->d_parent == GET_MNT_ROOT(mount)) {
+			/* pr_err("gator: filesystem is complete, moving to next '%s'\n", buf + pos); */
+			dentry = mount->mnt_mountpoint;
+			mount = mount->mnt_parent;
+			continue;
+		}
+		if (dentry == dentry->d_parent) {
+			pr_err("gator: parent is self\n");
+			break;
+		}
+		dentry = dentry->d_parent;
+	}
+
+	if (pos < 0) {
+		pr_err("gator: pos is somenow negative\n");
+		/* Fall back to name only */
+		return path->dentry->d_name.name;
+	}
+
+	return buf + pos;
+}
+
+#define d_path gator_d_path
+
 static int get_exec_cookie(int cpu, struct task_struct *task)
 {
 	struct mm_struct *mm = task->mm;
@@ -315,7 +408,7 @@ static int get_exec_cookie(int cpu, struct task_struct *task)
 		return NO_COOKIE;
 
 	if (task && task->mm && task->mm->exe_file) {
-		text = task->mm->exe_file->f_path.dentry->d_name.name;
+		text = d_path(&task->mm->exe_file->f_path, per_cpu(scratch, get_physical_cpu()), PAGE_SIZE);
 		return get_cookie(cpu, task, text, false);
 	}
 
@@ -337,7 +430,7 @@ static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsig
 			continue;
 
 		if (vma->vm_file) {
-			text = vma->vm_file->f_path.dentry->d_name.name;
+			text = d_path(&vma->vm_file->f_path, per_cpu(scratch, get_physical_cpu()), PAGE_SIZE);
 			cookie = get_cookie(cpu, task, text, false);
 			*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start;
 		} else {
@@ -394,6 +487,12 @@ static int cookies_initialize(void)
 			err = -ENOMEM;
 			goto cookie_setup_error;
 		}
+
+		per_cpu(scratch, cpu) = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!per_cpu(scratch, cpu)) {
+			err = -ENOMEM;
+			goto cookie_setup_error;
+		}
 	}
 
 	/* build CRC32 table */
@@ -414,7 +513,7 @@ static int cookies_initialize(void)
 		gator_crc32_table[i] = crc;
 	}
 
-	setup_timer(&app_process_wake_up_timer, app_process_wake_up_handler, 0);
+	setup_deferrable_timer_on_stack(&app_process_wake_up_timer, app_process_wake_up_handler, 0);
 
 cookie_setup_error:
 	return err;
@@ -438,6 +537,9 @@ static void cookies_release(void)
 
 		kfree(per_cpu(translate_text, cpu));
 		per_cpu(translate_text, cpu) = NULL;
+
+		kfree(per_cpu(scratch, cpu));
+		per_cpu(scratch, cpu) = NULL;
 	}
 
 	del_timer_sync(&app_process_wake_up_timer);
diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c
index 178397033e2c..62bd6eb94f9d 100644
--- a/drivers/gator/gator_events_armv6.c
+++ b/drivers/gator/gator_events_armv6.c
@@ -198,6 +198,7 @@ static int gator_events_armv6_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_armv6_interface = {
+	.name = "armv6",
 	.create_files = gator_events_armv6_create_files,
 	.stop = gator_events_armv6_stop,
 	.online = gator_events_armv6_online,
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
index e1f6a5fa9997..0db857c71a67 100644
--- a/drivers/gator/gator_events_armv7.c
+++ b/drivers/gator/gator_events_armv7.c
@@ -263,6 +263,7 @@ static int gator_events_armv7_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_armv7_interface = {
+	.name = "armv7",
 	.create_files = gator_events_armv7_create_files,
 	.stop = gator_events_armv7_stop,
 	.online = gator_events_armv7_online,
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
index b3467b133712..0bb4085db535 100644
--- a/drivers/gator/gator_events_block.c
+++ b/drivers/gator/gator_events_block.c
@@ -142,6 +142,7 @@ static int gator_events_block_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_block_interface = {
+	.name = "block",
 	.create_files = gator_events_block_create_files,
 	.start = gator_events_block_start,
 	.stop = gator_events_block_stop,
diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c
index 81b976a9b282..aa1ebbb290a5 100644
--- a/drivers/gator/gator_events_irq.c
+++ b/drivers/gator/gator_events_irq.c
@@ -144,6 +144,7 @@ static int gator_events_irq_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_irq_interface = {
+	.name = "irq",
 	.create_files = gator_events_irq_create_files,
 	.online = gator_events_irq_online,
 	.start = gator_events_irq_start,
diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c
index 063a06079c74..de3b383ad077 100644
--- a/drivers/gator/gator_events_l2c-310.c
+++ b/drivers/gator/gator_events_l2c-310.c
@@ -121,6 +121,7 @@ static int gator_events_l2c310_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_l2c310_interface = {
+	.name = "l2c-310",
 	.create_files = gator_events_l2c310_create_files,
 	.start = gator_events_l2c310_start,
 	.stop = gator_events_l2c310_stop,
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
index 423b4e08e8cc..fb3909a004d3 100644
--- a/drivers/gator/gator_events_mali_4xx.c
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -66,9 +66,7 @@ static unsigned long counter_event[NUMBER_OF_EVENTS];
 static unsigned long counter_key[NUMBER_OF_EVENTS];
 
 /* The data we have recorded */
-static u32 counter_data[NUMBER_OF_EVENTS];
-/* The address to sample (or 0 if samples are sent to us) */
-static u32 *counter_address[NUMBER_OF_EVENTS];
+static atomic_t counter_data[NUMBER_OF_EVENTS];
 
 /* An array used to return the data we recorded
  * as key,value pairs hence the *2
@@ -116,7 +114,7 @@ static inline int is_hw_counter(unsigned int event_id)
 GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
 {
 	if (is_hw_counter(event_id))
-		counter_data[event_id] = value;
+		atomic_add(value, &counter_data[event_id]);
 }
 
 GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
@@ -126,7 +124,7 @@ GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surfac
 	/* Copy over the values for those counters which are enabled. */
 	for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
 		if (counter_enabled[i])
-			counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
+			atomic_add(counters[i - FIRST_SW_COUNTER], &counter_data[i]);
 	}
 }
 
@@ -375,22 +373,22 @@ static void mali_counter_initialize(void)
 	if (mali_get_counters)
 		pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
 	else
-		pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined\n");
+		pr_debug("gator: mali _mali_profiling_get_counters symbol not defined\n");
 
 	mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
 	if (mali_get_l2_counters)
 		pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
 	else
-		pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined\n");
+		pr_debug("gator: mali _mali_profiling_get_l2_counters symbol not defined\n");
 
 	if (!mali_get_counters && !mali_get_l2_counters) {
-		pr_debug("gator: WARNING: no L2 counters available\n");
+		pr_err("gator: no L2 counters available\n");
 		n_l2_cores = 0;
 	}
 
 	/* Clear counters in the start */
 	for (i = 0; i < NUMBER_OF_EVENTS; i++) {
-		counter_data[i] = 0;
+		atomic_set(&counter_data[i], 0);
 		prev_set[i] = false;
 	}
 }
@@ -477,7 +475,6 @@ static void stop(void)
 	for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
 		counter_enabled[cnt] = 0;
 		counter_event[cnt] = 0;
-		counter_address[cnt] = NULL;
 	}
 
 	mali_counter_deinitialize();
@@ -489,10 +486,12 @@ static void dump_counters(unsigned int from_counter, unsigned int to_counter, un
 
 	for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
 		if (counter_enabled[counter_id]) {
-			counter_dump[(*len)++] = counter_key[counter_id];
-			counter_dump[(*len)++] = counter_data[counter_id];
+			unsigned int value;
 
-			counter_data[counter_id] = 0;
+			counter_dump[(*len)++] = counter_key[counter_id];
+			value = atomic_read(&counter_data[counter_id]);
+			atomic_sub(value, &counter_data[counter_id]);
+			counter_dump[(*len)++] = value;
 		}
 	}
 }
@@ -564,17 +563,19 @@ static int read(int **buffer, bool sched_switch)
 		 * Add in the voltage and frequency counters if enabled. Note
 		 * that, since these are actually passed as events, the counter
 		 * value should not be cleared.
+		 *
+		 * Intentionally use atomic_read as these are absolute counters
 		 */
 		cnt = COUNTER_FREQUENCY;
 		if (counter_enabled[cnt]) {
 			counter_dump[len++] = counter_key[cnt];
-			counter_dump[len++] = counter_data[cnt];
+			counter_dump[len++] = atomic_read(&counter_data[cnt]);
 		}
 
 		cnt = COUNTER_VOLTAGE;
 		if (counter_enabled[cnt]) {
 			counter_dump[len++] = counter_key[cnt];
-			counter_dump[len++] = counter_data[cnt];
+			counter_dump[len++] = atomic_read(&counter_data[cnt]);
 		}
 	}
 #endif
@@ -586,6 +587,7 @@ static int read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_mali_interface = {
+	.name = "mali_4xx",
 	.create_files = create_files,
 	.start = start,
 	.stop = stop,
@@ -595,8 +597,9 @@ static struct gator_interface gator_events_mali_interface = {
 extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
 {
 #ifdef DVFS_REPORTED_BY_DDK
-	counter_data[COUNTER_FREQUENCY] = frequency_mhz;
-	counter_data[COUNTER_VOLTAGE] = voltage_mv;
+	/* Intentionally use atomic_set as these are absolute counters */
+	atomic_set(&counter_data[COUNTER_FREQUENCY], frequency_mhz);
+	atomic_set(&counter_data[COUNTER_VOLTAGE], voltage_mv);
 #endif
 }
 
@@ -612,8 +615,7 @@ int gator_events_mali_init(void)
 		counter_enabled[cnt] = 0;
 		counter_event[cnt] = 0;
 		counter_key[cnt] = gator_events_get_key();
-		counter_address[cnt] = NULL;
-		counter_data[cnt] = 0;
+		atomic_set(&counter_data[cnt], 0);
 	}
 
 	trace_registered = 0;
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
index 7741f2575542..60fece814c29 100644
--- a/drivers/gator/gator_events_mali_common.c
+++ b/drivers/gator/gator_events_mali_common.c
@@ -25,31 +25,31 @@ extern int gator_mali_create_file_system(const char *mali_name, const char *even
 		dir = gatorfs_mkdir(sb, root, buf);
 
 		if (dir == NULL) {
-			pr_debug("gator: %s: error creating file system for: %s (%s)\n", mali_name, event_name, buf);
+			pr_err("gator: %s: error creating file system for: %s (%s)\n", mali_name, event_name, buf);
 			return -1;
 		}
 
 		err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
 		if (err != 0) {
-			pr_debug("gator: %s: error calling gatorfs_create_ulong for: %s (%s)\n", mali_name, event_name, buf);
+			pr_err("gator: %s: error calling gatorfs_create_ulong for: %s (%s)\n", mali_name, event_name, buf);
 			return -1;
 		}
 		err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
 		if (err != 0) {
-			pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
+			pr_err("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
 			return -1;
 		}
 		if (counter->cores != -1) {
 			err = gatorfs_create_ro_ulong(sb, dir, "cores", &counter->cores);
 			if (err != 0) {
-				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
+				pr_err("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
 				return -1;
 			}
 		}
 		if (event != NULL) {
 			err = gatorfs_create_ulong(sb, dir, "event", event);
 			if (err != 0) {
-				pr_debug("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
+				pr_err("gator: %s: error calling gatorfs_create_ro_ulong for: %s (%s)\n", mali_name, event_name, buf);
 				return -1;
 			}
 		}
diff --git a/drivers/gator/gator_events_mali_midgard.c b/drivers/gator/gator_events_mali_midgard.c
index 3b0963a8de21..5b84975f99e8 100644
--- a/drivers/gator/gator_events_mali_midgard.c
+++ b/drivers/gator/gator_events_mali_midgard.c
@@ -146,6 +146,10 @@ static unsigned int accumulators_data[NUMBER_OF_ACCUMULATORS];
 /* Hold the previous timestamp, used to calculate the sample interval. */
 static struct timespec prev_timestamp;
 
+static unsigned long long previous_shader_bitmask;
+static unsigned long long previous_tiler_bitmask;
+static unsigned long long previous_l2_bitmask;
+
 /**
  * Returns the timespan (in microseconds) between the two specified timestamps.
  *
@@ -209,10 +213,6 @@ GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long
 #define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
 #define BIT_AT(value, pos) ((value >> pos) & 1)
 
-	static unsigned long long previous_shader_bitmask;
-	static unsigned long long previous_tiler_bitmask;
-	static unsigned long long previous_l2_bitmask;
-
 	switch (event_id) {
 	case SHADER_PRESENT_LO:
 		{
@@ -324,27 +324,27 @@ static int create_files(struct super_block *sb, struct dentry *root)
 static int register_tracepoints(void)
 {
 	if (GATOR_REGISTER_TRACE(mali_pm_status)) {
-		pr_debug("gator: Mali-Midgard: mali_pm_status tracepoint failed to activate\n");
+		pr_err("gator: Mali-Midgard: mali_pm_status tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) {
-		pr_debug("gator: Mali-Midgard: mali_page_fault_insert_pages tracepoint failed to activate\n");
+		pr_err("gator: Mali-Midgard: mali_page_fault_insert_pages tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) {
-		pr_debug("gator: Mali-Midgard: mali_mmu_as_in_use tracepoint failed to activate\n");
+		pr_err("gator: Mali-Midgard: mali_mmu_as_in_use tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) {
-		pr_debug("gator: Mali-Midgard: mali_mmu_as_released tracepoint failed to activate\n");
+		pr_err("gator: Mali-Midgard: mali_mmu_as_released tracepoint failed to activate\n");
 		return 0;
 	}
 
 	if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) {
-		pr_debug("gator: Mali-Midgard: mali_total_alloc_pages_change tracepoint failed to activate\n");
+		pr_err("gator: Mali-Midgard: mali_total_alloc_pages_change tracepoint failed to activate\n");
 		return 0;
 	}
 
@@ -363,6 +363,10 @@ static int start(void)
 	unsigned int cnt;
 	mali_profiling_control_type *mali_control;
 
+	previous_shader_bitmask = 0;
+	previous_tiler_bitmask = 0;
+	previous_l2_bitmask = 0;
+
 	/* Clean all data for the next capture */
 	for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) {
 		timeline_event_starttime[cnt].tv_sec = timeline_event_starttime[cnt].tv_nsec = 0;
@@ -546,6 +550,7 @@ static int read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_mali_midgard_interface = {
+	.name = "mali_midgard",
 	.create_files = create_files,
 	.start = start,
 	.stop = stop,
diff --git a/drivers/gator/gator_events_mali_midgard_hw.c b/drivers/gator/gator_events_mali_midgard_hw.c
index 7e1eee30026d..db91c42c7d0c 100644
--- a/drivers/gator/gator_events_mali_midgard_hw.c
+++ b/drivers/gator/gator_events_mali_midgard_hw.c
@@ -433,7 +433,8 @@ static unsigned int num_hardware_counters_enabled;
 static struct mali_counter *counters;
 
 /* An array used to return the data we recorded as key,value pairs */
-static int *counter_dump;
+static long long *counter_dump;
+static uint64_t last_read_time;
 
 extern struct mali_counter mali_activity[3];
 
@@ -517,31 +518,6 @@ static void clean_symbols(void)
 #endif
 }
 
-/**
- * Determines whether a read should take place
- * @param current_time The current time, obtained from getnstimeofday()
- * @param prev_time_s The number of seconds at the previous read attempt.
- * @param next_read_time_ns The time (in ns) when the next read should be allowed.
- *
- * Note that this function has been separated out here to allow it to be tested.
- */
-static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns)
-{
-	/* If the current ns count rolls over a second, roll the next read time too. */
-	if (current_time->tv_sec != *prev_time_s)
-		*next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC;
-
-	/* Abort the read if the next read time has not arrived. */
-	if (current_time->tv_nsec < *next_read_time_ns)
-		return 0;
-
-	/* Set the next read some fixed time after this one, and update the read timestamp. */
-	*next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC;
-
-	*prev_time_s = current_time->tv_sec;
-	return 1;
-}
-
 static int start(void)
 {
 #if MALI_DDK_GATOR_API_VERSION != 3
@@ -552,6 +528,8 @@ static int start(void)
 #endif
 	int cnt;
 
+	last_read_time = 0;
+
 #if MALI_DDK_GATOR_API_VERSION == 3
 	/* Setup HW counters */
 	num_hardware_counters_enabled = 0;
@@ -620,14 +598,14 @@ static int start(void)
 
 		/* If we already got a context, fail */
 		if (kbcontext) {
-			pr_debug("gator: Mali-Midgard: error context already present\n");
+			pr_err("gator: Mali-Midgard: error context already present\n");
 			goto out;
 		}
 
 		/* kbcontext will only be valid after all the Mali symbols are loaded successfully */
 		kbcontext = kbase_create_context_symbol(kbdevice);
 		if (!kbcontext) {
-			pr_debug("gator: Mali-Midgard: error creating kbase context\n");
+			pr_err("gator: Mali-Midgard: error creating kbase context\n");
 			goto out;
 		}
 
@@ -650,7 +628,7 @@ static int start(void)
 		kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096, &kernel_dump_buffer_handle);
 #endif
 		if (!kernel_dump_buffer) {
-			pr_debug("gator: Mali-Midgard: error trying to allocate va\n");
+			pr_err("gator: Mali-Midgard: error trying to allocate va\n");
 			goto destroy_context;
 		}
 
@@ -665,7 +643,7 @@ static int start(void)
 		/* Use kbase API to enable hardware counters and provide dump buffer */
 		err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup);
 		if (err != MALI_ERROR_NONE) {
-			pr_debug("gator: Mali-Midgard: can't setup hardware counters\n");
+			pr_err("gator: Mali-Midgard: can't setup hardware counters\n");
 			goto free_buffer;
 		}
 		pr_debug("gator: Mali-Midgard: hardware counters enabled\n");
@@ -813,68 +791,80 @@ static int read_counter(const int cnt, const int len, const struct mali_counter
 	return 2;
 }
 
-static int read(int **buffer, bool sched_switch)
+static int read(long long **buffer, bool sched_switch)
 {
 	int cnt;
 	int len = 0;
 	uint32_t success;
 
-	struct timespec current_time;
-	static u32 prev_time_s;
-	static s32 next_read_time_ns;
+	uint64_t curr_time;
 
 	if (!on_primary_core() || sched_switch)
 		return 0;
 
-	getnstimeofday(&current_time);
+	/*
+	 * Report the HW counters
+	 * Only process hardware counters if at least one of the hardware counters is enabled.
+	 */
+	if (num_hardware_counters_enabled <= 0)
+		return 0;
+
+	curr_time = gator_get_time();
 
 	/*
 	 * Discard reads unless a respectable time has passed. This
 	 * reduces the load on the GPU without sacrificing accuracy on
 	 * the Streamline display.
 	 */
-	if (!is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns))
+	if (curr_time - last_read_time < READ_INTERVAL_NSEC)
 		return 0;
 
-	/*
-	 * Report the HW counters
-	 * Only process hardware counters if at least one of the hardware counters is enabled.
-	 */
-	if (num_hardware_counters_enabled > 0) {
 #if MALI_DDK_GATOR_API_VERSION == 3
-		if (!handles)
-			return -1;
+	if (!handles)
+		return -1;
 
-		/* Mali symbols can be called safely since a kbcontext is valid */
-		if (kbase_gator_instr_hwcnt_dump_complete_symbol(handles, &success)) {
+	/* Mali symbols can be called safely since a kbcontext is valid */
+	if (kbase_gator_instr_hwcnt_dump_complete_symbol(handles, &success)) {
 #else
-		if (!kbcontext)
-			return -1;
+	if (!kbcontext)
+		return -1;
 
-		/* Mali symbols can be called safely since a kbcontext is valid */
-		if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success)) {
+	/* Mali symbols can be called safely since a kbcontext is valid */
+	if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success)) {
 #endif
-			kbase_device_busy = false;
+		kbase_device_busy = false;
+
+		/*
+		 * If last_read_time is zero, then this result is from a previous
+		 * capture or in error.
+		 */
+		if (success && last_read_time > 0) {
+			/* Backdate these events to when they were requested */
+			counter_dump[len++] = 0;
+			counter_dump[len++] = last_read_time;
 
-			if (success) {
-				/* Cycle through hardware counters and accumulate totals */
-				for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
-					const struct mali_counter *counter = &counters[cnt];
+			/* Cycle through hardware counters and accumulate totals */
+			for (cnt = 0; cnt < number_of_hardware_counters; cnt++) {
+				const struct mali_counter *counter = &counters[cnt];
 
-					if (counter->enabled)
-						len += read_counter(cnt, len, counter);
-				}
+				if (counter->enabled)
+					len += read_counter(cnt, len, counter);
 			}
+
+			/* Restore the timestamp */
+			counter_dump[len++] = 0;
+			counter_dump[len++] = curr_time;
 		}
+	}
 
-		if (!kbase_device_busy) {
-			kbase_device_busy = true;
+	if (!kbase_device_busy) {
+		kbase_device_busy = true;
+		last_read_time = curr_time;
 #if MALI_DDK_GATOR_API_VERSION == 3
-			kbase_gator_instr_hwcnt_dump_irq_symbol(handles);
+		kbase_gator_instr_hwcnt_dump_irq_symbol(handles);
 #else
-			kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
+		kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
 #endif
-		}
 	}
 
 	/* Update the buffer */
@@ -920,7 +910,7 @@ static void shutdown(void)
 	hardware_counter_names = NULL;
 	if (kbase_gator_hwcnt_term_names_symbol != NULL) {
 		kbase_gator_hwcnt_term_names_symbol();
-		pr_err("Released symbols\n");
+		pr_debug("gator: Released symbols\n");
 	}
 
 	SYMBOL_CLEANUP(kbase_gator_hwcnt_term_names);
@@ -928,11 +918,12 @@ static void shutdown(void)
 }
 
 static struct gator_interface gator_events_mali_midgard_interface = {
+	.name = "mali_midgard_hw",
 	.shutdown = shutdown,
 	.create_files = create_files,
 	.start = start,
 	.stop = stop,
-	.read = read
+	.read64 = read
 };
 
 int gator_events_mali_midgard_hw_init(void)
@@ -969,7 +960,7 @@ int gator_events_mali_midgard_hw_init(void)
 #endif
 
 	counters = kmalloc(sizeof(*counters)*number_of_hardware_counters, GFP_KERNEL);
-	counter_dump = kmalloc(sizeof(*counter_dump)*number_of_hardware_counters*2, GFP_KERNEL);
+	counter_dump = kmalloc(sizeof(*counter_dump)*number_of_hardware_counters*2 + 4, GFP_KERNEL);
 
 	gator_mali_initialise_counters(mali_activity, ARRAY_SIZE(mali_activity));
 	gator_mali_initialise_counters(counters, number_of_hardware_counters);
diff --git a/drivers/gator/gator_events_mali_midgard_hw_test.c b/drivers/gator/gator_events_mali_midgard_hw_test.c
index 87c569cabf53..9fd44bc00e0a 100644
--- a/drivers/gator/gator_events_mali_midgard_hw_test.c
+++ b/drivers/gator/gator_events_mali_midgard_hw_test.c
@@ -10,46 +10,3 @@
 /**
  * Test functions for mali_t600_hw code.
  */
-
-static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns);
-
-static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int expected_result, s32 expected_next_ns)
-{
-	struct timespec current_time;
-	u32 prev_time_s = prev_s;
-	s32 next_read_time_ns = next_ns;
-
-	current_time.tv_sec = s;
-	current_time.tv_nsec = ns;
-
-	if (is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns) != expected_result) {
-		pr_err("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result);
-		return 0;
-	}
-
-	if (next_read_time_ns != expected_next_ns) {
-		pr_err("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns);
-		return 0;
-	}
-
-	return 1;
-}
-
-static void test_all_is_read_scheduled(void)
-{
-	const int HIGHEST_NS = 999999999;
-	int n_tests_passed = 0;
-
-	pr_err("gator: running tests on %s\n", __FILE__);
-
-	n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC);	/* Null time */
-	n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000);	/* Initial values */
-
-	n_tests_passed += test_is_read_scheduled(100, HIGHEST_NS, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500);
-	n_tests_passed += test_is_read_scheduled(101, 0001, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500 - NSEC_PER_SEC);
-	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500 - NSEC_PER_SEC, 1, 600 + READ_INTERVAL_NSEC);
-
-	n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC);
-
-	pr_err("gator: %d tests passed\n", n_tests_passed);
-}
diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c
index 985b312ab92b..42a6947752fe 100644
--- a/drivers/gator/gator_events_meminfo.c
+++ b/drivers/gator/gator_events_meminfo.c
@@ -80,7 +80,7 @@ static void notify(void)
 
 static unsigned int mem_event;
 static void wq_sched_handler(struct work_struct *wsptr);
-DECLARE_WORK(work, wq_sched_handler);
+static DECLARE_WORK(work, wq_sched_handler);
 static struct timer_list meminfo_wake_up_timer;
 static void meminfo_wake_up_handler(unsigned long unused_data);
 
@@ -187,7 +187,7 @@ static int gator_events_meminfo_start(void)
 	if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo")))
 		goto kthread_run_exit;
 #else
-	setup_timer(&meminfo_wake_up_timer, meminfo_wake_up_handler, 0);
+	setup_deferrable_timer_on_stack(&meminfo_wake_up_timer, meminfo_wake_up_handler, 0);
 #endif
 
 	return 0;
@@ -313,7 +313,7 @@ static void meminfo_wake_up_handler(unsigned long unused_data)
 
 #endif
 
-static int gator_events_meminfo_read(long long **buffer)
+static int gator_events_meminfo_read(long long **buffer, bool sched_switch)
 {
 #if !USE_THREAD
 	static unsigned int last_mem_event;
@@ -446,6 +446,7 @@ static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct
 }
 
 static struct gator_interface gator_events_meminfo_interface = {
+	.name = "meminfo",
 	.create_files = gator_events_meminfo_create_files,
 	.start = gator_events_meminfo_start,
 	.stop = gator_events_meminfo_stop,
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
index 7b517611f8ec..30ca980c3dc9 100644
--- a/drivers/gator/gator_events_mmapped.c
+++ b/drivers/gator/gator_events_mmapped.c
@@ -189,6 +189,7 @@ static int gator_events_mmapped_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_mmapped_interface = {
+	.name = "mmapped",
 	.create_files = gator_events_mmapped_create_files,
 	.start = gator_events_mmapped_start,
 	.stop = gator_events_mmapped_stop,
diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c
index 1e36731479d2..053f719419a7 100644
--- a/drivers/gator/gator_events_net.c
+++ b/drivers/gator/gator_events_net.c
@@ -45,7 +45,7 @@ static void get_network_stats(struct work_struct *wsptr)
 	tx_total = tx;
 }
 
-DECLARE_WORK(wq_get_stats, get_network_stats);
+static DECLARE_WORK(wq_get_stats, get_network_stats);
 
 static void net_wake_up_handler(unsigned long unused_data)
 {
@@ -98,11 +98,7 @@ static int gator_events_net_start(void)
 	get_network_stats(0);
 	netPrev[NETRX] = rx_total;
 	netPrev[NETTX] = tx_total;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
-	setup_timer(&net_wake_up_timer, net_wake_up_handler, 0);
-#else
 	setup_deferrable_timer_on_stack(&net_wake_up_timer, net_wake_up_handler, 0);
-#endif
 	return 0;
 }
 
@@ -154,6 +150,7 @@ static int gator_events_net_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_net_interface = {
+	.name = "net",
 	.create_files = gator_events_net_create_files,
 	.start = gator_events_net_start,
 	.stop = gator_events_net_stop,
diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c
index f6b4f18ad11a..e62492bbd536 100644
--- a/drivers/gator/gator_events_perf_pmu.c
+++ b/drivers/gator/gator_events_perf_pmu.c
@@ -136,6 +136,7 @@ static int gator_events_perf_pmu_online(int **buffer, bool migrate)
 static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event)
 {
 	perf_overflow_handler_t handler;
+	struct perf_event *pevent;
 
 	event->zero = true;
 
@@ -148,22 +149,22 @@ static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const at
 		handler = dummy_handler;
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
-	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler);
+	pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler);
 #else
-	event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0);
+	pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0);
 #endif
-	if (IS_ERR(event->pevent)) {
-		pr_debug("gator: unable to online a counter on cpu %d\n", cpu);
-		event->pevent = NULL;
+	if (IS_ERR(pevent)) {
+		pr_err("gator: unable to online a counter on cpu %d\n", cpu);
 		return;
 	}
 
-	if (event->pevent->state != PERF_EVENT_STATE_ACTIVE) {
-		pr_debug("gator: inactive counter on cpu %d\n", cpu);
-		perf_event_release_kernel(event->pevent);
-		event->pevent = NULL;
+	if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_err("gator: inactive counter on cpu %d\n", cpu);
+		perf_event_release_kernel(pevent);
 		return;
 	}
+
+	event->pevent = pevent;
 }
 
 static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate)
@@ -369,6 +370,7 @@ static int gator_events_perf_pmu_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_perf_pmu_interface = {
+	.name = "perf_pmu",
 	.create_files = gator_events_perf_pmu_create_files,
 	.start = gator_events_perf_pmu_start,
 	.stop = gator_events_perf_pmu_stop,
@@ -463,10 +465,10 @@ static void gator_events_perf_pmu_cci_400_init(const int type)
 		cci_name = "CCI_400";
 		break;
 	case 1:
-		cci_name = "CCI_400-r1";
+		cci_name = "CCI_400_r1";
 		break;
 	default:
-		pr_debug("gator: unrecognized cci-400 revision\n");
+		pr_err("gator: unrecognized cci-400 revision\n");
 		return;
 	}
 
@@ -536,14 +538,19 @@ int gator_events_perf_pmu_init(void)
 		}
 
 		if (pe->pmu != NULL && type == pe->pmu->type) {
-			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400-r1", pe->pmu->name) == 0) {
+			pr_notice("gator: perf pmu: %s\n", pe->pmu->name);
+			if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0 || strcmp("CCI_400_r1", pe->pmu->name) == 0) {
+				pr_notice("gator: Adding CCI-400 counters with type %i\n", type);
 				gator_events_perf_pmu_cci_400_init(type);
 			} else if (strcmp("CCI_500", pe->pmu->name) == 0) {
+				pr_notice("gator: Adding CCI-500 counters with type %i\n", type);
 				gator_events_perf_pmu_uncore_init("CCI_500", type, CCI_500, false);
 			} else if (strcmp("ccn", pe->pmu->name) == 0) {
+				pr_notice("gator: Adding CCN counters with type %i\n", type);
 				gator_events_perf_pmu_uncore_init("ARM_CCN_5XX", type, CCN_5XX, true);
 			} else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) {
 				found_cpu = true;
+				pr_notice("gator: Adding cpu counters for %s with type %i\n", gator_cpu->pmnc_name, type);
 				gator_events_perf_pmu_cpu_init(gator_cpu, type);
 			}
 			/* Initialize gator_attrs for dynamic PMUs here */
@@ -562,6 +569,7 @@ int gator_events_perf_pmu_init(void)
 				return -1;
 			}
 		}
+		pr_notice("gator: Adding cpu counters (based on cpuid) for %s\n", gator_cpu->pmnc_name);
 		gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW);
 	}
 
diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c
index 463d83496073..d3524157ab4b 100644
--- a/drivers/gator/gator_events_sched.c
+++ b/drivers/gator/gator_events_sched.c
@@ -97,6 +97,7 @@ static int gator_events_sched_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_sched_interface = {
+	.name = "sched",
 	.create_files = gator_events_sched_create_files,
 	.start = gator_events_sched_start,
 	.stop = gator_events_sched_stop,
diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c
index b51dcd39a8e1..efb5e7a0b873 100644
--- a/drivers/gator/gator_events_scorpion.c
+++ b/drivers/gator/gator_events_scorpion.c
@@ -635,6 +635,7 @@ static int gator_events_scorpion_read(int **buffer, bool sched_switch)
 }
 
 static struct gator_interface gator_events_scorpion_interface = {
+	.name = "scorpion",
 	.create_files = gator_events_scorpion_create_files,
 	.stop = gator_events_scorpion_stop,
 	.online = gator_events_scorpion_online,
diff --git a/drivers/gator/gator_fs.c b/drivers/gator/gator_fs.c
index d8fb357b9eda..205da8b34534 100644
--- a/drivers/gator/gator_fs.c
+++ b/drivers/gator/gator_fs.c
@@ -18,7 +18,7 @@
 
 #define gatorfs_MAGIC 0x24051020
 #define TMPBUFSIZE 50
-DEFINE_SPINLOCK(gatorfs_lock);
+static DEFINE_SPINLOCK(gatorfs_lock);
 
 static struct inode *gatorfs_get_inode(struct super_block *sb, int mode)
 {
diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c
index 36961f85b62a..8b86edebfed8 100644
--- a/drivers/gator/gator_hrtimer_gator.c
+++ b/drivers/gator/gator_hrtimer_gator.c
@@ -7,10 +7,10 @@
  *
  */
 
-void (*callback)(void);
-DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
-DEFINE_PER_CPU(ktime_t, hrtimer_expire);
-DEFINE_PER_CPU(int, hrtimer_is_active);
+static void (*callback)(void);
+static DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
+static DEFINE_PER_CPU(ktime_t, hrtimer_expire);
+static DEFINE_PER_CPU(int, hrtimer_is_active);
 static ktime_t profiling_interval;
 static void gator_hrtimer_online(void);
 static void gator_hrtimer_offline(void);
diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c
index affa1dc312e4..2d55afea4b9d 100644
--- a/drivers/gator/gator_main.c
+++ b/drivers/gator/gator_main.c
@@ -8,7 +8,7 @@
  */
 
 /* This version must match the gator daemon version */
-#define PROTOCOL_VERSION 21
+#define PROTOCOL_VERSION 22
 static unsigned long gator_protocol_version = PROTOCOL_VERSION;
 
 #include <linux/slab.h>
@@ -180,7 +180,6 @@ static DEFINE_PER_CPU(bool, in_scheduler_context);
 /******************************************************************************
  * Prototypes
  ******************************************************************************/
-static u64 gator_get_time(void);
 static void gator_emit_perf_time(u64 time);
 static void gator_op_create_files(struct super_block *sb, struct dentry *root);
 
@@ -378,21 +377,21 @@ static const struct gator_cpu gator_cpus[] = {
 	{
 		.cpuid = CORTEX_A53,
 		.core_name = "Cortex-A53",
-		.pmnc_name = "ARM_Cortex-A53",
+		.pmnc_name = "ARMv8_Cortex_A53",
 		.dt_name = "arm,cortex-a53",
 		.pmnc_counters = 6,
 	},
 	{
 		.cpuid = CORTEX_A57,
 		.core_name = "Cortex-A57",
-		.pmnc_name = "ARM_Cortex-A57",
+		.pmnc_name = "ARMv8_Cortex_A57",
 		.dt_name = "arm,cortex-a57",
 		.pmnc_counters = 6,
 	},
 	{
 		.cpuid = CORTEX_A72,
 		.core_name = "Cortex-A72",
-		.pmnc_name = "ARM_Cortex-A72",
+		.pmnc_name = "ARMv8_Cortex_A72",
 		.dt_name = "arm,cortex-a72",
 		.pmnc_counters = 6,
 	},
@@ -551,6 +550,8 @@ static void gator_timer_offline(void *migrate)
 		list_for_each_entry(gi, &gator_events, list) {
 			if (gi->offline) {
 				len = gi->offline(&buffer, migrate);
+				if (len < 0)
+					pr_err("gator: offline failed for %s\n", gi->name);
 				marshal_event(len, buffer);
 			}
 		}
@@ -639,6 +640,8 @@ static void gator_timer_online(void *migrate)
 		list_for_each_entry(gi, &gator_events, list) {
 			if (gi->online) {
 				len = gi->online(&buffer, migrate);
+				if (len < 0)
+					pr_err("gator: online failed for %s\n", gi->name);
 				marshal_event(len, buffer);
 			}
 		}
@@ -699,7 +702,7 @@ static int gator_timer_start(unsigned long sample_rate)
 	return 0;
 }
 
-static u64 gator_get_time(void)
+u64 gator_get_time(void)
 {
 	struct timespec ts;
 	u64 timestamp;
@@ -1366,6 +1369,7 @@ static void gator_op_create_files(struct super_block *sb, struct dentry *root)
 	struct dentry *dir;
 	struct gator_interface *gi;
 	int cpu;
+	int err;
 
 	/* reinitialize default values */
 	gator_cpu_cores = 0;
@@ -1393,8 +1397,12 @@ static void gator_op_create_files(struct super_block *sb, struct dentry *root)
 	/* Linux Events */
 	dir = gatorfs_mkdir(sb, root, "events");
 	list_for_each_entry(gi, &gator_events, list)
-		if (gi->create_files)
-			gi->create_files(sb, dir);
+		if (gi->create_files) {
+			err = gi->create_files(sb, dir);
+			if (err != 0) {
+				pr_err("gator: create_files failed for %s\n", gi->name);
+			}
+		}
 
 	/* Sched Events */
 	sched_trace_create_files(sb, dir);
@@ -1473,7 +1481,7 @@ static int __init gator_module_init(void)
 		return -1;
 	}
 
-	setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
+	setup_deferrable_timer_on_stack(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
 
 	/* Initialize the list of cpuids */
 	memset(gator_cpuids, -1, sizeof(gator_cpuids));
diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c
index f5b81843d1c4..cb75a3494519 100644
--- a/drivers/gator/gator_marshaling.c
+++ b/drivers/gator/gator_marshaling.c
@@ -27,6 +27,7 @@ static void marshal_summary(long long timestamp, long long uptime, long long mon
 {
 	unsigned long flags;
 	int cpu = 0;
+	char buf[32];
 
 	local_irq_save(flags);
 	gator_buffer_write_packed_int(cpu, SUMMARY_BUF, MESSAGE_SUMMARY);
@@ -36,6 +37,9 @@ static void marshal_summary(long long timestamp, long long uptime, long long mon
 	gator_buffer_write_packed_int64(cpu, SUMMARY_BUF, monotonic_delta);
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "uname");
 	gator_buffer_write_string(cpu, SUMMARY_BUF, uname);
+	gator_buffer_write_string(cpu, SUMMARY_BUF, "PAGESIZE");
+	snprintf(buf, sizeof(buf), "%lu", PAGE_SIZE);
+	gator_buffer_write_string(cpu, SUMMARY_BUF, buf);
 #if GATOR_IKS_SUPPORT
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "iks");
 	gator_buffer_write_string(cpu, SUMMARY_BUF, "");
diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c
index d9b82ee1857f..b35ccd58f9d6 100644
--- a/drivers/gator/gator_trace_gpu.c
+++ b/drivers/gator/gator_trace_gpu.c
@@ -132,7 +132,7 @@ static void mali_activity_stop(int core, int key)
 		gator_marshal_activity_switch(core, key, last_activity, last_pid);
 }
 
-void mali_activity_clear(struct mali_counter mali_activity[], size_t mali_activity_size)
+static void mali_activity_clear(struct mali_counter mali_activity[], size_t mali_activity_size)
 {
 	int activity;
 	int cores;
diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c
index ad7c39e14a17..700dd2ecdf08 100644
--- a/drivers/gator/gator_trace_sched.c
+++ b/drivers/gator/gator_trace_sched.c
@@ -104,13 +104,19 @@ static void collect_counters(u64 time, struct task_struct *task, bool sched_swit
 		list_for_each_entry(gi, &gator_events, list) {
 			if (gi->read) {
 				len = gi->read(&buffer, sched_switch);
+				if (len < 0)
+					pr_err("gator: read failed for %s\n", gi->name);
 				marshal_event(len, buffer);
 			} else if (gi->read64) {
-				len = gi->read64(&buffer64);
+				len = gi->read64(&buffer64, sched_switch);
+				if (len < 0)
+					pr_err("gator: read64 failed for %s\n", gi->name);
 				marshal_event64(len, buffer64);
 			}
 			if (gi->read_proc && task != NULL) {
 				len = gi->read_proc(&buffer64, task);
+				if (len < 0)
+					pr_err("gator: read_proc failed for %s\n", gi->name);
 				marshal_event64(len, buffer64);
 			}
 		}
diff --git a/drivers/gator/mali_midgard.mk b/drivers/gator/mali_midgard.mk
index b0076c22da6b..bd0ac77d3b1d 100644
--- a/drivers/gator/mali_midgard.mk
+++ b/drivers/gator/mali_midgard.mk
@@ -36,5 +36,6 @@ EXTRA_CFLAGS += -I$(KBASE_DIR)/ \
                 -I$(DDK_DIR)/include \
                 -I$(KBASE_DIR)/osk/src/linux/include \
                 -I$(KBASE_DIR)/platform_dummy \
-                -I$(KBASE_DIR)/src
+                -I$(KBASE_DIR)/src \
+                -Idrivers/staging/android \
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index c3768fafff45..9002122c52ae 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -136,7 +136,7 @@ static struct gpio_desc *gpio_to_desc(unsigned gpio)
  */
 static int desc_to_gpio(const struct gpio_desc *desc)
 {
-	return desc->chip->base + gpio_chip_hwgpio(desc);
+	return desc - &gpio_desc[0];
 }
 
 
@@ -1242,15 +1242,14 @@ int gpiochip_add(struct gpio_chip *chip)
 		}
 	}
 
+	spin_unlock_irqrestore(&gpio_lock, flags);
+
 #ifdef CONFIG_PINCTRL
 	INIT_LIST_HEAD(&chip->pin_ranges);
 #endif
 
 	of_gpiochip_add(chip);
 
-unlock:
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
 	if (status)
 		goto fail;
 
@@ -1263,6 +1262,9 @@ unlock:
 		chip->label ? : "generic");
 
 	return 0;
+
+unlock:
+	spin_unlock_irqrestore(&gpio_lock, flags);
 fail:
 	/* failures here can mean systems won't boot... */
 	pr_err("gpiochip_add: gpios %d..%d (%s) failed to register\n",
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index d8a22c2a579d..3fccf591ebc4 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -1,2 +1,3 @@
 obj-y			+= drm/ vga/
 obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
+obj-y			+= arm/
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild
new file mode 100755
index 000000000000..19c7e9a2659d
--- /dev/null
+++ b/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
new file mode 100755
index 000000000000..f6f84538f908
--- /dev/null
+++ b/drivers/gpu/arm/Kconfig
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
+
+source "drivers/base/ump/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
new file mode 100755
index 000000000000..6bc3d9091751
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,261 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r5p0-06rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3.  The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0.  So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+	-DMALI_COVERAGE=$(MALI_COVERAGE) \
+	-DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+	-DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+	mali_kbase_device.c \
+	mali_kbase_cache_policy.c \
+	mali_kbase_mem.c \
+	mali_kbase_mmu.c \
+	mali_kbase_jd.c \
+	mali_kbase_jd_debugfs.c \
+	mali_kbase_jm.c \
+	mali_kbase_cpuprops.c \
+	mali_kbase_gpuprops.c \
+	mali_kbase_js.c \
+	mali_kbase_js_affinity.c \
+	mali_kbase_js_ctx_attr.c \
+	mali_kbase_event.c \
+	mali_kbase_context.c \
+	mali_kbase_pm.c \
+	mali_kbase_pm_driver.c \
+	mali_kbase_pm_metrics.c \
+	mali_kbase_pm_ca.c \
+	mali_kbase_pm_ca_fixed.c \
+	mali_kbase_pm_always_on.c \
+	mali_kbase_pm_coarse_demand.c \
+	mali_kbase_pm_demand.c \
+	mali_kbase_pm_policy.c \
+	mali_kbase_config.c \
+	mali_kbase_security.c \
+	mali_kbase_instr.c \
+	mali_kbase_softjobs.c \
+	mali_kbase_10969_workaround.c \
+	mali_kbase_hw.c \
+	mali_kbase_utility.c \
+	mali_kbase_debug.c \
+	mali_kbase_trace_timeline.c \
+	mali_kbase_gpu_memory_debugfs.c \
+	mali_kbase_mem_linux.c \
+	mali_kbase_core_linux.c \
+	mali_kbase_sync.c \
+	mali_kbase_sync_user.c \
+	mali_kbase_replay.c \
+	mali_kbase_mem_profile_debugfs.c \
+	mali_kbase_mmu_hw_direct.c \
+	mali_kbase_disjoint_events.c \
+	mali_kbase_gator_api.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+SRC += \
+	mali_kbase_pm_ca_random.c \
+	mali_kbase_pm_demand_always_powered.c \
+	mali_kbase_pm_fast_start.c
+endif
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ifeq ($(CONFIG_MACH_MANTA),y)
+	SRC += mali_kbase_mem_alloc_carveout.c
+else
+	SRC += mali_kbase_mem_alloc.c
+endif
+
+# ensure GPL version of malisw gets pulled in
+ccflags-y += -I$(KBASE_PATH)
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+	# Dummy model
+	SRC += mali_kbase_model_dummy.c
+	SRC += mali_kbase_model_linux.c
+	# HW error simulation
+	SRC += mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(MALI_MOCK_TEST),1)
+	# Test functionality
+	SRC += tests/internal/src/mock/mali_kbase_pm_driver_mock.c
+endif
+
+# in-tree/out-of-tree logic needs to be slightly different to determine if a file is present
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+MALI_METRICS_PATH = $(srctree)/drivers/gpu/arm/midgard
+else
+# out-of-tree
+MALI_METRICS_PATH = $(KBUILD_EXTMOD)
+endif
+
+# Use vsync metrics example using PL111 driver, if available
+ifeq ($(wildcard $(MALI_METRICS_PATH)/mali_kbase_pm_metrics_linux.c),)
+	SRC += mali_kbase_pm_metrics_dummy.c
+else
+	SRC += mali_kbase_pm_metrics_linux.c
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+	SRC += mali_kbase_platform_fake.c
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+		SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+		platform/vexpress/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+		SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/rtsm_ve
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+		SRC += platform/juno/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/juno
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+		SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+		ccflags-y += -I$(src)/platform/juno_soc
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+		SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ),y)
+		SRC += platform/vexpress_virtex7_40mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_virtex7_40mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+		SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+		platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+		ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_GOLDFISH),y)
+		SRC += platform/goldfish/mali_kbase_config_goldfish.c
+		ccflags-y += -I$(src)/platform/goldfish
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_PBX),y)
+		SRC += platform/pbx/mali_kbase_config_pbx.c
+		ccflags-y += -I$(src)/platform/pbx
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_PANDA),y)
+		SRC += platform/panda/mali_kbase_config_panda.c
+		ccflags-y += -I$(src)/platform/panda
+	endif
+
+	ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+	# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+	ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+	ifeq ($(CONFIG_MALI_MIDGARD),m)
+	include  $(src)/platform/$(platform_name)/Kbuild
+	else ifeq ($(CONFIG_MALI_MIDGARD),y)
+	obj-$(CONFIG_MALI_MIDGARD) += platform/
+	endif
+	endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include  $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+mali_kbase-$(CONFIG_MALI_DEVFREQ) += mali_kbase_devfreq.o
+
+ifneq ($(wildcard $(src)/internal/Kbuild),)
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+include  $(src)/internal/Kbuild
+mali_kbase-y += $(INTERNAL:.c=.o)
+endif
+endif
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
new file mode 100755
index 000000000000..f0d92918caed
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,185 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+	tristate "Mali Midgard series support"
+	default n
+	help
+	  Enable this option to build support for a ARM Mali Midgard GPU.
+
+	  To compile this driver as a module, choose M here:
+	  this will generate a single module, called mali_kbase.
+
+config MALI_GATOR_SUPPORT
+	bool "Streamline Debug support"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+	  You will need the Gator device driver already loaded before loading this driver when enabling
+	  Streamline debug support.
+
+config MALI_MIDGARD_DVFS
+	bool "Enable DVFS"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Choose this option to enable DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_RT_PM
+	bool "Enable Runtime power management"
+	depends on MALI_MIDGARD
+	depends on PM_RUNTIME
+	default n
+	help
+	  Choose this option to enable runtime power management in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+	bool "Enable kbase tracing"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  Enables tracing in kbase.  Trace log available through
+	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_MIDGARD_DEBUG_SYS
+	bool "Enable sysfs for the Mali Midgard DDK "
+	depends on MALI_MIDGARD && SYSFS
+	default n
+	help
+	  Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK
+
+config MALI_DEVFREQ
+	bool "devfreq support for Mali"
+	depends on MALI_MIDGARD && PM_DEVFREQ
+	help
+	  Support devfreq for Mali.
+
+	  Using the devfreq framework and, by default, the simpleondemand
+	  governor, the frequency of Mali will be dynamically selected from the
+	  available OPPs.
+
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+	depends on MALI_MIDGARD
+	bool "Enable Expert Settings"
+	default n
+	help
+	  Enabling this option and modifying the default settings may produce a driver with performance or
+	  other limitations.
+
+config MALI_DEBUG_SHADER_SPLIT_FS
+	bool "Allow mapping of shader cores via sysfs"
+	depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT
+	default n
+	help
+	  Select this option to provide a sysfs entry for runtime configuration of shader
+	  core affinity masks.
+
+config MALI_PLATFORM_FAKE
+	bool "Enable fake platform device support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  When you start to work with the Mali Midgard series device driver the platform-specific code of
+	  the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+	  supports creating the platform device outside of the Linux platform-specific code.
+	  Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+	prompt "Platform configuration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default MALI_PLATFORM_VEXPRESS
+	help
+	  Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_VEXPRESS
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+	depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+	bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+	depends on ARCH_GOLDFISH
+	bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+	depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+	bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+	bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+	depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+	string "Third party platform name"
+	help
+	  Enter the name of a third party platform that is supported. The third part configuration
+	  file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+	  specified here.
+
+config MALI_DEBUG
+	bool "Debug build"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Select this option for increased checking and reporting of errors.
+
+config MALI_NO_MALI
+	bool "No Mali"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This can be used to test the driver in a simulated environment
+	  whereby the hardware is not physically present. If the hardware is physically
+	  present it will not be used. This can be used to test the majority of the
+	  driver without needing actual hardware or for software benchmarking.
+	  All calls to the simulated hardware will complete immediately as if the hardware
+	  completed the task.
+
+config MALI_ERROR_INJECT
+	bool "Error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+	default n
+	help
+	  Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+	bool "Timeline tracing"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+	bool "Enable system event tracing support"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Choose this option to enable system trace events for each
+	  kbase event.	This is typically used for debugging but has
+	  minimal overhead when not in use. Enable only if you know what
+	  you are doing.
+
+config MALI_GPU_TRACEPOINTS
+	bool "Enable GPU tracepoints"
+	depends on MALI_MIDGARD && ANDROID
+	select GPU_TRACEPOINTS
+	help
+	  Enables GPU tracepoints using Android trace event definitions.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/drivers/gpu/arm/midgard/MALI_SOFTWARE_VERSION b/drivers/gpu/arm/midgard/MALI_SOFTWARE_VERSION
new file mode 100644
index 000000000000..3c1af6a88789
--- /dev/null
+++ b/drivers/gpu/arm/midgard/MALI_SOFTWARE_VERSION
@@ -0,0 +1 @@
+MALI_SOFTWARE_VERSION=r5p0-06rel0
diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile
new file mode 100755
index 000000000000..9820be2b4446
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,36 @@
+#
+# (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100755
index 000000000000..2bef9c25eaeb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100755
index 000000000000..e2662c2dfaeb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  += ../../kernel/drivers/gpu/arm/midgard/ 
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE                += ../../kernel/drivers/gpu/arm/midgard/malisw/ ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100755
index 000000000000..a70b54947088
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR;
+	size="12,8";
+	compound=true;
+
+	node [ shape = box ];
+
+	subgraph cluster_policy_queues {
+		low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+		queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+		rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+		label = "Policy's Queue(s)";
+	}
+
+	call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+	{
+		rank=same;
+		ordering=out;
+		call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+		call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+		call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+	}
+
+	subgraph cluster_runpool {
+
+		as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+		as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+		as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+		as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+		label = "Policy's Run Pool";
+	}
+
+	{
+		rank=same;
+		call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+		sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+	}
+
+	{
+		rank=same;
+		ordering=out;
+		sstop [ shape=ellipse label="SS-Timer expires" ]
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		irq [ label="IRQ" shape=ellipse ];
+
+		job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+	}
+
+	hstop [ shape=ellipse label="HS-Timer expires" ]
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+	low_queue:qr -> call_dequeue:w;
+	rt_queue:qr -> call_dequeue:w;
+
+	call_dequeue -> as1 [lhead=cluster_runpool];
+
+	as1->call_jdequeue         [ltail=cluster_runpool];
+	call_jdequeue->jobslots:0;
+	call_jdequeue->sstop_dotfixup [ arrowhead=none];
+	sstop_dotfixup->sstop      [label="Spawn SS-Timer"];
+	sstop->jobslots            [label="SoftStop"];
+	sstop->hstop               [label="Spawn HS-Timer"];
+	hstop->jobslots:ne            [label="HardStop"];
+
+
+	as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+	call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+	call_ctxfinish->call_ctxdone [constraint=false];
+
+	call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+	{
+	jobslots->irq   [constraint=false];
+
+	irq->job_finish [constraint=false];
+	}
+
+	irq->as2  [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100755
index 000000000000..bd5e21b074ca
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+	rankdir=LR
+	size="6,6"
+	compound=true;
+
+	node [ shape = box ];
+
+	call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+	policy_queue [ label="Policy's Queue" ];
+
+	{
+		rank=same;
+		runpool [ label="Policy's Run Pool" ];
+
+		ctx_finish [ label="ctx finished" ];
+	}
+
+	{
+		rank=same;
+		jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+		job_finish [ label="Job finished" ];
+	}
+
+
+
+	/*
+	 * Edges
+	 */
+
+	call_enqueue -> policy_queue;
+
+	policy_queue->runpool [label="dequeue ctx" weight=0.1];
+	runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+	runpool->ctx_finish [ style=dotted ];
+
+	runpool->jobslots  [label="dequeue job" weight=0.1];
+	jobslots->runpool  [label="requeue job" weight=0.1];
+
+	jobslots->job_finish [ style=dotted ];
+}
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig.h b/drivers/gpu/arm/midgard/mali_base_hwconfig.h
new file mode 100755
index 000000000000..963d6329eceb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig.h
@@ -0,0 +1,31 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Software workarounds configuration for Hardware issues.
+ */
+
+#ifndef _BASE_HWCONFIG_H_
+#define _BASE_HWCONFIG_H_
+
+#include <malisw/mali_malisw.h>
+
+#include "mali_base_hwconfig_issues.h"
+#include "mali_base_hwconfig_features.h"
+
+#endif /* _BASE_HWCONFIG_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100755
index 000000000000..da582a596b5b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,117 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update user/midgard/mali_base_hwconfig_{issues,features}.h and
+ * re-run hwconfig_header_generator instead. This tool is available in
+ * progs_install directory for host builds. More information is available in
+ * base/tools/hwconfig_header_generator/README */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+	BASE_HW_FEATURE_33BIT_VA,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100755
index 000000000000..d0c4bffcb38f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,698 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update user/midgard/mali_base_hwconfig_{issues,features}.h and
+ * re-run hwconfig_header_generator instead. This tool is available in
+ * progs_install directory for host builds. More information is available in
+ * base/tools/hwconfig_header_generator/README */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8879,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_2121,
+	BASE_HW_ISSUE_T76X_2315,
+	BASE_HW_ISSUE_T76X_2686,
+	BASE_HW_ISSUE_T76X_2712,
+	BASE_HW_ISSUE_T76X_2772,
+	BASE_HW_ISSUE_T76X_2906,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3285,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3759,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6398,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7144,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8073,
+	BASE_HW_ISSUE_8186,
+	BASE_HW_ISSUE_8215,
+	BASE_HW_ISSUE_8245,
+	BASE_HW_ISSUE_8250,
+	BASE_HW_ISSUE_8260,
+	BASE_HW_ISSUE_8280,
+	BASE_HW_ISSUE_8316,
+	BASE_HW_ISSUE_8381,
+	BASE_HW_ISSUE_8394,
+	BASE_HW_ISSUE_8401,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8443,
+	BASE_HW_ISSUE_8456,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8634,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8791,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8833,
+	BASE_HW_ISSUE_8896,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_8986,
+	BASE_HW_ISSUE_8987,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_9566,
+	BASE_HW_ISSUE_9630,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_10984,
+	BASE_HW_ISSUE_10995,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9418,
+	BASE_HW_ISSUE_9423,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10969,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+	BASE_HW_ISSUE_6367,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_6787,
+	BASE_HW_ISSUE_7027,
+	BASE_HW_ISSUE_7304,
+	BASE_HW_ISSUE_8408,
+	BASE_HW_ISSUE_8564,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8975,
+	BASE_HW_ISSUE_9010,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_9510,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10127,
+	BASE_HW_ISSUE_10327,
+	BASE_HW_ISSUE_10410,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10487,
+	BASE_HW_ISSUE_10607,
+	BASE_HW_ISSUE_10632,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10676,
+	BASE_HW_ISSUE_10682,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10817,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11035,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0_beta[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_10959,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_2121,
+	BASE_HW_ISSUE_T76X_2315,
+	BASE_HW_ISSUE_T76X_2686,
+	BASE_HW_ISSUE_T76X_2712,
+	BASE_HW_ISSUE_T76X_2772,
+	BASE_HW_ISSUE_T76X_2906,
+	BASE_HW_ISSUE_T76X_3285,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3759,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_26,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3542,
+	BASE_HW_ISSUE_T76X_3556,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10684,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10471,
+	BASE_HW_ISSUE_10797,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_8778,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_6402,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10472,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_11012,
+	BASE_HW_ISSUE_11020,
+	BASE_HW_ISSUE_11024,
+	BASE_HW_ISSUE_11042,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_END
+};
+
+#if defined(MALI_INCLUDE_TFRX)
+static const enum base_hw_issue base_hw_issues_tFRx_r0p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* defined(MALI_INCLUDE_TFRX) */
+
+#if defined(MALI_INCLUDE_TFRX)
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* defined(MALI_INCLUDE_TFRX) */
+
+#if defined(MALI_INCLUDE_TFRX)
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* defined(MALI_INCLUDE_TFRX) */
+
+#if defined(MALI_INCLUDE_TFRX)
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* defined(MALI_INCLUDE_TFRX) */
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p0[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_T76X_3960,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+	BASE_HW_ISSUE_8803,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10649,
+	BASE_HW_ISSUE_10821,
+	BASE_HW_ISSUE_10883,
+	BASE_HW_ISSUE_10946,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_T76X_3953,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9275,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_10931,
+	BASE_HW_ISSUE_T76X_1963,
+	BASE_HW_ISSUE_T76X_3086,
+	BASE_HW_ISSUE_T76X_3700,
+	BASE_HW_ISSUE_T76X_3793,
+	BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100755
index 000000000000..02a255239cdd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1569 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+typedef mali_addr64 base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT              256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2      5
+#define BASEP_JD_SEM_PER_WORD           (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x)         ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x)    (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE         BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+	void __user *value;	  /**< client should store their pointers here */
+	u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	u64 sizer;	  /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_TOTAL_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+	BASE_MEM_PROT_CPU_RD = (1U << 0),      /**< Read access CPU side */
+	BASE_MEM_PROT_CPU_WR = (1U << 1),      /**< Write access CPU side */
+	BASE_MEM_PROT_GPU_RD = (1U << 2),      /**< Read access GPU side */
+	BASE_MEM_PROT_GPU_WR = (1U << 3),      /**< Write access GPU side */
+	BASE_MEM_PROT_GPU_EX = (1U << 4),      /**< Execute allowed on the GPU
+						    side */
+
+	/* Note that the HINT flags are obsolete now. If you want the memory
+	 * to be cached on the CPU please use the BASE_MEM_CACHED_CPU flag
+	 */
+	BASE_MEM_HINT_CPU_RD = (1U << 5),      /**< Heavily read CPU side
+						    - OBSOLETE */
+	BASE_MEM_HINT_CPU_WR = (1U << 6),      /**< Heavily written CPU side
+						    - OBSOLETE */
+	BASE_MEM_HINT_GPU_RD = (1U << 7),      /**< Heavily read GPU side
+						    - OBSOLETE */
+	BASE_MEM_HINT_GPU_WR = (1U << 8),      /**< Heavily written GPU side
+						    - OBSOLETE */
+
+	BASE_MEM_GROW_ON_GPF = (1U << 9),      /**< Grow backing store on GPU
+						    Page Fault */
+
+	BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+						    shareable */
+	BASE_MEM_COHERENT_LOCAL = (1U << 11),  /**< Page coherence Inner
+						    shareable */
+	BASE_MEM_CACHED_CPU = (1U << 12),      /**< Should be cached on the
+						    CPU */
+
+/* IN/OUT */
+	BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+					    and the CPU */
+/* OUT */
+	BASE_MEM_NEED_MMAP = (1U << 14) /**< Must call mmap to aquire a GPU
+					     address for the alloc */
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_INPUT_BITS  14
+#define BASE_MEM_FLAGS_NR_OUTPUT_BITS 1
+#define BASE_MEM_FLAGS_NR_TOTAL_BITS  ((BASE_MEM_FLAGS_NR_INPUT_BITS) + (BASE_MEM_FLAGS_NR_OUTPUT_BITS))
+#define BASE_MEM_FLAGS_NR_BITS 15
+
+#if BASE_MEM_FLAGS_NR_TOTAL_BITS > BASE_MEM_FLAGS_NR_BITS
+#error "Too many flag bits, will require change in cmem"
+#endif
+
+/**
+ * @brief Memory types supported by @a base_mem_import
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+	BASE_MEM_IMPORT_TYPE_INVALID = 0,
+	/** UMP import. Handle type is ump_secure_id. */
+	BASE_MEM_IMPORT_TYPE_UMP = 1,
+	/** UMM import. Handle type is a file descriptor (int) */
+	BASE_MEM_IMPORT_TYPE_UMM = 2
+} base_mem_import_type;
+
+/* legacy API wrappers */
+#define base_tmem_import_type          base_mem_import_type
+#define BASE_TMEM_IMPORT_TYPE_INVALID  BASE_MEM_IMPORT_TYPE_INVALID
+#define BASE_TMEM_IMPORT_TYPE_UMP      BASE_MEM_IMPORT_TYPE_UMP
+#define BASE_TMEM_IMPORT_TYPE_UMM      BASE_MEM_IMPORT_TYPE_UMM
+
+/**
+ * @brief Invalid memory handle type.
+ * Return value from functions returning @a base_mem_handle on error.
+ */
+#define BASE_MEM_INVALID_HANDLE                (0ull  << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE               (1ull  << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE      (4ull  << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
+						BASE_MEM_COOKIE_BASE)
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
+	BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1,	    /**< Not a growable tmem object */
+	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
+	BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3,	    /**< Resize attempted on buffer while it was mapped, which is not permitted */
+	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+	struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+	struct {
+		mali_addr64 handle;
+	} basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+	struct {
+		int fd;
+	} basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+	struct {
+		int fd;
+		int stream_fd;
+	} basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+	u64 blob[2];	 /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ *         Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ *         specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+	base_mem_handle handle;
+	u64 offset;
+	u64 length;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or 
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without 
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then 
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID  (0) 	/**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA     (1U << 0) 	/**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER    (1U << 1) 	/**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly.  By not specifying the
+ * correct settings can/will cause an early job termination.  Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u16 base_jd_core_req;
+
+/* Requirements that come from the HW */
+#define BASE_JD_REQ_DEP 0	    /**< No requirement, dependency only */
+#define BASE_JD_REQ_FS  (1U << 0)   /**< Requires fragment shaders */
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS  (1U << 1)
+#define BASE_JD_REQ_T   (1U << 2)   /**< Requires tiling */
+#define BASE_JD_REQ_CF  (1U << 3)   /**< Requires cache flushes */
+#define BASE_JD_REQ_V   (1U << 4)   /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC  (1U << 13)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP  (1U << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON               (1U << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES   (1U << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB        (1U << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME      (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ *
+ * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag,
+ * allowing specific jobs to be marked as 'Only Compute' instead of the entire context
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE    (1U << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then 
+ * the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE   (1U << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
+
+/**
+* These requirement bits are currently unused in base_jd_core_req (currently a u16)
+*/
+
+#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5)
+#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15)
+
+/**
+* Mask of all the currently unused requirement bits in base_jd_core_req.
+*/
+
+#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \
+				BASEP_JD_REQ_RESERVED_BIT15)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
+				BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER))
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+	/** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+	/** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+	/** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+	KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+	/** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+	KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+	/** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+	KBASE_ATOM_COREREF_STATE_READY
+};
+
+enum kbase_jd_atom_state {
+	/** Atom is not used */
+	KBASE_JD_ATOM_STATE_UNUSED,
+	/** Atom is queued in JD */
+	KBASE_JD_ATOM_STATE_QUEUED,
+	/** Atom has been given to JS (is runnable/running) */
+	KBASE_JD_ATOM_STATE_IN_JS,
+	/** Atom has been completed, but not yet handed back to userspace */
+	KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+	base_atom_id  atom_id;               /**< An atom number */
+	base_jd_dep_type dependency_type;    /**< Dependency type */
+}; 
+
+typedef struct base_jd_atom_v2 {
+	mali_addr64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	const struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
+	this is done in order to reduce possibility of improper assigment of a dependency field */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	s8 prio;			    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[5];
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+	mali_addr64 jc;			    /**< job-chain GPU address */
+	struct base_jd_udata udata;		    /**< user data */
+	kbase_pointer extres_list;	    /**< list of external resources */
+	u16 nr_extres;			    /**< nr of external resources */
+	base_jd_core_req core_req;	    /**< core requirements */
+	base_atom_id pre_dep[2]; /**< pre-dependencies */
+	base_atom_id atom_number;	    /**< unique number to identify the atom */
+	s8 prio;			    /**< priority - smaller is higher priority */
+	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+	u8 padding[7];
+};
+#endif
+
+typedef enum base_external_resource_access {
+	BASE_EXT_RES_ACCESS_SHARED,
+	BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+	u64 ext_resource;
+} base_external_resource;
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep          The kbase jd atom dependency to be initialized.
+ * @param     id           The atom_id to be assigned.
+ * @param     dep_type     The dep_type to be assigned.
+ *
+ */
+static INLINE void base_jd_atom_dep_set(const struct base_dependency* const_dep, base_atom_id id, base_jd_dep_type dep_type)
+{
+	struct base_dependency* dep;
+	
+	LOCAL_ASSERT(const_dep != NULL);
+	/* make sure we don't set not allowed combinations of atom_id/dependency_type */
+	LOCAL_ASSERT( ( id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || 
+				(id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID) );
+
+	dep = REINTERPRET_CAST(struct base_dependency*)const_dep;
+
+	dep->atom_id = id;
+	dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep          The kbase jd atom dependency to be written.
+ * @param[in]     from         The dependency to make a copy from.
+ *
+ */
+static INLINE void base_jd_atom_dep_copy(const struct base_dependency* const_dep, const struct base_dependency* from)
+{
+	LOCAL_ASSERT(const_dep != NULL);
+
+	base_jd_atom_dep_set(const_dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static INLINE void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+	LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static INLINE void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+	LOCAL_ASSERT(atom);
+	LOCAL_ASSERT(fence);
+	LOCAL_ASSERT(fence->basep.fd >= 0);
+	atom->jc = (uintptr_t) fence;
+	atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up a external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res     The resource object to initialize
+ * @param     handle  The handle to the imported memory object
+ * @param     access  The type of access requested
+ */
+static INLINE void base_external_resource_init(struct base_external_resource * res, struct base_import_handle handle, base_external_resource_access access)
+{
+	mali_addr64 address;
+	address = handle.basep.handle;
+
+	LOCAL_ASSERT(res != NULL);
+	LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+	LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+	res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+	BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+	BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+	BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+	BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+	BASE_JD_SW_EVENT_RESERVED = (3u << 11),	/**< Reserved event type */
+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)	    /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0  - subtype
+ * @li 12:11 - type
+ * @li 13    - SW success (only valid if the SW bit is set)
+ * @li 14    - SW event (HW event if not set)
+ * @li 15    - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+	/* HW defined exceptions */
+
+	/** Start of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+	/* non-fatal exceptions */
+	BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+	BASE_JD_EVENT_DONE = 0x01,
+	BASE_JD_EVENT_STOPPED = 0x03,	  /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+	BASE_JD_EVENT_TERMINATED = 0x04,  /**< This is actually a fault status code - the job was hard stopped */
+	BASE_JD_EVENT_ACTIVE = 0x08,	  /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+	/** End of HW Non-fault status codes
+	 *
+	 * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+	 * because the job was hard-stopped
+	 */
+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+	/** Start of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+	/* job exceptions */
+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+	BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+	/* GPU exceptions */
+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+	/* MMU exceptions */
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+	/* SW defined exceptions */
+	BASE_JD_EVENT_MEM_GROWTH_FAILED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_TIMED_OUT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+	/** End of HW fault and SW Error status codes */
+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+	/** End of SW Success status codes */
+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+	/** Start of Kernel-only status codes. Such codes are never returned to user-space */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+	/** End of Kernel-only status codes. */
+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+	base_jd_event_code event_code;  /**< event code */
+	base_atom_id atom_number;       /**< the atom number that has completed */
+	struct base_jd_udata udata;     /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption.  Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+	u64 system_time;
+	u64 cycle_counter;
+	u64 sec;
+	u32 usec;
+	u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#ifdef __KERNEL__
+/*
+ * The following typedefs should be removed when a midg types header is added.
+ * See MIDCOM-1657 for details.
+ */
+typedef u32 midg_product_id;
+typedef u32 midg_cache_features;
+typedef u32 midg_tiler_features;
+typedef u32 midg_mem_features;
+typedef u32 midg_mmu_features;
+typedef u32 midg_js_features;
+typedef u32 midg_as_present;
+typedef u32 midg_js_present;
+
+#define MIDG_MAX_JOB_SLOTS 16
+
+#else
+#include <midg/mali_midg.h>
+#endif
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref midg_gpuprops_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ *  - Part of a critical inner-loop
+ *  - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref midg_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU  registers themselves.
+ *
+ * @usecase The raw properties in @ref midg_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ *  - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ *  so they won't accidentally pick another file with 'mali_t600' in its name
+ *  - When the build doesn't work, the System Integrator may think the DDK is
+ *  doesn't work, and attempt to fix it themselves:
+ *   - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ *   error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ *   you.
+ *   - For a \#include mechanism, checks must still be made elsewhere, which the
+ *   System Integrator may try working around by setting \#defines (such as
+ *   VA_BITS) themselves in their plat_config.h. In the  worst case, they may
+ *   set the prevention-mechanism \#define of
+ *   "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ *   - In this case, they would believe they are on the right track, because
+ *   the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+   digraph plat_config_mechanism {
+	   rankdir=BT
+	   size="6,6"
+
+       "mali_base.h";
+	   "midg/midg.h";
+
+	   node [ shape=box ];
+	   {
+	       rank = same; ordering = out;
+
+		   "midg/midg_gpu_props.h";
+		   "base/midg_gpus/mali_t600.h";
+		   "base/midg_gpus/other_midg_gpu.h";
+	   }
+	   { rank = same; "plat/plat_config.h"; }
+	   {
+	       rank = same;
+		   "midg/midg.h" [ shape=box ];
+		   gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+		   select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+	   }
+	   node [ shape=box ];
+	   { rank = same; "plat/plat_config.h"; }
+	   { rank = same; "mali_base.h"; }
+
+	   "mali_base.h" -> "midg/midg.h" -> "midg/midg_gpu_props.h";
+	   "mali_base.h" -> "plat/plat_config.h" ;
+	   "mali_base.h" -> select_gpu ;
+
+	   "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+	   gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+	   select_gpu -> "base/midg_gpus/mali_t600.h" ;
+	   select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+   }
+   @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT or L3_PRESENT masks, depending on whether the
+ * system is L2 or L2+L3 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2/L3_PRESENT masks (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+	/**
+	 * Product specific value.
+	 */
+	midg_product_id product_id;
+
+	/**
+	 * Status of the GPU release.
+     * No defined values, but starts at 0 and increases by one for each release
+     * status (alpha, beta, EAC, etc.).
+     * 4 bit values (0-15).
+	 */
+	u16 version_status;
+
+	/**
+	 * Minor release number of the GPU. "P" part of an "RnPn" release number.
+     * 8 bit values (0-255).
+	 */
+	u16 minor_revision;
+
+	/**
+	 * Major release number of the GPU. "R" part of an "RnPn" release number.
+     * 4 bit values (0-15).
+	 */
+	u16 major_revision;
+
+	u16 padding;
+
+	/**
+	 * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+	 * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+	 */
+	u32 gpu_speed_mhz;
+
+	/**
+	 * @usecase GPU clock max/min speed is required for computing best/worst case
+	 * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+	 *  Midgard Architecture).
+	 */
+	u32 gpu_freq_khz_max;
+	u32 gpu_freq_khz_min;
+
+	/**
+	 * Size of the shader program counter, in bits.
+	 */
+	u32 log2_program_counter_size;
+
+	/**
+	 * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+	 * bitpattern where a set bit indicates that the format is supported.
+	 *
+	 * Before using a texture format, it is recommended that the corresponding
+	 * bit be checked.
+	 */
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+	/**
+	 * Theoretical maximum memory available to the GPU. It is unlikely that a
+	 * client will be able to allocate all of this memory for their own
+	 * purposes, but this at least provides an upper bound on the memory
+	 * available to the GPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+	 * client will not be expecting to allocate anywhere near this value.
+	 */
+	u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+	u8 padding[5];
+};
+
+struct mali_base_gpu_l3_cache_props {
+	u8 log2_line_size;
+	u8 log2_cache_size;
+	u8 padding[6];
+};
+
+struct mali_base_gpu_tiler_props {
+	u32 bin_size_bytes;	/* Max is 4*2^15 */
+	u32 max_active_levels;	/* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.  
+ */
+struct mali_base_gpu_thread_props {
+	u32 max_threads;            /* Max. number of threads per core */ 
+	u32 max_workgroup_size;     /* Max. number of threads per workgroup */
+	u32 max_barrier_size;       /* Max. number of threads that can synchronize on a simple barrier */
+	u16 max_registers;			/* Total size [1..65535] of the register file available per core. */
+	u8  max_task_queue;			/* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+	u8  max_thread_group_split;	/* Max. allowed value [1..15] of the Thread Group Split field. */
+	u8  impl_tech;		    	/* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+	u8  padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Job Scheduler policy. Therefore,
+ * the application should not further restrict the core mask itself, as it may
+ * result in an empty core mask. However, it can guarentee that there will be
+ * at least one core available for each core group exposed .
+ *
+ * @usecase Chains marked at certain user-side priorities (e.g. the Long-running
+ * (batch) priority ) can be prevented from running on entire core groups by the
+ * Kernel Chain Scheduler policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+	u64 core_mask;	       /**< Core restriction mask required for the group */
+	u16 num_cores;	       /**< Number of cores in the group */
+	u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+	u32 num_groups;
+
+	/**
+	 * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+	 *
+	 * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+	 * whether the core groups are coherent or not. Hence this member is needed
+	 * to calculate how much memory is required for dumping.
+	 *
+	 * @note Do not use it to work out how many valid elements are in the
+	 * group[] member. Use num_groups instead.
+	 */
+	u32 num_core_groups;
+
+	/**
+	 * Coherency features of the memory, accessed by @ref midg_mem_features
+	 * methods
+	 */
+	midg_mem_features coherency;
+
+	u32 padding;
+
+	/**
+	 * Descriptors of coherent groups
+	 */
+	struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref midg_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct midg_raw_gpu_props {
+	u64 shader_present;
+	u64 tiler_present;
+	u64 l2_present;
+	u64 l3_present;
+
+	midg_cache_features l2_features;
+	midg_cache_features l3_features;
+	midg_mem_features mem_features;
+	midg_mmu_features mmu_features;
+
+	midg_as_present as_present;
+
+	u32 js_present;
+	midg_js_features js_features[MIDG_MAX_JOB_SLOTS];
+	midg_tiler_features tiler_features;
+	u32 texture_features[3];
+
+	u32 gpu_id;
+	
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+
+	u32 padding;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+	struct mali_base_gpu_core_props core_props;
+	struct mali_base_gpu_l2_cache_props l2_props;
+	struct mali_base_gpu_l3_cache_props l3_props;
+	struct mali_base_gpu_tiler_props tiler_props;
+	struct mali_base_gpu_thread_props thread_props;
+
+	/** This member is large, likely to be 128 bytes */
+	struct midg_raw_gpu_props raw_props;
+
+	/** This must be last member of the structure */
+	struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as @ref basep_context_private_flags, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+	/** No flags set */
+	BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+	/** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+	BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+	/** Base context is a 'System Monitor' context for Hardware counters.
+	 *
+	 * One important side effect of this is that job submission is disabled. */
+	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1),
+
+	/** Base context flag indicating a 'hint' that this context uses Compute
+	 * Jobs only.
+	 *
+	 * Specifially, this means that it only sends atoms that <b>do not</b>
+	 * contain the following @ref base_jd_core_req :
+	 * - BASE_JD_REQ_FS
+	 * - BASE_JD_REQ_T
+	 *
+	 * Violation of these requirements will cause the Job-Chains to be rejected.
+	 *
+	 * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs
+	 * of the following @ref midg_job_type (whilst it may work now, it may not
+	 * work in future) :
+	 * - @ref MIDG_JOB_VERTEX
+	 * - @ref MIDG_JOB_GEOMETRY
+	 *
+	 * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE
+	 * requirement in atoms.
+	 */
+	BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+	  ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
+	  ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+	(((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
+	  ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+
+/**
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+enum basep_context_private_flags {
+	/** Private flag tracking whether job descriptor dumping is disabled */
+	BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31)
+};
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+	/**
+	 * Pointer to the first entry in the base_jd_replay_jc list.  These
+	 * will be replayed in @b reverse order (so that extra ones can be added
+	 * to the head in future soft jobs without affecting this soft job)
+	 */
+	mali_addr64 tiler_jc_list;
+
+	/**
+	 * Pointer to the fragment job chain.
+	 */
+	mali_addr64 fragment_jc;
+
+	/**
+	 * Pointer to the tiler heap free FBD field to be modified.
+	 */
+	mali_addr64 tiler_heap_free;
+
+	/**
+	 * Hierarchy mask for the replayed fragment jobs. May be zero.
+	 */
+	u16 fragment_hierarchy_mask;
+
+	/**
+	 * Hierarchy mask for the replayed tiler jobs. May be zero.
+	 */
+	u16 tiler_hierarchy_mask;
+
+	/**
+	 * Default weight to be used for hierarchy levels not in the original
+	 * mask.
+	 */
+	u32 hierarchy_default_weight;
+
+	/**
+	 * Core requirements for the tiler job chain
+	 */
+	base_jd_core_req tiler_core_req;
+
+	/**
+	 * Core requirements for the fragment job chain
+	 */
+	base_jd_core_req fragment_core_req;
+
+	u8 padding[4];
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ *        be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+	/**
+	 * Pointer to next entry in the list. A setting of NULL indicates the
+	 * end of the list.
+	 */
+	mali_addr64 next;
+
+	/**
+	 * Pointer to the job chain.
+	 */
+	mali_addr64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+#endif				/* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100755
index 000000000000..01a837cfae8b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0  error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif				/* _BASE_KERNEL_SYNC_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100755
index 000000000000..40942283824a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC	(1U << 0)
+#define BASE_SYNCSET_OP_CSYNC	(1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ *	- type = ::BASE_SYNCSET_OP_MSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes
+ *	- offset is ignored.
+ * @li a sync from Memory to CPU:
+ *	- type = ::BASE_SYNCSET_OP_CSYNC
+ *	- mem_handle = a handle to the memory object on which the operation
+ *	  is taking place
+ *	- user_addr = the address of the range to be synced
+ *	- size = the amount of data to be synced, in bytes.
+ *	- offset is ignored.
+ */
+typedef struct basep_syncset {
+	base_mem_handle mem_handle;
+	u64 user_addr;
+	u64 size;
+	u8 type;
+	u8 padding[7];
+} basep_syncset;
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100755
index 000000000000..ce02e8b8e2bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+#include <malisw/mali_stdtypes.h>
+
+mali_error kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif	/*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100755
index 000000000000..fc995041d5bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,566 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <malisw/mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+#include "mali_kbase_pm.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_security.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_cpuprops.h"
+#include "mali_kbase_gpuprops.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphone and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+mali_error kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+struct kbase_device *kbase_find_device(int minor);	/* Only needed for gator integration */
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+/**
+ * Ensure that all IRQ handlers have completed execution
+ *
+ * @param kbdev     The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+mali_error kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+mali_error kbase_instr_hwcnt_setup(struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup);
+mali_error kbase_instr_hwcnt_enable(struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup);
+mali_error kbase_instr_hwcnt_disable(struct kbase_context *kctx);
+mali_error kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+mali_error kbase_instr_hwcnt_dump(struct kbase_context *kctx);
+mali_error kbase_instr_hwcnt_dump_irq(struct kbase_context *kctx);
+mali_bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, mali_bool * const success);
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev);
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev);
+
+void kbasep_cache_clean_worker(struct work_struct *data);
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * The GPU has completed performance count sampling successfully.
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+mali_error kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+mali_error kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom);
+#else
+mali_error kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data);
+#endif
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+                   kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+mali_bool jd_done_nolock(struct kbase_jd_atom *katom);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+mali_bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom);
+
+mali_error kbase_job_slot_init(struct kbase_device *kbdev);
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+void kbase_job_slot_term(struct kbase_device *kbdev);
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+void kbase_job_zap_context(struct kbase_context *kctx);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+		struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+		struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+mali_error kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+mali_error kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, struct kbase_context *kctx);
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, struct kbase_context *kctx);
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Report a GPU fault.
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param kbdev     The kbase device that the GPU fault occurred from.
+ * @param multiple  Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS was also set
+ */
+void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple);
+
+/** Kill all jobs that are currently running from a context
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting context from the hardware.
+ *
+ * @param kctx      The context to kill jobs from
+ */
+void kbase_job_kill_jobs_from_context(struct kbase_context *kctx);
+
+/**
+ * GPU interrupt handler
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be handled.
+ *
+ * @param kbdev The kbase device to handle an IRQ for
+ * @param val   The value of the GPU IRQ status register which triggered the call
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+/**
+ * Prepare for resetting the GPU.
+ * This function just soft-stops all the slots to ensure that as many jobs as possible are saved.
+ *
+ * The function returns a boolean which should be interpreted as follows:
+ * - MALI_TRUE - Prepared for reset, kbase_reset_gpu should be called.
+ * - MALI_FALSE - Another thread is performing a reset, kbase_reset_gpu should not be called.
+ *
+ * @return See description
+ */
+mali_bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * Pre-locked version of @a kbase_prepare_to_reset_gpu.
+ *
+ * Identical to @a kbase_prepare_to_reset_gpu, except that the
+ * kbasep_js_device_data::runpool_irq::lock is externally locked.
+ *
+ * @see kbase_prepare_to_reset_gpu
+ */
+mali_bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/** Reset the GPU
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu iff it returns MALI_TRUE.
+ * It should never be called without a corresponding call to kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu returned MALI_FALSE),
+ * the caller should wait for kbdev->reset_waitq to be signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * Pre-locked version of @a kbase_reset_gpu.
+ *
+ * Identical to @a kbase_reset_gpu, except that the
+ * kbasep_js_device_data::runpool_irq::lock is externally locked.
+ *
+ * @see kbase_reset_gpu
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * @param[in] exception_code  exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return MALI_FALSE if a suspend is not in progress
+ * @return !=MALI_FALSE otherwise
+ */
+static INLINE mali_bool kbase_pm_is_suspending(struct kbase_device *kbdev) {
+	return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static INLINE int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	int result;
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+	result = katom - &kctx->jctx.atoms[0];
+	KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+	return result;
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+/**
+ * Platform-specific function to setup the OPPs for Mali
+ *
+ * Platform's can define this function if they need to setup the OPPs
+ * in a platform-specific way
+ */
+int setup_opps(void);
+
+#if KBASE_TRACE_ENABLE
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)     \
+	kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+			0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+	kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+	kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+	trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(jobslot);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(refcount);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(gpu_addr);\
+		CSTD_UNUSED(info_val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(code);\
+		CSTD_UNUSED(subcode);\
+		CSTD_UNUSED(ctx);\
+		CSTD_UNUSED(katom);\
+		CSTD_UNUSED(val);\
+		CSTD_NOP(0);\
+	} while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+	do {\
+		CSTD_UNUSED(kbdev);\
+		CSTD_NOP(0);\
+	} while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100755
index 000000000000..4ee29098eba3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+	struct device *dev = katom->kctx->kbdev->dev;
+	u32   clamped = 0;
+	struct kbase_va_region *region;
+	phys_addr_t *page_array;
+	u64 page_index;
+	u32 offset = katom->jc & (~PAGE_MASK);
+	u32 *page_1 = NULL;
+	u32 *page_2 = NULL;
+	u32   job_header[JOB_HEADER_SIZE_IN_WORDS];
+	void *dst = job_header;
+	u32 minX, minY, maxX, maxY;
+	u32 restartX, restartY;
+	struct page *p;
+	u32 copy_size;
+
+	dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+	if (!(katom->core_req & BASE_JD_REQ_FS))
+		return 0;
+
+	kbase_gpu_vm_lock(katom->kctx);
+	region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+			katom->jc);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_array = kbase_get_phy_pages(region);
+	if (!page_array)
+		goto out_unlock;
+
+	page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+	p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+	/* we need the first 10 words of the fragment shader job descriptor.
+	 * We need to check that the offset + 10 words is less that the page
+	 * size otherwise we need to load the next page.
+	 * page_size_overflow will be equal to 0 in case the whole descriptor
+	 * is within the page > 0 otherwise.
+	 */
+	copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+	page_1 = kmap_atomic(p);
+
+	/* page_1 is a u32 pointer, offset is expressed in bytes */
+	page_1 += offset>>2;
+	dma_sync_single_for_cpu(katom->kctx->kbdev->dev,
+			kbase_dma_addr(p) + offset,
+			copy_size, DMA_BIDIRECTIONAL);
+	memcpy(dst, page_1, copy_size);
+
+	/* The data needed overflows page the dimension,
+	 * need to map the subsequent page */
+	if (copy_size < JOB_HEADER_SIZE) {
+		p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+		page_2 = kmap_atomic(p);
+
+		dma_sync_single_for_cpu(katom->kctx->kbdev->dev,
+				kbase_dma_addr(p),
+				JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+		memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+	}
+
+	/* We managed to correctly map one or two pages (in case of overflow) */
+	/* Get Bounding Box data and restart index from fault address low word */
+	minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+	maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+	restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+	restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+	dev_warn(dev, "Before Clamping:\n"
+			"Jobstatus: %08x\n"
+			"restartIdx: %08x\n"
+			"Fault_addr_low: %08x\n"
+			"minCoordsX: %08x minCoordsY: %08x\n"
+			"maxCoordsX: %08x maxCoordsY: %08x\n",
+			job_header[JOB_DESC_STATUS_WORD],
+			job_header[JOB_DESC_RESTART_INDEX_WORD],
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+			minX, minY,
+			maxX, maxY);
+
+	/* Set the restart index to the one which generated the fault*/
+	job_header[JOB_DESC_RESTART_INDEX_WORD] =
+			job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+	if (restartX < minX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to minimum. %08x clamped to %08x\n",
+			restartX, minX);
+		clamped =  1;
+	}
+	if (restartY < minY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to minimum. %08x clamped to %08x\n",
+			restartY, minY);
+		clamped =  1;
+	}
+	if (restartX > maxX) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+		dev_warn(dev,
+			"Clamping restart X index to maximum. %08x clamped to %08x\n",
+			restartX, maxX);
+		clamped =  1;
+	}
+	if (restartY > maxY) {
+		job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+		dev_warn(dev,
+			"Clamping restart Y index to maximum. %08x clamped to %08x\n",
+			restartY, maxY);
+		clamped =  1;
+	}
+
+	if (clamped) {
+		/* Reset the fault address low word
+		 * and set the job status to STOPPED */
+		job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+		job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+		dev_warn(dev, "After Clamping:\n"
+				"Jobstatus: %08x\n"
+				"restartIdx: %08x\n"
+				"Fault_addr_low: %08x\n"
+				"minCoordsX: %08x minCoordsY: %08x\n"
+				"maxCoordsX: %08x maxCoordsY: %08x\n",
+				job_header[JOB_DESC_STATUS_WORD],
+				job_header[JOB_DESC_RESTART_INDEX_WORD],
+				job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+				minX, minY,
+				maxX, maxY);
+
+		/* Flush CPU cache to update memory for future GPU reads*/
+		memcpy(page_1, dst, copy_size);
+		p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+		dma_sync_single_for_device(katom->kctx->kbdev->dev,
+				kbase_dma_addr(p) + offset,
+				copy_size, DMA_TO_DEVICE);
+
+		if (copy_size < JOB_HEADER_SIZE) {
+			memcpy(page_2, dst + copy_size,
+					JOB_HEADER_SIZE - copy_size);
+			p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+			dma_sync_single_for_device(katom->kctx->kbdev->dev,
+					kbase_dma_addr(p),
+					JOB_HEADER_SIZE - copy_size,
+					DMA_TO_DEVICE);
+		}
+	}
+	if (copy_size < JOB_HEADER_SIZE)
+		kunmap_atomic(page_2);
+
+	kunmap_atomic(page_1);
+
+out_unlock:
+	kbase_gpu_vm_unlock(katom->kctx);
+	return clamped;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100755
index 000000000000..90bd027be30a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100755
index 000000000000..a1c3aa88307b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_cache_policy.h
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+	u32 cache_flags = 0;
+
+	CSTD_UNUSED(nr_pages);
+
+	if (flags & BASE_MEM_CACHED_CPU)
+		cache_flags |= KBASE_REG_CPU_CACHED;
+
+	return cache_flags;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100755
index 000000000000..70f8a4c8ada2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_cache_policy.h
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include <malisw/mali_malisw.h>
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+/**
+ * @brief Choose the cache policy for a specific region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific region.
+ * This function can be modified to customize the cache policy depending on the flags
+ * and size of the region.
+ *
+ * @param[in] flags     flags describing attributes of the region
+ * @param[in] nr_pages  total number of pages (backed or not) for the region
+ *
+ * @return a combination of KBASE_REG_CPU_CACHED and KBASE_REG_GPU_CACHED depending
+ * on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif				/* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100755
index 000000000000..88315acf4263
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+/* Specifies how many attributes are permitted in the config (excluding terminating attribute).
+ * This is used in validation function so we can detect if configuration is properly terminated. This value can be
+ * changed if we need to introduce more attributes or many memory regions need to be defined */
+#define ATTRIBUTE_COUNT_MAX 32
+
+int kbasep_get_config_attribute_count(const struct kbase_attribute *attributes)
+{
+	int count = 1;
+
+	if (!attributes)
+		return -EINVAL;
+
+	while (attributes->id != KBASE_CONFIG_ATTR_END) {
+		attributes++;
+		count++;
+	}
+
+	return count;
+}
+
+const struct kbase_attribute *kbasep_get_next_attribute(const struct kbase_attribute *attributes, int attribute_id)
+{
+	KBASE_DEBUG_ASSERT(attributes != NULL);
+
+	while (attributes->id != KBASE_CONFIG_ATTR_END) {
+		if (attributes->id == attribute_id)
+			return attributes;
+
+		attributes++;
+	}
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_get_next_attribute)
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+	KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+	*clock_speed = 100;
+	return 0;
+}
+
+uintptr_t kbasep_get_config_value(struct kbase_device *kbdev, const struct kbase_attribute *attributes, int attribute_id)
+{
+	const struct kbase_attribute *attr;
+
+	KBASE_DEBUG_ASSERT(attributes != NULL);
+
+	attr = kbasep_get_next_attribute(attributes, attribute_id);
+	if (attr != NULL)
+		return attr->data;
+
+	/* default values */
+	switch (attribute_id) {
+		/* Begin scheduling defaults */
+	case KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS:
+		return DEFAULT_JS_SCHEDULING_TICK_NS;
+	case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS:
+		return DEFAULT_JS_SOFT_STOP_TICKS;
+	case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL:
+		return DEFAULT_JS_SOFT_STOP_TICKS_CL;
+	case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS:
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			return DEFAULT_JS_HARD_STOP_TICKS_SS_HW_ISSUE_8408;
+		else
+			return DEFAULT_JS_HARD_STOP_TICKS_SS;
+	case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL:
+		return DEFAULT_JS_HARD_STOP_TICKS_CL;
+	case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS:
+		return DEFAULT_JS_HARD_STOP_TICKS_NSS;
+	case KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS:
+		return DEFAULT_JS_CTX_TIMESLICE_NS;
+	case KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS:
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+			return DEFAULT_JS_RESET_TICKS_SS_HW_ISSUE_8408;
+		else
+			return DEFAULT_JS_RESET_TICKS_SS;
+	case KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL:
+		return DEFAULT_JS_RESET_TICKS_CL;
+	case KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS:
+		return DEFAULT_JS_RESET_TICKS_NSS;
+	case KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS:
+		return DEFAULT_JS_RESET_TIMEOUT_MS;
+		/* End scheduling defaults */
+	case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS:
+		return 0;
+	case KBASE_CONFIG_ATTR_PLATFORM_FUNCS:
+		return 0;
+	case KBASE_CONFIG_ATTR_CPU_SPEED_FUNC:
+		return DEFAULT_CPU_SPEED_FUNC;
+	case KBASE_CONFIG_ATTR_GPU_SPEED_FUNC:
+		return 0;
+	case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ:
+		return DEFAULT_PM_DVFS_FREQ;
+	case KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS:
+		return DEFAULT_PM_GPU_POWEROFF_TICK_NS;
+	case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER:
+		return DEFAULT_PM_POWEROFF_TICK_SHADER;
+	case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU:
+		return DEFAULT_PM_POWEROFF_TICK_GPU;
+	case KBASE_CONFIG_ATTR_POWER_MODEL_CALLBACKS:
+		return 0;
+
+	default:
+		dev_err(kbdev->dev, "kbasep_get_config_value. Cannot get value of attribute with id=%d and no default value defined", attribute_id);
+		return 0;
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbasep_get_config_value)
+
+mali_bool kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs;
+
+	platform_funcs = (struct kbase_platform_funcs_conf *)kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PLATFORM_FUNCS);
+	if (platform_funcs) {
+		if (platform_funcs->platform_init_func)
+			return platform_funcs->platform_init_func(kbdev);
+	}
+	return MALI_TRUE;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+	struct kbase_platform_funcs_conf *platform_funcs;
+
+	platform_funcs = (struct kbase_platform_funcs_conf *)kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PLATFORM_FUNCS);
+	if (platform_funcs) {
+		if (platform_funcs->platform_term_func)
+			platform_funcs->platform_term_func(kbdev);
+	}
+}
+
+static mali_bool kbasep_validate_pm_callback(const struct kbase_pm_callback_conf *callbacks, const struct kbase_device *kbdev)
+{
+	if (callbacks == NULL) {
+		/* Having no callbacks is valid */
+		return MALI_TRUE;
+	}
+
+	if ((callbacks->power_off_callback != NULL && callbacks->power_on_callback == NULL) || (callbacks->power_off_callback == NULL && callbacks->power_on_callback != NULL)) {
+		dev_warn(kbdev->dev, "Invalid power management callbacks: Only one of power_off_callback and power_on_callback was specified");
+		return MALI_FALSE;
+	}
+	return MALI_TRUE;
+}
+
+static mali_bool kbasep_validate_cpu_speed_func(kbase_cpuprops_clock_speed_function fcn)
+{
+	return fcn != NULL;
+}
+
+mali_bool kbasep_validate_configuration_attributes(struct kbase_device *kbdev, const struct kbase_attribute *attributes)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(attributes);
+
+	for (i = 0; attributes[i].id != KBASE_CONFIG_ATTR_END; i++) {
+		if (i >= ATTRIBUTE_COUNT_MAX) {
+			dev_warn(kbdev->dev, "More than ATTRIBUTE_COUNT_MAX=%d configuration attributes defined. Is attribute list properly terminated?", ATTRIBUTE_COUNT_MAX);
+			return MALI_FALSE;
+		}
+
+		switch (attributes[i].id) {
+			/* Only non-zero unsigned 32-bit values accepted */
+		case KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS:
+#if CSTD_CPU_64BIT
+			if (attributes[i].data == 0u || (u64) attributes[i].data > (u64) U32_MAX) {
+#else
+			if (attributes[i].data == 0u) {
+#endif
+				dev_warn(kbdev->dev, "Invalid Job Scheduling Configuration attribute for " "KBASE_CONFIG_ATTR_JS_SCHEDULING_TICKS_NS: %d", (int)attributes[i].data);
+				return MALI_FALSE;
+			}
+			break;
+
+			/* All these Job Scheduling attributes are FALLTHROUGH: only unsigned 32-bit values accepted */
+		case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS:
+		case KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL:
+		case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS:
+		case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL:
+		case KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS:
+		case KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS:
+		case KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL:
+		case KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS:
+		case KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS:
+		case KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS:
+#if CSTD_CPU_64BIT
+			if ((u64) attributes[i].data > (u64) U32_MAX) {
+				dev_warn(kbdev->dev, "Job Scheduling Configuration attribute exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data);
+				return MALI_FALSE;
+			}
+#endif
+			break;
+
+		case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS:
+			if (MALI_FALSE == kbasep_validate_pm_callback((struct kbase_pm_callback_conf *)attributes[i].data, kbdev)) {
+				/* Warning message handled by kbasep_validate_pm_callback() */
+				return MALI_FALSE;
+			}
+			break;
+
+		case KBASE_CONFIG_ATTR_CPU_SPEED_FUNC:
+			if (MALI_FALSE == kbasep_validate_cpu_speed_func((kbase_cpuprops_clock_speed_function) attributes[i].data)) {
+				dev_warn(kbdev->dev, "Invalid function pointer in KBASE_CONFIG_ATTR_CPU_SPEED_FUNC");
+				return MALI_FALSE;
+			}
+			break;
+
+		case KBASE_CONFIG_ATTR_GPU_SPEED_FUNC:
+			if (0 == attributes[i].data) {
+				dev_warn(kbdev->dev, "Invalid function pointer in KBASE_CONFIG_ATTR_GPU_SPEED_FUNC");
+				return MALI_FALSE;
+			}
+			break;
+
+		case KBASE_CONFIG_ATTR_PLATFORM_FUNCS:
+			/* any value is allowed */
+			break;
+
+		case KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ:
+#if CSTD_CPU_64BIT
+			if ((u64) attributes[i].data > (u64) U32_MAX) {
+				dev_warn(kbdev->dev, "PM DVFS interval exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data);
+				return MALI_FALSE;
+			}
+#endif
+			break;
+
+		case KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS:
+#if CSTD_CPU_64BIT
+			if (attributes[i].data == 0u || (u64) attributes[i].data > (u64) U32_MAX) {
+#else
+			if (attributes[i].data == 0u) {
+#endif
+				dev_warn(kbdev->dev, "Invalid Power Manager Configuration attribute for " "KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS: %d", (int)attributes[i].data);
+				return MALI_FALSE;
+			}
+			break;
+
+	case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER:
+	case KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU:
+#if CSTD_CPU_64BIT
+			if ((u64) attributes[i].data > (u64) U32_MAX) {
+				dev_warn(kbdev->dev, "Power Manager Configuration attribute exceeds 32-bits: " "id==%d val==%d", attributes[i].id, (int)attributes[i].data);
+				return MALI_FALSE;
+			}
+#endif
+			break;
+
+		case KBASE_CONFIG_ATTR_POWER_MODEL_CALLBACKS:
+			if (0 == attributes[i].data) {
+				dev_warn(kbdev->dev, "Power model callbacks is specified but NULL: " "id==%d val==%d",
+						attributes[i].id, (int)attributes[i].data);
+				return MALI_FALSE;
+			}
+			break;
+
+		default:
+			dev_warn(kbdev->dev, "Invalid attribute found in configuration: %d", attributes[i].id);
+			return MALI_FALSE;
+		}
+	}
+
+	return MALI_TRUE;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100755
index 000000000000..3a1acf89ac7b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,722 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <malisw/mali_stdtypes.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#if !MALI_CUSTOMER_RELEASE
+/* This flag is set for internal builds so we can run tests without credentials. */
+#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1
+#else
+#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0
+#endif
+
+/* Enable power management API, note that KBASE_PM_EN==0 is not supported */
+#define KBASE_PM_EN 1
+/* Enable GPU reset API, note that KBASE_GPU_RESET_EN==0 is not supported */
+#define KBASE_GPU_RESET_EN 1
+/* Enable HW MMU backend, note that KBASE_MMU_HW_BACKEND==0 is not supported */
+#define KBASE_MMU_HW_BACKEND 1
+
+
+#include <linux/rbtree.h>
+
+
+/**
+ * Device wide configuration
+ */
+enum {
+	/**
+	 * Invalid attribute ID (reserve 0).
+	 *
+	 * Attached value: Ignored
+	 * Default value: NA
+	 * */
+	KBASE_CONFIG_ATTR_INVALID,
+
+	/*** Begin Job Scheduling Configs ***/
+	/**
+	 * Job Scheduler scheduling tick granuality. This is in nanoseconds to
+	 * allow HR timer support.
+	 *
+	 * On each scheduling tick, the scheduler may decide to:
+	 * -# soft stop a job (the job will be re-run later, and other jobs will
+	 * be able to run on the GPU now). This effectively controls the
+	 * 'timeslice' given to a job.
+	 * -# hard stop a job (to kill a job if it has spent too long on the GPU
+	 * and didn't soft-stop).
+	 *
+	 * The numbers of ticks for these events are controlled by:
+	 * - @ref KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS
+	 * - @ref KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS
+	 * - @ref KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS
+	 *
+	 * A soft-stopped job will later be resumed, allowing it to use more GPU
+	 * time <em>in total</em> than that defined by any of the above. However,
+	 * the scheduling policy attempts to limit the amount of \em uninterrupted
+	 * time spent on the GPU using the above values (that is, the 'timeslice'
+	 * of a job)
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::scheduling_tick_ns.
+	 * The value might be rounded down to lower precision. Must be non-zero
+	 * after rounding.<br>
+	 * Default value: @ref DEFAULT_JS_SCHEDULING_TICK_NS
+	 *
+	 * @note this value is allowed to be greater than
+	 * @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS. This allows jobs to run on (much)
+	 * longer than the job-timeslice, but once this happens, the context gets
+	 * scheduled in (much) less frequently than others that stay within the
+	 * ctx-timeslice.
+	 */
+	KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before non-CL jobs
+	 * are soft-stopped.
+	 *
+	 * This defines the amount of time a job is allowed to stay on the GPU,
+	 * before it is soft-stopped to allow other jobs to run.
+	 *
+	 * That is, this defines the 'timeslice' of the job. It is separate from the
+	 * timeslice of the context that contains the job (see
+	 * @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS).
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::soft_stop_ticks<br>
+	 * Default value: @ref DEFAULT_JS_SOFT_STOP_TICKS
+	 *
+	 * @note a value of zero means "the quickest time to soft-stop a job",
+	 * which is somewhere between instant and one tick later.
+	 *
+	 * @note this value is allowed to be greater than
+	 * @ref KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS or
+	 * @ref KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS. This effectively disables
+	 * soft-stop, and just uses hard-stop instead. In this case, this value
+	 * should be much greater than any of the hard stop values (to avoid
+	 * soft-stop-after-hard-stop)
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before CL jobs
+	 * are soft-stopped.
+	 *
+	 * This defines the amount of time a job is allowed to stay on the GPU,
+	 * before it is soft-stopped to allow other jobs to run.
+	 *
+	 * That is, this defines the 'timeslice' of the job. It is separate
+	 * from the timeslice of the context that contains the job (see
+	 * @ref KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS).
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit
+	 *                         kbasep_js_device_data::soft_stop_ticks_cl<br>
+	 * Default value: @ref DEFAULT_JS_SOFT_STOP_TICKS_CL
+	 *
+	 * @note a value of zero means "the quickest time to soft-stop a job",
+	 * which is somewhere between instant and one tick later.
+	 *
+	 * @note this value is allowed to be greater than
+	 * @ref KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL. This effectively
+	 * disables soft-stop, and just uses hard-stop instead. In this case,
+	 * this value should be much greater than any of the hard stop values
+	 * (to avoid soft-stop-after-hard-stop)
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before non-CL jobs
+	 * are hard-stopped.
+	 *
+	 * This defines the amount of time a job is allowed to spend on the GPU before it
+	 * is killed. Such jobs won't be resumed if killed.
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::hard_stop_ticks_ss<br>
+	 * Default value: @ref DEFAULT_JS_HARD_STOP_TICKS_SS
+	 *
+	 * @note a value of zero means "the quickest time to hard-stop a job",
+	 * which is somewhere between instant and one tick later.
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before CL jobs are hard-stopped.
+	 *
+	 * This defines the amount of time a job is allowed to spend on the GPU before it
+	 * is killed. Such jobs won't be resumed if killed.
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::hard_stop_ticks_cl<br>
+	 * Default value: @ref DEFAULT_JS_HARD_STOP_TICKS_CL
+	 *
+	 * @note a value of zero means "the quickest time to hard-stop a job",
+	 * which is somewhere between instant and one tick later.
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before jobs are hard-stopped
+	 * when dumping.
+	 *
+	 * This defines the amount of time a job is allowed to spend on the GPU before it
+	 * is killed. Such jobs won't be resumed if killed.
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::hard_stop_ticks_nss<br>
+	 * Default value: @ref DEFAULT_JS_HARD_STOP_TICKS_NSS
+	 *
+	 * @note a value of zero means "the quickest time to hard-stop a job",
+	 * which is somewhere between instant and one tick later.
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+
+	/**
+	 * Job Scheduler timeslice that a context is scheduled in for, in nanoseconds.
+	 *
+	 * When a context has used up this amount of time across its jobs, it is
+	 * scheduled out to let another run.
+	 *
+	 * @note the resolution is nanoseconds (ns) here, because that's the format
+	 * often used by the OS.
+	 *
+	 * This value controls affects the actual time defined by the following
+	 * defaults:
+	 * - @ref DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES
+	 * - @ref DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::ctx_timeslice_ns.
+	 * The value might be rounded down to lower precision.<br>
+	 * Default value: @ref DEFAULT_JS_CTX_TIMESLICE_NS
+	 *
+	 * @note a value of zero models a "Round Robin" scheduling policy, and
+	 * disables @ref DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES
+	 * (initially causing LIFO scheduling) and
+	 * @ref DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES (allowing
+	 * not-run-often contexts to get scheduled in quickly, but to only use
+	 * a single timeslice when they get scheduled in).
+	 */
+	KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before non-CL jobs
+	 * cause the GPU to be reset.
+	 *
+	 * This defines the amount of time a job is allowed to spend on the GPU before it
+	 * is assumed that the GPU has hung and needs to be reset. The assumes that the job
+	 * has been hard-stopped already and so the presence of a job that has remained on
+	 * the GPU for so long indicates that the GPU has in some way hung.
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::gpu_reset_ticks_nss<br>
+	 * Default value: @ref DEFAULT_JS_RESET_TICKS_SS
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before CL jobs
+	 * cause the GPU to be reset.
+	 *
+	 * This defines the amount of time a job is allowed to spend on the GPU before it 
+	 * is assumed that the GPU has hung and needs to be reset. The assumes that the job
+	 * has been hard-stopped already and so the presence of a job that has remained on
+	 * the GPU for so long indicates that the GPU has in some way hung.
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::gpu_reset_ticks_cl<br>
+	 * Default value: @ref DEFAULT_JS_RESET_TICKS_CL
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+
+	/**
+	 * Job Scheduler minimum number of scheduling ticks before jobs cause the GPU to be
+	 * reset when dumping.
+	 *
+	 * This defines the amount of time a job is allowed to spend on the GPU before it
+	 * is assumed that the GPU has hung and needs to be reset. The assumes that the job
+	 * has been hard-stopped already and so the presence of a job that has remained on
+	 * the GPU for so long indicates that the GPU has in some way hung.
+	 *
+	 * This value is supported by the following scheduling policies:
+	 * - The Completely Fair Share (CFS) policy
+	 *
+	 * Attached value: unsigned 32-bit kbasep_js_device_data::gpu_reset_ticks_nss<br>
+	 * Default value: @ref DEFAULT_JS_RESET_TICKS_NSS
+	 *
+	 * @see KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+
+	/**
+	 * Number of milliseconds given for other jobs on the GPU to be
+	 * soft-stopped when the GPU needs to be reset.
+	 *
+	 * Attached value: number in milliseconds
+	 * Default value: @ref DEFAULT_JS_RESET_TIMEOUT_MS
+	 */
+	KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	/*** End Job Scheduling Configs ***/
+
+	/** Power management configuration
+	 *
+	 * Attached value: pointer to @ref kbase_pm_callback_conf
+	 * Default value: See @ref kbase_pm_callback_conf
+	 */
+	KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+
+	/**
+	 * A pointer to a function that calculates the CPU clock
+	 * speed of the platform in MHz - see
+	 * @ref kbase_cpuprops_clock_speed_function for the function
+	 * prototype.
+	 *
+	 * Attached value: A @ref kbase_cpuprops_clock_speed_function.
+	 * Default Value:  Pointer to @ref DEFAULT_CPU_SPEED_FUNC -
+	 *                 returns a clock speed of 100 MHz.
+	 */
+	KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+
+	/**
+	 * A pointer to a function that calculates the GPU clock
+	 * speed of the platform in MHz - see
+	 * @ref kbase_gpuprops_clock_speed_function for the function
+	 * prototype.
+	 *
+	 * Attached value: A @ref kbase_gpuprops_clock_speed_function.
+	 * Default Value:  NULL (in which case the driver assumes maximum
+	 *                 GPU frequency stored in gpu_freq_khz_max)
+	 */
+	KBASE_CONFIG_ATTR_GPU_SPEED_FUNC,
+
+	/**
+	 * Platform specific configuration functions
+	 *
+	 * Attached value: pointer to @ref kbase_platform_funcs_conf
+	 * Default value: See @ref kbase_platform_funcs_conf
+	 */
+	KBASE_CONFIG_ATTR_PLATFORM_FUNCS,
+
+	/**
+	 * Rate at which dvfs data should be collected.
+	 *
+	 * Attached value: u32 value
+	 * Default value: 500 Milliseconds
+	 */
+	KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ,
+
+	/**
+	 * Power Management poweroff tick granuality. This is in nanoseconds to
+	 * allow HR timer support.
+	 *
+	 * On each scheduling tick, the power manager core may decide to:
+	 * -# Power off one or more shader cores
+	 * -# Power off the entire GPU
+	 *
+	 * Attached value: number in nanoseconds
+	 * Default value: @ref DEFAULT_PM_GPU_POWEROFF_TICK_NS,
+	 */
+	KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS,
+
+	/**
+	 * Power Manager number of ticks before shader cores are powered off
+	 *
+	 * Attached value: unsigned 32-bit kbasep_pm_device_data::poweroff_shader_ticks<br>
+	 * Default value: @ref DEFAULT_PM_POWEROFF_TICK_SHADER
+	 *
+	 * @see KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER,
+
+	/**
+	 * Power Manager number of ticks before GPU is powered off
+	 *
+	 * Attached value: unsigned 32-bit kbasep_pm_device_data::poweroff_gpu_ticks<br>
+	 * Default value: @ref DEFAULT_PM_POWEROFF_TICK_GPU
+	 *
+	 * @see KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS
+	 */
+	KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU,
+
+	/** Power model for IPA
+	 *
+	 * Attached value: pointer to @ref mali_pa_model_ops
+	 */
+	KBASE_CONFIG_ATTR_POWER_MODEL_CALLBACKS,
+
+	/**
+	 * End of attribute list indicator.
+	 * The configuration loader will stop processing any more elements
+	 * when it encounters this attribute.
+	 *
+	 * Default value: NA
+	 */
+	KBASE_CONFIG_ATTR_END = 0x1FFFUL
+};
+
+/*
+ * @brief specifies a single attribute
+ *
+ * Attribute is identified by attr field. Data is either integer or a pointer to attribute-specific structure.
+ */
+typedef struct kbase_attribute {
+	int id;
+	uintptr_t data;
+} kbase_attribute;
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/*
+ * @brief Specifies the functions for platform specific initialization and termination
+ *
+ * By default no functions are required. No additional platform specific control is necessary.
+ */
+typedef struct kbase_platform_funcs_conf {
+	/**
+	 * Function pointer for platform specific initialization or NULL if no initialization function is required.
+	 * This function will be called \em before any other callbacks listed in the struct kbase_attribute struct (such as
+	 * Power Management callbacks).
+	 * The platform specific private pointer kbase_device::platform_context can be accessed (and possibly initialized) in here.
+	 */
+	mali_bool(*platform_init_func) (struct kbase_device *kbdev);
+	/**
+	 * Function pointer for platform specific termination or NULL if no termination function is required.
+	 * This function will be called \em after any other callbacks listed in the struct kbase_attribute struct (such as
+	 * Power Management callbacks).
+	 * The platform specific private pointer kbase_device::platform_context can be accessed (and possibly terminated) in here.
+	 */
+	void (*platform_term_func)(struct kbase_device *kbdev);
+
+} kbase_platform_funcs_conf;
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+typedef struct kbase_pm_callback_conf {
+	/** Callback for when the GPU is idle and the power to it can be switched off.
+	 *
+	 * The system integrator can decide whether to either do nothing, just switch off
+	 * the clocks to the GPU, or to completely power down the GPU.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the GPU is about to become active and power must be supplied.
+	 *
+	 * This function must not return until the GPU is powered and clocked sufficiently for register access to
+	 * succeed.  The return value specifies whether the GPU was powered down since the call to power_off_callback.
+	 * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+	 * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+	 * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+	 *
+	 * The return value of the first call to this function is ignored.
+	 *
+	 * @return 1 if the GPU state may have been lost, 0 otherwise.
+	 */
+	int (*power_on_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is requesting a suspend and GPU power
+	 * must be switched off.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a preceding call to power_off_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_off_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+	/** Callback for when the system is resuming from a suspend and GPU
+	 * power must be switched on.
+	 *
+	 * Note that if this callback is present, then this may be called
+	 * without a following call to power_on_callback. Therefore this
+	 * callback must be able to take any action that might otherwise happen
+	 * in power_on_callback.
+	 *
+	 * The platform specific private pointer kbase_device::platform_context
+	 * can be accessed and modified in here. It is the platform \em
+	 * callbacks responsibility to initialize and terminate this pointer if
+	 * used (see @ref kbase_platform_funcs_conf).
+	 */
+	void (*power_resume_callback)(struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management initialization.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * will become active from calls made to the OS from within this function.
+	 * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return MALI_ERROR_NONE on success, else mali_error erro code.
+	 */
+	 mali_error(*power_runtime_init_callback) (struct kbase_device *kbdev);
+
+	/** Callback for handling runtime power management termination.
+	 *
+	 * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+	 * should no longer be called by the OS on completion of this function.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-off power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_suspend callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
+	 */
+	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+	/** Callback for runtime power-on power management callback
+	 *
+	 * For linux this callback will be called by the kernel runtime_resume callback.
+	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 */
+	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+
+} kbase_pm_callback_conf;
+
+/**
+ * @brief Default implementation of @ref KBASE_CONFIG_ATTR_CPU_SPEED_FUNC.
+ *
+ * This function sets clock_speed to 100, so will be an underestimate for
+ * any real system.
+ *
+ * See @ref kbase_cpuprops_clock_speed_function for details on the parameters
+ * and return value.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * Type of the function pointer for KBASE_CONFIG_ATTR_CPU_SPEED_FUNC.
+ *
+ * @param clock_speed [out] Once called this will contain the current CPU clock speed in MHz.
+ *                          This is mainly used to implement OpenCL's clGetDeviceInfo().
+ *
+ * @return 0 on success, 1 on error.
+ */
+typedef int (*kbase_cpuprops_clock_speed_function) (u32 *clock_speed);
+
+/**
+ * Type of the function pointer for KBASE_CONFIG_ATTR_GPU_SPEED_FUNC.
+ *
+ * @param clock_speed [out] Once called this will contain the current GPU clock speed in MHz.
+ *                          If the system timer is not available then this function is required
+ *                          for the OpenCL queue profiling to return correct timing information.
+ *
+ * @return 0 on success, 1 on error. When an error is returned the caller assumes maximum
+ * GPU speed stored in gpu_freq_khz_max.
+ */
+typedef int (*kbase_gpuprops_clock_speed_function) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+typedef struct kbase_platform_config {
+	const struct kbase_attribute *attributes;
+	u32 midgard_type;
+} kbase_platform_config;
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+typedef struct kbase_io_memory_region {
+	u64 start;
+	u64 end;
+} kbase_io_memory_region;
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+typedef struct kbase_io_resources {
+
+	u32                      job_irq_number;
+	u32                      mmu_irq_number;
+	u32                      gpu_irq_number;
+	struct kbase_io_memory_region io_memory_region;
+} kbase_io_resources;
+
+typedef struct kbase_platform_config {
+	const struct kbase_attribute *attributes;
+	const struct kbase_io_resources *io_resources;
+	u32 midgard_type;
+} kbase_platform_config;
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Return character string associated with the given midgard type.
+ *
+ * @param[in]  midgard_type - ID of midgard type
+  *
+ * @return  Pointer to NULL-terminated character array associated with the given midgard type
+ */
+const char *kbasep_midgard_type_to_string(u32 midgard_type);
+
+/**
+ * @brief Gets the next config attribute with the specified ID from the array of attributes.
+ *
+ * Function gets the next attribute with specified attribute id within specified array. If no such attribute is found,
+ * NULL is returned.
+ *
+ * @param[in]  attributes     Array of attributes in which lookup is performed
+ * @param[in]  attribute_id   ID of attribute
+ *
+ * @return  Pointer to the first attribute matching id or NULL if none is found.
+ */
+const struct kbase_attribute *kbasep_get_next_attribute(const struct kbase_attribute *attributes, int attribute_id);
+
+/**
+ * @brief Gets the value of a single config attribute.
+ *
+ * Function gets the value of attribute specified as parameter. If no such attribute is found in the array of
+ * attributes, default value is used.
+ *
+ * @param[in]  kbdev          Kbase device pointer
+ * @param[in]  attributes     Array of attributes in which lookup is performed
+ * @param[in]  attribute_id   ID of attribute
+ *
+ * @return Value of attribute with the given id
+ */
+uintptr_t kbasep_get_config_value(struct kbase_device *kbdev, const struct kbase_attribute *attributes, int attribute_id);
+
+/**
+ * @brief Validates configuration attributes
+ *
+ * Function checks validity of given configuration attributes. It will fail on any attribute with unknown id, attribute
+ * with invalid value or attribute list that is not correctly terminated.
+ *
+ * @param[in]  kbdev       Kbase device pointer
+ * @param[in]  attributes  Array of attributes to validate
+ *
+ * @return   MALI_TRUE if no errors have been found in the config. MALI_FALSE otherwise.
+ */
+mali_bool kbasep_validate_configuration_attributes(struct kbase_device *kbdev, const struct kbase_attribute *attributes);
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * @brief Gets the count of attributes in array
+ *
+ * Function gets the count of attributes in array. Note that end of list indicator is also included.
+ *
+ * @param[in]  attributes     Array of attributes
+ *
+ * @return  Number of attributes in the array including end of list indicator.
+ */
+int kbasep_get_config_attribute_count(const struct kbase_attribute *attributes);
+
+/**
+ * @brief Platform specific call to initialize hardware
+ *
+ * Function calls a platform defined routine if specified in the configuration attributes.
+ * The routine can initialize any hardware and context state that is required for the GPU block to function.
+ *
+ * @param[in]  kbdev       Kbase device pointer
+ *
+ * @return   MALI_TRUE if no errors have been found in the config. MALI_FALSE otherwise.
+ */
+mali_bool kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Platform specific call to terminate hardware
+ *
+ * Function calls a platform defined routine if specified in the configuration attributes.
+ * The routine can destroy any platform specific context state and shut down any hardware functionality that are
+ * outside of the Power Management callbacks.
+ *
+ * @param[in]  kbdev       Kbase device pointer
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+	  /** @} *//* end group kbase_config */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_CONFIG_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100755
index 000000000000..3a55594dd6c0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,288 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/*** Begin Scheduling defaults ***/
+
+/**
+ * Default scheduling tick granuality, in nanoseconds
+ */
+/* 50ms */
+#define DEFAULT_JS_SCHEDULING_TICK_NS 50000000u
+
+/**
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of
+ * a context)
+ */
+/* Between 0.1 and 0.15s before soft-stop */
+#define DEFAULT_JS_SOFT_STOP_TICKS 2
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+/* Between 0.05 and 0.1s before soft-stop */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL 1
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+/* 1.2s before hard-stop, for a certain GLES2 test at 128x128 (bound by
+ * combined vertex+tiler job)
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_HW_ISSUE_8408 24
+/* Between 0.2 and 0.25s before hard-stop */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS 4
+
+/**
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+/* Between 0.1 and 0.15s before hard-stop */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL 2
+
+/**
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+/* 60s @ 50ms tick */
+#define DEFAULT_JS_HARD_STOP_TICKS_NSS 1200
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset
+ * to clear a "stuck" job
+ */
+/* 1.8s before resetting GPU, for a certain GLES2 test at 128x128 (bound by
+ * combined vertex+tiler job)
+ */
+#define DEFAULT_JS_RESET_TICKS_SS_HW_ISSUE_8408 36
+/* 0.3-0.35s before GPU is reset */
+#define DEFAULT_JS_RESET_TICKS_SS 6
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset
+ * to clear a "stuck" CL job.
+ */
+/* 0.2-0.25s before GPU is reset */
+#define DEFAULT_JS_RESET_TICKS_CL 4
+
+/**
+ * Default minimum number of scheduling ticks before the GPU is reset
+ * to clear a "stuck" job during dumping.
+ */
+/* 60.1s @ 100ms tick */
+#define DEFAULT_JS_RESET_TICKS_NSS 1202
+
+/**
+ * Number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_JS_RESET_TIMEOUT_MS 3000
+
+/**
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+/* 0.05s - at 20fps a ctx does at least 1 frame before being scheduled out.
+ * At 40fps, 2 frames, etc
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS 50000000
+
+/**
+ *  Default Job Scheduler initial runtime of a context for the CFS Policy,
+ *  in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to MALI_TRUE, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* MALI_FALSE.
+* - When set to MALI_FALSE, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to MALI_FALSE, and no performance loss by setting it to
+* MALI_TRUE.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE MALI_FALSE
+
+enum {
+	/**
+	 * Use unrestricted Address ID width on the AXI bus.
+	 */
+	KBASE_AID_32 = 0x0,
+
+	/**
+	 * Restrict GPU to a half of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_16 = 0x3,
+
+	/**
+	 * Restrict GPU to a quarter of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_8  = 0x2,
+
+	/**
+	 * Restrict GPU to an eighth of maximum Address ID count.
+	 * This will reduce performance, but reduce bus load due to GPU.
+	 */
+	KBASE_AID_4  = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ *    KBASE_AID_16 - use 16 IDs (4 ID bits)
+ *    KBASE_AID_8  - use 8 IDs (3 ID bits)
+ *    KBASE_AID_4  - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for using alternative hardware counters.
+ */
+#define DEFAULT_ALTERNATIVE_HWC MALI_FALSE
+
+/*** End Scheduling defaults ***/
+
+/*** Begin Power Manager defaults */
+
+/* Milliseconds */
+#define DEFAULT_PM_DVFS_FREQ 500
+
+/**
+ * Default poweroff tick granuality, in nanoseconds
+ */
+/* 400us */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS 400000
+
+/**
+ * Default number of poweroff ticks before shader cores are powered off
+ */
+/* 400-800us */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER 2
+
+/**
+ * Default number of poweroff ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU 2         /* 400-800us */
+
+/*** End Power Manager defaults ***/
+
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/**
+ * Default value for KBASE_CONFIG_ATTR_CPU_SPEED_FUNC.
+ * Points to @ref kbase_cpuprops_get_default_clock_speed.
+ */
+#define DEFAULT_CPU_SPEED_FUNC \
+	((uintptr_t)kbase_cpuprops_get_default_clock_speed)
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100755
index 000000000000..1d42445672b8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,280 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_context.c
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+#define MEMPOOL_PAGES 16384
+
+
+/**
+ * @brief Create a kernel base context.
+ *
+ * Allocate and init a kernel base context.
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+	struct kbase_context *kctx;
+	mali_error mali_err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* zero-inited as lot of code assume it's zero'ed out on create */
+	kctx = vzalloc(sizeof(*kctx));
+
+	if (!kctx)
+		goto out;
+
+	/* creating a context is considered a disjoint event */
+	kbase_disjoint_event(kbdev);
+
+	kctx->kbdev = kbdev;
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	kctx->is_compat = is_compat;
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+	atomic_set(&kctx->setup_complete, 0);
+	atomic_set(&kctx->setup_in_progress, 0);
+	kctx->keep_gpu_powered = MALI_FALSE;
+	spin_lock_init(&kctx->mm_update_lock);
+	kctx->process_mm = NULL;
+	atomic_set(&kctx->nonmapped_pages, 0);
+
+	if (MALI_ERROR_NONE != kbase_mem_allocator_init(&kctx->osalloc,
+							MEMPOOL_PAGES,
+							kctx->kbdev))
+		goto free_kctx;
+
+	kctx->pgd_allocator = &kctx->osalloc;
+	atomic_set(&kctx->used_pages, 0);
+
+	if (kbase_jd_init(kctx))
+		goto free_allocator;
+
+	mali_err = kbasep_js_kctx_init(kctx);
+	if (MALI_ERROR_NONE != mali_err)
+		goto free_jd;	/* safe to call kbasep_js_kctx_term  in this case */
+
+	mali_err = kbase_event_init(kctx);
+	if (MALI_ERROR_NONE != mali_err)
+		goto free_jd;
+
+	mutex_init(&kctx->reg_lock);
+
+	INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+#ifdef CONFIG_KDS
+	INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+
+	mali_err = kbase_mmu_init(kctx);
+	if (MALI_ERROR_NONE != mali_err)
+		goto free_event;
+
+	kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+	if (!kctx->pgd)
+		goto free_mmu;
+
+	if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&kctx->osalloc, 1, &kctx->aliasing_sink_page))
+		goto no_sink_page;
+
+	kctx->tgid = current->tgid;
+	kctx->pid = current->pid; 
+	init_waitqueue_head(&kctx->event_queue);
+
+	kctx->cookies = KBASE_COOKIE_MASK;
+
+	/* Make sure page 0 is not used... */
+	if (kbase_region_tracker_init(kctx))
+		goto no_region_tracker;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+	mali_err = kbasep_mem_profile_debugfs_add(kctx);
+	if (MALI_ERROR_NONE != mali_err)
+		goto no_region_tracker;
+
+	if (kbasep_jd_debugfs_ctx_add(kctx))
+		goto free_mem_profile;
+
+	return kctx;
+
+free_mem_profile:
+	kbasep_mem_profile_debugfs_remove(kctx);
+no_region_tracker:
+no_sink_page:
+	kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0);
+	kbase_mmu_free_pgd(kctx);
+free_mmu:
+	kbase_mmu_term(kctx);
+free_event:
+	kbase_event_cleanup(kctx);
+free_jd:
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+	kbase_jd_exit(kctx);
+free_allocator:
+	kbase_mem_allocator_term(&kctx->osalloc);
+free_kctx:
+	vfree(kctx);
+out:
+	return NULL;
+
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context)
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+	dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+	kbase_mem_phy_alloc_put(reg->alloc);
+	kfree(reg);
+}
+
+/**
+ * @brief Destroy a kernel base context.
+ *
+ * Destroy a kernel base context. Calls kbase_destroy_os_context() to
+ * free OS specific structures. Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	int pages;
+	unsigned long pending_regions_to_clean;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+	kbasep_jd_debugfs_ctx_remove(kctx);
+
+	kbasep_mem_profile_debugfs_remove(kctx);
+
+	/* Ensure the core is powered up for the destroy process */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	if (kbdev->hwcnt.kctx == kctx) {
+		/* disable the use of the hw counters if the app didn't use the API correctly or crashed */
+		KBASE_TRACE_ADD(kbdev, CORE_CTX_HWINSTR_TERM, kctx, NULL, 0u, 0u);
+		dev_warn(kbdev->dev, "The privileged process asking for instrumentation forgot to disable it " "before exiting. Will end instrumentation for them");
+		kbase_instr_hwcnt_disable(kctx);
+	}
+
+	kbase_jd_zap_context(kctx);
+	kbase_event_cleanup(kctx);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* MMU is disabled as part of scheduling out the context */
+	kbase_mmu_free_pgd(kctx);
+
+	/* drop the aliasing sink page now that it can't be mapped anymore */
+	kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0);
+
+	/* free pending region setups */
+	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+	while (pending_regions_to_clean) {
+		unsigned int cookie = __ffs(pending_regions_to_clean);
+		BUG_ON(!kctx->pending_regions[cookie]);
+
+		kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+		kctx->pending_regions[cookie] = NULL;
+		pending_regions_to_clean &= ~(1UL << cookie);
+	}
+
+	kbase_region_tracker_term(kctx);
+	kbase_gpu_vm_unlock(kctx);
+
+	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+	kbasep_js_kctx_term(kctx);
+
+	kbase_jd_exit(kctx);
+
+	kbase_pm_context_idle(kbdev);
+
+	kbase_mmu_term(kctx);
+
+	pages = atomic_read(&kctx->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+	if (kctx->keep_gpu_powered) {
+		atomic_dec(&kbdev->keep_gpu_powered_count);
+		kbase_pm_context_idle(kbdev);
+	}
+
+	kbase_mem_allocator_term(&kctx->osalloc);
+	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+	vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context)
+
+/**
+ * Set creation flags on a context
+ */
+mali_error kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+	mali_error err = MALI_ERROR_NONE;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* Validate flags */
+	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+		err = MALI_ERROR_FUNCTION_FAILED;
+		goto out;
+	}
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+
+	if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0)
+		js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE;
+
+	/* Latch the initial attributes into the Job Scheduler */
+	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100755
index 000000000000..aaeef908cff3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,3368 @@
+
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_core_linux.c
+ * Base kernel driver init.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include "mali_kbase_devfreq.h"
+#endif /* CONFIG_MALI_DEVFREQ */
+#include <mali_kbase_cpuprops.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/compat.h>	/* is_compat_task */
+#include <linux/version.h>
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+
+/*
+ * This file is included since when we support device tree we don't
+ * use the platform fake code for registering the kbase config attributes.
+ */
+#ifdef CONFIG_OF
+#include <mali_kbase_config.h>
+#endif
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+/* GPU IRQ Tags */
+#define	JOB_IRQ_TAG	0
+#define MMU_IRQ_TAG	1
+#define GPU_IRQ_TAG	2
+
+
+struct kbase_irq_table {
+	u32 tag;
+	irq_handler_t handler;
+};
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_DRV_NAME "mali"
+
+static const char kbase_drv_name[] = KBASE_DRV_NAME;
+
+static int kbase_dev_nr;
+
+static DEFINE_SEMAPHORE(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+KBASE_EXPORT_TEST_API(kbase_dev_list_lock)
+KBASE_EXPORT_TEST_API(kbase_dev_list)
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static INLINE void __compile_time_asserts(void)
+{
+	CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+	struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+	.release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+	struct kds_resource **kds_resources;
+	unsigned long *kds_access_bitmap;
+	int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+	struct kbasep_kds_resource_set_file_data *data;
+
+	data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+	if (NULL != data) {
+		if (NULL != data->lock)
+			kds_resource_set_release(&data->lock);
+
+		kfree(data);
+	}
+	return 0;
+}
+
+static mali_error kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list)
+{
+	struct base_external_resource *res = ext_res;
+	int res_id;
+
+	/* assume we have to wait for all */
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_resources = kmalloc_array(num_elems,
+			sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == resources_list->kds_resources)
+		return MALI_ERROR_OUT_OF_MEMORY;
+
+	KBASE_DEBUG_ASSERT(0 != num_elems);
+	resources_list->kds_access_bitmap = kzalloc(
+			sizeof(unsigned long) *
+			((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG),
+			GFP_KERNEL);
+
+	if (NULL == resources_list->kds_access_bitmap) {
+		kfree(resources_list->kds_access_bitmap);
+		return MALI_ERROR_OUT_OF_MEMORY;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	for (res_id = 0; res_id < num_elems; res_id++, res++) {
+		int exclusive;
+		struct kbase_va_region *reg;
+		struct kds_resource *kds_res = NULL;
+
+		exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE;
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+			break;
+
+		/* no need to check reg->alloc as only regions with an alloc has
+		 * a size, and kbase_region_tracker_find_region_enclosing_address
+		 * only returns regions with size > 0 */
+		switch (reg->alloc->type) {
+#if defined(CONFIG_UMP) && defined(CONFIG_KDS)
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+			kds_res = ump_dd_kds_resource_get(reg->alloc->imported.ump_handle);
+			break;
+#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */
+		default:
+			break;
+		}
+
+		/* no kds resource for the region ? */
+		if (!kds_res)
+			break;
+
+		resources_list->kds_resources[res_id] = kds_res;
+
+		if (exclusive)
+			set_bit(res_id, resources_list->kds_access_bitmap);
+	}
+	kbase_gpu_vm_unlock(kctx);
+
+	/* did the loop run to completion? */
+	if (res_id == num_elems)
+		return MALI_ERROR_NONE;
+
+	/* Clean up as the resource list is not valid. */
+	kfree(resources_list->kds_resources);
+	kfree(resources_list->kds_access_bitmap);
+
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+static mali_bool kbasep_validate_kbase_pointer(
+		struct kbase_context *kctx, union kbase_pointer *p)
+{
+	if (kctx->is_compat) {
+		if (p->compat_value == 0)
+			return MALI_FALSE;
+	} else {
+		if (NULL == p->value)
+			return MALI_FALSE;
+	}
+	return MALI_TRUE;
+}
+
+static mali_error kbase_external_buffer_lock(struct kbase_context *kctx, struct kbase_uk_ext_buff_kds_data *args, u32 args_size)
+{
+	struct base_external_resource *ext_res_copy;
+	size_t ext_resource_size;
+	mali_error return_error = MALI_ERROR_FUNCTION_FAILED;
+	int fd;
+
+	if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data))
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	/* Check user space has provided valid data */
+	if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) ||
+			!kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) ||
+			(0 == args->num_res) ||
+			(args->num_res > KBASE_MAXIMUM_EXT_RESOURCES))
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	ext_resource_size = sizeof(struct base_external_resource) * args->num_res;
+
+	KBASE_DEBUG_ASSERT(0 != ext_resource_size);
+	ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL);
+
+	if (NULL != ext_res_copy) {
+		struct base_external_resource __user *ext_res_user;
+		int __user *file_descriptor_user;
+#ifdef CONFIG_COMPAT
+		if (kctx->is_compat) {
+			ext_res_user = compat_ptr(args->external_resource.compat_value);
+			file_descriptor_user = compat_ptr(args->file_descriptor.compat_value);
+		} else {
+#endif /* CONFIG_COMPAT */
+			ext_res_user = args->external_resource.value;
+			file_descriptor_user = args->file_descriptor.value;
+#ifdef CONFIG_COMPAT
+		}
+#endif /* CONFIG_COMPAT */
+
+		/* Copy the external resources to lock from user space */
+		if (0 == copy_from_user(ext_res_copy, ext_res_user, ext_resource_size)) {
+			struct kbasep_kds_resource_set_file_data *fdata;
+
+			/* Allocate data to be stored in the file */
+			fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
+
+			if (NULL != fdata) {
+				struct kbase_kds_resource_list_data resource_list_data;
+				/* Parse given elements and create resource and access lists */
+				return_error = kbasep_kds_allocate_resource_list_data(kctx, ext_res_copy, args->num_res, &resource_list_data);
+				if (MALI_ERROR_NONE == return_error) {
+					long err;
+
+					fdata->lock = NULL;
+
+					fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0);
+
+					err = copy_to_user(file_descriptor_user, &fd, sizeof(fd));
+
+					/* If the file descriptor was valid and we successfully copied it to user space, then we
+					 * can try and lock the requested kds resources.
+					 */
+					if ((fd >= 0) && (0 == err)) {
+						struct kds_resource_set *lock;
+
+						lock = kds_waitall(args->num_res, resource_list_data.kds_access_bitmap,
+								resource_list_data.kds_resources,
+								KDS_WAIT_BLOCKING);
+
+						if (IS_ERR_OR_NULL(lock)) {
+							return_error = MALI_ERROR_FUNCTION_FAILED;
+						} else {
+							return_error = MALI_ERROR_NONE;
+							fdata->lock = lock;
+						}
+					} else {
+						return_error = MALI_ERROR_FUNCTION_FAILED;
+					}
+
+					kfree(resource_list_data.kds_resources);
+					kfree(resource_list_data.kds_access_bitmap);
+				}
+
+				if (MALI_ERROR_NONE != return_error) {
+					/* If the file was opened successfully then close it which will clean up
+					 * the file data, otherwise we clean up the file data ourself. */
+					if (fd >= 0)
+						sys_close(fd);
+					else
+						kfree(fdata);
+				}
+			} else {
+				return_error = MALI_ERROR_OUT_OF_MEMORY;
+			}
+		}
+		kfree(ext_res_copy);
+	}
+	return return_error;
+}
+#endif /* CONFIG_KDS */
+
+static mali_error kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+	struct kbase_device *kbdev;
+	union uk_header *ukh = args;
+	u32 id;
+
+	KBASE_DEBUG_ASSERT(ukh != NULL);
+
+	kbdev = kctx->kbdev;
+	id = ukh->id;
+	ukh->ret = MALI_ERROR_NONE;	/* Be optimistic */
+
+	if (UKP_FUNC_ID_CHECK_VERSION == id) {
+		if (args_size == sizeof(struct uku_version_check_args)) {
+			struct uku_version_check_args *version_check = (struct uku_version_check_args *)args;
+
+			switch (version_check->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			case 6:
+				/* We are backwards compatible with version 6,
+				 * so pretend to be the old version */
+				version_check->major = 6;
+				version_check->minor = 1;
+				break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+			case 7:
+				/* We are backwards compatible with version 7,
+				 * so pretend to be the old version */
+				version_check->major = 7;
+				version_check->minor = 1;
+				break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+			default:
+				/* We return our actual version regardless if it
+				 * matches the version returned by userspace -
+				 * userspace can bail if it can't handle this
+				 * version */
+				version_check->major = BASE_UK_VERSION_MAJOR;
+				version_check->minor = BASE_UK_VERSION_MINOR;
+			}
+
+			ukh->ret = MALI_ERROR_NONE;
+		} else {
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+		}
+		return MALI_ERROR_NONE;
+	}
+
+
+	if (!atomic_read(&kctx->setup_complete)) {
+		/* setup pending, try to signal that we'll do the setup */
+		if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1)) {
+			/* setup was already in progress, err this call */
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		/* we're the one doing setup */
+
+		/* is it the only call we accept? */
+		if (id == KBASE_FUNC_SET_FLAGS) {
+			struct kbase_uk_set_flags *kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+			if (sizeof(*kbase_set_flags) != args_size) {
+				/* not matching the expected call, stay stuck in setup mode */
+				goto bad_size;
+			}
+
+			if (MALI_ERROR_NONE != kbase_context_set_create_flags(kctx, kbase_set_flags->create_flags)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				/* bad flags, will stay stuck in setup mode */
+				return MALI_ERROR_NONE;
+			} else {
+				/* we've done the setup, all OK */
+				atomic_set(&kctx->setup_complete, 1);
+				return MALI_ERROR_NONE;
+			}
+		} else {
+			/* unexpected call, will stay stuck in setup mode */
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+	}
+
+	/* setup complete, perform normal operation */
+	switch (id) {
+	case KBASE_FUNC_MEM_ALLOC:
+		{
+			struct kbase_uk_mem_alloc *mem = args;
+			struct kbase_va_region *reg;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			reg = kbase_mem_alloc(kctx, mem->va_pages, mem->commit_pages, mem->extent, &mem->flags, &mem->gpu_va, &mem->va_alignment);
+			if (!reg)
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_MEM_IMPORT:
+		{
+			struct kbase_uk_mem_import *mem_import = args;
+			int __user *phandle;
+			int handle;
+
+			if (sizeof(*mem_import) != args_size)
+				goto bad_size;
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				phandle = compat_ptr(mem_import->phandle.compat_value);
+			else
+#endif
+				phandle = mem_import->phandle.value;
+
+			switch (mem_import->type) {
+			case BASE_MEM_IMPORT_TYPE_UMP:
+				get_user(handle, phandle);
+				break;
+			case BASE_MEM_IMPORT_TYPE_UMM:
+				get_user(handle, phandle);
+				break;
+			default:
+				goto bad_type;
+				break;
+			}
+
+			if (kbase_mem_import(kctx, mem_import->type, handle, &mem_import->gpu_va, &mem_import->va_pages, &mem_import->flags)) {
+bad_type:
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_MEM_ALIAS: {
+			struct kbase_uk_mem_alias *alias = args;
+			struct base_mem_aliasing_info __user *user_ai;
+			struct base_mem_aliasing_info *ai;
+
+			if (sizeof(*alias) != args_size)
+				goto bad_size;
+
+			if (alias->nents > 2048) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_ai = compat_ptr(alias->ai.compat_value);
+			else
+#endif
+				user_ai = alias->ai.value;
+
+			ai = vmalloc(sizeof(*ai) * alias->nents);
+
+			if (!ai) {
+				ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+				break;
+			}
+
+			if (copy_from_user(ai, user_ai,
+					   sizeof(*ai) * alias->nents)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto copy_failed;
+			}
+
+			alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+							alias->stride,
+							alias->nents, ai,
+							&alias->va_pages);
+			if (!alias->gpu_va) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				goto no_alias;
+			}
+no_alias:
+copy_failed:
+			vfree(ai);
+			break;
+		}
+	case KBASE_FUNC_MEM_COMMIT:
+		{
+			struct kbase_uk_mem_commit *commit = args;
+
+			if (sizeof(*commit) != args_size)
+				goto bad_size;
+
+			if (commit->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_commit(kctx, commit->gpu_addr,
+					commit->pages,
+					(base_backing_threshold_status *)&commit->result_subcode))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+
+	case KBASE_FUNC_MEM_QUERY:
+		{
+			struct kbase_uk_mem_query *query = args;
+
+			if (sizeof(*query) != args_size)
+				goto bad_size;
+
+			if (query->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+			if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+			    query->query != KBASE_MEM_QUERY_VA_SIZE &&
+				query->query != KBASE_MEM_QUERY_FLAGS) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			ukh->ret = kbase_mem_query(kctx, query->gpu_addr, query->query, &query->value);
+			break;
+		}
+		break;
+
+	case KBASE_FUNC_MEM_FLAGS_CHANGE:
+		{
+			struct kbase_uk_mem_flags_change *fc = args;
+
+			if (sizeof(*fc) != args_size)
+				goto bad_size;
+
+			if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_flags_change(kctx, fc->gpu_va, fc->flags, fc->mask))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+			break;
+		}
+	case KBASE_FUNC_MEM_FREE:
+		{
+			struct kbase_uk_mem_free *mem = args;
+
+			if (sizeof(*mem) != args_size)
+				goto bad_size;
+
+			if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (kbase_mem_free(kctx, mem->gpu_addr))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_JOB_SUBMIT:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+			if (MALI_ERROR_NONE != kbase_jd_submit(kctx, job, 0))
+#else
+			if (MALI_ERROR_NONE != kbase_jd_submit(kctx, job))
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	case KBASE_FUNC_JOB_SUBMIT_UK6:
+		{
+			struct kbase_uk_job_submit *job = args;
+
+			if (sizeof(*job) != args_size)
+				goto bad_size;
+
+			if (MALI_ERROR_NONE != kbase_jd_submit(kctx, job, 1))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif
+
+	case KBASE_FUNC_SYNC:
+		{
+			struct kbase_uk_sync_now *sn = args;
+
+			if (sizeof(*sn) != args_size)
+				goto bad_size;
+
+			if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			if (MALI_ERROR_NONE != kbase_sync_now(kctx, &sn->sset))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_DISJOINT_QUERY:
+		{
+			struct kbase_uk_disjoint_query *dquery = args;
+
+			if (sizeof(*dquery) != args_size)
+				goto bad_size;
+
+			/* Get the disjointness counter value. */
+			dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+			break;
+		}
+
+	case KBASE_FUNC_POST_TERM:
+		{
+			kbase_event_close(kctx);
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_SETUP:
+		{
+			struct kbase_uk_hwcnt_setup *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (MALI_ERROR_NONE != kbase_instr_hwcnt_setup(kctx, setup))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_DUMP:
+		{
+			/* args ignored */
+			if (MALI_ERROR_NONE != kbase_instr_hwcnt_dump(kctx))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+	case KBASE_FUNC_HWCNT_CLEAR:
+		{
+			/* args ignored */
+			if (MALI_ERROR_NONE != kbase_instr_hwcnt_clear(kctx))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	case KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE:
+		{
+			struct kbase_uk_cpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (MALI_ERROR_NONE != kbase_cpuprops_uk_get_props(kctx, setup))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+
+	case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+		{
+			struct kbase_uk_gpuprops *setup = args;
+
+			if (sizeof(*setup) != args_size)
+				goto bad_size;
+
+			if (MALI_ERROR_NONE != kbase_gpuprops_uk_get_props(kctx, setup))
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			break;
+		}
+	case KBASE_FUNC_FIND_CPU_OFFSET:
+		{
+			struct kbase_uk_find_cpu_offset *find = args;
+
+			if (sizeof(*find) != args_size)
+				goto bad_size;
+
+			if (find->gpu_addr & ~PAGE_MASK) {
+				dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+				goto out_bad;
+			}
+
+			if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				mali_error err;
+
+				err = kbasep_find_enclosing_cpu_mapping_offset(
+						kctx,
+						find->gpu_addr,
+						(uintptr_t) find->cpu_addr,
+						(size_t) find->size,
+						&find->offset);
+
+				if (err != MALI_ERROR_NONE)
+					ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+			}
+			break;
+		}
+	case KBASE_FUNC_GET_VERSION:
+		{
+			struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+			if (sizeof(*get_version) != args_size)
+				goto bad_size;
+
+			/* version buffer size check is made in compile time assert */
+			memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+			get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+			break;
+		}
+
+	case KBASE_FUNC_STREAM_CREATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+			if (sizeof(*screate) != args_size)
+				goto bad_size;
+
+			if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+				/* not NULL terminated */
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				break;
+			}
+
+			ukh->ret = kbase_stream_create(screate->name, &screate->fd);
+#else /* CONFIG_SYNC */
+			ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+			break;
+		}
+	case KBASE_FUNC_FENCE_VALIDATE:
+		{
+#ifdef CONFIG_SYNC
+			struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+			if (sizeof(*fence_validate) != args_size)
+				goto bad_size;
+
+			ukh->ret = kbase_fence_validate(fence_validate->fd);
+#endif /* CONFIG_SYNC */
+			break;
+		}
+
+	case KBASE_FUNC_EXT_BUFFER_LOCK:
+		{
+#ifdef CONFIG_KDS
+			ukh->ret = kbase_external_buffer_lock(kctx, (struct kbase_uk_ext_buff_kds_data *)args, args_size);
+#endif /* CONFIG_KDS */
+			break;
+		}
+
+	case KBASE_FUNC_SET_TEST_DATA:
+		{
+#if MALI_UNIT_TEST
+			struct kbase_uk_set_test_data *set_data = args;
+
+			shared_kernel_test_data = set_data->test_data;
+			shared_kernel_test_data.kctx.value = (void __user *)kctx;
+			shared_kernel_test_data.mm.value = (void __user *)current->mm;
+			ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+			break;
+		}
+
+	case KBASE_FUNC_INJECT_ERROR:
+		{
+#ifdef CONFIG_MALI_ERROR_INJECT
+			unsigned long flags;
+			struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			ukh->ret = job_atom_inject_error(&params);
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+			break;
+		}
+
+	case KBASE_FUNC_MODEL_CONTROL:
+		{
+#ifdef CONFIG_MALI_NO_MALI
+			unsigned long flags;
+			struct kbase_model_control_params params =
+					((struct kbase_uk_model_control_params *)args)->params;
+
+			/*mutex lock */
+			spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+			ukh->ret = midg_model_control(kbdev->model, &params);
+			spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+			/*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+			break;
+		}
+
+	case KBASE_FUNC_KEEP_GPU_POWERED:
+		{
+			struct kbase_uk_keep_gpu_powered *kgp =
+					(struct kbase_uk_keep_gpu_powered *)args;
+
+			/* A suspend won't happen here, because we're in a syscall from a
+			 * userspace thread.
+			 *
+			 * Nevertheless, we'd get the wrong pm_context_active/idle counting
+			 * here if a suspend did happen, so let's assert it won't: */
+			KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+			if (kgp->enabled && !kctx->keep_gpu_powered) {
+				kbase_pm_context_active(kbdev);
+				atomic_inc(&kbdev->keep_gpu_powered_count);
+				kctx->keep_gpu_powered = MALI_TRUE;
+			} else if (!kgp->enabled && kctx->keep_gpu_powered) {
+				atomic_dec(&kbdev->keep_gpu_powered_count);
+				kbase_pm_context_idle(kbdev);
+				kctx->keep_gpu_powered = MALI_FALSE;
+			}
+
+			break;
+		}
+
+	case KBASE_FUNC_GET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+			break;
+		}
+
+	/* used only for testing purposes; these controls are to be set by gator through gator API */
+	case KBASE_FUNC_SET_PROFILING_CONTROLS:
+		{
+			struct kbase_uk_profiling_controls *controls =
+					(struct kbase_uk_profiling_controls *)args;
+			u32 i;
+
+			if (sizeof(*controls) != args_size)
+				goto bad_size;
+
+			for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+				_mali_profiling_control(i, controls->profiling_controls[i]);
+
+			break;
+		}
+
+	case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+		{
+			struct kbase_uk_debugfs_mem_profile_add *add_data =
+					(struct kbase_uk_debugfs_mem_profile_add *)args;
+			char *buf;
+			char __user *user_buf;
+
+			if (sizeof(*add_data) != args_size)
+				goto bad_size;
+
+			if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+				dev_err(kbdev->dev, "buffer too big");
+				goto out_bad;
+			}
+
+#ifdef CONFIG_COMPAT
+			if (kctx->is_compat)
+				user_buf = compat_ptr(add_data->buf.compat_value);
+			else
+#endif
+				user_buf = add_data->buf.value;
+
+			buf = kmalloc(add_data->len, GFP_KERNEL);
+			if (!buf)
+				goto out_bad;
+
+			if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+				ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+				kfree(buf);
+				goto out_bad;
+			}
+			kbasep_mem_profile_debugfs_insert(kctx, buf,
+					add_data->len);
+
+			break;
+		}
+
+	default:
+		dev_err(kbdev->dev, "unknown ioctl %u", id);
+		goto out_bad;
+	}
+
+	return MALI_ERROR_NONE;
+
+ bad_size:
+	dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
+/*
+ * API to acquire device list semaphore and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+	down(&kbase_dev_list_lock);
+	return &kbase_dev_list;
+}
+
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+	up(&kbase_dev_list_lock);
+}
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+	struct kbase_device *kbdev = NULL;
+	struct list_head *entry;
+
+	down(&kbase_dev_list_lock);
+	list_for_each(entry, &kbase_dev_list) {
+		struct kbase_device *tmp;
+
+		tmp = list_entry(entry, struct kbase_device, entry);
+		if (tmp->mdev.minor == minor || minor == -1) {
+			kbdev = tmp;
+			get_device(kbdev->dev);
+			break;
+		}
+	}
+	up(&kbase_dev_list_lock);
+
+	return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+	put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_context *kctx;
+	int ret = 0;
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kctx = kbase_create_context(kbdev, is_compat_task());
+	if (!kctx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	init_waitqueue_head(&kctx->event_queue);
+	filp->private_data = kctx;
+
+	dev_dbg(kbdev->dev, "created base context\n");
+
+	{
+		struct kbasep_kctx_list_element *element;
+
+		element = kzalloc(sizeof(*element), GFP_KERNEL);
+		if (element) {
+			mutex_lock(&kbdev->kctx_list_lock);
+			element->kctx = kctx;
+			list_add(&element->link, &kbdev->kctx_list);
+			mutex_unlock(&kbdev->kctx_list_lock);
+		} else {
+			/* we don't treat this as a fail - just warn about it */
+			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+		}
+	}
+	return 0;
+
+ out:
+	kbase_release_device(kbdev);
+	return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbasep_kctx_list_element *element, *tmp;
+	mali_bool found_element = MALI_FALSE;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+		if (element->kctx == kctx) {
+			list_del(&element->link);
+			kfree(element);
+			found_element = MALI_TRUE;
+		}
+	}
+	mutex_unlock(&kbdev->kctx_list_lock);
+	if (!found_element)
+		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+	filp->private_data = NULL;
+	kbase_destroy_context(kctx);
+
+	dev_dbg(kbdev->dev, "deleted base context\n");
+	kbase_release_device(kbdev);
+	return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull };	/* alignment fixup */
+	u32 size = _IOC_SIZE(cmd);
+	struct kbase_context *kctx = filp->private_data;
+
+	if (size > CALL_MAX_SIZE)
+		return -ENOTTY;
+
+	if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+		return -EFAULT;
+	}
+
+	if (MALI_ERROR_NONE != kbase_dispatch(kctx, &msg, size))
+		return -EFAULT;
+
+	if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+		dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+	struct kbase_context *kctx = filp->private_data;
+	struct base_jd_event_v2 uevent;
+	int out_count = 0;
+
+	if (count < sizeof(uevent))
+		return -ENOBUFS;
+
+	do {
+		while (kbase_event_dequeue(kctx, &uevent)) {
+			if (out_count > 0)
+				goto out;
+
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			if (wait_event_interruptible(kctx->event_queue, kbase_event_pending(kctx)))
+				return -ERESTARTSYS;
+		}
+		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+			if (out_count == 0)
+				return -EPIPE;
+			goto out;
+		}
+
+		if (copy_to_user(buf, &uevent, sizeof(uevent)))
+			return -EFAULT;
+
+		buf += sizeof(uevent);
+		out_count++;
+		count -= sizeof(uevent);
+	} while (count >= sizeof(uevent));
+
+ out:
+	return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_context *kctx = filp->private_data;
+
+	poll_wait(filp, &kctx->event_queue, wait);
+	if (kbase_event_pending(kctx))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup)
+
+static int kbase_check_flags(int flags)
+{
+	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+	 * closes the file descriptor in a child process.
+	 */
+	if (0 == (flags & O_CLOEXEC))
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+#ifdef CONFIG_64BIT
+	/* based on get_unmapped_area, but simplified slightly due to that some
+	 * values are known in advance */
+	struct kbase_context *kctx = filp->private_data;
+
+	if (!kctx->is_compat && !addr &&
+		kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+		unsigned long low_limit, high_limit, gap_start, gap_end;
+
+		/* Hardware has smaller VA than userspace, ensure the page
+		 * comes from a VA which can be used on the GPU */
+
+		gap_end = (1UL<<33);
+		if (gap_end < len)
+			return -ENOMEM;
+		high_limit = gap_end - len;
+		low_limit = PAGE_SIZE + len;
+
+		gap_start = mm->highest_vm_end;
+		if (gap_start <= high_limit)
+			goto found_highest;
+
+		if (RB_EMPTY_ROOT(&mm->mm_rb))
+			return -ENOMEM;
+		vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+		if (vma->rb_subtree_gap < len)
+			return -ENOMEM;
+
+		while (true) {
+			gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+			if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+				struct vm_area_struct *right =
+					rb_entry(vma->vm_rb.rb_right,
+						 struct vm_area_struct, vm_rb);
+				if (right->rb_subtree_gap >= len) {
+					vma = right;
+					continue;
+				}
+			}
+check_current:
+			gap_end = vma->vm_start;
+			if (gap_end < low_limit)
+				return -ENOMEM;
+			if (gap_start <= high_limit &&
+			    gap_end - gap_start >= len)
+				goto found;
+
+			if (vma->vm_rb.rb_left) {
+				struct vm_area_struct *left =
+					rb_entry(vma->vm_rb.rb_left,
+						 struct vm_area_struct, vm_rb);
+
+				if (left->rb_subtree_gap >= len) {
+					vma = left;
+					continue;
+				}
+			}
+			while (true) {
+				struct rb_node *prev = &vma->vm_rb;
+
+				if (!rb_parent(prev))
+					return -ENOMEM;
+				vma = rb_entry(rb_parent(prev),
+						struct vm_area_struct, vm_rb);
+				if (prev == vma->vm_rb.rb_right) {
+					gap_start = vma->vm_prev ?
+						vma->vm_prev->vm_end : 0;
+					goto check_current;
+				}
+			}
+		}
+
+found:
+		if (gap_end > (1UL<<33))
+			gap_end = (1UL<<33);
+
+found_highest:
+		gap_end -= len;
+
+		VM_BUG_ON(gap_end < PAGE_SIZE);
+		VM_BUG_ON(gap_end < gap_start);
+		return gap_end;
+	}
+#endif
+	/* No special requirements - fallback to the default version */
+	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+}
+
+static const struct file_operations kbase_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_open,
+	.release = kbase_release,
+	.read = kbase_read,
+	.poll = kbase_poll,
+	.unlocked_ioctl = kbase_ioctl,
+	.compat_ioctl = kbase_ioctl,
+	.mmap = kbase_mmap,
+	.check_flags = kbase_check_flags,
+	.get_unmapped_area = kbase_get_unmapped_area,
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+	writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+	return readl(kbdev->reg + offset);
+}
+#endif
+
+#ifndef CONFIG_MALI_NO_MALI
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+	return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+	return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+
+
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_job_done(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.driver_ready_for_irqs)
+		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_mmu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+	if (!kbdev->pm.driver_ready_for_irqs)
+		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+				__func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbase_gpu_interrupt(kbdev, val);
+
+	return IRQ_HANDLED;
+}
+static irq_handler_t kbase_handler_table[] = {
+	[JOB_IRQ_TAG] = kbase_job_irq_handler,
+	[MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+	[GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define  JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define  MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define  GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * @brief Registers given interrupt handler for requested interrupt type
+ *        Case irq handler is not specified default handler shall be registered
+ *
+ * @param[in] kbdev           - Device for which the handler is to be registered
+ * @param[in] custom_handler  - Handler to be registered
+ * @param[in] irq_type        - Interrupt type
+ * @return	MALI_ERROR_NONE case success, MALI_ERROR_FUNCTION_FAILED otherwise
+ */
+static mali_error kbase_set_custom_irq_handler(struct kbase_device *kbdev, irq_handler_t custom_handler, int irq_type)
+{
+	mali_error result = MALI_ERROR_NONE;
+	irq_handler_t requested_irq_handler = NULL;
+
+	KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && (GPU_IRQ_HANDLER >= irq_type));
+
+	/* Release previous handler */
+	if (kbdev->irqs[irq_type].irq)
+		free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+	requested_irq_handler = (NULL != custom_handler) ? custom_handler : kbase_handler_table[irq_type];
+
+	if (0 != request_irq(kbdev->irqs[irq_type].irq,
+			requested_irq_handler,
+			kbdev->irqs[irq_type].flags | IRQF_SHARED,
+			dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+		result = MALI_ERROR_FUNCTION_FAILED;
+		dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+		dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler)
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+	struct hrtimer timer;
+	wait_queue_head_t wait;
+	int triggered;
+	u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT    500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev = kbase_untag(data);
+	u32 val;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!kbdev->pm.gpu_powered) {
+		/* GPU is turned off - IRQ is not for us */
+		spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+		return IRQ_NONE;
+	}
+
+	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (!val)
+		return IRQ_NONE;
+
+	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+	kbasep_irq_test_data.triggered = 1;
+	wake_up(&kbasep_irq_test_data.wait);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+	struct kbasep_irq_test *test_data = container_of(timer, struct kbasep_irq_test, timer);
+
+	test_data->timeout = 1;
+	test_data->triggered = 1;
+	wake_up(&test_data->wait);
+	return HRTIMER_NORESTART;
+}
+
+static mali_error kbasep_common_test_interrupt(struct kbase_device * const kbdev, u32 tag)
+{
+	mali_error err = MALI_ERROR_NONE;
+	irq_handler_t test_handler;
+
+	u32 old_mask_val;
+	u16 mask_offset;
+	u16 rawstat_offset;
+
+	switch (tag) {
+	case JOB_IRQ_TAG:
+		test_handler = kbase_job_irq_test_handler;
+		rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+		mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+		break;
+	case MMU_IRQ_TAG:
+		test_handler = kbase_mmu_irq_test_handler;
+		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		break;
+	case GPU_IRQ_TAG:
+		/* already tested by pm_driver - bail out */
+	default:
+		return MALI_ERROR_NONE;
+	}
+
+	/* store old mask */
+	old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+	/* mask interrupts */
+	kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+	if (kbdev->irqs[tag].irq) {
+		/* release original handler and install test handler */
+		if (MALI_ERROR_NONE != kbase_set_custom_irq_handler(kbdev, test_handler, tag)) {
+			err = MALI_ERROR_FUNCTION_FAILED;
+		} else {
+			kbasep_irq_test_data.timeout = 0;
+			hrtimer_init(&kbasep_irq_test_data.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+			kbasep_irq_test_data.timer.function = kbasep_test_interrupt_timeout;
+
+			/* trigger interrupt */
+			kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+			kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+			hrtimer_start(&kbasep_irq_test_data.timer, HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), HRTIMER_MODE_REL);
+
+			wait_event(kbasep_irq_test_data.wait, kbasep_irq_test_data.triggered != 0);
+
+			if (kbasep_irq_test_data.timeout != 0) {
+				dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", kbdev->irqs[tag].irq, tag);
+				err = MALI_ERROR_FUNCTION_FAILED;
+			} else {
+				dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", kbdev->irqs[tag].irq, tag);
+			}
+
+			hrtimer_cancel(&kbasep_irq_test_data.timer);
+			kbasep_irq_test_data.triggered = 0;
+
+			/* mask interrupts */
+			kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+			/* release test handler */
+			free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+		}
+
+		/* restore original interrupt */
+		if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], kbdev->irqs[tag].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+			dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", kbdev->irqs[tag].irq, tag);
+			err = MALI_ERROR_FUNCTION_FAILED;
+		}
+	}
+	/* restore old mask */
+	kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+	return err;
+}
+
+static mali_error kbasep_common_test_interrupt_handlers(struct kbase_device * const kbdev)
+{
+	mali_error err;
+
+	init_waitqueue_head(&kbasep_irq_test_data.wait);
+	kbasep_irq_test_data.triggered = 0;
+
+	/* A suspend won't happen during startup/insmod */
+	kbase_pm_context_active(kbdev);
+
+	err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+	if (MALI_ERROR_NONE != err) {
+		dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+	if (MALI_ERROR_NONE != err) {
+		dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+		goto out;
+	}
+
+	dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+	kbase_pm_context_idle(kbdev);
+
+	return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+static int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	int err;
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], kbdev->irqs[i].flags | IRQF_SHARED, dev_name(kbdev->dev), kbase_tag(kbdev, i));
+		if (err) {
+			dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+			dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+			goto release;
+		}
+	}
+
+	return 0;
+
+ release:
+	while (i-- > 0)
+		free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+	return err;
+}
+
+static void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+	}
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+	u32 nr = ARRAY_SIZE(kbase_handler_table);
+	u32 i;
+
+	for (i = 0; i < nr; i++) {
+		if (kbdev->irqs[i].irq)
+			synchronize_irq(kbdev->irqs[i].irq);
+	}
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+#if KBASE_PM_EN
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *current_policy;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_policy *new_policy = NULL;
+	const struct kbase_pm_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "power_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *current_policy;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	current_policy = kbase_pm_ca_get_policy(kbdev);
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+		if (policy_list[i] == current_policy)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	const struct kbase_pm_ca_policy *new_policy = NULL;
+	const struct kbase_pm_ca_policy *const *policy_list;
+	int policy_count;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+	for (i = 0; i < policy_count; i++) {
+		if (sysfs_streq(policy_list[i]->name, buf)) {
+			new_policy = policy_list[i];
+			break;
+		}
+	}
+
+	if (!new_policy) {
+		dev_err(dev, "core_availability_policy: policy not found\n");
+		return -EINVAL;
+	}
+
+	kbase_pm_ca_set_policy(kbdev, new_policy);
+
+	return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->shader_present_bitmap);
+
+	return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u64 new_core_mask;
+	int rc;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	rc = kstrtoull(buf, 16, &new_core_mask);
+	if (rc)
+		return rc;
+
+	if ((new_core_mask & kbdev->shader_present_bitmap) != new_core_mask ||
+	    !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) {
+		dev_err(dev, "power_policy: invalid core specification\n");
+		return -EINVAL;
+	}
+
+	if (kbdev->pm.debug_core_mask != new_core_mask) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+		kbdev->pm.debug_core_mask = new_core_mask;
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	}
+
+	return count;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+#endif /* KBASE_PM_EN */
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/* Import the external affinity mask variables */
+extern u64 mali_js0_affinity_mask;
+extern u64 mali_js1_affinity_mask;
+extern u64 mali_js2_affinity_mask;
+
+/**
+ * Structure containing a single shader affinity split configuration.
+ */
+struct sc_split_config {
+	char const *tag;
+	char const *human_readable;
+	u64          js0_mask;
+	u64          js1_mask;
+	u64          js2_mask;
+};
+
+/**
+ * Array of available shader affinity split configurations.
+ */
+static struct sc_split_config const sc_split_configs[] = {
+	/* All must be the first config (default). */
+	{
+		"all", "All cores",
+		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
+	},
+	{
+		"mp1", "MP1 shader core",
+		0x1, 0x1, 0x1
+	},
+	{
+		"mp2", "MP2 shader core",
+		0x3, 0x3, 0x3
+	},
+	{
+		"mp4", "MP4 shader core",
+		0xF, 0xF, 0xF
+	},
+	{
+		"mp1_vf", "MP1 vertex + MP1 fragment shader core",
+		0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL
+	},
+	{
+		"mp2_vf", "MP2 vertex + MP2 fragment shader core",
+		0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL
+	},
+	/* This must be the last config. */
+	{
+		NULL, NULL,
+		0x0, 0x0, 0x0
+	},
+};
+
+/* Pointer to the currently active shader split configuration. */
+static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0];
+
+/** Show callback for the @c sc_split sysfs file
+ *
+ * Returns the current shader core affinity policy.
+ */
+static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	ssize_t ret;
+	/* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed
+	 * to be shorter than that at this time so no length check needed. */
+	ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag);
+	return ret;
+}
+
+/** Store callback for the @c sc_split sysfs file.
+ *
+ * This function is called when the @c sc_split sysfs file is written to
+ * It modifies the system shader core affinity configuration to allow
+ * system profiling with different hardware configurations.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct sc_split_config const *config = &sc_split_configs[0];
+
+	/* Try to match: loop until we hit the last "NULL" entry */
+	while (config->tag) {
+		if (sysfs_streq(config->tag, buf)) {
+			current_sc_split_config = config;
+			mali_js0_affinity_mask  = config->js0_mask;
+			mali_js1_affinity_mask  = config->js1_mask;
+			mali_js2_affinity_mask  = config->js2_mask;
+			dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag);
+			return count;
+		}
+		config++;
+	}
+
+	/* No match found in config list */
+	dev_err(dev, "sc_split: invalid value\n");
+	dev_err(dev, "  Possible settings: mp[1|2|4], mp[1|2]_vf\n");
+	return -ENOENT;
+}
+
+/** The sysfs file @c sc_split
+ *
+ * This is used for configuring/querying the current shader core work affinity
+ * configuration.
+ */
+static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+ * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+ * KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS, BASE_CONFIG_ATTR_JS_RESET_TICKS_NSS
+ * configuration values (in that order), with the difference that the js_timeout
+ * valus are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_nss;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_nss;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%lu %lu %lu %lu %lu %lu %lu %lu",
+			&js_soft_stop_ms, &js_soft_stop_ms_cl,
+			&js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+			&js_hard_stop_ms_nss, &js_reset_ms_ss,
+			&js_reset_ms_cl, &js_reset_ms_nss);
+
+	if (items == 8) {
+		u64 ticks;
+
+		ticks = js_soft_stop_ms * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_soft_stop_ticks = ticks;
+
+		ticks = js_soft_stop_ms_cl * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_soft_stop_ticks_cl = ticks;
+
+		ticks = js_hard_stop_ms_ss * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_hard_stop_ticks_ss = ticks;
+
+		ticks = js_hard_stop_ms_cl * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_hard_stop_ticks_cl = ticks;
+
+		ticks = js_hard_stop_ms_nss * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_hard_stop_ticks_nss = ticks;
+
+		ticks = js_reset_ms_ss * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_reset_ticks_ss = ticks;
+
+		ticks = js_reset_ms_cl * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_reset_ticks_cl = ticks;
+
+		ticks = js_reset_ms_nss * 1000000ULL;
+		do_div(ticks, kbdev->js_data.scheduling_tick_ns);
+		kbdev->js_reset_ticks_nss = ticks;
+
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_soft_stop_ticks, js_soft_stop_ms);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_soft_stop_ticks_cl, js_soft_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_hard_stop_ticks_ss, js_hard_stop_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_hard_stop_ticks_cl, js_hard_stop_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_hard_stop_ticks_nss, js_hard_stop_ms_nss);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_reset_ticks_ss, js_reset_ms_ss);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_reset_ticks_cl, js_reset_ms_cl);
+		dev_dbg(kbdev->dev, "Overriding KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS with %lu ticks (%lu ms)\n", (unsigned long)kbdev->js_reset_ticks_nss, js_reset_ms_nss);
+
+		return count;
+	} else {
+		dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\nUse format " "<soft_stop_ms> <hard_stop_ms_ss> <hard_stop_ms_nss> <reset_ms_ss> <reset_ms_nss>\n");
+		return -EINVAL;
+	}
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u64 ms;
+	unsigned long js_soft_stop_ms;
+	unsigned long js_soft_stop_ms_cl;
+	unsigned long js_hard_stop_ms_ss;
+	unsigned long js_hard_stop_ms_cl;
+	unsigned long js_hard_stop_ms_nss;
+	unsigned long js_reset_ms_ss;
+	unsigned long js_reset_ms_cl;
+	unsigned long js_reset_ms_nss;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ms = (u64) kbdev->js_soft_stop_ticks * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_soft_stop_ticks_cl * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_soft_stop_ms_cl = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_hard_stop_ticks_ss * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_ss = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_hard_stop_ticks_cl * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_cl = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_hard_stop_ticks_nss * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_hard_stop_ms_nss = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_reset_ticks_ss * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_ss = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_reset_ticks_cl * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_cl = (unsigned long)ms;
+
+	ms = (u64) kbdev->js_reset_ticks_nss * kbdev->js_data.scheduling_tick_ns;
+	do_div(ms, 1000000UL);
+	js_reset_ms_nss = (unsigned long)ms;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+			js_soft_stop_ms, js_soft_stop_ms_cl,
+			js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+			js_hard_stop_ms_nss, js_reset_ms_ss,
+			js_reset_ms_cl, js_reset_ms_nss);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_SS
+ * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_CL
+ * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS
+ * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL
+ * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS
+ * KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS
+ * KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL
+ * KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+
+
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (!strncmp("limit=", buf, MIN(6, count))) {
+		int force_replay_limit;
+		int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+		if (items == 1) {
+			kbdev->force_replay_random = MALI_FALSE;
+			kbdev->force_replay_limit = force_replay_limit;
+			kbdev->force_replay_count = 0;
+
+			return count;
+		}
+	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
+		kbdev->force_replay_random = MALI_TRUE;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+		kbdev->force_replay_random = MALI_FALSE;
+		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+		kbdev->force_replay_count = 0;
+
+		return count;
+	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
+		unsigned int core_req;
+		int items = sscanf(buf, "core_req=%x", &core_req);
+
+		if (items == 1) {
+			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+			return count;
+		}
+	}
+	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+	return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->force_replay_random)
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=0\nrandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_core_req);
+	else
+		ret = scnprintf(buf, PAGE_SIZE,
+				"limit=%u\nnorandom_limit\ncore_req=%x\n",
+				kbdev->force_replay_limit,
+				kbdev->force_replay_core_req);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay, set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int items;
+	int softstop_always;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	items = sscanf(buf, "%d", &softstop_always);
+	if ((items == 1) && ((softstop_always == 0) || (softstop_always == 1))) {
+		kbdev->js_data.softstop_always = (mali_bool) softstop_always;
+		dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", (kbdev->js_data.softstop_always == MALI_FALSE) ? "Disabled" : "Enabled");
+		return count;
+	} else {
+		dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\nUse format " "<soft_stop_always>\n");
+		return -EINVAL;
+	}
+}
+
+static ssize_t show_js_softstop_always(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+	KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+	/* This must be the last enum */
+	KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+	char *str;
+	kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+	{
+	 .str = "dumptrace",
+	 .func = &kbasep_trace_dump,
+	 }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev	The device this sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	int i;
+	ssize_t ret = 0;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+	if (ret >= PAGE_SIZE) {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev	The device with sysfs file is for
+ * @param attr	The attributes of the sysfs file
+ * @param buf	The value written to the sysfs file
+ * @param count	The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	int i;
+
+	kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+		if (sysfs_streq(debug_commands[i].str, buf)) {
+			debug_commands[i].func(kbdev);
+			return count;
+		}
+	}
+
+	/* Debug Command not found */
+	dev_err(dev, "debug_command: command not known\n");
+	return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+
+	kbdev->reg_res = request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev));
+	if (!kbdev->reg_res) {
+		dev_err(kbdev->dev, "Register window unavailable\n");
+		err = -EIO;
+		goto out_region;
+	}
+
+	kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+	if (!kbdev->reg) {
+		dev_err(kbdev->dev, "Can't remap register window\n");
+		err = -EINVAL;
+		goto out_ioremap;
+	}
+
+	return 0;
+
+ out_ioremap:
+	release_resource(kbdev->reg_res);
+	kfree(kbdev->reg_res);
+ out_region:
+	return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+	iounmap(kbdev->reg);
+	release_resource(kbdev->reg_res);
+	kfree(kbdev->reg_res);
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+static int kbase_common_device_init(struct kbase_device *kbdev)
+{
+	int err = -ENOMEM;
+	mali_error mali_err;
+	enum {
+		inited_mem = (1u << 0),
+		inited_job_slot = (1u << 1),
+		inited_pm = (1u << 2),
+		inited_js = (1u << 3),
+		inited_irqs = (1u << 4),
+		inited_debug = (1u << 5),
+		inited_js_softstop = (1u << 6),
+#if !MALI_CUSTOMER_RELEASE
+		inited_js_timeouts = (1u << 7),
+		inited_force_replay = (1u << 13),
+#endif /* !MALI_CUSTOMER_RELEASE */
+		inited_pm_runtime_init = (1u << 8),
+#ifdef CONFIG_DEBUG_FS
+		inited_gpu_memory = (1u << 9),
+#endif /* CONFIG_DEBUG_FS */
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+		inited_sc_split = (1u << 11),
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+		inited_timeline = (1u << 12),
+#endif /* CONFIG_MALI_TRACE_LINE */
+		inited_pm_powerup = (1u << 14),
+	};
+
+	int inited = 0;
+
+	dev_set_drvdata(kbdev->dev, kbdev);
+
+	kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+	kbdev->mdev.name = kbdev->devname;
+	kbdev->mdev.fops = &kbase_fops;
+	kbdev->mdev.parent = get_device(kbdev->dev);
+
+	kbase_disjoint_init(kbdev);
+
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, kbase_dev_nr++);
+
+	if (misc_register(&kbdev->mdev)) {
+		dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname);
+		err = -EINVAL;
+		goto out_misc;
+	}
+#if KBASE_PM_EN
+	if (device_create_file(kbdev->dev, &dev_attr_power_policy)) {
+		dev_err(kbdev->dev, "Couldn't create power_policy sysfs file\n");
+		goto out_file;
+	}
+
+	if (device_create_file(kbdev->dev, &dev_attr_core_availability_policy)) {
+		dev_err(kbdev->dev, "Couldn't create core_availability_policy sysfs file\n");
+		goto out_file_core_availability_policy;
+	}
+
+	if (device_create_file(kbdev->dev, &dev_attr_core_mask)) {
+		dev_err(kbdev->dev, "Couldn't create core_mask sysfs file\n");
+		goto out_file_core_mask;
+	}
+#endif /* KBASE_PM_EN */
+	down(&kbase_dev_list_lock);
+	list_add(&kbdev->entry, &kbase_dev_list);
+	up(&kbase_dev_list_lock);
+	dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device));
+
+	mali_err = kbase_pm_init(kbdev);
+	if (MALI_ERROR_NONE != mali_err)
+		goto out_partial;
+
+	inited |= inited_pm;
+
+	if (kbdev->pm.callback_power_runtime_init) {
+		mali_err = kbdev->pm.callback_power_runtime_init(kbdev);
+		if (MALI_ERROR_NONE != mali_err)
+			goto out_partial;
+
+		inited |= inited_pm_runtime_init;
+	}
+
+	mali_err = kbase_mem_init(kbdev);
+	if (MALI_ERROR_NONE != mali_err)
+		goto out_partial;
+
+	inited |= inited_mem;
+
+	mali_err = kbase_job_slot_init(kbdev);
+	if (MALI_ERROR_NONE != mali_err)
+		goto out_partial;
+
+	inited |= inited_job_slot;
+
+	mali_err = kbasep_js_devdata_init(kbdev);
+	if (MALI_ERROR_NONE != mali_err)
+		goto out_partial;
+
+	inited |= inited_js;
+
+	err = kbase_install_interrupts(kbdev);
+	if (err)
+		goto out_partial;
+
+	inited |= inited_irqs;
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	if (device_create_file(kbdev->dev, &dev_attr_sc_split)) {
+		dev_err(kbdev->dev, "Couldn't create sc_split sysfs file\n");
+		goto out_partial;
+	}
+
+	inited |= inited_sc_split;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+#ifdef CONFIG_DEBUG_FS
+	if (kbasep_gpu_memory_debugfs_init(kbdev)) {
+		dev_err(kbdev->dev, "Couldn't create gpu_memory debugfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_gpu_memory;
+#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEBUG
+
+	if (device_create_file(kbdev->dev, &dev_attr_debug_command)) {
+		dev_err(kbdev->dev, "Couldn't create debug_command sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_debug;
+
+	if (device_create_file(kbdev->dev, &dev_attr_js_softstop_always)) {
+		dev_err(kbdev->dev, "Couldn't create js_softstop_always sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_js_softstop;
+#endif /* CONFIG_MALI_DEBUG */
+
+#if !MALI_CUSTOMER_RELEASE
+	if (device_create_file(kbdev->dev, &dev_attr_js_timeouts)) {
+		dev_err(kbdev->dev, "Couldn't create js_timeouts sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_js_timeouts;
+
+	if (device_create_file(kbdev->dev, &dev_attr_force_replay)) {
+		dev_err(kbdev->dev, "Couldn't create force_replay sysfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_force_replay;
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	if (kbasep_trace_timeline_debugfs_init(kbdev)) {
+		dev_err(kbdev->dev, "Couldn't create mali_timeline_defs debugfs file\n");
+		goto out_partial;
+	}
+	inited |= inited_timeline;
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_MALI_DEVFREQ
+	kbase_devfreq_init(kbdev);
+#endif
+
+	mali_err = kbase_pm_powerup(kbdev);
+	if (MALI_ERROR_NONE == mali_err) {
+		inited |= inited_pm_powerup;
+#ifdef CONFIG_MALI_DEBUG
+#if !defined(CONFIG_MALI_NO_MALI)
+		if (MALI_ERROR_NONE != kbasep_common_test_interrupt_handlers(kbdev)) {
+			dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+			err = -EINVAL;
+			goto out_partial;
+		}
+#endif /* CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+		/* intialise the kctx list */
+		mutex_init(&kbdev->kctx_list_lock);
+		INIT_LIST_HEAD(&kbdev->kctx_list);
+		return 0;
+	} else {
+		/* Failed to power up the GPU. */
+		dev_err(kbdev->dev, "GPU power up failed.\n");
+		err = -ENODEV;
+	}
+
+ out_partial:
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	if (inited & inited_timeline)
+		kbasep_trace_timeline_debugfs_term(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+#if !MALI_CUSTOMER_RELEASE
+	if (inited & inited_force_replay)
+		device_remove_file(kbdev->dev, &dev_attr_force_replay);
+	if (inited & inited_js_timeouts)
+		device_remove_file(kbdev->dev, &dev_attr_js_timeouts);
+#endif /* !MALI_CUSTOMER_RELEASE */
+#ifdef CONFIG_MALI_DEBUG
+	if (inited & inited_js_softstop)
+		device_remove_file(kbdev->dev, &dev_attr_js_softstop_always);
+
+	if (inited & inited_debug)
+		device_remove_file(kbdev->dev, &dev_attr_debug_command);
+
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_DEBUG_FS
+	if (inited & inited_gpu_memory)
+		kbasep_gpu_memory_debugfs_term(kbdev);
+#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	if (inited & inited_sc_split)
+		device_remove_file(kbdev->dev, &dev_attr_sc_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+	if (inited & inited_js)
+		kbasep_js_devdata_halt(kbdev);
+
+	if (inited & inited_job_slot)
+		kbase_job_slot_halt(kbdev);
+
+	if (inited & inited_mem)
+		kbase_mem_halt(kbdev);
+
+	if (inited & inited_pm_powerup)
+		kbase_pm_halt(kbdev);
+
+	if (inited & inited_irqs)
+		kbase_release_interrupts(kbdev);
+
+	if (inited & inited_js)
+		kbasep_js_devdata_term(kbdev);
+
+	if (inited & inited_job_slot)
+		kbase_job_slot_term(kbdev);
+
+	if (inited & inited_mem)
+		kbase_mem_term(kbdev);
+
+	if (inited & inited_pm_runtime_init) {
+		if (kbdev->pm.callback_power_runtime_term)
+			kbdev->pm.callback_power_runtime_term(kbdev);
+	}
+
+	if (inited & inited_pm)
+		kbase_pm_term(kbdev);
+
+	down(&kbase_dev_list_lock);
+	list_del(&kbdev->entry);
+	up(&kbase_dev_list_lock);
+#if KBASE_PM_EN
+	device_remove_file(kbdev->dev, &dev_attr_core_mask);
+ out_file_core_mask:
+	device_remove_file(kbdev->dev, &dev_attr_core_availability_policy);
+ out_file_core_availability_policy:
+	device_remove_file(kbdev->dev, &dev_attr_power_policy);
+ out_file:
+#endif /*KBASE_PM_EN*/
+	misc_deregister(&kbdev->mdev);
+ out_misc:
+	put_device(kbdev->dev);
+	return err;
+}
+
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev;
+	struct resource *reg_res;
+	struct kbase_attribute *platform_data;
+	int err;
+	int i;
+	struct mali_base_gpu_core_props *core_props;
+#ifdef CONFIG_MALI_NO_MALI
+	mali_error mali_err;
+#endif /* CONFIG_MALI_NO_MALI */
+#ifdef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	struct kbase_platform_config *config;
+	int attribute_count;
+
+	config = kbase_get_platform_config();
+	attribute_count = kbasep_get_config_attribute_count(config->attributes);
+
+	err = platform_device_add_data(pdev, config->attributes,
+			attribute_count * sizeof(config->attributes[0]));
+	if (err)
+		return err;
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+#endif /* CONFIG_OF */
+
+	kbdev = kbase_device_alloc();
+	if (!kbdev) {
+		dev_err(&pdev->dev, "Can't allocate device\n");
+		err = -ENOMEM;
+		goto out;
+	}
+#ifdef CONFIG_MALI_NO_MALI
+	mali_err = midg_device_create(kbdev);
+	if (MALI_ERROR_NONE != mali_err) {
+		dev_err(&pdev->dev, "Can't initialize dummy model\n");
+		err = -ENOMEM;
+		goto out_midg;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	kbdev->dev = &pdev->dev;
+	platform_data = (struct kbase_attribute *)kbdev->dev->platform_data;
+
+	if (NULL == platform_data) {
+		dev_err(kbdev->dev, "Platform data not specified\n");
+		err = -ENOENT;
+		goto out_free_dev;
+	}
+
+	if (MALI_TRUE != kbasep_validate_configuration_attributes(kbdev, platform_data)) {
+		dev_err(kbdev->dev, "Configuration attributes failed to validate\n");
+		err = -EINVAL;
+		goto out_free_dev;
+	}
+	kbdev->config_attributes = platform_data;
+
+	/* 3 IRQ resources */
+	for (i = 0; i < 3; i++) {
+		struct resource *irq_res;
+		int irqtag;
+
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res) {
+			dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+			err = -ENOENT;
+			goto out_free_dev;
+		}
+
+#ifdef CONFIG_OF
+		if (!strcmp(irq_res->name, "JOB")) {
+			irqtag = JOB_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "MMU")) {
+			irqtag = MMU_IRQ_TAG;
+		} else if (!strcmp(irq_res->name, "GPU")) {
+			irqtag = GPU_IRQ_TAG;
+		} else {
+			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+				irq_res->name);
+			err = -EINVAL;
+			goto out_free_dev;
+		}
+#else
+		irqtag = i;
+#endif /* CONFIG_OF */
+		kbdev->irqs[irqtag].irq = irq_res->start;
+		kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK);
+	}
+
+	/* the first memory resource is the physical address of the GPU registers */
+	reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!reg_res) {
+		dev_err(kbdev->dev, "Invalid register resource\n");
+		err = -ENOENT;
+		goto out_free_dev;
+	}
+
+	kbdev->reg_start = reg_res->start;
+	kbdev->reg_size = resource_size(reg_res);
+
+	err = kbase_common_reg_map(kbdev);
+	if (err)
+		goto out_free_dev;
+
+	kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+	if (IS_ERR(kbdev->clock)) {
+		err = PTR_ERR(kbdev->clock);
+		if (err == -EPROBE_DEFER)
+			goto out_reg_map;
+		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+		kbdev->clock = NULL;
+		/* Allow probe to continue without clock. */
+	} else {
+		err = clk_prepare_enable(kbdev->clock);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n", err);
+			goto out_clock_get;
+		}
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	kbdev->mali_debugfs_directory = debugfs_create_dir("mali", NULL);
+	if (NULL == kbdev->mali_debugfs_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+		goto out_clock_enable;
+	}
+	kbdev->memory_profile_directory = debugfs_create_dir("mem",
+			kbdev->mali_debugfs_directory);
+	if (NULL == kbdev->memory_profile_directory) {
+		dev_err(kbdev->dev, "Couldn't create mali mem debugfs directory\n");
+		goto out_mali_debugfs_remove;
+	}
+	if (kbasep_jd_debugfs_init(kbdev)) {
+		dev_err(kbdev->dev, "Couldn't create mali jd debugfs entries\n");
+		goto out_mem_profile_remove;
+	}
+#endif /* CONFIG_DEBUG_FS */
+
+
+	if (MALI_ERROR_NONE != kbase_device_init(kbdev)) {
+		dev_err(kbdev->dev, "Can't initialize device\n");
+
+		err = -ENOMEM;
+		goto out_debugfs_remove;
+	}
+
+	/* obtain min/max configured gpu frequencies */
+	core_props = &(kbdev->gpu_props.props.core_props);
+	core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+	core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+	kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+	err = kbase_common_device_init(kbdev);
+	if (err) {
+		dev_err(kbdev->dev, "Failed kbase_common_device_init\n");
+		goto out_term_dev;
+	}
+	return 0;
+
+out_term_dev:
+	kbase_device_term(kbdev);
+out_debugfs_remove:
+#ifdef CONFIG_DEBUG_FS
+	kbasep_jd_debugfs_term(kbdev);
+out_mem_profile_remove:
+	debugfs_remove(kbdev->memory_profile_directory);
+out_mali_debugfs_remove:
+	debugfs_remove(kbdev->mali_debugfs_directory);
+out_clock_enable:
+#endif /* CONFIG_DEBUG_FS */
+	clk_disable_unprepare(kbdev->clock);
+out_clock_get:
+	clk_put(kbdev->clock);
+out_reg_map:
+	kbase_common_reg_unmap(kbdev);
+out_free_dev:
+#ifdef CONFIG_MALI_NO_MALI
+	midg_device_destroy(kbdev);
+out_midg:
+#endif /* CONFIG_MALI_NO_MALI */
+	kbase_device_free(kbdev);
+out:
+	return err;
+}
+
+static int kbase_common_device_remove(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	kbase_devfreq_term(kbdev);
+#endif
+
+	if (kbdev->pm.callback_power_runtime_term)
+		kbdev->pm.callback_power_runtime_term(kbdev);
+#if KBASE_PM_EN
+	/* Remove the sys power policy file */
+	device_remove_file(kbdev->dev, &dev_attr_power_policy);
+	device_remove_file(kbdev->dev, &dev_attr_core_availability_policy);
+	device_remove_file(kbdev->dev, &dev_attr_core_mask);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	kbasep_trace_timeline_debugfs_term(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#ifdef CONFIG_MALI_DEBUG
+	device_remove_file(kbdev->dev, &dev_attr_js_softstop_always);
+	device_remove_file(kbdev->dev, &dev_attr_debug_command);
+#endif /* CONFIG_MALI_DEBUG */
+#if !MALI_CUSTOMER_RELEASE
+	device_remove_file(kbdev->dev, &dev_attr_js_timeouts);
+	device_remove_file(kbdev->dev, &dev_attr_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+#ifdef CONFIG_DEBUG_FS
+	kbasep_gpu_memory_debugfs_term(kbdev);
+#endif
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	device_remove_file(kbdev->dev, &dev_attr_sc_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+	kbasep_js_devdata_halt(kbdev);
+	kbase_job_slot_halt(kbdev);
+	kbase_mem_halt(kbdev);
+	kbase_pm_halt(kbdev);
+
+	kbase_release_interrupts(kbdev);
+
+	kbasep_js_devdata_term(kbdev);
+	kbase_job_slot_term(kbdev);
+	kbase_mem_term(kbdev);
+	kbase_pm_term(kbdev);
+
+	down(&kbase_dev_list_lock);
+	list_del(&kbdev->entry);
+	up(&kbase_dev_list_lock);
+
+	misc_deregister(&kbdev->mdev);
+	put_device(kbdev->dev);
+	kbase_common_reg_unmap(kbdev);
+	kbase_device_term(kbdev);
+#ifdef CONFIG_DEBUG_FS
+	kbasep_jd_debugfs_term(kbdev);
+	debugfs_remove(kbdev->memory_profile_directory);
+	debugfs_remove(kbdev->mali_debugfs_directory);
+#endif /* CONFIG_DEBUG_FS */
+	if (kbdev->clock) {
+		clk_disable_unprepare(kbdev->clock);
+		clk_put(kbdev->clock);
+		kbdev->clock = NULL;
+	}
+#ifdef CONFIG_MALI_NO_MALI
+	midg_device_destroy(kbdev);
+#endif /* CONFIG_MALI_NO_MALI */
+	kbase_device_free(kbdev);
+
+	return 0;
+}
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_common_device_remove(kbdev);
+}
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	kbase_pm_suspend(kbdev);
+	return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev  The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+	return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef CONFIG_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+	if (kbdev->pm.callback_power_runtime_off) {
+		kbdev->pm.callback_power_runtime_off(kbdev);
+		dev_dbg(dev, "runtime suspend\n");
+	}
+	return 0;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef CONFIG_PM_RUNTIME
+int kbase_device_runtime_resume(struct device *dev)
+{
+	int ret = 0;
+	struct kbase_device *kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kbdev->pm.callback_power_runtime_on) {
+		ret = kbdev->pm.callback_power_runtime_on(kbdev);
+		dev_dbg(dev, "runtime resume\n");
+	}
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	devfreq_resume_device(kbdev->devfreq);
+#endif
+
+	return ret;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** Runtime idle callback from the OS.
+ *
+ * This is called by Linux when the device appears to be inactive and it might be
+ * placed into a low power state
+ *
+ * @param dev  The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef CONFIG_PM_RUNTIME
+static int kbase_device_runtime_idle(struct device *dev)
+{
+	/* Avoid pm_runtime_suspend being called */
+	return 1;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+	.suspend = kbase_device_suspend,
+	.resume = kbase_device_resume,
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = kbase_device_runtime_suspend,
+	.runtime_resume = kbase_device_runtime_resume,
+	.runtime_idle = kbase_device_runtime_idle,
+#endif /* CONFIG_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+	{ .compatible = "arm,malit6xx" },
+	{ .compatible = "arm,mali-midgard" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+	.probe = kbase_platform_device_probe,
+	.remove = kbase_platform_device_remove,
+	.driver = {
+		   .name = kbase_drv_name,
+		   .owner = THIS_MODULE,
+		   .pm = &kbase_pm_ops,
+		   .of_match_table = of_match_ptr(kbase_dt_ids),
+	},
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else /* CONFIG_MALI_PLATFORM_FAKE */
+
+extern int kbase_platform_early_init(void);
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+extern int kbase_platform_fake_register(void);
+extern void kbase_platform_fake_unregister(void);
+#endif
+
+static int __init kbase_driver_init(void)
+{
+	int ret;
+
+	ret = kbase_platform_early_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	ret = kbase_platform_fake_register();
+	if (ret)
+		return ret;
+#endif
+	ret = platform_driver_register(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	if (ret)
+		kbase_platform_fake_unregister();
+#endif
+
+	return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+	platform_driver_unregister(&kbase_platform_driver);
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	kbase_platform_fake_unregister();
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+	trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+	trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+	trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+	trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+	trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+	trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+	trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c
new file mode 100755
index 000000000000..a1ef0109d2b4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_cpuprops.c
+ * Base kernel property query APIs
+ */
+
+#include "mali_kbase.h"
+#ifdef BASE_LEGACY_UK7_SUPPORT
+
+#include "mali_kbase_cpuprops.h"
+#include "mali_kbase_uku.h"
+#include <mali_kbase_config.h>
+#include <linux/cache.h>
+#include <linux/cpufreq.h>
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+#include <asm/cputype.h>
+#endif
+
+#define KBASE_DEFAULT_CPU_NUM 0
+
+#define L1_DCACHE_LINE_SIZE_LOG2 L1_CACHE_SHIFT
+
+/**
+ * @brief Macros used to extract cpu id info
+ * @see Doc's for Main ID register
+ */
+#define KBASE_CPUPROPS_ID_GET_REV(cpuid)    ((cpuid) & 0x0F)          /* [3:0]   Revision                            */
+#define KBASE_CPUPROPS_ID_GET_PART_NR(cpuid)(((cpuid) >>  4) & 0xFFF) /* [15:4]  Part number                         */
+#define KBASE_CPUPROPS_ID_GET_ARCH(cpuid)   (((cpuid) >> 16) & 0x0F)  /* [19:16] Architecture                        */
+#define KBASE_CPUPROPS_ID_GET_VARIANT(cpuid)(((cpuid) >> 20) & 0x0F)  /* [23:20] Variant                             */
+#define KBASE_CPUPROPS_ID_GET_CODE(cpuid)   (((cpuid) >> 24) & 0xFF)  /* [31:23] ASCII code of implementer trademark */
+
+/*Below value sourced from OSK*/
+#define L1_DCACHE_SIZE ((u32)0x00008000)
+
+/**
+ * @brief Retrieves detailed CPU info from given cpu_val ( ID reg )
+ *
+ * @param kbase_props CPU props to be filled-in with cpu id info
+ *
+ */
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) 
+static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props)
+{
+	kbase_props->props.cpu_id.id           = read_cpuid_id();
+
+	kbase_props->props.cpu_id.valid        = 1;
+	kbase_props->props.cpu_id.rev          = KBASE_CPUPROPS_ID_GET_REV(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.part         = KBASE_CPUPROPS_ID_GET_PART_NR(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.arch         = KBASE_CPUPROPS_ID_GET_ARCH(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.variant      = KBASE_CPUPROPS_ID_GET_VARIANT(kbase_props->props.cpu_id.id);
+	kbase_props->props.cpu_id.implementer  = KBASE_CPUPROPS_ID_GET_CODE(kbase_props->props.cpu_id.id);
+}
+#else
+static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props)
+{
+	kbase_props->props.cpu_id.id           = 0;
+	kbase_props->props.cpu_id.valid        = 0;
+	kbase_props->props.cpu_id.rev          = 0;
+	kbase_props->props.cpu_id.part         = 0;
+	kbase_props->props.cpu_id.arch         = 0;
+	kbase_props->props.cpu_id.variant      = 0;
+	kbase_props->props.cpu_id.implementer  = 'N';
+}
+#endif
+
+/**
+ * This function (and file!) is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+
+mali_error kbase_cpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props)
+{
+	unsigned int max_cpu_freq;
+
+	kbase_props->props.cpu_l1_dcache_line_size_log2 = L1_DCACHE_LINE_SIZE_LOG2;
+	kbase_props->props.cpu_l1_dcache_size = L1_DCACHE_SIZE;
+	kbase_props->props.cpu_flags = BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN;
+
+	kbase_props->props.nr_cores = NR_CPUS;
+	kbase_props->props.cpu_page_size_log2 = PAGE_SHIFT;
+	kbase_props->props.available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	kbasep_cpuprops_uk_get_cpu_id_info(kbase_props);
+
+	/* check if kernel supports dynamic frequency scaling */
+	max_cpu_freq = cpufreq_quick_get_max(KBASE_DEFAULT_CPU_NUM);
+	if (max_cpu_freq != 0) {
+		/* convert from kHz to mHz */
+		kbase_props->props.max_cpu_clock_speed_mhz = max_cpu_freq / 1000;
+	} else {
+		/* fallback if CONFIG_CPU_FREQ turned off */
+		int result;
+		kbase_cpuprops_clock_speed_function kbase_cpuprops_uk_get_clock_speed;
+
+		kbase_cpuprops_uk_get_clock_speed = (kbase_cpuprops_clock_speed_function) kbasep_get_config_value(kctx->kbdev, kctx->kbdev->config_attributes, KBASE_CONFIG_ATTR_CPU_SPEED_FUNC);
+		result = kbase_cpuprops_uk_get_clock_speed(&kbase_props->props.max_cpu_clock_speed_mhz);
+		if (result != 0)
+			return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+	return MALI_ERROR_NONE;
+}
+
+#endif /* BASE_LEGACY_UK7_SUPPORT */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h
new file mode 100755
index 000000000000..b107f4ac5a29
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h
@@ -0,0 +1,55 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#ifdef BASE_LEGACY_UK7_SUPPORT
+
+/**
+ * @file mali_kbase_cpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_CPUPROPS_H_
+#define _KBASE_CPUPROPS_H_
+
+#include <malisw/mali_malisw.h>
+
+/* Forward declarations */
+struct kbase_uk_cpuprops;
+
+/**
+ * This file is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+
+/**
+ * @brief Provides CPU properties data.
+ *
+ * Fill the struct kbase_uk_cpuprops with values from CPU configuration.
+ *
+ * @param kctx         The kbase context
+ * @param kbase_props  A copy of the struct kbase_uk_cpuprops structure from userspace
+ *
+ * @return MALI_ERROR_NONE on success. Any other value indicates failure.
+ */
+mali_error kbase_cpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props);
+
+#endif /*_KBASE_CPUPROPS_H_*/
+#endif /* BASE_LEGACY_UK7_SUPPORT */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100755
index 000000000000..ce571c9970be
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+	NULL,
+	NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+	kbasep_debug_assert_registered_cb.func = func;
+	kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+	if (kbasep_debug_assert_registered_cb.func != NULL)
+		kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100755
index 000000000000..e4ced8c76145
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+typedef struct kbasep_debug_assert_cb {
+	kbase_debug_assert_hook *func;
+	void *param;
+} kbasep_debug_assert_cb;
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+		"In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION CSTD_FUNC
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+		do { \
+			pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+			pr_err(__VA_ARGS__);\
+			pr_err("\n");\
+		} while (MALI_FALSE)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook();
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP();
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+	KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+	/**
+	 * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+	 * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+	 *
+	 * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+	 *
+	 * @param expr Boolean expression
+	 * @param ...  Message to display when @a expr is false, as a format string followed by format arguments.
+	 */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+		do { \
+			if (!(expr)) { \
+				KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+				KBASE_CALL_ASSERT_HOOK();\
+				BUG();\
+			} \
+		} while (MALI_FALSE)
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif				/* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif				/* _KBASE_DEBUG_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100755
index 000000000000..0be33ace2821
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,967 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mem_alloc.h>
+#include <mali_kbase_mmu_hw.h>
+
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif				/* CONFIG_SYNC */
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif				/* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif				/* CONFIG_MALI_DEBUG */
+#endif				/* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT             1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT           500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS        16
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS              16
+
+/* mmu */
+#define ENTRY_IS_ATE        1ULL
+#define ENTRY_IS_INVAL      2ULL
+#define ENTRY_IS_PTE        3ULL
+
+#define MIDGARD_MMU_VA_BITS 48
+
+#define ENTRY_ATTR_BITS (7ULL << 2)	/* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8)	/* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL    0
+#else
+#define MIDGARD_MMU_TOPLEVEL    1
+#endif
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID     (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8	/* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS  (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ *  entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+		(JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+
+struct kbase_jd_atom_dependency
+{
+	struct kbase_jd_atom *atom;
+	u8 dep_type;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from 
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static INLINE const struct kbase_jd_atom* const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency* dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+	
+	return (const struct kbase_jd_atom* const )(dep->atom);
+}
+ 
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from 
+ * the  kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static INLINE const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency* dep)
+{
+	LOCAL_ASSERT(dep != NULL);
+
+	return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency.
+ * @param[in] a      The ATOM to be set as a dependency.
+ * @param     type   The ATOM dependency type to be set.
+ *
+ */
+static INLINE void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency* const_dep, 
+	struct kbase_jd_atom * a,
+	u8 type)
+{
+	struct kbase_jd_atom_dependency* dep;
+	
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (REINTERPRET_CAST(struct kbase_jd_atom_dependency* )const_dep);
+
+	dep->atom = a;
+	dep->dep_type = type; 
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep    The kbase jd atom dependency to be cleared.
+ *
+ */
+static INLINE void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency* const_dep)
+{
+	struct kbase_jd_atom_dependency* dep;
+
+	LOCAL_ASSERT(const_dep != NULL);
+
+	dep = (REINTERPRET_CAST(struct kbase_jd_atom_dependency* )const_dep);
+
+	dep->atom = NULL;
+	dep->dep_type = BASE_JD_DEP_TYPE_INVALID; 
+}
+
+struct kbase_ext_res
+{
+	mali_addr64 gpu_address;
+	struct kbase_mem_phy_alloc * alloc;
+};
+
+struct kbase_jd_atom {
+	struct work_struct work;
+	ktime_t start_timestamp;
+	u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+	struct base_jd_udata udata;
+	struct kbase_context *kctx;
+
+	struct list_head dep_head[2];
+	struct list_head dep_item[2];
+	const struct kbase_jd_atom_dependency dep[2];
+
+	u16 nr_extres;
+	struct kbase_ext_res * extres;
+
+	u32 device_nr;
+	u64 affinity;
+	u64 jc;
+	enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+	struct list_head node;
+	struct kds_resource_set *kds_rset;
+	mali_bool kds_dep_satisfied;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+	struct sync_fence *fence;
+	struct sync_fence_waiter sync_waiter;
+#endif				/* CONFIG_SYNC */
+
+	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+	enum base_jd_event_code event_code;
+	base_jd_core_req core_req;	    /**< core requirements */
+	/** Job Slot to retry submitting to if submission from IRQ handler failed
+	 *
+	 * NOTE: see if this can be unified into the another member e.g. the event */
+	int retry_submit_on_slot;
+
+	union kbasep_js_policy_job_info sched_info;
+	/* atom priority scaled to nice range with +20 offset 0..39 */
+	int nice_prio;
+
+	int poking;		/* BASE_HW_ISSUE_8316 */
+
+	wait_queue_head_t completed;
+	enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+	int work_id;
+#endif
+	/* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+	int slot_nr;
+
+	u32 atom_flags;
+
+	/* Number of times this atom has been retried. Used by replay soft job.
+	 */
+	int retry_count;
+};
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+	struct mutex lock;
+	struct kbasep_js_kctx_info sched_info;
+	struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+	/** Tracks all job-dispatch jobs.  This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	u32 job_nr;
+
+	/** Waitq that reflects whether there are no jobs (including SW-only
+	 * dependency jobs). This is set when no jobs are present on the ctx,
+	 * and clear when there are jobs.
+	 *
+	 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+	 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+	 * and SW-only dependency jobs.
+	 *
+	 * This waitq can be waited upon to find out when the context jobs are all
+	 * done/cancelled (including those that might've been blocked on
+	 * dependencies) - and so, whether it can be terminated. However, it should
+	 * only be terminated once it is neither present in the policy-queue (see
+	 * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+	 * kbasep_js_kctx_info::ctx::is_scheduled).
+	 *
+	 * Since the waitq is only set under kbase_jd_context::lock,
+	 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+	 * guarentee that the setter has completed its work on the kbase_context
+	 *
+	 * This must be updated atomically with:
+	 * - kbase_jd_context::job_nr */
+	wait_queue_head_t zero_jobs_wait;
+
+	/** Job Done workqueue. */
+	struct workqueue_struct *job_done_wq;
+
+	spinlock_t tb_lock;
+	u32 *tb;
+	size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+	struct kds_callback kds_cb;
+#endif				/* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	atomic_t work_id;
+#endif
+};
+
+struct kbase_jm_slot {
+	/* The number of slots must be a power of two */
+#define BASE_JM_SUBMIT_SLOTS        16
+#define BASE_JM_SUBMIT_SLOTS_MASK   (BASE_JM_SUBMIT_SLOTS - 1)
+
+	struct kbase_jd_atom *submitted[BASE_JM_SUBMIT_SLOTS];
+
+	struct kbase_context *last_context;
+
+	u8 submitted_head;
+	u8 submitted_nr;
+	u8 job_chain_flag;
+
+};
+
+struct kbase_device_info {
+	u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+enum {
+	KBASE_AS_POKE_STATE_IN_FLIGHT     = 1<<0,
+	KBASE_AS_POKE_STATE_KILLING_POKE  = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316  */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+	u64	transtab;
+	u64	memattr;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+	int number;
+
+	struct workqueue_struct *pf_wq;
+	struct work_struct work_pagefault;
+	struct work_struct work_busfault;
+	enum kbase_mmu_fault_type fault_type;
+	u32 fault_status;
+	mali_addr64 fault_addr;
+	struct mutex transaction_mutex;
+
+	struct kbase_mmu_setup current_setup;
+
+	/* BASE_HW_ISSUE_8316  */
+	struct workqueue_struct *poke_wq;
+	struct work_struct poke_work;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	int poke_refcount;
+	/** Protected by kbasep_js_device_data::runpool_irq::lock */
+	kbase_as_poke_state poke_state;
+	struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+/**
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+	/** State where instrumentation is not active */
+	KBASE_INSTR_STATE_DISABLED = 0,
+	/** State machine is active and ready for a command. */
+	KBASE_INSTR_STATE_IDLE,
+	/** Hardware is currently dumping a frame. */
+	KBASE_INSTR_STATE_DUMPING,
+	/** We've requested a clean to occur on a workqueue */
+	KBASE_INSTR_STATE_REQUEST_CLEAN,
+	/** Hardware is currently cleaning and invalidating caches. */
+	KBASE_INSTR_STATE_CLEANING,
+	/** Cache clean completed, and either a) a dump is complete, or
+	 * b) instrumentation can now be setup. */
+	KBASE_INSTR_STATE_CLEANED,
+	/** kbasep_reset_timeout_worker() has started (but not compelted) a
+	 * reset. This generally indicates the current action should be aborted, and
+	 * kbasep_reset_timeout_worker() will handle the cleanup */
+	KBASE_INSTR_STATE_RESETTING,
+	/** An error has occured during DUMPING (page fault). */
+	KBASE_INSTR_STATE_FAULT
+};
+
+void kbasep_reset_timeout_worker(struct work_struct *data);
+enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *data);
+
+struct kbasep_mem_device {
+	atomic_t used_pages;   /* Tracks usage of OS shared memory. Updated
+				   when OS memory is allocated/freed. */
+
+};
+
+
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+	/* Comma on its own, to extend the list */
+	,
+	/* Must be the last in the enum */
+	KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT  (((u8)1) << 1)
+
+struct kbase_trace {
+	struct timespec timestamp;
+	u32 thread_id;
+	u32 cpu;
+	void *ctx;
+	mali_bool katom;
+	int atom_number;
+	u64 atom_udata[2];
+	u64 gpu_addr;
+	unsigned long info_val;
+	u8 code;
+	u8 jobslot;
+	u8 refcount;
+	u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+	/* helper for tests */
+	KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** Event reserved for backwards compatibility with 'init' events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+	/** The power state of the device has changed.
+	 *
+	 * Specifically, the device has reached a desired or available state.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+	/** The GPU is becoming active.
+	 *
+	 * This event is sent when the first context is about to use the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+	/** The GPU is becoming idle.
+	 *
+	 * This event is sent when the last context has finished using the GPU.
+	 */
+	KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+	/** Event reserved for backwards compatibility with 'policy_change'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+	/** Event reserved for backwards compatibility with 'system_suspend'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+	/** Event reserved for backwards compatibility with 'system_resume'
+	 * events */
+	KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+	/** The job scheduler is requesting to power up/down cores.
+	 *
+	 * This event is sent when:
+	 * - powered down cores are needed to complete a job
+	 * - powered up cores are not needed anymore
+	 */
+	KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+	KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+	atomic_t jd_atoms_in_flight;
+	u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+	/** DebugFS entry */
+	struct dentry *dentry;
+
+	/* Note: strictly speaking, not needed, because it's in sync with
+	 * kbase_device::jm_slots[]::submitted_nr
+	 *
+	 * But it's kept as an example of how to add global timeline tracking
+	 * information
+	 *
+	 * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+	 * accessing this */
+	u8 slot_atoms_submitted[BASE_JM_SUBMIT_SLOTS];
+
+	/* Last UID for each PM event */
+	atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+	/* Counter for generating PM event UIDs */
+	atomic_t pm_event_uid_counter;
+	/*
+	 * L2 transition state - MALI_TRUE indicates that the transition is ongoing
+	 * Expected to be protected by pm.power_change_lock */
+	mali_bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+	struct list_head link;
+	struct kbase_context    *kctx;
+};
+
+#define DEVNAME_SIZE	16
+
+struct kbase_device {
+	/** jm_slots is protected by kbasep_js_device_data::runpool_irq::lock */
+	struct kbase_jm_slot jm_slots[BASE_JM_MAX_NR_SLOTS];
+	s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+	struct list_head entry;
+	struct device *dev;
+	struct miscdevice mdev;
+	u64 reg_start;
+	size_t reg_size;
+	void __iomem *reg;
+	struct resource *reg_res;
+	struct {
+		int irq;
+		int flags;
+	} irqs[3];
+#ifdef CONFIG_HAVE_CLK
+	struct clk *clock;
+#endif
+	char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+	void *model;
+	struct kmem_cache *irq_slab;
+	struct workqueue_struct *irq_workq;
+	atomic_t serving_job_irq;
+	atomic_t serving_gpu_irq;
+	atomic_t serving_mmu_irq;
+	spinlock_t reg_op_lock;
+#endif				/* CONFIG_MALI_NO_MALI */
+
+	struct kbase_pm_device_data pm;
+	struct kbasep_js_device_data js_data;
+	struct kbasep_mem_device memdev;
+	struct kbase_as as[BASE_MAX_NR_AS];
+
+	spinlock_t              mmu_mask_change;
+
+	kbase_gpu_props gpu_props;
+
+	/** List of SW workarounds for HW issues */
+	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+	/** List of features available */
+	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+	/* Cached present bitmaps - these are the same as the corresponding hardware registers */
+	u64 shader_present_bitmap;
+	u64 tiler_present_bitmap;
+	u64 l2_present_bitmap;
+	u64 l3_present_bitmap;
+
+	/* Bitmaps of cores that are currently in use (running jobs).
+	 * These should be kept up to date by the job scheduler.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 *
+	 * kbase_pm_check_transitions_nolock() should be called when bits are
+	 * cleared to update the power management system and allow transitions to
+	 * occur. */
+	u64 shader_inuse_bitmap;
+
+	/* Refcount for cores in use */
+	u32 shader_inuse_cnt[64];
+
+	/* Bitmaps of cores the JS needs for jobs ready to run */
+	u64 shader_needed_bitmap;
+
+	/* Refcount for cores needed */
+	u32 shader_needed_cnt[64];
+
+	u32 tiler_inuse_cnt;
+
+	u32 tiler_needed_cnt;
+
+	/* struct for keeping track of the disjoint information
+	 *
+	 * The state  is > 0 if the GPU is in a disjoint state. Otherwise 0
+	 * The count is the number of disjoint events that have occurred on the GPU
+	 */
+	struct {
+		atomic_t count;
+		atomic_t state;
+	} disjoint_event;
+
+	/* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+	u32 l2_users_count;
+
+	/* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+	 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+	 * submitting new jobs to any cores that are not marked as available.
+	 *
+	 * pm.power_change_lock should be held when accessing these members.
+	 */
+	u64 shader_available_bitmap;
+	u64 tiler_available_bitmap;
+	u64 l2_available_bitmap;
+
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+
+	s8 nr_hw_address_spaces;			  /**< Number of address spaces in the GPU (constant after driver initialisation) */
+	s8 nr_user_address_spaces;			  /**< Number of address spaces available to user contexts */
+
+	/* Structure used for instrumentation and HW counters dumping */
+	struct {
+		/* The lock should be used when accessing any of the following members */
+		spinlock_t lock;
+
+		struct kbase_context *kctx;
+		u64 addr;
+		wait_queue_head_t wait;
+		int triggered;
+		enum kbase_instr_state state;
+		wait_queue_head_t   cache_clean_wait;
+		struct workqueue_struct *cache_clean_wq;
+		struct work_struct  cache_clean_work;
+
+		struct kbase_context *suspended_kctx;
+		struct kbase_uk_hwcnt_setup suspended_state;
+	} hwcnt;
+
+	/* Set when we're about to reset the GPU */
+	atomic_t reset_gpu;
+#define KBASE_RESET_GPU_NOT_PENDING     0	/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_PREPARED        1	/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_COMMITTED       2	/* kbase_reset_gpu has been called - the reset will now definitely happen
+						 * within the timeout period */
+#define KBASE_RESET_GPU_HAPPENING       3	/* The GPU reset process is currently occuring (timeout has expired or
+						 * kbasep_try_reset_gpu_early was called) */
+
+	/* Work queue and work item for performing the reset in */
+	struct workqueue_struct *reset_workq;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	struct hrtimer reset_timer;
+
+	/*value to be written to the irq_throttle register each time an irq is served */
+	atomic_t irq_throttle_cycles;
+
+	const struct kbase_attribute *config_attributes;
+
+#if KBASE_TRACE_ENABLE
+	spinlock_t              trace_lock;
+	u16                     trace_first_out;
+	u16                     trace_next_in;
+	struct kbase_trace            *trace_rbuf;
+#endif
+
+#if !MALI_CUSTOMER_RELEASE
+	/* This is used to override the current job scheduler values for
+	 * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_SS
+	 * KBASE_CONFIG_ATTR_JS_STOP_STOP_TICKS_CL
+	 * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS
+	 * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL
+	 * KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS
+	 * KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS
+	 * KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL
+	 * KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS.
+	 *
+	 * These values are set via the js_timeouts sysfs file.
+	 */
+	u32 js_soft_stop_ticks;
+	u32 js_soft_stop_ticks_cl;
+	u32 js_hard_stop_ticks_ss;
+	u32 js_hard_stop_ticks_cl;
+	u32 js_hard_stop_ticks_nss;
+	u32 js_reset_ticks_ss;
+	u32 js_reset_ticks_cl;
+	u32 js_reset_ticks_nss;
+#endif
+
+	struct mutex cacheclean_lock;
+
+	/* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+	void *platform_context;
+
+	/** Count of contexts keeping the GPU powered */
+	atomic_t keep_gpu_powered_count;
+
+	/* List of kbase_contexts created */
+	struct list_head        kctx_list;
+	struct mutex            kctx_list_lock;
+
+#ifdef CONFIG_MALI_MIDGARD_RT_PM
+	struct delayed_work runtime_pm_workqueue;
+#endif
+
+#ifdef CONFIG_PM_DEVFREQ
+	struct devfreq_dev_profile devfreq_profile;
+	struct devfreq *devfreq;
+	unsigned long freq;
+#ifdef CONFIG_DEVFREQ_THERMAL
+	struct devfreq_cooling_device *devfreq_cooling;
+#endif
+#endif
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	/* directory for debugfs entries */
+	struct dentry *mali_debugfs_directory;
+	/* debugfs entry for gpu_memory */
+	struct dentry *gpu_memory_dentry;
+	/* debugfs entry for trace */
+	struct dentry *trace_dentry;
+	/* directory for per-ctx memory profiling data */
+	struct dentry *memory_profile_directory;
+	/* Root directory for job dispatcher data */
+	struct dentry *jd_directory;
+#endif /* CONFIG_DEBUG_FS */
+
+	/* fbdump profiling controls set by gator */
+	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+	/* Number of jobs that are run before a job is forced to fail and
+	 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+	 * failures. */
+	int force_replay_limit;
+	/* Count of jobs between forced failures. Incremented on each job. A
+	 * job is forced to fail once this is greater than or equal to
+	 * force_replay_limit. */
+	int force_replay_count;
+	/* Core requirement for jobs to be failed and replayed. May be zero. */
+	base_jd_core_req force_replay_core_req;
+	/* MALI_TRUE if force_replay_limit should be randomized. The random
+	 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+	 */
+	mali_bool force_replay_random;
+#endif
+
+	/* Total number of created contexts */
+	atomic_t ctx_num;
+};
+
+struct kbase_context {
+	struct kbase_device *kbdev;
+	int id; /* System wide unique id */
+	phys_addr_t pgd;
+	struct list_head event_list;
+	struct mutex event_mutex;
+	mali_bool event_closed;
+	struct workqueue_struct *event_workq;
+
+	u64 mem_attrs;
+	bool is_compat;
+
+	atomic_t                setup_complete;
+	atomic_t                setup_in_progress;
+
+	mali_bool keep_gpu_powered;
+
+	u64 *mmu_teardown_pages;
+
+	phys_addr_t aliasing_sink_page;
+
+	struct mutex            reg_lock; /* To be converted to a rwlock? */
+	struct rb_root          reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+	unsigned long    cookies;
+	struct kbase_va_region *pending_regions[BITS_PER_LONG];
+	
+	wait_queue_head_t event_queue;
+	pid_t tgid;
+	pid_t pid;
+
+	struct kbase_jd_context jctx;
+	atomic_t used_pages;
+	atomic_t         nonmapped_pages;
+
+	struct kbase_mem_allocator osalloc;
+	struct kbase_mem_allocator * pgd_allocator;
+
+	struct list_head waiting_soft_jobs;
+#ifdef CONFIG_KDS
+	struct list_head waiting_kds_resource;
+#endif
+	/** This is effectively part of the Run Pool, because it only has a valid
+	 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+	 *
+	 * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+	 * this.
+	 *
+	 * If the context relating to this as_nr is required, you must use
+	 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+	 * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+	 * to ensure the context doesn't disappear (but this has restrictions on what other locks
+	 * you can take whilst doing this) */
+	int as_nr;
+
+	/* NOTE:
+	 *
+	 * Flags are in jctx.sched_info.ctx.flags
+	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+	 *
+	 * All other flags must be added there */
+	spinlock_t         mm_update_lock;
+	struct mm_struct * process_mm;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+	/* debugfs entry for memory profile */
+	struct dentry *mem_dentry;
+	/* Content of mem_profile file */
+	char *mem_profile_data;
+	/* Size of @c mem_profile_data */
+	size_t mem_profile_size;
+	/* Spinlock guarding data */
+	spinlock_t mem_profile_lock;
+	/* Per-context directory for JD data */
+	struct dentry *jd_ctx_dir;
+#endif /* CONFIG_DEBUG_FS */
+};
+
+enum kbase_reg_access_type {
+	REG_READ,
+	REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+	/* (1ULL << 8) bit is reserved */
+	SHARE_BOTH_BITS = (2ULL << 8),	/* inner and outer shareable coherency */
+	SHARE_INNER_BITS = (3ULL << 8)	/* inner shareable coherency */
+};
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+#endif				/* _KBASE_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/mali_kbase_devfreq.c
new file mode 100755
index 000000000000..67e3caebe166
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_devfreq.c
@@ -0,0 +1,254 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	struct opp *opp;
+	unsigned long freq = 0;
+	int err;
+
+	freq = *target_freq;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &freq, flags);
+	rcu_read_unlock();
+	if (IS_ERR_OR_NULL(opp)) {
+		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+		return PTR_ERR(opp);
+	}
+
+	/*
+	 * Only update if there is a change of frequency
+	 */
+	if (kbdev->freq == freq) {
+		*target_freq = freq;
+		return 0;
+	}
+
+	err = clk_set_rate(kbdev->clock, freq);
+	if (err) {
+		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+				freq, *target_freq);
+		return err;
+	}
+
+	kbdev->freq = freq;
+	*target_freq = freq;
+
+	kbase_pm_reset_dvfs_utilisation(kbdev);
+
+	return 0;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	*freq = kbdev->freq;
+
+	return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	stat->current_frequency = kbdev->freq;
+
+	kbase_pm_get_dvfs_utilisation(kbdev,
+			&stat->total_time, &stat->busy_time);
+
+	stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat));
+#endif
+
+	return 0;
+}
+
+/* Weak definition to be overriden by platforms */
+int __weak setup_opps(void)
+{
+	return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+		struct devfreq_dev_profile *dp)
+{
+	int err, count;
+	int i = 0;
+	unsigned long freq = 0;
+	struct opp *opp;
+
+	err = setup_opps();
+	if (err)
+		return err;
+
+	rcu_read_lock();
+	count = opp_get_opp_count(kbdev->dev);
+	if (count < 0) {
+		rcu_read_unlock();
+		return count;
+	}
+	rcu_read_unlock();
+
+	dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+				GFP_KERNEL);
+	if (!dp->freq_table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++, freq++) {
+		opp = opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		dp->freq_table[i] = freq;
+	}
+	rcu_read_unlock();
+
+	if (count != i)
+		dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+				count, i);
+
+	dp->max_state = i;
+
+	return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+	kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+	kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+	struct devfreq_dev_profile *dp;
+	int err;
+
+	dev_dbg(kbdev->dev, "Init Mali devfreq\n");
+
+	if (!kbdev->clock)
+		return -ENODEV;
+
+	kbdev->freq = clk_get_rate(kbdev->clock);
+
+	dp = &kbdev->devfreq_profile;
+
+	dp->initial_freq = kbdev->freq;
+	dp->polling_ms = 100;
+	dp->target = kbase_devfreq_target;
+	dp->get_dev_status = kbase_devfreq_status;
+	dp->get_cur_freq = kbase_devfreq_cur_freq;
+	dp->exit = kbase_devfreq_exit;
+
+	if (kbase_devfreq_init_freq_table(kbdev, dp))
+		return -EFAULT;
+
+	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+				"simple_ondemand", NULL);
+	if (IS_ERR_OR_NULL(kbdev->devfreq)) {
+		kbase_devfreq_term_freq_table(kbdev);
+		return PTR_ERR(kbdev->devfreq);
+	}
+
+	err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register OPP notifier (%d)\n", err);
+		goto opp_notifier_failed;
+	}
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	{
+		struct devfreq_cooling_ops *callbacks;
+
+		callbacks = (void *)kbasep_get_config_value(kbdev,
+				kbdev->config_attributes,
+				KBASE_CONFIG_ATTR_POWER_MODEL_CALLBACKS);
+
+		kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+				kbdev->dev->of_node,
+				kbdev->devfreq,
+				callbacks);
+		if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+			err = PTR_ERR(kbdev->devfreq_cooling);
+			dev_err(kbdev->dev,
+				"Failed to register cooling device (%d)\n",
+				err);
+			goto cooling_failed;
+		}
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+
+	return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+	int err;
+
+	dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+	devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+	devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+	err = devfreq_remove_device(kbdev->devfreq);
+	if (err)
+		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+	else
+		kbdev->devfreq = NULL;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/mali_kbase_devfreq.h
new file mode 100755
index 000000000000..e3b333ac3550
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100755
index 000000000000..39043c96dbbb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,817 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_device.c
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) || defined(CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ)
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+extern struct kbase_attribute config_attributes_hw_issue_8408[];
+#endif				/* CONFIG_MALI_PLATFORM_FAKE */
+#endif				/* CONFIG_MALI_PLATFORM_VEXPRESS || CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ */
+
+#if KBASE_TRACE_ENABLE
+STATIC CONST char *kbasep_trace_code_string[] = {
+	/* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+	 * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef  KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+STATIC mali_error kbasep_trace_init(struct kbase_device *kbdev);
+STATIC void kbasep_trace_term(struct kbase_device *kbdev);
+STATIC void kbasep_trace_hook_wrapper(void *param);
+#if KBASE_TRACE_ENABLE
+STATIC void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+STATIC void kbasep_trace_debugfs_term(struct kbase_device *kbdev);
+#endif
+
+struct kbase_device *kbase_device_alloc(void)
+{
+	return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+mali_error kbase_device_init(struct kbase_device * const kbdev)
+{
+	int i;			/* i used after the for loop, don't reuse ! */
+
+	spin_lock_init(&kbdev->mmu_mask_change);
+
+	/* Initialize platform specific context */
+	if (MALI_FALSE == kbasep_platform_device_init(kbdev))
+		goto fail;
+
+	/* Ensure we can access the GPU registers */
+	kbase_pm_register_access_enable(kbdev);
+
+	/* Find out GPU properties based on the GPU feature registers */
+	kbase_gpuprops_set(kbdev);
+
+	/* Get the list of workarounds for issues on the current HW (identified by the GPU_ID register) */
+	if (MALI_ERROR_NONE != kbase_hw_set_issues_mask(kbdev)) {
+		kbase_pm_register_access_disable(kbdev);
+		goto free_platform;
+	}
+	/* Set the list of features available on the current HW (identified by the GPU_ID register) */
+	kbase_hw_set_features_mask(kbdev);
+
+#if defined(CONFIG_ARM64)
+	set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif
+
+	/* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+	 * device structure was created by device-tree
+	 */
+	if (!kbdev->dev->dma_mask)
+		kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+	if (dma_set_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)))
+		goto dma_set_mask_failed;
+
+	if (dma_set_coherent_mask(kbdev->dev,
+			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)))
+		goto dma_set_mask_failed;
+
+	if (kbase_mem_lowlevel_init(kbdev))
+		goto mem_lowlevel_init_failed;
+
+	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+	/* We're done accessing the GPU registers for now. */
+	kbase_pm_register_access_disable(kbdev);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		const char format[] = "mali_mmu%d";
+		char name[sizeof(format)];
+		const char poke_format[] = "mali_mmu%d_poker";	/* BASE_HW_ISSUE_8316 */
+		char poke_name[sizeof(poke_format)];	/* BASE_HW_ISSUE_8316 */
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			if (0 > snprintf(poke_name, sizeof(poke_name), poke_format, i))
+				goto free_workqs;
+		}
+
+		if (0 > snprintf(name, sizeof(name), format, i))
+			goto free_workqs;
+
+		kbdev->as[i].number = i;
+		kbdev->as[i].fault_addr = 0ULL;
+
+		kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+		if (NULL == kbdev->as[i].pf_wq)
+			goto free_workqs;
+
+		mutex_init(&kbdev->as[i].transaction_mutex);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+			struct hrtimer *poking_timer = &kbdev->as[i].poke_timer;
+
+			kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+			if (NULL == kbdev->as[i].poke_wq) {
+				destroy_workqueue(kbdev->as[i].pf_wq);
+				goto free_workqs;
+			}
+			KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&kbdev->as[i].poke_work));
+			INIT_WORK(&kbdev->as[i].poke_work, kbasep_as_do_poke);
+
+			hrtimer_init(poking_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+			poking_timer->function = kbasep_as_poke_timer_callback;
+
+			kbdev->as[i].poke_refcount = 0;
+			kbdev->as[i].poke_state = 0u;
+		}
+	}
+	/* don't change i after this point */
+
+	spin_lock_init(&kbdev->hwcnt.lock);
+
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_DISABLED;
+	init_waitqueue_head(&kbdev->reset_wait);
+	init_waitqueue_head(&kbdev->hwcnt.wait);
+	init_waitqueue_head(&kbdev->hwcnt.cache_clean_wait);
+	INIT_WORK(&kbdev->hwcnt.cache_clean_work, kbasep_cache_clean_worker);
+	kbdev->hwcnt.triggered = 0;
+
+	kbdev->hwcnt.cache_clean_wq = alloc_workqueue("Mali cache cleaning workqueue",
+	                                              0, 1);
+	if (NULL == kbdev->hwcnt.cache_clean_wq)
+		goto free_workqs;
+
+#if KBASE_GPU_RESET_EN
+	kbdev->reset_workq = alloc_workqueue("Mali reset workqueue", 0, 1);
+	if (NULL == kbdev->reset_workq)
+		goto free_cache_clean_workq;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&kbdev->reset_work));
+	INIT_WORK(&kbdev->reset_work, kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->reset_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->reset_timer.function = kbasep_reset_timer_callback;
+
+	if (kbasep_trace_init(kbdev) != MALI_ERROR_NONE)
+		goto free_reset_workq;
+#else
+	if (kbasep_trace_init(kbdev) != MALI_ERROR_NONE)
+		goto free_cache_clean_workq;
+#endif /* KBASE_GPU_RESET_EN */
+
+	mutex_init(&kbdev->cacheclean_lock);
+	atomic_set(&kbdev->keep_gpu_powered_count, 0);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+	for (i = 0; i < BASE_JM_SUBMIT_SLOTS; ++i)
+		kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+	for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+		atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+		kbdev->kbase_profiling_controls[i] = 0;
+
+		kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) || defined(CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ)
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+	/* BASE_HW_ISSUE_8408 requires a configuration with different timeouts for
+	 * the vexpress platform */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+		kbdev->config_attributes = config_attributes_hw_issue_8408;
+#endif				/* CONFIG_MALI_PLATFORM_FAKE */
+#endif				/* CONFIG_MALI_PLATFORM_VEXPRESS || CONFIG_MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ */
+
+	atomic_set(&kbdev->ctx_num, 0);
+
+	return MALI_ERROR_NONE;
+#if KBASE_GPU_RESET_EN
+free_reset_workq:
+	destroy_workqueue(kbdev->reset_workq);
+#endif /* KBASE_GPU_RESET_EN */
+free_cache_clean_workq:
+	destroy_workqueue(kbdev->hwcnt.cache_clean_wq);
+ free_workqs:
+	while (i > 0) {
+		i--;
+		destroy_workqueue(kbdev->as[i].pf_wq);
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+			destroy_workqueue(kbdev->as[i].poke_wq);
+	}
+	kbase_mem_lowlevel_term(kbdev);
+mem_lowlevel_init_failed:
+dma_set_mask_failed:
+free_platform:
+	kbasep_platform_device_term(kbdev);
+fail:
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+	kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+	kbasep_trace_term(kbdev);
+
+#if KBASE_GPU_RESET_EN
+	destroy_workqueue(kbdev->reset_workq);
+#endif
+
+	destroy_workqueue(kbdev->hwcnt.cache_clean_wq);
+
+	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+		destroy_workqueue(kbdev->as[i].pf_wq);
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+			destroy_workqueue(kbdev->as[i].poke_wq);
+	}
+
+	kbase_mem_lowlevel_term(kbdev);
+	kbasep_platform_device_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+	kfree(kbdev);
+}
+
+void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size)
+{
+	unsigned long flags;
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(tb);
+
+	/* set up the header */
+	/* magic number in the first 4 bytes */
+	tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+	/* Store (write offset = 0, wrap counter = 0, transaction active = no)
+	 * write offset 0 means never written.
+	 * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+	 */
+	tb[1] = 0;
+
+	/* install trace buffer */
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb_wrap_offset = size / 8;
+	kctx->jctx.tb = tb;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	KBASE_DEBUG_ASSERT(kctx);
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	kctx->jctx.tb = NULL;
+	kctx->jctx.tb_wrap_offset = 0;
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+	if (kctx->jctx.tb) {
+		u16 wrap_count;
+		u16 write_offset;
+		u32 *tb = kctx->jctx.tb;
+		u32 header_word;
+
+		header_word = tb[1];
+		KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+		wrap_count = (header_word >> 1) & 0x7FFF;
+		write_offset = (header_word >> 16) & 0xFFFF;
+
+		/* mark as transaction in progress */
+		tb[1] |= 0x1;
+		mb();
+
+		/* calculate new offset */
+		write_offset++;
+		if (write_offset == kctx->jctx.tb_wrap_offset) {
+			/* wrap */
+			write_offset = 1;
+			wrap_count++;
+			wrap_count &= 0x7FFF;	/* 15bit wrap counter */
+		}
+
+		/* store the trace entry at the selected offset */
+		tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+		tb[write_offset * 2 + 1] = reg_value;
+		mb();
+
+		/* new header word */
+		header_word = (write_offset << 16) | (wrap_count << 1) | 0x0;	/* transaction complete */
+		tb[1] = header_word;
+	}
+	spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+	kbase_os_reg_write(kbdev, offset, value);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_WRITE, offset, value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write)
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, struct kbase_context *kctx)
+{
+	u32 val;
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered);
+	KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+	val = kbase_os_reg_read(kbdev, offset);
+	dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+	if (kctx && kctx->jctx.tb)
+		kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+	return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read)
+
+#if KBASE_PM_EN
+void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+	u32 status;
+	u64 address;
+
+	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+	address = (u64) kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+	address |= kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", status & 0xFF, kbase_exception_name(status), address);
+	if (multiple)
+		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+	if (val & GPU_FAULT)
+		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+	if (val & RESET_COMPLETED)
+		kbase_pm_reset_done(kbdev);
+
+	if (val & PRFCNT_SAMPLE_COMPLETED)
+		kbase_instr_hwcnt_sample_done(kbdev);
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+	/* kbase_pm_check_transitions must be called after the IRQ has been cleared. This is because it might trigger
+	 * further power transitions and we don't want to miss the interrupt raised to notify us that these further
+	 * transitions have finished.
+	 */
+	if (val & POWER_CHANGED_ALL) {
+		mali_bool cores_are_available;
+		unsigned long flags;
+
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+		if (cores_are_available) {
+			/* Fast-path Job Scheduling on PM IRQ */
+			int js;
+			/* Log timelining information that a change in state has completed */
+			kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+			spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+			/* A simplified check to ensure the last context hasn't exited
+			 * after dropping the PM lock whilst doing a PM IRQ: any bits set
+			 * in 'submit_allowed' indicate that we have a context in the
+			 * runpool (which can't leave whilst we hold this lock). It is
+			 * sometimes zero even when we have a context in the runpool, but
+			 * that's no problem because we'll be unable to submit jobs
+			 * anyway */
+			if (kbdev->js_data.runpool_irq.submit_allowed)
+				for (js = 0; js < kbdev->gpu_props.num_job_slots; ++js) {
+					mali_bool needs_retry;
+					s8 submitted_count = 0;
+					needs_retry = kbasep_js_try_run_next_job_on_slot_irq_nolock(kbdev, js, &submitted_count);
+					/* Don't need to retry outside of IRQ context - this can
+					 * only happen if we submitted too many in one IRQ, such
+					 * that they were completing faster than we could
+					 * submit. In this case, a job IRQ will fire to cause more
+					 * work to be submitted in some way */
+					CSTD_UNUSED(needs_retry);
+				}
+			spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+		}
+	}
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
+#endif  /* KBASE_PM_EN */
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+STATIC mali_error kbasep_trace_init(struct kbase_device *kbdev)
+{
+	void *rbuf;
+
+	rbuf = kmalloc(sizeof(struct kbase_trace) * KBASE_TRACE_SIZE, GFP_KERNEL);
+
+	if (!rbuf)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	kbdev->trace_rbuf = rbuf;
+	spin_lock_init(&kbdev->trace_lock);
+	kbasep_trace_debugfs_init(kbdev);
+	return MALI_ERROR_NONE;
+}
+
+STATIC void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	kbasep_trace_debugfs_term(kbdev);
+	kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+	s32 written = 0;
+
+	/* Initial part of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+	if (trace_msg->katom != MALI_FALSE) {
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+	}
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+	/* NOTE: Could add function callbacks to handle different message types */
+	/* Jobslot present */
+	if ((trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) != MALI_FALSE)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Refcount present */
+	if ((trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) != MALI_FALSE)
+		written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+	/* Rest of message */
+	written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	unsigned long irqflags;
+	struct kbase_trace *trace_msg;
+
+	spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+	trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+	/* Fill the message */
+	trace_msg->thread_id = task_pid_nr(current);
+	trace_msg->cpu = task_cpu(current);
+
+	getnstimeofday(&trace_msg->timestamp);
+
+	trace_msg->code = code;
+	trace_msg->ctx = ctx;
+
+	if (NULL == katom) {
+		trace_msg->katom = MALI_FALSE;
+	} else {
+		trace_msg->katom = MALI_TRUE;
+		trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+		trace_msg->atom_udata[0] = katom->udata.blob[0];
+		trace_msg->atom_udata[1] = katom->udata.blob[1];
+	}
+
+	trace_msg->gpu_addr = gpu_addr;
+	trace_msg->jobslot = jobslot;
+	trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+	trace_msg->info_val = info_val;
+	trace_msg->flags = flags;
+
+	/* Update the ringbuffer indices */
+	kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+	if (kbdev->trace_next_in == kbdev->trace_first_out)
+		kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+	/* Done */
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	kbdev->trace_first_out = kbdev->trace_next_in;
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 start;
+	u32 end;
+
+	dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	start = kbdev->trace_first_out;
+	end = kbdev->trace_next_in;
+
+	while (start != end) {
+		struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+		kbasep_trace_dump_msg(kbdev, trace_msg);
+
+		start = (start + 1) & KBASE_TRACE_MASK;
+	}
+	dev_dbg(kbdev->dev, "TRACE_END");
+
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	KBASE_TRACE_CLEAR(kbdev);
+}
+
+STATIC void kbasep_trace_hook_wrapper(void *param)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)param;
+	kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+	struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+	u32 start;
+	u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	if (*pos > KBASE_TRACE_SIZE)
+		return NULL;
+	i = state->start + *pos;
+	if ((state->end >= state->start && i >= state->end) ||
+			i >= state->end + KBASE_TRACE_SIZE)
+		return NULL;
+
+	i &= KBASE_TRACE_MASK;
+
+	return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	struct trace_seq_state *state = s->private;
+	int i;
+
+	(*pos)++;
+
+	i = (state->start + *pos) & KBASE_TRACE_MASK;
+	if (i == state->end)
+		return NULL;
+
+	return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace *trace_msg = data;
+	char buffer[DEBUG_MESSAGE_SIZE];
+
+	kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+	seq_printf(s, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+	.start = kbasep_trace_seq_start,
+	.next = kbasep_trace_seq_next,
+	.stop = kbasep_trace_seq_stop,
+	.show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *kbdev = inode->i_private;
+	unsigned long flags;
+
+	struct trace_seq_state *state;
+
+	state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&kbdev->trace_lock, flags);
+	state->start = kbdev->trace_first_out;
+	state->end = kbdev->trace_next_in;
+	memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+	spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+	return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+	.open = kbasep_trace_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+STATIC void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+	kbdev->trace_dentry = debugfs_create_file("mali_trace", S_IRUGO,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_trace_debugfs_fops);
+}
+
+STATIC void kbasep_trace_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove(kbdev->trace_dentry);
+	kbdev->trace_dentry = NULL;
+}
+#else
+STATIC void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+
+}
+STATIC void kbasep_trace_debugfs_term(struct kbase_device *kbdev)
+{
+
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+#else				/* KBASE_TRACE_ENABLE  */
+STATIC mali_error kbasep_trace_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+	return MALI_ERROR_NONE;
+}
+
+STATIC void kbasep_trace_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+STATIC void kbasep_trace_hook_wrapper(void *param)
+{
+	CSTD_UNUSED(param);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(code);
+	CSTD_UNUSED(ctx);
+	CSTD_UNUSED(katom);
+	CSTD_UNUSED(gpu_addr);
+	CSTD_UNUSED(flags);
+	CSTD_UNUSED(refcount);
+	CSTD_UNUSED(jobslot);
+	CSTD_UNUSED(info_val);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		kbdev->kbase_profiling_controls[control] = value;
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+	u32 ret_value = 0;
+
+	switch (control) {
+	case FBDUMP_CONTROL_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RATE:
+		/* fall through */
+	case SW_COUNTER_ENABLE:
+		/* fall through */
+	case FBDUMP_CONTROL_RESIZE_FACTOR:
+		ret_value = kbdev->kbase_profiling_controls[control];
+		break;
+	default:
+		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+		break;
+	}
+
+	return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+	struct kbase_device *kbdev = NULL;
+
+	/* find the first i.e. call with -1 */
+	kbdev = kbase_find_device(-1);
+
+	if (NULL != kbdev) {
+		kbase_set_profiling_control(kbdev, action, value);
+	}
+}
+
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100755
index 000000000000..22b62277cc92
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_disjoint_events.c
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_set(&kbdev->disjoint_event.count, 0);
+	atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	atomic_inc(&kbdev->disjoint_event.state);
+
+	kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+	kbase_disjoint_event(kbdev);
+
+	atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (atomic_read(&kbdev->disjoint_event.state))
+		kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100755
index 000000000000..27312ca7a125
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,186 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+
+STATIC struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct base_jd_udata data;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+	data = katom->udata;
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+	mutex_lock(&kctx->jctx.lock);
+	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+	mutex_unlock(&kctx->jctx.lock);
+
+	wake_up(&katom->completed);
+
+	return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+	ret = (!list_empty(&ctx->event_list)) || (MALI_TRUE == ctx->event_closed);
+	mutex_unlock(&ctx->event_mutex);
+
+	return ret;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending)
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+	struct kbase_jd_atom *atom;
+
+	KBASE_DEBUG_ASSERT(ctx);
+
+	mutex_lock(&ctx->event_mutex);
+
+	if (list_empty(&ctx->event_list)) {
+		if (ctx->event_closed) {
+			/* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+			mutex_unlock(&ctx->event_mutex);
+			uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+			memset(&uevent->udata, 0, sizeof(uevent->udata));
+			dev_dbg(ctx->kbdev->dev,
+				"event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+				BASE_JD_EVENT_DRV_TERMINATED);
+			return 0;
+		} else {
+			mutex_unlock(&ctx->event_mutex);
+			return -1;
+		}
+	}
+
+	/* normal event processing */
+	atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+	list_del(ctx->event_list.next);
+
+	mutex_unlock(&ctx->event_mutex);
+
+	dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+	uevent->event_code = atom->event_code;
+	uevent->atom_number = (atom - ctx->jctx.atoms);
+	uevent->udata = kbase_event_process(ctx, atom);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue)
+
+static void kbase_event_post_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *atom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_context *ctx = atom->kctx;
+
+	if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+		kbase_jd_free_external_resources(atom);
+
+	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+		if (atom->event_code == BASE_JD_EVENT_DONE) {
+			/* Don't report the event */
+			kbase_event_process(ctx, atom);
+			return;
+		}
+	}
+
+	if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+		/* Don't report the event */
+		kbase_event_process(ctx, atom);
+		return;
+	}
+
+	mutex_lock(&ctx->event_mutex);
+	list_add_tail(&atom->dep_item[0], &ctx->event_list);
+	mutex_unlock(&ctx->event_mutex);
+
+	kbase_event_wakeup(ctx);
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+	KBASE_DEBUG_ASSERT(ctx);
+	KBASE_DEBUG_ASSERT(ctx->event_workq);
+	KBASE_DEBUG_ASSERT(atom);
+
+	INIT_WORK(&atom->work, kbase_event_post_worker);
+	queue_work(ctx->event_workq, &atom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_post)
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+	mutex_lock(&kctx->event_mutex);
+	kctx->event_closed = MALI_TRUE;
+	mutex_unlock(&kctx->event_mutex);
+	kbase_event_wakeup(kctx);
+}
+
+mali_error kbase_event_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+	INIT_LIST_HEAD(&kctx->event_list);
+	mutex_init(&kctx->event_mutex);
+	kctx->event_closed = MALI_FALSE;
+	kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+	if (NULL == kctx->event_workq)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	return MALI_ERROR_NONE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init)
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+	flush_workqueue(kctx->event_workq);
+	destroy_workqueue(kctx->event_workq);
+
+	/* We use kbase_event_dequeue to remove the remaining events as that
+	 * deals with all the cleanup needed for the atoms.
+	 *
+	 * Note: use of kctx->event_list without a lock is safe because this must be the last
+	 * thread using it (because we're about to terminate the lock)
+	 */
+	while (!list_empty(&kctx->event_list)) {
+		struct base_jd_event_v2 event;
+
+		kbase_event_dequeue(kctx, &event);
+	}
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100755
index 000000000000..c931af6dfa08
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator  */
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping and s/w counter reporting.
+ * We cannot use the enums in mali_uk_types.h because they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP  2
+#define GATOR_JOB_SLOT_SOFT_STOPPED  3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif  /* _KBASE_GATOR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100755
index 000000000000..74419b2d4628
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,315 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+
+#define MALI_MAX_CORES_PER_GROUP		4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP	8
+#define MALI_COUNTERS_PER_BLOCK			64
+#define MALI_BYTES_PER_COUNTER			4
+
+struct kbase_gator_hwcnt_handles {
+	struct kbase_device  *kbdev;
+	struct kbase_context *kctx;
+	mali_addr64 hwcnt_gpu_va;
+	void *hwcnt_cpu_va;
+	struct kbase_vmap_struct hwcnt_map;
+};
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_number_of_counters)
+{
+	uint32_t gpu_id;
+	const char * const *hardware_counter_names;
+	struct kbase_device *kbdev;
+
+	if (!total_number_of_counters)
+		return NULL;
+
+	/* Get the first device - it doesn't matter in this case */
+	kbdev = kbase_find_device(-1);
+	if (!kbdev)
+		return NULL;
+
+	gpu_id = kbdev->gpu_props.props.core_props.product_id;
+
+	switch (gpu_id) {
+	/* If we are using a Mali-T60x device */
+	case GPU_ID_PI_T60X:
+			hardware_counter_names = hardware_counter_names_mali_t60x;
+			*total_number_of_counters = ARRAY_SIZE(hardware_counter_names_mali_t60x);
+			break;
+	/* If we are using a Mali-T62x device */
+	case GPU_ID_PI_T62X:
+			hardware_counter_names = hardware_counter_names_mali_t62x;
+			*total_number_of_counters = ARRAY_SIZE(hardware_counter_names_mali_t62x);
+			break;
+	/* If we are using a Mali-T72x device */
+	case GPU_ID_PI_T72X:
+			hardware_counter_names = hardware_counter_names_mali_t72x;
+			*total_number_of_counters = ARRAY_SIZE(hardware_counter_names_mali_t72x);
+			break;
+	/* If we are using a Mali-T76x device */
+	case GPU_ID_PI_T76X:
+			hardware_counter_names = hardware_counter_names_mali_t76x;
+			*total_number_of_counters = ARRAY_SIZE(hardware_counter_names_mali_t76x);
+			break;
+#ifdef MALI_INCLUDE_TFRX
+	/* If we are using a Mali-TFRX device - for now just mimic the T760 counters */
+	case GPU_ID_PI_TFRX:
+			hardware_counter_names = hardware_counter_names_mali_t76x;
+			*total_number_of_counters = ARRAY_SIZE(hardware_counter_names_mali_t76x);
+			break;
+#endif /* MALI_INCLUDE_TRFX */
+	/* If we are using a Mali-T86X device - for now just mimic the T760 counters */
+	case GPU_ID_PI_T86X:
+			hardware_counter_names = hardware_counter_names_mali_t76x;
+			*total_number_of_counters = ARRAY_SIZE(hardware_counter_names_mali_t76x);
+			break;
+	default:
+			hardware_counter_names = NULL;
+			*total_number_of_counters = 0;
+			dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id);
+			break;
+	}
+
+	/* Release the kbdev reference. */
+	kbase_release_device(kbdev);
+
+	/* If we return a string array take a reference on the module (or fail). */
+	if (hardware_counter_names && !try_module_get(THIS_MODULE))
+		return NULL;
+
+	return hardware_counter_names;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names)
+
+void kbase_gator_hwcnt_term_names(void)
+{
+	/* Release the module reference. */
+	module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names)
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+	struct kbase_gator_hwcnt_handles *hand;
+	struct kbase_uk_hwcnt_setup setup;
+	mali_error err;
+	uint32_t dump_size = 0, i = 0;
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+	u16 va_alignment = 0;
+
+	if (!in_out_info)
+		return NULL;
+
+	hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+	if (!hand)
+		return NULL;
+
+	/* Get the first device */
+	hand->kbdev = kbase_find_device(-1);
+	if (!hand->kbdev)
+		goto free_hand;
+
+	/* Create a kbase_context */
+	hand->kctx = kbase_create_context(hand->kbdev, true);
+	if (!hand->kctx)
+		goto release_device;
+
+	in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+	in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+	in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+	/* If we are using a Mali-T6xx or Mali-T72x device */
+	if (in_out_info->gpu_id == GPU_ID_PI_T60X ||
+	    in_out_info->gpu_id == GPU_ID_PI_T62X ||
+	    in_out_info->gpu_id == GPU_ID_PI_T72X) {
+		uint32_t cg, j;
+		uint64_t core_mask;
+
+		/* There are 8 hardware counters blocks per core group */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			in_out_info->nr_core_groups, GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto destroy_context;
+
+		dump_size = in_out_info->nr_core_groups *
+			MALI_MAX_NUM_BLOCKS_PER_GROUP *
+			MALI_COUNTERS_PER_BLOCK *
+			MALI_BYTES_PER_COUNTER;
+
+		for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+			core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+			for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+				if (core_mask & (1u << j))
+					in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+				else
+					in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			}
+
+			in_out_info->hwc_layout[i++] = TILER_BLOCK;
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+			/* There are no implementation with L3 cache */
+			in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+			if (0 == cg)
+				in_out_info->hwc_layout[i++] = JM_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+		}
+	/* If we are using a Mali-T76x device */
+	} else if (
+			(in_out_info->gpu_id == GPU_ID_PI_T76X)
+#ifdef MALI_INCLUDE_TFRX
+				|| (in_out_info->gpu_id == GPU_ID_PI_TFRX)
+#endif /* MALI_INCLUDE_TFRX */
+				|| (in_out_info->gpu_id == GPU_ID_PI_T86X)
+#ifdef MALI_INCLUDE_TGAL
+				|| (in_out_info->gpu_id == GPU_ID_PI_TGAL)
+#endif
+			) {
+		uint32_t nr_l2, nr_sc, j;
+		uint64_t core_mask;
+
+		nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+		core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+		nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+
+		/* For Mali-T76x, the job manager and tiler sets of counters are always present */
+		in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL);
+
+		if (!in_out_info->hwc_layout)
+			goto destroy_context;
+
+		dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+		in_out_info->hwc_layout[i++] = JM_BLOCK;
+		in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+		for (j = 0; j < nr_l2; j++)
+			in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+		while (core_mask != 0ull) {
+			if ((core_mask & 1ull) != 0ull)
+				in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+			else
+				in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+			core_mask >>= 1;
+		}
+	}
+
+	in_out_info->nr_hwc_blocks = i;
+
+	in_out_info->size = dump_size;
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+	nr_pages = PFN_UP(dump_size);
+	reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
+			&flags, &hand->hwcnt_gpu_va, &va_alignment);
+	if (!reg)
+		goto free_layout;
+
+	hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
+			dump_size, &hand->hwcnt_map);
+
+	if (!hand->hwcnt_cpu_va)
+		goto free_buffer;
+
+	in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
+
+	/*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
+	setup.dump_buffer = hand->hwcnt_gpu_va;
+	setup.jm_bm = in_out_info->bitmask[0];
+	setup.tiler_bm = in_out_info->bitmask[1];
+	setup.shader_bm = in_out_info->bitmask[2];
+	setup.mmu_l2_bm = in_out_info->bitmask[3];
+
+	/* There are no implementations with L3 cache */
+	setup.l3_cache_bm = 0;
+
+	err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
+	if (err != MALI_ERROR_NONE)
+		goto free_unmap;
+
+	kbase_instr_hwcnt_clear(hand->kctx);
+
+	return hand;
+
+free_unmap:
+	kbase_vunmap(hand->kctx, &hand->hwcnt_map);
+
+free_buffer:
+	kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);
+
+free_layout:
+	kfree(in_out_info->hwc_layout);
+
+destroy_context:
+	kbase_destroy_context(hand->kctx);
+
+release_device:
+	kbase_release_device(hand->kbdev);
+
+free_hand:
+	kfree(hand);
+
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init)
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (in_out_info)
+		kfree(in_out_info->hwc_layout);
+
+	if (opaque_handles) {
+		kbase_instr_hwcnt_disable(opaque_handles->kctx);
+		kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map);
+		kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va);
+		kbase_destroy_context(opaque_handles->kctx);
+		kbase_release_device(opaque_handles->kbdev);
+		kfree(opaque_handles);
+	}
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term)
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success)
+{
+	if (opaque_handles && success)
+		return (kbase_instr_hwcnt_dump_complete(opaque_handles->kctx, success) != 0);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete)
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+	if (opaque_handles)
+		return  (kbase_instr_hwcnt_dump_irq(opaque_handles->kctx) == MALI_ERROR_NONE);
+	return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100755
index 000000000000..75251e7c1959
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to collect hardware counters data from a Mali device.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ *     Version 1 API: DDK versions r1px, r2px
+ *     Version 2 API: DDK versions r3px, r4px
+ *     Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the  previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ *        kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ *    In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ *        hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ *        hwcnt_name # pointer to name list
+ *
+ *        u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ *        # Iterate over each 64-counter block in this GPU configuration
+ *        for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ *            hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ *            # Skip reserved type blocks - they contain no counters at all
+ *            if( type == RESERVED_BLOCK ) {
+ *                continue;
+ *            }
+ *
+ *            size_t name_offset = type * 64;
+ *            size_t data_offset = i * 64;
+ *
+ *            # Iterate over the names of the counters in this block type
+ *            for( j = 0; j < 64; j++) {
+ *                const char * name = hwcnt_name[name_offset+j];
+ *
+ *                # Skip empty name strings - there is no counter here
+ *                if( name[0] == '\0' ) {
+ *                    continue;
+ *                }
+ *
+ *                u32 data = hwcnt_data[data_offset+j];
+ *
+ *                printk( "COUNTER: %s DATA: %u\n", name, data );
+ *            }
+ *        }
+ *
+ *
+ *     Note that in most implementations you typically want to either SUM or
+ *     AVERAGE multiple instances of the same counter if, for example, you have
+ *     multiple shader cores or multiple L2 caches. The most sensible view for
+ *     analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ *     counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling kbase_gator_hwcnt_term_names().
+ *    This function must only be called if init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+	JM_BLOCK = 0,
+	TILER_BLOCK,
+	SHADER_BLOCK,
+	MMU_L2_BLOCK,
+	RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+
+	/* Passed from Gator to kbase */
+
+	/* the bitmask of enabled hardware counters for each counter block */
+	uint16_t bitmask[4];
+
+	/* Passed from kbase to Gator */
+
+	/* ptr to counter dump memory */
+	void *kernel_dump_buffer;
+
+	/* size of counter dump memory */
+	uint32_t size;
+
+	/* the ID of the Mali device */
+	uint32_t gpu_id;
+
+	/* the number of shader cores in the GPU */
+	uint32_t nr_cores;
+
+	/* the number of core groups */
+	uint32_t nr_core_groups;
+
+	/* the memory layout of the performance counters */
+	enum hwc_type *hwc_layout;
+
+	/* the total number of hardware couter blocks */
+	uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info   A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ *                      specific information that will be returned to Gator. On entry Gator must have populated the
+ *                      'bitmask' field with the counters it wishes to enable for each class of counter block.
+ *                      Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ *                      enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ *                      the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ *                      kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return              Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info       A pointer to a structure containing the enabled counters passed from Gator and all the
+ *                          Mali specific information that will be returned to Gator.
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ * @param[out] success      Non-zero on success, zero on failure.
+ *
+ * @return                  Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ *                          completed dump may not have dumped succesfully, so the caller must test for both
+ *                          a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles    A wrapper structure for kbase structures.
+ *
+ * @return                  Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_number_of_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return                              Pointer to an array of strings of length *total_number_of_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_number_of_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100755
index 000000000000..97784bb16c89
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,1095 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counter_names_mali_t60x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MESSAGES_SENT",
+	"T60x_MESSAGES_RECEIVED",
+	"T60x_GPU_ACTIVE",
+	"T60x_IRQ_ACTIVE",
+	"T60x_JS0_JOBS",
+	"T60x_JS0_TASKS",
+	"T60x_JS0_ACTIVE",
+	"",
+	"T60x_JS0_WAIT_READ",
+	"T60x_JS0_WAIT_ISSUE",
+	"T60x_JS0_WAIT_DEPEND",
+	"T60x_JS0_WAIT_FINISH",
+	"T60x_JS1_JOBS",
+	"T60x_JS1_TASKS",
+	"T60x_JS1_ACTIVE",
+	"",
+	"T60x_JS1_WAIT_READ",
+	"T60x_JS1_WAIT_ISSUE",
+	"T60x_JS1_WAIT_DEPEND",
+	"T60x_JS1_WAIT_FINISH",
+	"T60x_JS2_JOBS",
+	"T60x_JS2_TASKS",
+	"T60x_JS2_ACTIVE",
+	"",
+	"T60x_JS2_WAIT_READ",
+	"T60x_JS2_WAIT_ISSUE",
+	"T60x_JS2_WAIT_DEPEND",
+	"T60x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T60x_TI_JOBS_PROCESSED",
+	"T60x_TI_TRIANGLES",
+	"T60x_TI_QUADS",
+	"T60x_TI_POLYGONS",
+	"T60x_TI_POINTS",
+	"T60x_TI_LINES",
+	"T60x_TI_VCACHE_HIT",
+	"T60x_TI_VCACHE_MISS",
+	"T60x_TI_FRONT_FACING",
+	"T60x_TI_BACK_FACING",
+	"T60x_TI_PRIM_VISIBLE",
+	"T60x_TI_PRIM_CULLED",
+	"T60x_TI_PRIM_CLIPPED",
+	"T60x_TI_LEVEL0",
+	"T60x_TI_LEVEL1",
+	"T60x_TI_LEVEL2",
+	"T60x_TI_LEVEL3",
+	"T60x_TI_LEVEL4",
+	"T60x_TI_LEVEL5",
+	"T60x_TI_LEVEL6",
+	"T60x_TI_LEVEL7",
+	"T60x_TI_COMMAND_1",
+	"T60x_TI_COMMAND_2",
+	"T60x_TI_COMMAND_3",
+	"T60x_TI_COMMAND_4",
+	"T60x_TI_COMMAND_4_7",
+	"T60x_TI_COMMAND_8_15",
+	"T60x_TI_COMMAND_16_63",
+	"T60x_TI_COMMAND_64",
+	"T60x_TI_COMPRESS_IN",
+	"T60x_TI_COMPRESS_OUT",
+	"T60x_TI_COMPRESS_FLUSH",
+	"T60x_TI_TIMESTAMPS",
+	"T60x_TI_PCACHE_HIT",
+	"T60x_TI_PCACHE_MISS",
+	"T60x_TI_PCACHE_LINE",
+	"T60x_TI_PCACHE_STALL",
+	"T60x_TI_WRBUF_HIT",
+	"T60x_TI_WRBUF_MISS",
+	"T60x_TI_WRBUF_LINE",
+	"T60x_TI_WRBUF_PARTIAL",
+	"T60x_TI_WRBUF_STALL",
+	"T60x_TI_ACTIVE",
+	"T60x_TI_LOADING_DESC",
+	"T60x_TI_INDEX_WAIT",
+	"T60x_TI_INDEX_RANGE_WAIT",
+	"T60x_TI_VERTEX_WAIT",
+	"T60x_TI_PCACHE_WAIT",
+	"T60x_TI_WRBUF_WAIT",
+	"T60x_TI_BUS_READ",
+	"T60x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_TI_UTLB_STALL",
+	"T60x_TI_UTLB_REPLAY_MISS",
+	"T60x_TI_UTLB_REPLAY_FULL",
+	"T60x_TI_UTLB_NEW_MISS",
+	"T60x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T60x_FRAG_ACTIVE",
+	"T60x_FRAG_PRIMITIVES",
+	"T60x_FRAG_PRIMITIVES_DROPPED",
+	"T60x_FRAG_CYCLES_DESC",
+	"T60x_FRAG_CYCLES_PLR",
+	"T60x_FRAG_CYCLES_VERT",
+	"T60x_FRAG_CYCLES_TRISETUP",
+	"T60x_FRAG_CYCLES_RAST",
+	"T60x_FRAG_THREADS",
+	"T60x_FRAG_DUMMY_THREADS",
+	"T60x_FRAG_QUADS_RAST",
+	"T60x_FRAG_QUADS_EZS_TEST",
+	"T60x_FRAG_QUADS_EZS_KILLED",
+	"T60x_FRAG_THREADS_LZS_TEST",
+	"T60x_FRAG_THREADS_LZS_KILLED",
+	"T60x_FRAG_CYCLES_NO_TILE",
+	"T60x_FRAG_NUM_TILES",
+	"T60x_FRAG_TRANS_ELIM",
+	"T60x_COMPUTE_ACTIVE",
+	"T60x_COMPUTE_TASKS",
+	"T60x_COMPUTE_THREADS",
+	"T60x_COMPUTE_CYCLES_DESC",
+	"T60x_TRIPIPE_ACTIVE",
+	"T60x_ARITH_WORDS",
+	"T60x_ARITH_CYCLES_REG",
+	"T60x_ARITH_CYCLES_L0",
+	"T60x_ARITH_FRAG_DEPEND",
+	"T60x_LS_WORDS",
+	"T60x_LS_ISSUES",
+	"T60x_LS_RESTARTS",
+	"T60x_LS_REISSUES_MISS",
+	"T60x_LS_REISSUES_VD",
+	"T60x_LS_REISSUE_ATTRIB_MISS",
+	"T60x_LS_NO_WB",
+	"T60x_TEX_WORDS",
+	"T60x_TEX_BUBBLES",
+	"T60x_TEX_WORDS_L0",
+	"T60x_TEX_WORDS_DESC",
+	"T60x_TEX_ISSUES",
+	"T60x_TEX_RECIRC_FMISS",
+	"T60x_TEX_RECIRC_DESC",
+	"T60x_TEX_RECIRC_MULTI",
+	"T60x_TEX_RECIRC_PMISS",
+	"T60x_TEX_RECIRC_CONF",
+	"T60x_LSC_READ_HITS",
+	"T60x_LSC_READ_MISSES",
+	"T60x_LSC_WRITE_HITS",
+	"T60x_LSC_WRITE_MISSES",
+	"T60x_LSC_ATOMIC_HITS",
+	"T60x_LSC_ATOMIC_MISSES",
+	"T60x_LSC_LINE_FETCHES",
+	"T60x_LSC_DIRTY_LINE",
+	"T60x_LSC_SNOOPS",
+	"T60x_AXI_TLB_STALL",
+	"T60x_AXI_TLB_MIESS",
+	"T60x_AXI_TLB_TRANSACTION",
+	"T60x_LS_TLB_MISS",
+	"T60x_LS_TLB_HIT",
+	"T60x_AXI_BEATS_READ",
+	"T60x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T60x_MMU_HIT",
+	"T60x_MMU_NEW_MISS",
+	"T60x_MMU_REPLAY_FULL",
+	"T60x_MMU_REPLAY_MISS",
+	"T60x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_UTLB_HIT",
+	"T60x_UTLB_NEW_MISS",
+	"T60x_UTLB_REPLAY_FULL",
+	"T60x_UTLB_REPLAY_MISS",
+	"T60x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T60x_L2_EXT_WRITE_BEATS",
+	"T60x_L2_EXT_READ_BEATS",
+	"T60x_L2_ANY_LOOKUP",
+	"T60x_L2_READ_LOOKUP",
+	"T60x_L2_SREAD_LOOKUP",
+	"T60x_L2_READ_REPLAY",
+	"T60x_L2_READ_SNOOP",
+	"T60x_L2_READ_HIT",
+	"T60x_L2_CLEAN_MISS",
+	"T60x_L2_WRITE_LOOKUP",
+	"T60x_L2_SWRITE_LOOKUP",
+	"T60x_L2_WRITE_REPLAY",
+	"T60x_L2_WRITE_SNOOP",
+	"T60x_L2_WRITE_HIT",
+	"T60x_L2_EXT_READ_FULL",
+	"T60x_L2_EXT_READ_HALF",
+	"T60x_L2_EXT_WRITE_FULL",
+	"T60x_L2_EXT_WRITE_HALF",
+	"T60x_L2_EXT_READ",
+	"T60x_L2_EXT_READ_LINE",
+	"T60x_L2_EXT_WRITE",
+	"T60x_L2_EXT_WRITE_LINE",
+	"T60x_L2_EXT_WRITE_SMALL",
+	"T60x_L2_EXT_BARRIER",
+	"T60x_L2_EXT_AR_STALL",
+	"T60x_L2_EXT_R_BUF_FULL",
+	"T60x_L2_EXT_RD_BUF_FULL",
+	"T60x_L2_EXT_R_RAW",
+	"T60x_L2_EXT_W_STALL",
+	"T60x_L2_EXT_W_BUF_FULL",
+	"T60x_L2_EXT_R_W_HAZARD",
+	"T60x_L2_TAG_HAZARD",
+	"T60x_L2_SNOOP_FULL",
+	"T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counter_names_mali_t62x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MESSAGES_SENT",
+	"T62x_MESSAGES_RECEIVED",
+	"T62x_GPU_ACTIVE",
+	"T62x_IRQ_ACTIVE",
+	"T62x_JS0_JOBS",
+	"T62x_JS0_TASKS",
+	"T62x_JS0_ACTIVE",
+	"",
+	"T62x_JS0_WAIT_READ",
+	"T62x_JS0_WAIT_ISSUE",
+	"T62x_JS0_WAIT_DEPEND",
+	"T62x_JS0_WAIT_FINISH",
+	"T62x_JS1_JOBS",
+	"T62x_JS1_TASKS",
+	"T62x_JS1_ACTIVE",
+	"",
+	"T62x_JS1_WAIT_READ",
+	"T62x_JS1_WAIT_ISSUE",
+	"T62x_JS1_WAIT_DEPEND",
+	"T62x_JS1_WAIT_FINISH",
+	"T62x_JS2_JOBS",
+	"T62x_JS2_TASKS",
+	"T62x_JS2_ACTIVE",
+	"",
+	"T62x_JS2_WAIT_READ",
+	"T62x_JS2_WAIT_ISSUE",
+	"T62x_JS2_WAIT_DEPEND",
+	"T62x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T62x_TI_JOBS_PROCESSED",
+	"T62x_TI_TRIANGLES",
+	"T62x_TI_QUADS",
+	"T62x_TI_POLYGONS",
+	"T62x_TI_POINTS",
+	"T62x_TI_LINES",
+	"T62x_TI_VCACHE_HIT",
+	"T62x_TI_VCACHE_MISS",
+	"T62x_TI_FRONT_FACING",
+	"T62x_TI_BACK_FACING",
+	"T62x_TI_PRIM_VISIBLE",
+	"T62x_TI_PRIM_CULLED",
+	"T62x_TI_PRIM_CLIPPED",
+	"T62x_TI_LEVEL0",
+	"T62x_TI_LEVEL1",
+	"T62x_TI_LEVEL2",
+	"T62x_TI_LEVEL3",
+	"T62x_TI_LEVEL4",
+	"T62x_TI_LEVEL5",
+	"T62x_TI_LEVEL6",
+	"T62x_TI_LEVEL7",
+	"T62x_TI_COMMAND_1",
+	"T62x_TI_COMMAND_2",
+	"T62x_TI_COMMAND_3",
+	"T62x_TI_COMMAND_4",
+	"T62x_TI_COMMAND_5_7",
+	"T62x_TI_COMMAND_8_15",
+	"T62x_TI_COMMAND_16_63",
+	"T62x_TI_COMMAND_64",
+	"T62x_TI_COMPRESS_IN",
+	"T62x_TI_COMPRESS_OUT",
+	"T62x_TI_COMPRESS_FLUSH",
+	"T62x_TI_TIMESTAMPS",
+	"T62x_TI_PCACHE_HIT",
+	"T62x_TI_PCACHE_MISS",
+	"T62x_TI_PCACHE_LINE",
+	"T62x_TI_PCACHE_STALL",
+	"T62x_TI_WRBUF_HIT",
+	"T62x_TI_WRBUF_MISS",
+	"T62x_TI_WRBUF_LINE",
+	"T62x_TI_WRBUF_PARTIAL",
+	"T62x_TI_WRBUF_STALL",
+	"T62x_TI_ACTIVE",
+	"T62x_TI_LOADING_DESC",
+	"T62x_TI_INDEX_WAIT",
+	"T62x_TI_INDEX_RANGE_WAIT",
+	"T62x_TI_VERTEX_WAIT",
+	"T62x_TI_PCACHE_WAIT",
+	"T62x_TI_WRBUF_WAIT",
+	"T62x_TI_BUS_READ",
+	"T62x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_TI_UTLB_STALL",
+	"T62x_TI_UTLB_REPLAY_MISS",
+	"T62x_TI_UTLB_REPLAY_FULL",
+	"T62x_TI_UTLB_NEW_MISS",
+	"T62x_TI_UTLB_HIT",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"T62x_SHADER_CORE_ACTIVE",
+	"T62x_FRAG_ACTIVE",
+	"T62x_FRAG_PRIMITIVES",
+	"T62x_FRAG_PRIMITIVES_DROPPED",
+	"T62x_FRAG_CYCLES_DESC",
+	"T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T62x_FRAG_CYCLES_VERT",
+	"T62x_FRAG_CYCLES_TRISETUP",
+	"T62x_FRAG_CYCLES_EZS_ACTIVE",
+	"T62x_FRAG_THREADS",
+	"T62x_FRAG_DUMMY_THREADS",
+	"T62x_FRAG_QUADS_RAST",
+	"T62x_FRAG_QUADS_EZS_TEST",
+	"T62x_FRAG_QUADS_EZS_KILLED",
+	"T62x_FRAG_THREADS_LZS_TEST",
+	"T62x_FRAG_THREADS_LZS_KILLED",
+	"T62x_FRAG_CYCLES_NO_TILE",
+	"T62x_FRAG_NUM_TILES",
+	"T62x_FRAG_TRANS_ELIM",
+	"T62x_COMPUTE_ACTIVE",
+	"T62x_COMPUTE_TASKS",
+	"T62x_COMPUTE_THREADS",
+	"T62x_COMPUTE_CYCLES_DESC",
+	"T62x_TRIPIPE_ACTIVE",
+	"T62x_ARITH_WORDS",
+	"T62x_ARITH_CYCLES_REG",
+	"T62x_ARITH_CYCLES_L0",
+	"T62x_ARITH_FRAG_DEPEND",
+	"T62x_LS_WORDS",
+	"T62x_LS_ISSUES",
+	"T62x_LS_RESTARTS",
+	"T62x_LS_REISSUES_MISS",
+	"T62x_LS_REISSUES_VD",
+	"T62x_LS_REISSUE_ATTRIB_MISS",
+	"T62x_LS_NO_WB",
+	"T62x_TEX_WORDS",
+	"T62x_TEX_BUBBLES",
+	"T62x_TEX_WORDS_L0",
+	"T62x_TEX_WORDS_DESC",
+	"T62x_TEX_ISSUES",
+	"T62x_TEX_RECIRC_FMISS",
+	"T62x_TEX_RECIRC_DESC",
+	"T62x_TEX_RECIRC_MULTI",
+	"T62x_TEX_RECIRC_PMISS",
+	"T62x_TEX_RECIRC_CONF",
+	"T62x_LSC_READ_HITS",
+	"T62x_LSC_READ_MISSES",
+	"T62x_LSC_WRITE_HITS",
+	"T62x_LSC_WRITE_MISSES",
+	"T62x_LSC_ATOMIC_HITS",
+	"T62x_LSC_ATOMIC_MISSES",
+	"T62x_LSC_LINE_FETCHES",
+	"T62x_LSC_DIRTY_LINE",
+	"T62x_LSC_SNOOPS",
+	"T62x_AXI_TLB_STALL",
+	"T62x_AXI_TLB_MIESS",
+	"T62x_AXI_TLB_TRANSACTION",
+	"T62x_LS_TLB_MISS",
+	"T62x_LS_TLB_HIT",
+	"T62x_AXI_BEATS_READ",
+	"T62x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T62x_MMU_HIT",
+	"T62x_MMU_NEW_MISS",
+	"T62x_MMU_REPLAY_FULL",
+	"T62x_MMU_REPLAY_MISS",
+	"T62x_MMU_TABLE_WALK",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_UTLB_HIT",
+	"T62x_UTLB_NEW_MISS",
+	"T62x_UTLB_REPLAY_FULL",
+	"T62x_UTLB_REPLAY_MISS",
+	"T62x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T62x_L2_EXT_WRITE_BEATS",
+	"T62x_L2_EXT_READ_BEATS",
+	"T62x_L2_ANY_LOOKUP",
+	"T62x_L2_READ_LOOKUP",
+	"T62x_L2_SREAD_LOOKUP",
+	"T62x_L2_READ_REPLAY",
+	"T62x_L2_READ_SNOOP",
+	"T62x_L2_READ_HIT",
+	"T62x_L2_CLEAN_MISS",
+	"T62x_L2_WRITE_LOOKUP",
+	"T62x_L2_SWRITE_LOOKUP",
+	"T62x_L2_WRITE_REPLAY",
+	"T62x_L2_WRITE_SNOOP",
+	"T62x_L2_WRITE_HIT",
+	"T62x_L2_EXT_READ_FULL",
+	"T62x_L2_EXT_READ_HALF",
+	"T62x_L2_EXT_WRITE_FULL",
+	"T62x_L2_EXT_WRITE_HALF",
+	"T62x_L2_EXT_READ",
+	"T62x_L2_EXT_READ_LINE",
+	"T62x_L2_EXT_WRITE",
+	"T62x_L2_EXT_WRITE_LINE",
+	"T62x_L2_EXT_WRITE_SMALL",
+	"T62x_L2_EXT_BARRIER",
+	"T62x_L2_EXT_AR_STALL",
+	"T62x_L2_EXT_R_BUF_FULL",
+	"T62x_L2_EXT_RD_BUF_FULL",
+	"T62x_L2_EXT_R_RAW",
+	"T62x_L2_EXT_W_STALL",
+	"T62x_L2_EXT_W_BUF_FULL",
+	"T62x_L2_EXT_R_W_HAZARD",
+	"T62x_L2_TAG_HAZARD",
+	"T62x_L2_SNOOP_FULL",
+	"T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counter_names_mali_t72x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T72x_GPU_ACTIVE",
+	"T72x_IRQ_ACTIVE",
+	"T72x_JS0_JOBS",
+	"T72x_JS0_TASKS",
+	"T72x_JS0_ACTIVE",
+	"T72x_JS1_JOBS",
+	"T72x_JS1_TASKS",
+	"T72x_JS1_ACTIVE",
+	"T72x_JS2_JOBS",
+	"T72x_JS2_TASKS",
+	"T72x_JS2_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T72x_TI_JOBS_PROCESSED",
+	"T72x_TI_TRIANGLES",
+	"T72x_TI_QUADS",
+	"T72x_TI_POLYGONS",
+	"T72x_TI_POINTS",
+	"T72x_TI_LINES",
+	"T72x_TI_FRONT_FACING",
+	"T72x_TI_BACK_FACING",
+	"T72x_TI_PRIM_VISIBLE",
+	"T72x_TI_PRIM_CULLED",
+	"T72x_TI_PRIM_CLIPPED",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T72x_TI_ACTIVE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T72x_FRAG_ACTIVE",
+	"T72x_FRAG_PRIMITIVES",
+	"T72x_FRAG_PRIMITIVES_DROPPED",
+	"T72x_FRAG_THREADS",
+	"T72x_FRAG_DUMMY_THREADS",
+	"T72x_FRAG_QUADS_RAST",
+	"T72x_FRAG_QUADS_EZS_TEST",
+	"T72x_FRAG_QUADS_EZS_KILLED",
+	"T72x_FRAG_THREADS_LZS_TEST",
+	"T72x_FRAG_THREADS_LZS_KILLED",
+	"T72x_FRAG_CYCLES_NO_TILE",
+	"T72x_FRAG_NUM_TILES",
+	"T72x_FRAG_TRANS_ELIM",
+	"T72x_COMPUTE_ACTIVE",
+	"T72x_COMPUTE_TASKS",
+	"T72x_COMPUTE_THREADS",
+	"T72x_TRIPIPE_ACTIVE",
+	"T72x_ARITH_WORDS",
+	"T72x_ARITH_CYCLES_REG",
+	"T72x_LS_WORDS",
+	"T72x_LS_ISSUES",
+	"T72x_LS_RESTARTS",
+	"T72x_LS_REISSUES_MISS",
+	"T72x_TEX_WORDS",
+	"T72x_TEX_BUBBLES",
+	"T72x_TEX_ISSUES",
+	"T72x_LSC_READ_HITS",
+	"T72x_LSC_READ_MISSES",
+	"T72x_LSC_WRITE_HITS",
+	"T72x_LSC_WRITE_MISSES",
+	"T72x_LSC_ATOMIC_HITS",
+	"T72x_LSC_ATOMIC_MISSES",
+	"T72x_LSC_LINE_FETCHES",
+	"T72x_LSC_DIRTY_LINE",
+	"T72x_LSC_SNOOPS",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T72x_L2_EXT_WRITE_BEAT",
+	"T72x_L2_EXT_READ_BEAT",
+	"T72x_L2_READ_SNOOP",
+	"T72x_L2_READ_HIT",
+	"T72x_L2_WRITE_SNOOP",
+	"T72x_L2_WRITE_HIT",
+	"T72x_L2_EXT_WRITE_SMALL",
+	"T72x_L2_EXT_BARRIER",
+	"T72x_L2_EXT_AR_STALL",
+	"T72x_L2_EXT_W_STALL",
+	"T72x_L2_SNOOP_FULL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	""
+};
+
+static const char * const hardware_counter_names_mali_t76x[] = {
+	/* Job Manager */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MESSAGES_SENT",
+	"T76x_MESSAGES_RECEIVED",
+	"T76x_GPU_ACTIVE",
+	"T76x_IRQ_ACTIVE",
+	"T76x_JS0_JOBS",
+	"T76x_JS0_TASKS",
+	"T76x_JS0_ACTIVE",
+	"",
+	"T76x_JS0_WAIT_READ",
+	"T76x_JS0_WAIT_ISSUE",
+	"T76x_JS0_WAIT_DEPEND",
+	"T76x_JS0_WAIT_FINISH",
+	"T76x_JS1_JOBS",
+	"T76x_JS1_TASKS",
+	"T76x_JS1_ACTIVE",
+	"",
+	"T76x_JS1_WAIT_READ",
+	"T76x_JS1_WAIT_ISSUE",
+	"T76x_JS1_WAIT_DEPEND",
+	"T76x_JS1_WAIT_FINISH",
+	"T76x_JS2_JOBS",
+	"T76x_JS2_TASKS",
+	"T76x_JS2_ACTIVE",
+	"",
+	"T76x_JS2_WAIT_READ",
+	"T76x_JS2_WAIT_ISSUE",
+	"T76x_JS2_WAIT_DEPEND",
+	"T76x_JS2_WAIT_FINISH",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+
+	/*Tiler */
+	"",
+	"",
+	"",
+	"T76x_TI_JOBS_PROCESSED",
+	"T76x_TI_TRIANGLES",
+	"T76x_TI_QUADS",
+	"T76x_TI_POLYGONS",
+	"T76x_TI_POINTS",
+	"T76x_TI_LINES",
+	"T76x_TI_VCACHE_HIT",
+	"T76x_TI_VCACHE_MISS",
+	"T76x_TI_FRONT_FACING",
+	"T76x_TI_BACK_FACING",
+	"T76x_TI_PRIM_VISIBLE",
+	"T76x_TI_PRIM_CULLED",
+	"T76x_TI_PRIM_CLIPPED",
+	"T76x_TI_LEVEL0",
+	"T76x_TI_LEVEL1",
+	"T76x_TI_LEVEL2",
+	"T76x_TI_LEVEL3",
+	"T76x_TI_LEVEL4",
+	"T76x_TI_LEVEL5",
+	"T76x_TI_LEVEL6",
+	"T76x_TI_LEVEL7",
+	"T76x_TI_COMMAND_1",
+	"T76x_TI_COMMAND_2",
+	"T76x_TI_COMMAND_3",
+	"T76x_TI_COMMAND_4",
+	"T76x_TI_COMMAND_5_7",
+	"T76x_TI_COMMAND_8_15",
+	"T76x_TI_COMMAND_16_63",
+	"T76x_TI_COMMAND_64",
+	"T76x_TI_COMPRESS_IN",
+	"T76x_TI_COMPRESS_OUT",
+	"T76x_TI_COMPRESS_FLUSH",
+	"T76x_TI_TIMESTAMPS",
+	"T76x_TI_PCACHE_HIT",
+	"T76x_TI_PCACHE_MISS",
+	"T76x_TI_PCACHE_LINE",
+	"T76x_TI_PCACHE_STALL",
+	"T76x_TI_WRBUF_HIT",
+	"T76x_TI_WRBUF_MISS",
+	"T76x_TI_WRBUF_LINE",
+	"T76x_TI_WRBUF_PARTIAL",
+	"T76x_TI_WRBUF_STALL",
+	"T76x_TI_ACTIVE",
+	"T76x_TI_LOADING_DESC",
+	"T76x_TI_INDEX_WAIT",
+	"T76x_TI_INDEX_RANGE_WAIT",
+	"T76x_TI_VERTEX_WAIT",
+	"T76x_TI_PCACHE_WAIT",
+	"T76x_TI_WRBUF_WAIT",
+	"T76x_TI_BUS_READ",
+	"T76x_TI_BUS_WRITE",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_TI_UTLB_HIT",
+	"T76x_TI_UTLB_NEW_MISS",
+	"T76x_TI_UTLB_REPLAY_FULL",
+	"T76x_TI_UTLB_REPLAY_MISS",
+	"T76x_TI_UTLB_STALL",
+
+	/* Shader Core */
+	"",
+	"",
+	"",
+	"",
+	"T76x_FRAG_ACTIVE",
+	"T76x_FRAG_PRIMITIVES",
+	"T76x_FRAG_PRIMITIVES_DROPPED",
+	"T76x_FRAG_CYCLES_DESC",
+	"T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+	"T76x_FRAG_CYCLES_VERT",
+	"T76x_FRAG_CYCLES_TRISETUP",
+	"T76x_FRAG_CYCLES_EZS_ACTIVE",
+	"T76x_FRAG_THREADS",
+	"T76x_FRAG_DUMMY_THREADS",
+	"T76x_FRAG_QUADS_RAST",
+	"T76x_FRAG_QUADS_EZS_TEST",
+	"T76x_FRAG_QUADS_EZS_KILLED",
+	"T76x_FRAG_THREADS_LZS_TEST",
+	"T76x_FRAG_THREADS_LZS_KILLED",
+	"T76x_FRAG_CYCLES_NO_TILE",
+	"T76x_FRAG_NUM_TILES",
+	"T76x_FRAG_TRANS_ELIM",
+	"T76x_COMPUTE_ACTIVE",
+	"T76x_COMPUTE_TASKS",
+	"T76x_COMPUTE_THREADS",
+	"T76x_COMPUTE_CYCLES_DESC",
+	"T76x_TRIPIPE_ACTIVE",
+	"T76x_ARITH_WORDS",
+	"T76x_ARITH_CYCLES_REG",
+	"T76x_ARITH_CYCLES_L0",
+	"T76x_ARITH_FRAG_DEPEND",
+	"T76x_LS_WORDS",
+	"T76x_LS_ISSUES",
+	"T76x_LS_REISSUE_ATTR",
+	"T76x_LS_REISSUES_VARY",
+	"T76x_LS_VARY_RV_MISS",
+	"T76x_LS_VARY_RV_HIT",
+	"T76x_LS_NO_UNPARK",
+	"T76x_TEX_WORDS",
+	"T76x_TEX_BUBBLES",
+	"T76x_TEX_WORDS_L0",
+	"T76x_TEX_WORDS_DESC",
+	"T76x_TEX_ISSUES",
+	"T76x_TEX_RECIRC_FMISS",
+	"T76x_TEX_RECIRC_DESC",
+	"T76x_TEX_RECIRC_MULTI",
+	"T76x_TEX_RECIRC_PMISS",
+	"T76x_TEX_RECIRC_CONF",
+	"T76x_LSC_READ_HITS",
+	"T76x_LSC_READ_OP",
+	"T76x_LSC_WRITE_HITS",
+	"T76x_LSC_WRITE_OP",
+	"T76x_LSC_ATOMIC_HITS",
+	"T76x_LSC_ATOMIC_OP",
+	"T76x_LSC_LINE_FETCHES",
+	"T76x_LSC_DIRTY_LINE",
+	"T76x_LSC_SNOOPS",
+	"T76x_AXI_TLB_STALL",
+	"T76x_AXI_TLB_MIESS",
+	"T76x_AXI_TLB_TRANSACTION",
+	"T76x_LS_TLB_MISS",
+	"T76x_LS_TLB_HIT",
+	"T76x_AXI_BEATS_READ",
+	"T76x_AXI_BEATS_WRITTEN",
+
+	/*L2 and MMU */
+	"",
+	"",
+	"",
+	"",
+	"T76x_MMU_HIT",
+	"T76x_MMU_NEW_MISS",
+	"T76x_MMU_REPLAY_FULL",
+	"T76x_MMU_REPLAY_MISS",
+	"T76x_MMU_TABLE_WALK",
+	"T76x_MMU_REQUESTS",
+	"",
+	"",
+	"T76x_UTLB_HIT",
+	"T76x_UTLB_NEW_MISS",
+	"T76x_UTLB_REPLAY_FULL",
+	"T76x_UTLB_REPLAY_MISS",
+	"T76x_UTLB_STALL",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"T76x_L2_EXT_WRITE_BEATS",
+	"T76x_L2_EXT_READ_BEATS",
+	"T76x_L2_ANY_LOOKUP",
+	"T76x_L2_READ_LOOKUP",
+	"T76x_L2_SREAD_LOOKUP",
+	"T76x_L2_READ_REPLAY",
+	"T76x_L2_READ_SNOOP",
+	"T76x_L2_READ_HIT",
+	"T76x_L2_CLEAN_MISS",
+	"T76x_L2_WRITE_LOOKUP",
+	"T76x_L2_SWRITE_LOOKUP",
+	"T76x_L2_WRITE_REPLAY",
+	"T76x_L2_WRITE_SNOOP",
+	"T76x_L2_WRITE_HIT",
+	"T76x_L2_EXT_READ_FULL",
+	"",
+	"T76x_L2_EXT_WRITE_FULL",
+	"T76x_L2_EXT_R_W_HAZARD",
+	"T76x_L2_EXT_READ",
+	"T76x_L2_EXT_READ_LINE",
+	"T76x_L2_EXT_WRITE",
+	"T76x_L2_EXT_WRITE_LINE",
+	"T76x_L2_EXT_WRITE_SMALL",
+	"T76x_L2_EXT_BARRIER",
+	"T76x_L2_EXT_AR_STALL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_EXT_RD_BUF_FULL",
+	"T76x_L2_EXT_R_RAW",
+	"T76x_L2_EXT_W_STALL",
+	"T76x_L2_EXT_W_BUF_FULL",
+	"T76x_L2_EXT_R_BUF_FULL",
+	"T76x_L2_TAG_HAZARD",
+	"T76x_L2_SNOOP_FULL",
+	"T76x_L2_REPLAY_FULL"
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100755
index 000000000000..6a9781ffd29d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+	ssize_t ret = 0;
+	struct list_head *entry;
+	const struct list_head *kbdev_list;
+	kbdev_list = kbase_dev_list_get();
+	list_for_each(entry, kbdev_list) {
+		struct kbase_device *kbdev = NULL;
+		struct kbasep_kctx_list_element *element;
+
+		kbdev = list_entry(entry, struct kbase_device, entry);
+		/* output the total memory usage and cap for this device */
+		ret = seq_printf(sfile, "%-16s  %10u\n", \
+				kbdev->devname, \
+				atomic_read(&(kbdev->memdev.used_pages)));
+		mutex_lock(&kbdev->kctx_list_lock);
+		list_for_each_entry(element, &kbdev->kctx_list, link) {
+			/* output the memory usage and cap for each kctx
+			* opened on this device */
+			ret = seq_printf(sfile, "  %s-0x%p %10u\n", \
+				"kctx",
+				element->kctx, \
+				atomic_read(&(element->kctx->used_pages)));
+		}
+		mutex_unlock(&kbdev->kctx_list_lock);
+	}
+	kbase_dev_list_put(kbdev_list);
+	return ret;
+}
+
+/*
+ *  File operations related to debugfs entry for gpu_memory
+ */
+STATIC int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.open = kbasep_gpu_memory_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+/*
+ *  Initialize debugfs entry for gpu_memory
+ */
+mali_error kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	kbdev->gpu_memory_dentry = debugfs_create_file("gpu_memory", \
+					S_IRUGO, \
+					kbdev->mali_debugfs_directory, \
+					NULL, \
+					&kbasep_gpu_memory_debugfs_fops);
+	if (IS_ERR(kbdev->gpu_memory_dentry))
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	return MALI_ERROR_NONE;
+}
+
+/*
+ *  Terminate debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove(kbdev->gpu_memory_dentry);
+}
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+mali_error kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+	return MALI_ERROR_NONE;
+}
+void kbasep_gpu_memory_debugfs_term(struct kbase_device *kbdev)
+{
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100755
index 000000000000..7ceb380a2653
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_H
+#define _KBASE_GPU_MEMORY_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+mali_error kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_term(struct kbase_device *kbdev);
+
+#endif  /*_KBASE_GPU_MEMORY_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100755
index 000000000000..2d2ff64b90cf
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,335 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.c
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+
+/**
+ * @brief Extracts bits from a 32-bit bitfield.
+ * @hideinitializer
+ *
+ * @param[in]    value       The value from which to extract bits.
+ * @param[in]    offset      The first bit to extract (0 being the LSB).
+ * @param[in]    size        The number of bits to extract.
+ * @return                   Bits [@a offset, @a offset + @a size) from @a value.
+ *
+ * @pre offset + size <= 32.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+	(((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+mali_error kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+	kbase_gpuprops_clock_speed_function get_gpu_speed_mhz;
+	u32 gpu_speed_mhz;
+	int rc = 1;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+	/* Current GPU speed is requested from the system integrator via the KBASE_CONFIG_ATTR_GPU_SPEED_FUNC function.
+	 * If that function fails, or the function is not provided by the system integrator, we report the maximum
+	 * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+	 */
+	get_gpu_speed_mhz = (kbase_gpuprops_clock_speed_function) kbasep_get_config_value(kctx->kbdev, kctx->kbdev->config_attributes, KBASE_CONFIG_ATTR_GPU_SPEED_FUNC);
+	if (get_gpu_speed_mhz != NULL) {
+		rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+		/* Issue a warning message when the reported GPU speed falls outside the min/max range */
+		if (rc == 0) {
+			u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+			if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min || gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+				dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n", (unsigned long)gpu_speed_khz, (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min, (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+		}
+#endif				/* CONFIG_MALI_DEBUG */
+	}
+	if (rc != 0)
+		gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+	kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+	memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+	return MALI_ERROR_NONE;
+}
+
+STATIC void kbase_gpuprops_dump_registers(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != regdump);
+
+	/* Fill regdump with the content of the relevant registers */
+	regdump->gpu_id = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
+
+	regdump->l2_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES));
+	regdump->l3_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_FEATURES));
+	regdump->tiler_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES));
+	regdump->mem_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(MEM_FEATURES));
+	regdump->mmu_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(MMU_FEATURES));
+	regdump->as_present = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(AS_PRESENT));
+	regdump->js_present = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(JS_PRESENT));
+
+	for (i = 0; i < MIDG_MAX_JOB_SLOTS; i++)
+		regdump->js_features[i] = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(JS_FEATURES_REG(i)));
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		regdump->texture_features[i] = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)));
+
+	regdump->thread_max_threads = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_THREADS));
+	regdump->thread_max_workgroup_size = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE));
+	regdump->thread_max_barrier_size = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE));
+	regdump->thread_features = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(THREAD_FEATURES));
+
+	regdump->shader_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_LO));
+	regdump->shader_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(SHADER_PRESENT_HI));
+
+	regdump->tiler_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_LO));
+	regdump->tiler_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(TILER_PRESENT_HI));
+
+	regdump->l2_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_LO));
+	regdump->l2_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L2_PRESENT_HI));
+
+	regdump->l3_present_lo = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_LO));
+	regdump->l3_present_hi = kbase_os_reg_read(kbdev, GPU_CONTROL_REG(L3_PRESENT_HI));
+}
+
+STATIC void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+	struct mali_base_gpu_coherent_group *current_group;
+	u64 group_present;
+	u64 group_mask;
+	u64 first_set, first_set_prev;
+	u32 num_groups = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != props);
+
+	props->coherency_info.coherency = props->raw_props.mem_features;
+	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+	if (props->coherency_info.coherency & GROUPS_L3_COHERENT) {
+		/* Group is l3 coherent */
+		group_present = props->raw_props.l3_present;
+	} else if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+		/* Group is l2 coherent */
+		group_present = props->raw_props.l2_present;
+	} else {
+		/* Group is l1 coherent */
+		group_present = props->raw_props.shader_present;
+	}
+
+	/*
+	 * The coherent group mask can be computed from the l2/l3 present
+	 * register.
+	 *
+	 * For the coherent group n:
+	 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+	 * where first_set is group_present with only its nth set-bit kept
+	 * (i.e. the position from where a new group starts).
+	 *
+	 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+	 * The first mask is:
+	 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+	 *               = (0x0..010     - 1) & ~(0x0..01      - 1)
+	 *               =  0x0..00f
+	 * The second mask is:
+	 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+	 *               = (0x0..100     - 1) & ~(0x0..010     - 1)
+	 *               =  0x0..0f0
+	 * And so on until all the bits from group_present have been cleared
+	 * (i.e. there is no group left).
+	 */
+
+	current_group = props->coherency_info.group;
+	first_set = group_present & ~(group_present - 1);
+
+	while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+		group_present -= first_set;	/* Clear the current group bit */
+		first_set_prev = first_set;
+
+		first_set = group_present & ~(group_present - 1);
+		group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+		/* Populate the coherent_group structure for each group */
+		current_group->core_mask = group_mask & props->raw_props.shader_present;
+		current_group->num_cores = hweight64(current_group->core_mask);
+
+		num_groups++;
+		current_group++;
+	}
+
+	if (group_present != 0)
+		pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+	props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * @brief Get the GPU configuration
+ *
+ * Fill the base_gpu_props structure with values from the GPU configuration registers.
+ * Only the raw properties are filled in this function
+ *
+ * @param gpu_props  The base_gpu_props structure
+ * @param kbdev      The struct kbase_device structure for the device
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	struct kbase_gpuprops_regdump regdump;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+	/* Dump relevant registers */
+	kbase_gpuprops_dump_registers(kbdev, &regdump);
+	gpu_props->raw_props.gpu_id = regdump.gpu_id;
+	gpu_props->raw_props.tiler_features = regdump.tiler_features;
+	gpu_props->raw_props.mem_features = regdump.mem_features;
+	gpu_props->raw_props.mmu_features = regdump.mmu_features;
+	gpu_props->raw_props.l2_features = regdump.l2_features;
+	gpu_props->raw_props.l3_features = regdump.l3_features;
+
+	gpu_props->raw_props.as_present = regdump.as_present;
+	gpu_props->raw_props.js_present = regdump.js_present;
+	gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+	gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+	gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+	gpu_props->raw_props.l3_present = ((u64) regdump.l3_present_hi << 32) + regdump.l3_present_lo;
+
+	for (i = 0; i < MIDG_MAX_JOB_SLOTS; i++)
+		gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+	gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+	gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+	gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+	gpu_props->raw_props.thread_features = regdump.thread_features;
+}
+
+/**
+ * @brief Calculate the derived properties
+ *
+ * Fill the base_gpu_props structure with values derived from the GPU configuration registers
+ *
+ * @param gpu_props  The base_gpu_props structure
+ * @param kbdev      The struct kbase_device structure for the device
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+	int i;
+
+	/* Populate the base_gpu_props structure */
+	gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+	gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+	gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+	gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+	for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+		gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+	gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+	gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+	gpu_props->l2_props.num_l2_slices = 1;
+	if (gpu_props->core_props.product_id == GPU_ID_PI_T76X) {
+		gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+	}
+
+	gpu_props->l3_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l3_features, 0U, 8);
+	gpu_props->l3_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l3_features, 16U, 8);
+
+	gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+	gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+	if (gpu_props->raw_props.thread_max_threads == 0)
+		gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+	else
+		gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+	if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+		gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+	else
+		gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+	if (gpu_props->raw_props.thread_max_barrier_size == 0)
+		gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+	else
+		gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+	/* If values are not specified, then use defaults */
+	if (gpu_props->thread_props.max_registers == 0) {
+		gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+		gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+		gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+	}
+	/* Initialize the coherent_group structure for each group */
+	kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+	kbase_gpu_props *gpu_props;
+	struct midg_raw_gpu_props *raw;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	gpu_props = &kbdev->gpu_props;
+	raw = &gpu_props->props.raw_props;
+
+	/* Initialize the base_gpu_props structure from the hardware */
+	kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+	/* Populate the derived properties */
+	kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+	/* Populate kbase-only fields */
+	gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+	gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+	gpu_props->l3_props.associativity = KBASE_UBFX32(raw->l3_features, 8U, 8);
+	gpu_props->l3_props.external_bus_width = KBASE_UBFX32(raw->l3_features, 24U, 8);
+
+	gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+	gpu_props->mem.supergroup = KBASE_UBFX32(raw->mem_features, 1U, 1);
+
+	gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+	gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+	gpu_props->num_cores = hweight64(raw->shader_present);
+	gpu_props->num_core_groups = hweight64(raw->l2_present);
+	gpu_props->num_supergroups = hweight64(raw->l3_present);
+	gpu_props->num_address_spaces = hweight32(raw->as_present);
+	gpu_props->num_job_slots = hweight32(raw->js_present);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100755
index 000000000000..fe2676cd9b99
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev		The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx		The struct kbase_context structure
+ * @param kbase_props	A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return MALI_ERROR_NONE on success. Any other value indicates failure.
+ */
+mali_error kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif				/* _KBASE_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100755
index 000000000000..6ae5a20d5d4e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,96 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ    123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+typedef struct kbase_gpuprops_regdump {
+	u32 gpu_id;
+	u32 l2_features;
+	u32 l3_features;
+	u32 tiler_features;
+	u32 mem_features;
+	u32 mmu_features;
+	u32 as_present;
+	u32 js_present;
+	u32 thread_max_threads;
+	u32 thread_max_workgroup_size;
+	u32 thread_max_barrier_size;
+	u32 thread_features;
+	u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	u32 js_features[MIDG_MAX_JOB_SLOTS];
+	u32 shader_present_lo;
+	u32 shader_present_hi;
+	u32 tiler_present_lo;
+	u32 tiler_present_hi;
+	u32 l2_present_lo;
+	u32 l2_present_hi;
+	u32 l3_present_lo;
+	u32 l3_present_hi;
+} kbase_gpuprops_regdump;
+
+typedef struct kbase_gpu_cache_props {
+	u8 associativity;
+	u8 external_bus_width;
+} kbase_gpu_cache_props;
+
+typedef struct kbase_gpu_mem_props {
+	u8 core_group;
+	u8 supergroup;
+} kbase_gpu_mem_props;
+
+typedef struct kbase_gpu_mmu_props {
+	u8 va_bits;
+	u8 pa_bits;
+} kbase_gpu_mmu_props;
+
+typedef struct mali_kbase_gpu_props {
+	/* kernel-only properties */
+	u8 num_cores;
+	u8 num_core_groups;
+	u8 num_supergroups;
+	u8 num_address_spaces;
+	u8 num_job_slots;
+
+	struct kbase_gpu_cache_props l2_props;
+	struct kbase_gpu_cache_props l3_props;
+
+	struct kbase_gpu_mem_props mem;
+	struct kbase_gpu_mmu_props mmu;
+
+	/**
+	 * Implementation specific irq throttle value (us), should be adjusted during integration.
+	 */
+	int irq_throttle_time_us;
+
+	/* Properties shared with userspace */
+	base_gpu_props props;
+} kbase_gpu_props;
+
+#endif				/* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100755
index 000000000000..6b431cc60f2e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,191 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_feature *features;
+	u32 gpu_id;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+	gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	switch (gpu_id) {
+	case GPU_ID_PI_T76X:
+		features = base_hw_features_t76x;
+		break;
+#ifdef MALI_INCLUDE_TFRX
+	case GPU_ID_PI_TFRX:
+		/* Fall through */
+#endif /* MALI_INCLUDE_TFRX */
+	case GPU_ID_PI_T86X:
+		features = base_hw_features_tFxx;
+		break;
+	case GPU_ID_PI_T72X:
+		features = base_hw_features_t72x;
+		break;
+	case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+	case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+	case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+		features = base_hw_features_t62x;
+		break;
+	case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+	case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+	case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+		features = base_hw_features_t60x;
+		break;
+	default:
+		features = base_hw_features_generic;
+		break;
+	}
+
+	for (; *features != BASE_HW_FEATURE_END; features++)
+		set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+mali_error kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+	const enum base_hw_issue *issues;
+	u32 gpu_id;
+	u32 impl_tech;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+	if (impl_tech != IMPLEMENTATION_MODEL) {
+		switch (gpu_id) {
+		case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+			issues = base_hw_issues_t60x_r0p0_15dev0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+			issues = base_hw_issues_t60x_r0p0_eac;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+			issues = base_hw_issues_t60x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+			issues = base_hw_issues_t62x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+			issues = base_hw_issues_t62x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+			issues = base_hw_issues_t62x_r1p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 0):
+			issues = base_hw_issues_t76x_r0p0_beta;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+			issues = base_hw_issues_t76x_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+			issues = base_hw_issues_t76x_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+			issues = base_hw_issues_t76x_r0p1_50rel0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+			issues = base_hw_issues_t76x_r0p2;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+			issues = base_hw_issues_t76x_r0p3;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+			issues = base_hw_issues_t76x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+			issues = base_hw_issues_t72x_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+			issues = base_hw_issues_t72x_r1p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+			issues = base_hw_issues_t72x_r1p1;
+			break;
+#ifdef MALI_INCLUDE_TFRX
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 0, 0):
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 0, 1):
+			issues = base_hw_issues_tFRx_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+			issues = base_hw_issues_tFRx_r0p1;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+			issues = base_hw_issues_tFRx_r0p2;
+			break;
+#endif /* MALI_INCLUDE_TFRX */
+		case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 0, 1):
+			issues = base_hw_issues_t86x_r0p0;
+			break;
+		case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+			issues = base_hw_issues_t86x_r0p2;
+			break;
+		default:
+			dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+	} else {
+		/* Software model */
+		switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) {
+		case GPU_ID_PI_T60X:
+			issues = base_hw_issues_model_t60x;
+			break;
+		case GPU_ID_PI_T62X:
+			issues = base_hw_issues_model_t62x;
+			break;
+		case GPU_ID_PI_T72X:
+			issues = base_hw_issues_model_t72x;
+			break;
+		case GPU_ID_PI_T76X:
+			issues = base_hw_issues_model_t76x;
+			break;
+#ifdef MALI_INCLUDE_TFRX
+		case GPU_ID_PI_TFRX:
+			issues = base_hw_issues_model_tFRx;
+			break;
+#endif /* MALI_INCLUDE_TFRX */
+		case GPU_ID_PI_T86X:
+			issues = base_hw_issues_model_t86x;
+			break;
+		default:
+			dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+	}
+
+	dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+	for (; *issues != BASE_HW_ISSUE_END; issues++)
+		set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+	return MALI_ERROR_NONE;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100755
index 000000000000..1074290702ae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+	test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+	test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+mali_error kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c
new file mode 100755
index 000000000000..a5cdba6fa6bd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c
@@ -0,0 +1,619 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_instr.c
+ * Base kernel instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+/**
+ * @brief Issue Cache Clean & Invalidate command to hardware
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long pm_flags;
+	u32 irq_mask;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for any reset to complete */
+	while (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.cache_clean_wait,
+				kbdev->hwcnt.state != KBASE_INSTR_STATE_RESETTING);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* clean&invalidate the caches so we're sure the mmu tables for the dump buffer is valid */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANING;
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+STATIC mali_error kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup)
+{
+	unsigned long flags, pm_flags;
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	struct kbasep_js_device_data *js_devdata;
+	u32 irq_mask;
+	int ret;
+	u64 shader_cores_needed;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != setup);
+	KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
+
+	shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	js_devdata = &kbdev->js_data;
+
+	/* alignment failure */
+	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+		goto out_err;
+
+	/* Override core availability policy to ensure all cores are available */
+	kbase_pm_ca_instr_enable(kbdev);
+
+	/* Mark the context as active so the GPU is kept turned on */
+	/* A suspend won't happen here, because we're in a syscall from a userspace
+	 * thread. */
+	kbase_pm_context_active(kbdev);
+
+	/* Request the cores early on synchronously - we'll release them on any errors
+	 * (e.g. instrumentation already active) */
+	kbase_pm_request_cores_sync(kbdev, MALI_TRUE, shader_cores_needed);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.state != KBASE_INSTR_STATE_DISABLED) {
+		/* Instrumentation is already enabled */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		goto out_unrequest_cores;
+	}
+
+	/* Enable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* In use, this context is the owner */
+	kbdev->hwcnt.kctx = kctx;
+	/* Remember the dump address so we can reprogram it later */
+	kbdev->hwcnt.addr = setup->dump_buffer;
+	/* Remember all the settings for suspend/resume */
+	if (&kbdev->hwcnt.suspended_state != setup)
+		memcpy(&kbdev->hwcnt.suspended_state, setup, sizeof(kbdev->hwcnt.suspended_state));
+
+	/* Request the clean */
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+	kbdev->hwcnt.triggered = 0;
+	/* Clean&invalidate the caches so we're sure the mmu tables for the dump buffer is valid */
+	ret = queue_work(kbdev->hwcnt.cache_clean_wq, &kbdev->hwcnt.cache_clean_work);
+	KBASE_DEBUG_ASSERT(ret);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Wait for cacheclean to complete */
+	wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE);
+
+	/* Schedule the context in */
+	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+
+	/* Configure */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_OFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),     setup->dump_buffer & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),     setup->dump_buffer >> 32,        kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),       setup->jm_bm,                    kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),   setup->shader_bm,                kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), setup->l3_cache_bm,              kctx);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),   setup->mmu_l2_bm,                kctx);
+	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the HW counter dump. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx);
+	else
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.triggered = 1;
+	wake_up(&kbdev->hwcnt.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	err = MALI_ERROR_NONE;
+
+	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+	return err;
+ out_unrequest_cores:
+	kbase_pm_unrequest_cores(kbdev, MALI_TRUE, shader_cores_needed);
+	kbase_pm_context_idle(kbdev);
+ out_err:
+	return err;
+}
+
+/**
+ * @brief Enable HW counters collection
+ *
+ * Note: will wait for a cache clean to complete
+ */
+mali_error kbase_instr_hwcnt_enable(struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup)
+{
+	struct kbase_device *kbdev;
+	mali_bool access_allowed;
+
+	kbdev = kctx->kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* Determine if the calling task has access to this capability */
+	access_allowed = kbase_security_has_capability(kctx, KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, KBASE_SEC_FLAG_NOAUDIT);
+	if (MALI_FALSE == access_allowed)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	return kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable)
+
+/**
+ * @brief Disable HW counters collection
+ *
+ * Note: might sleep, waiting for an ongoing dump to complete
+ */
+mali_error kbase_instr_hwcnt_disable(struct kbase_context *kctx)
+{
+	unsigned long flags, pm_flags;
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	u32 irq_mask;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	while (1) {
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_DISABLED) {
+			/* Instrumentation is not enabled */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.kctx != kctx) {
+			/* Instrumentation has been setup for another context */
+			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			goto out;
+		}
+
+		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE)
+			break;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+		/* Ongoing dump/setup - wait for its completion */
+		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+	}
+
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_DISABLED;
+	kbdev->hwcnt.triggered = 0;
+
+	/* Disable interrupt */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+	/* Disable the counters */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+	kbdev->hwcnt.kctx = NULL;
+	kbdev->hwcnt.addr = 0ULL;
+
+	kbase_pm_ca_instr_disable(kbdev);
+
+	kbase_pm_unrequest_cores(kbdev, MALI_TRUE, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Release the context. This had its own Power Manager Active reference */
+	kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+	/* Also release our Power Manager Active reference */
+	kbase_pm_context_idle(kbdev);
+
+	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
+
+	err = MALI_ERROR_NONE;
+
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable)
+
+/**
+ * @brief Configure HW counters collection
+ */
+mali_error kbase_instr_hwcnt_setup(struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup)
+{
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	if (NULL == setup) {
+		/* Bad parameter - abort */
+		goto out;
+	}
+
+	if (setup->dump_buffer != 0ULL) {
+		/* Enable HW counters */
+		err = kbase_instr_hwcnt_enable(kctx, setup);
+	} else {
+		/* Disable HW counters */
+		err = kbase_instr_hwcnt_disable(kctx);
+	}
+
+ out:
+	return err;
+}
+
+/**
+ * @brief Issue Dump command to hardware
+ *
+ * Notes:
+ * - does not sleep
+ */
+mali_error kbase_instr_hwcnt_dump_irq(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.kctx != kctx) {
+		/* The instrumentation has been setup for another context */
+		goto unlock;
+	}
+
+	if (kbdev->hwcnt.state != KBASE_INSTR_STATE_IDLE) {
+		/* HW counters are disabled or another dump is ongoing, or we're resetting */
+		goto unlock;
+	}
+
+	kbdev->hwcnt.triggered = 0;
+
+	/* Mark that we're dumping - the PF handler can signal that we faulted */
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_DUMPING;
+
+	/* Reconfigure the dump address */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kbdev->hwcnt.addr >> 32, NULL);
+
+	/* Start dumping */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, kbdev->hwcnt.addr, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+	err = MALI_ERROR_NONE;
+
+ unlock:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_irq)
+
+/**
+ * @brief Tell whether the HW counters dump has completed
+ *
+ * Notes:
+ * - does not sleep
+ * - success will be set to MALI_TRUE if the dump succeeded or
+ *   MALI_FALSE on failure
+ */
+mali_bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, mali_bool * const success)
+{
+	unsigned long flags;
+	mali_bool complete = MALI_FALSE;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != success);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE) {
+		*success = MALI_TRUE;
+		complete = MALI_TRUE;
+	} else if (kbdev->hwcnt.state == KBASE_INSTR_STATE_FAULT) {
+		*success = MALI_FALSE;
+		complete = MALI_TRUE;
+		kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete)
+
+/**
+ * @brief Issue Dump command to hardware and wait for completion
+ */
+mali_error kbase_instr_hwcnt_dump(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	err = kbase_instr_hwcnt_dump_irq(kctx);
+	if (MALI_ERROR_NONE != err) {
+		/* Can't dump HW counters */
+		goto out;
+	}
+
+	/* Wait for dump & cacheclean to complete */
+	wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_FAULT) {
+		err = MALI_ERROR_FUNCTION_FAILED;
+		kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
+	} else {
+		/* Dump done */
+		KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_IDLE);
+		err = MALI_ERROR_NONE;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ out:
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump)
+
+/**
+ * @brief Clear the HW counters
+ */
+mali_error kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+
+	/* Check it's the context previously set up and we're not already dumping */
+	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.state != KBASE_INSTR_STATE_IDLE)
+		goto out;
+
+	/* Clear the counters */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+	err = MALI_ERROR_NONE;
+
+ out:
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear)
+
+/**
+ * Workqueue for handling cache cleaning
+ */
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev;
+	unsigned long flags;
+
+	kbdev = container_of(data, struct kbase_device, hwcnt.cache_clean_work);
+
+	mutex_lock(&kbdev->cacheclean_lock);
+	kbasep_instr_hwcnt_cacheclean(kbdev);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Wait for our condition, and any reset to complete */
+	while (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING ||
+			kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANING) {
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.cache_clean_wait,
+		           (kbdev->hwcnt.state != KBASE_INSTR_STATE_RESETTING
+		            && kbdev->hwcnt.state != KBASE_INSTR_STATE_CLEANING));
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANED);
+
+	/* All finished and idle */
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
+	kbdev->hwcnt.triggered = 1;
+	wake_up(&kbdev->hwcnt.wait);
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+/**
+ * @brief Dump complete interrupt received
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_FAULT) {
+		kbdev->hwcnt.triggered = 1;
+		wake_up(&kbdev->hwcnt.wait);
+	} else if (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING) {
+		int ret;
+		/* Always clean and invalidate the cache after a successful dump */
+		kbdev->hwcnt.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+		ret = queue_work(kbdev->hwcnt.cache_clean_wq, &kbdev->hwcnt.cache_clean_work);
+		KBASE_DEBUG_ASSERT(ret);
+	}
+	/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
+	 * and the instrumentation state hasn't been restored yet -
+	 * kbasep_reset_timeout_worker() will do the rest of the work */
+
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+/**
+ * @brief Cache clean interrupt received
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	if (kbdev->hwcnt.state != KBASE_INSTR_STATE_DISABLED) {
+		unsigned long flags;
+		unsigned long pm_flags;
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		/* Disable interrupt */
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+		/* Wakeup... */
+		if (kbdev->hwcnt.state == KBASE_INSTR_STATE_CLEANING) {
+			/* Only wake if we weren't resetting */
+			kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANED;
+			wake_up(&kbdev->hwcnt.cache_clean_wait);
+		}
+		/* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
+		 * and the instrumentation state hasn't been restored yet -
+		 * kbasep_reset_timeout_worker() will do the rest of the work */
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	}
+}
+
+
+/* Disable instrumentation and wait for any existing dump to complete
+ * It's assumed that there's only one privileged context
+ * Safe to do this without lock when doing an OS suspend, because it only
+ * changes in response to user-space IOCTLs */
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx);
+
+	kctx = kbdev->hwcnt.kctx;
+	kbdev->hwcnt.suspended_kctx = kctx;
+
+	/* Relevant state was saved into hwcnt.suspended_state when enabling the
+	 * counters */
+
+	if (kctx) {
+		KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED);
+		kbase_instr_hwcnt_disable(kctx);
+	}
+}
+
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	kctx = kbdev->hwcnt.suspended_kctx;
+	kbdev->hwcnt.suspended_kctx = NULL;
+
+	if (kctx) {
+		mali_error err;
+		err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &kbdev->hwcnt.suspended_state);
+		WARN(err != MALI_ERROR_NONE,
+		     "Failed to restore instrumented hardware counters on resume\n");
+	}
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100755
index 000000000000..fbd1ca7345f7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1727 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#include <mali_kbase_js_affinity.h>
+#include <mali_kbase_10969_workaround.h>
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include <linux/random.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ *   completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+	if (kctx->is_compat)
+		return compat_ptr(p->compat_value);
+	else
+#endif
+		return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+		/* Dependency only atom */
+		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		return 0;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		/* Soft-job */
+		if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+			if (!kbase_replay_process(katom))
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+		} else {
+			/* The job has not completed */
+			list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+		}
+		return 0;
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+	/* Queue an action about whether we should try scheduling a context */
+	return kbasep_js_add_job(kctx, katom);
+}
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_jd_context *ctx;
+	struct kbase_device *kbdev;
+
+	katom = (struct kbase_jd_atom *)callback_parameter;
+	KBASE_DEBUG_ASSERT(katom);
+	ctx = &katom->kctx->jctx;
+	kbdev = katom->kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&ctx->lock);
+
+	/* KDS resource has already been satisfied (e.g. due to zapping) */
+	if (katom->kds_dep_satisfied)
+		goto out;
+
+	/* This atom's KDS dependency has now been met */
+	katom->kds_dep_satisfied = MALI_TRUE;
+
+	/* Check whether the atom's other dependencies were already met */
+	if (!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+			!kbase_jd_katom_dep_atom(&katom->dep[1])) {
+		/* katom dep complete, attempt to run it */
+		mali_bool resched = MALI_FALSE;
+
+		resched = jd_run_atom(katom);
+
+		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+			/* The atom has already finished */
+			resched |= jd_done_nolock(katom);
+		}
+
+		if (resched)
+			kbasep_js_try_schedule_head_ctx(kbdev);
+	}
+ out:
+	mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* Prevent job_done_nolock from being called twice on an atom when
+	 *  there is a race between job completion and cancellation */
+
+	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+		/* Wait was cancelled - zap the atom */
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		if (jd_done_nolock(katom))
+			kbasep_js_try_schedule_head_ctx(katom->kctx->kbdev);
+	}
+}
+#endif				/* CONFIG_KDS */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static mali_error kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	phys_addr_t *pa;
+	mali_error err;
+	size_t count = 0;
+
+	KBASE_DEBUG_ASSERT(reg->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+	KBASE_DEBUG_ASSERT(NULL == reg->alloc->imported.umm.sgt);
+	sgt = dma_buf_map_attachment(reg->alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL);
+
+	if (IS_ERR_OR_NULL(sgt))
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	/* save for later */
+	reg->alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_phy_pages(reg);
+	KBASE_DEBUG_ASSERT(pa);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		int j;
+		size_t pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
+			*pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		reg->alloc->imported.umm.dma_buf->size);
+	}
+
+	if (WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			reg->alloc->imported.umm.dma_buf->size)) {
+		err = MALI_ERROR_FUNCTION_FAILED;
+		goto out;
+	}
+
+	/* Update nents as we now have pages to map */
+	reg->alloc->nents = count;
+
+	err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+	if (MALI_ERROR_NONE != err) {
+		dma_buf_unmap_attachment(reg->alloc->imported.umm.dma_attachment, reg->alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+		reg->alloc->imported.umm.sgt = NULL;
+	}
+
+	return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+	alloc->nents = 0;
+}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+	if (katom->kds_rset) {
+		struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+		/*
+		 * As the atom is no longer waiting, remove it from
+		 * the waiting list.
+		 */
+
+		mutex_lock(&jctx->lock);
+		kbase_jd_kds_waiters_remove(katom);
+		mutex_unlock(&jctx->lock);
+
+		/* Release the kds resource or cancel if zapping */
+		kds_resource_set_release_sync(&katom->kds_rset);
+	}
+#endif				/* CONFIG_KDS */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+	/* Prevent the KDS resource from triggering the atom in case of zapping */
+	if (katom->kds_rset)
+		katom->kds_dep_satisfied = MALI_TRUE;
+#endif				/* CONFIG_KDS */
+
+	kbase_gpu_vm_lock(katom->kctx);
+	/* only roll back if extres is non-NULL */
+	if (katom->extres) {
+		u32 res_no;
+
+		res_no = katom->nr_extres;
+		while (res_no-- > 0) {
+			struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+				alloc->imported.umm.current_mapping_usage_count--;
+
+				if (0 == alloc->imported.umm.current_mapping_usage_count) {
+					struct kbase_va_region *reg;
+
+					reg = kbase_region_tracker_find_region_base_address(
+							katom->kctx,
+							katom->extres[res_no].gpu_address);
+
+					if (reg && reg->alloc == alloc)
+						kbase_mmu_teardown_pages(
+								katom->kctx,
+								reg->start_pfn,
+								kbase_reg_current_backed_size(reg));
+
+					kbase_jd_umm_unmap(katom->kctx, alloc);
+				}
+			}
+#endif	/* CONFIG_DMA_SHARED_BUFFER */
+			kbase_mem_phy_alloc_put(katom->extres[res_no].alloc);
+		}
+		kfree(katom->extres);
+		katom->extres = NULL;
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+}
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, mali_bool exclusive)
+{
+	u32 i;
+
+	for (i = 0; i < *kds_res_count; i++) {
+		/* Duplicate resource, ignore */
+		if (kds_resources[i] == kds_res)
+			return;
+	}
+
+	kds_resources[*kds_res_count] = kds_res;
+	if (exclusive)
+		set_bit(*kds_res_count, kds_access_bitmap);
+	(*kds_res_count)++;
+}
+#endif
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static mali_error kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+	mali_error err_ret_val = MALI_ERROR_FUNCTION_FAILED;
+	u32 res_no;
+#ifdef CONFIG_KDS
+	u32 kds_res_count = 0;
+	struct kds_resource **kds_resources = NULL;
+	unsigned long *kds_access_bitmap = NULL;
+#endif				/* CONFIG_KDS */
+	struct base_external_resource *input_extres;
+
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+	/* no resources encoded, early out */
+	if (!katom->nr_extres)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+	if (NULL == katom->extres) {
+		err_ret_val = MALI_ERROR_OUT_OF_MEMORY;
+		goto early_err_out;
+	}
+
+	/* copy user buffer to the end of our real buffer.
+	 * Make sure the struct sizes haven't changed in a way
+	 * we don't support */
+	BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+	input_extres = (struct base_external_resource *)
+			(((unsigned char *)katom->extres) +
+			(sizeof(*katom->extres) - sizeof(*input_extres)) *
+			katom->nr_extres);
+
+	if (copy_from_user(input_extres,
+			get_compat_pointer(katom->kctx, &user_atom->extres_list),
+			sizeof(*input_extres) * katom->nr_extres) != 0) {
+		err_ret_val = MALI_ERROR_FUNCTION_FAILED;
+		goto early_err_out;
+	}
+#ifdef CONFIG_KDS
+	/* assume we have to wait for all */
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+	if (NULL == kds_resources) {
+		err_ret_val = MALI_ERROR_OUT_OF_MEMORY;
+		goto early_err_out;
+	}
+
+	KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+	kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL);
+
+	if (NULL == kds_access_bitmap) {
+		err_ret_val = MALI_ERROR_OUT_OF_MEMORY;
+		goto early_err_out;
+	}
+#endif				/* CONFIG_KDS */
+
+	/* need to keep the GPU VM locked while we set up UMM buffers */
+	kbase_gpu_vm_lock(katom->kctx);
+	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+		struct base_external_resource *res;
+		struct kbase_va_region *reg;
+
+		res = &input_extres[res_no];
+		reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+		/* did we find a matching region object? */
+		if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+			/* roll back */
+			goto failed_loop;
+		}
+
+		/* decide what needs to happen for this resource */
+		switch (reg->alloc->type) {
+		case BASE_TMEM_IMPORT_TYPE_UMP:
+			{
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+				struct kds_resource *kds_res;
+
+				kds_res = ump_dd_kds_resource_get(reg->alloc->imported.ump_handle);
+				if (kds_res)
+					add_kds_resource(kds_res, kds_resources, &kds_res_count,
+							kds_access_bitmap,
+							res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
+#endif				/*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+				break;
+			}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		case BASE_TMEM_IMPORT_TYPE_UMM:
+			{
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+				struct kds_resource *kds_res;
+
+				kds_res = get_dma_buf_kds_resource(reg->alloc->imported.umm.dma_buf);
+				if (kds_res)
+					add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
+#endif
+				reg->alloc->imported.umm.current_mapping_usage_count++;
+				if (1 == reg->alloc->imported.umm.current_mapping_usage_count) {
+					/* use a local variable to not pollute err_ret_val
+					 * with a potential success value as some other gotos depend
+					 * on the default error code stored in err_ret_val */
+					mali_error tmp;
+					tmp = kbase_jd_umm_map(katom->kctx, reg);
+					if (MALI_ERROR_NONE != tmp) {
+						/* failed to map this buffer, roll back */
+						err_ret_val = tmp;
+						reg->alloc->imported.umm.current_mapping_usage_count--;
+						goto failed_loop;
+					}
+				}
+				break;
+			}
+#endif
+		default:
+			goto failed_loop;
+		}
+
+		/* finish with updating out array with the data we found */
+		/* NOTE: It is important that this is the last thing we do (or
+		 * at least not before the first write) as we overwrite elements
+		 * as we loop and could be overwriting ourself, so no writes
+		 * until the last read for an element.
+		 * */
+		katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+		katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->alloc);
+	}
+	/* successfully parsed the extres array */
+	/* drop the vm lock before we call into kds */
+	kbase_gpu_vm_unlock(katom->kctx);
+
+#ifdef CONFIG_KDS
+	if (kds_res_count) {
+		int wait_failed;
+
+		/* We have resources to wait for with kds */
+		katom->kds_dep_satisfied = MALI_FALSE;
+
+		wait_failed = kds_async_waitall(&katom->kds_rset,
+				&katom->kctx->jctx.kds_cb, katom, NULL,
+				kds_res_count, kds_access_bitmap,
+				kds_resources);
+
+		if (wait_failed)
+			goto failed_kds_setup;
+		else
+			kbase_jd_kds_waiters_add(katom);
+	} else {
+		/* Nothing to wait for, so kds dep met */
+		katom->kds_dep_satisfied = MALI_TRUE;
+	}
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+
+	/* all done OK */
+	return MALI_ERROR_NONE;
+
+/* error handling section */
+
+#ifdef CONFIG_KDS
+ failed_kds_setup:
+
+	/* lock before we unmap */
+	kbase_gpu_vm_lock(katom->kctx);
+#endif				/* CONFIG_KDS */
+
+ failed_loop:
+	/* undo the loop work */
+	while (res_no-- > 0) {
+		struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+			alloc->imported.umm.current_mapping_usage_count--;
+
+			if (0 == alloc->imported.umm.current_mapping_usage_count) {
+				struct kbase_va_region *reg;
+
+				reg = kbase_region_tracker_find_region_base_address(
+						katom->kctx,
+						katom->extres[res_no].gpu_address);
+
+				if (reg && reg->alloc == alloc) 
+					kbase_mmu_teardown_pages(katom->kctx,
+							reg->start_pfn,
+					    		kbase_reg_current_backed_size(reg));
+
+				kbase_jd_umm_unmap(katom->kctx, alloc);
+			}
+		}
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+		kbase_mem_phy_alloc_put(alloc);
+	}
+	kbase_gpu_vm_unlock(katom->kctx);
+
+ early_err_out:
+	kfree(katom->extres);
+	katom->extres = NULL;
+#ifdef CONFIG_KDS
+	kfree(kds_resources);
+	kfree(kds_access_bitmap);
+#endif				/* CONFIG_KDS */
+	return err_ret_val;
+}
+
+STATIC INLINE void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, u8 d, bool ctx_is_dying)
+{
+	u8 other_d = !d;
+
+	while (!list_empty(&katom->dep_head[d])) {
+		struct kbase_jd_atom *dep_atom;
+		u8 dep_type;
+
+		dep_atom = list_entry(katom->dep_head[d].next, 
+				struct kbase_jd_atom, dep_item[d]);
+		dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+
+		list_del(katom->dep_head[d].next);
+
+		kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+		if (katom->event_code != BASE_JD_EVENT_DONE &&
+			(dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) {
+			/* Atom failed, so remove the other dependencies and immediately fail the atom */
+			if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+				list_del(&dep_atom->dep_item[other_d]);
+				kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]);
+			}
+#ifdef CONFIG_KDS
+			if (!dep_atom->kds_dep_satisfied) {
+				/* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+				 * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+				 */
+				dep_atom->kds_dep_satisfied = MALI_TRUE;
+			}
+#endif
+			/* at this point a dependency to the failed job is already removed */
+			dep_atom->event_code = katom->event_code;
+			KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED);
+			dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+			list_add_tail(&dep_atom->dep_item[0], out_list);
+		} else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+#ifdef CONFIG_KDS
+			if (dep_atom->kds_dep_satisfied)
+#endif
+				list_add_tail(&dep_atom->dep_item[0], out_list);
+		}
+	}
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep)
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	kbdev->force_replay_count++;
+
+	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+		kbdev->force_replay_count = 0;
+		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+		if (kbdev->force_replay_random)
+			kbdev->force_replay_limit =
+			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+		dev_info(kbdev->dev, "force_replay : promoting to error\n");
+	}
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int i;
+
+	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+		return;
+
+	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+			if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+						     BASE_JD_REQ_SOFT_REPLAY &&
+			    (dep_atom->core_req & kbdev->force_replay_core_req)
+					     == kbdev->force_replay_core_req) {
+				jd_force_failure(kbdev, katom);
+				return;
+			}
+		}
+	}
+}
+#endif
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+mali_bool jd_done_nolock(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct list_head completed_jobs;
+	struct list_head runnable_jobs;
+	mali_bool need_to_try_schedule_context = MALI_FALSE;
+	int i;
+
+	INIT_LIST_HEAD(&completed_jobs);
+	INIT_LIST_HEAD(&runnable_jobs);
+
+	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+	jd_check_force_failure(katom);
+#endif
+
+
+	/* This is needed in case an atom is failed due to being invalid, this
+	 * can happen *before* the jobs that the atom depends on have completed */
+	for (i = 0; i < 2; i++) {
+		if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+			list_del(&katom->dep_item[i]);
+			kbase_jd_katom_dep_clear(&katom->dep[i]);
+		}
+	}
+
+	/* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+	 * BASE_JD_EVENT_TILE_RANGE_FAULT.
+	 *
+	 * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+	 * error code to BASE_JD_EVENT_DONE
+	 */
+
+	if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+		  katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+		if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+			/* Promote the failure to job done */
+			katom->event_code = BASE_JD_EVENT_DONE;
+			katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+		}
+	}
+
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	list_add_tail(&katom->dep_item[0], &completed_jobs);
+
+	while (!list_empty(&completed_jobs)) {
+		katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+		list_del(completed_jobs.prev);
+
+		KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+		for (i = 0; i < 2; i++)
+			jd_resolve_dep(&runnable_jobs, katom, i,
+						js_kctx_info->ctx.is_dying);
+
+		if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+			kbase_jd_post_external_resources(katom);
+
+		while (!list_empty(&runnable_jobs)) {
+			struct kbase_jd_atom *node;
+
+			node = list_entry(runnable_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+			
+			list_del(runnable_jobs.prev);
+
+			KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+			if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) {
+				need_to_try_schedule_context |= jd_run_atom(node);
+			} else {
+				node->event_code = katom->event_code;
+
+				if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+					if (kbase_replay_process(node))
+						/* Don't complete this atom */
+						continue;
+				} else if (node->core_req &
+							BASE_JD_REQ_SOFT_JOB) {
+					/* If this is a fence wait then remove it from the list of sync waiters. */
+					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+						list_del(&node->dep_item[0]);
+
+					kbase_finish_soft_job(node);
+				}
+				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+			}
+
+			if (node->status == KBASE_JD_ATOM_STATE_COMPLETED)
+				list_add_tail(&node->dep_item[0], &completed_jobs);
+		}
+
+		/* Register a completed job as a disjoint event when the GPU
+		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kctx->kbdev);
+		kbase_event_post(kctx, katom);
+
+		/* Decrement and check the TOTAL number of jobs. This includes
+		 * those not tracked by the scheduler: 'not ready to run' and
+		 * 'dependency-only' jobs. */
+		if (--kctx->jctx.job_nr == 0)
+			wake_up(&kctx->jctx.zero_jobs_wait);	/* All events are safely queued now, and we can signal any waiter
+								 * that we've got no more jobs (so we can be safely terminated) */
+	}
+
+	return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock)
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+	CORE_REQ_DEP_ONLY,
+	CORE_REQ_SOFT,
+	CORE_REQ_COMPUTE,
+	CORE_REQ_FRAGMENT,
+	CORE_REQ_VERTEX,
+	CORE_REQ_TILER,
+	CORE_REQ_FRAGMENT_VERTEX,
+	CORE_REQ_FRAGMENT_VERTEX_TILER,
+	CORE_REQ_FRAGMENT_TILER,
+	CORE_REQ_VERTEX_TILER,
+	CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+	"Dependency Only Job",
+	"Soft Job",
+	"Compute Shader Job",
+	"Fragment Shader Job",
+	"Vertex/Geometry Shader Job",
+	"Tiler Job",
+	"Fragment Shader + Vertex/Geometry Shader Job",
+	"Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+	"Fragment Shader + Tiler Job",
+	"Vertex/Geometry Shader Job + Tiler Job",
+	"Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+	if (core_req & BASE_JD_REQ_SOFT_JOB)
+		return core_req_strings[CORE_REQ_SOFT];
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		return core_req_strings[CORE_REQ_COMPUTE];
+	switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+	case BASE_JD_REQ_DEP:
+		return core_req_strings[CORE_REQ_DEP_ONLY];
+	case BASE_JD_REQ_FS:
+		return core_req_strings[CORE_REQ_FRAGMENT];
+	case BASE_JD_REQ_CS:
+		return core_req_strings[CORE_REQ_VERTEX];
+	case BASE_JD_REQ_T:
+		return core_req_strings[CORE_REQ_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+	case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_VERTEX_TILER];
+	case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+		return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+	}
+	return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+mali_bool jd_submit_atom(struct kbase_context *kctx,
+			 const struct base_jd_atom_v2 *user_atom,
+			 struct kbase_jd_atom *katom)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	base_jd_core_req core_req;
+	int queued = 0;
+	int i;
+	mali_bool ret;
+
+	/* Update the TOTAL number of jobs. This includes those not tracked by
+	 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+	jctx->job_nr++;
+
+	core_req = user_atom->core_req;
+
+	katom->start_timestamp.tv64 = 0;
+	katom->time_spent_us = 0;
+	katom->udata = user_atom->udata;
+	katom->kctx = kctx;
+	katom->nr_extres = user_atom->nr_extres;
+	katom->extres = NULL;
+	katom->device_nr = user_atom->device_nr;
+	katom->affinity = 0;
+	katom->jc = user_atom->jc;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+	katom->core_req = core_req;
+	katom->nice_prio = user_atom->prio;
+	katom->atom_flags = 0;
+	katom->retry_count = 0;
+
+#ifdef CONFIG_KDS
+	/* Start by assuming that the KDS dependencies are satisfied,
+	 * kbase_jd_pre_external_resources will correct this if there are dependencies */
+	katom->kds_dep_satisfied = MALI_TRUE;
+	katom->kds_rset = NULL;
+#endif				/* CONFIG_KDS */
+
+	/* Don't do anything if there is a mess up with dependencies.
+	   This is done in a separate cycle to check both the dependencies at ones, otherwise
+	   it will be extra complexity to deal with 1st dependency ( just added to the list )
+	   if only the 2nd one has invalid config.
+	 */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		if (dep_atom_number) {
+			if ( dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+					dep_atom_type != BASE_JD_DEP_TYPE_DATA ) {
+				katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+				ret = jd_done_nolock(katom);
+				goto out;
+			}
+		}
+	}
+
+	/* Add dependencies */
+	for (i = 0; i < 2; i++) {
+		int dep_atom_number = user_atom->pre_dep[i].atom_id;
+		base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+		kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+		if (dep_atom_number) {
+			struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+			if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+				if (dep_atom->event_code != BASE_JD_EVENT_DONE) {
+					/* don't stop this atom if it has an order dependency only to the failed one,
+					 try to submit it throught the normal path */
+					if ( dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+							dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+						continue;
+					}
+
+					if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) {
+						/* Remove the previous dependency */
+						list_del(&katom->dep_item[0]);
+						kbase_jd_katom_dep_clear(&katom->dep[0]);
+					}
+
+					/* Atom has completed, propagate the error code if any */
+					katom->event_code = dep_atom->event_code;
+					katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+					if ((katom->core_req &
+							BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+						if (kbase_replay_process(katom)) {
+							ret = MALI_FALSE;
+							goto out;
+						}
+					}
+					ret = jd_done_nolock(katom);
+
+					goto out;
+				}
+			} else {
+				/* Atom is in progress, add this atom to the list */
+				list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+				kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+				queued = 1;
+			}
+		}
+	}
+
+	/* These must occur after the above loop to ensure that an atom that
+	 * depends on a previous atom with the same number behaves as expected */
+	katom->event_code = BASE_JD_EVENT_DONE;
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+	if (0 == katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom);
+		goto out;
+	}
+
+	/* Reject atoms with an invalid device_nr */
+	if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+	    (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+		dev_warn(kctx->kbdev->dev, "Rejecting atom with invalid device_nr %d", katom->device_nr);
+		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+		ret = jd_done_nolock(katom);
+		goto out;
+	}
+
+	/*
+	 * If the priority is increased we need to check the caller has security caps to do this, if
+	 * priority is decreased then this is ok as the result will have no negative impact on other
+	 * processes running.
+	 */
+	if (0 > katom->nice_prio) {
+		mali_bool access_allowed;
+
+		access_allowed = kbase_security_has_capability(kctx,
+				KBASE_SEC_MODIFY_PRIORITY, KBASE_SEC_FLAG_NOAUDIT);
+
+		if (!access_allowed) {
+			/* For unprivileged processes - a negative priority is interpreted as zero */
+			katom->nice_prio = 0;
+		}
+	}
+
+	/* Scale priority range to use NICE range */
+	if (katom->nice_prio) {
+		/* Remove sign for calculation */
+		int nice_priority = katom->nice_prio + 128;
+
+		/* Fixed point maths to scale from ..255 to 0..39 (NICE range with +20 offset) */
+		katom->nice_prio = (((20 << 16) / 128) * nice_priority) >> 16;
+	}
+
+	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		/* handle what we need to do to access the external resources */
+		if (MALI_ERROR_NONE != kbase_jd_pre_external_resources(katom, user_atom)) {
+			/* setup failed (no access, bad resource, unknown resource types, etc.) */
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+	}
+
+	/* Initialize the jobscheduler policy for this atom. Function will
+	 * return error if the atom is malformed.
+	 *
+	 * Soft-jobs never enter the job scheduler but have their own initialize method.
+	 *
+	 * If either fail then we immediately complete the atom with an error.
+	 */
+	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+		union kbasep_js_policy *js_policy = &(kctx->kbdev->js_data.policy);
+
+		if (MALI_ERROR_NONE != kbasep_js_policy_init_job(js_policy, kctx, katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+	} else {
+		/* Soft-job */
+		if (MALI_ERROR_NONE != kbase_prepare_soft_job(katom)) {
+			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+	katom->work_id = atomic_inc_return(&jctx->work_id);
+	trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+	if (queued) {
+		ret = MALI_FALSE;
+		goto out;
+	}
+#ifdef CONFIG_KDS
+	if (!katom->kds_dep_satisfied) {
+		/* Queue atom due to KDS dependency */
+		ret = MALI_FALSE;
+		goto out;
+	}
+#endif				/* CONFIG_KDS */
+
+	if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+						  == BASE_JD_REQ_SOFT_REPLAY) {
+		if (kbase_replay_process(katom))
+			ret = MALI_FALSE;
+		else
+			ret = jd_done_nolock(katom);
+
+		goto out;
+	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+		if (kbase_process_soft_job(katom) == 0) {
+			kbase_finish_soft_job(katom);
+			ret = jd_done_nolock(katom);
+			goto out;
+		}
+		/* The job has not yet completed */
+		list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+		ret = MALI_FALSE;
+	} else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+		katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+		ret = kbasep_js_add_job(kctx, katom);
+	} else {
+		/* This is a pure dependency. Resolve it immediately */
+		ret = jd_done_nolock(katom);
+	}
+
+ out:
+	return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+mali_error kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data,
+		int uk6_atom)
+#else
+mali_error kbase_jd_submit(struct kbase_context *kctx,
+		const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	mali_error err = MALI_ERROR_NONE;
+	int i;
+	mali_bool need_to_try_schedule_context = MALI_FALSE;
+	struct kbase_device *kbdev;
+	void __user *user_addr;
+
+	/*
+	 * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+	 * are reported by immediately falling them (through event system)
+	 */
+	kbdev = kctx->kbdev;
+
+	beenthere(kctx, "%s", "Enter");
+
+	if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	if ((uk6_atom && submit_data->stride !=
+			sizeof(struct base_jd_atom_v2_uk6)) ||
+			submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+	if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+	user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+	KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+	for (i = 0; i < submit_data->nr_atoms; i++) {
+		struct base_jd_atom_v2 user_atom;
+		struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		if (uk6_atom) {
+			struct base_jd_atom_v2_uk6 user_atom_v6;
+			base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+			if (copy_from_user(&user_atom_v6, user_addr,
+					sizeof(user_atom_v6))) {
+				err = MALI_ERROR_FUNCTION_FAILED;
+				KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+					atomic_sub_return(
+					submit_data->nr_atoms - i,
+					&kctx->timeline.jd_atoms_in_flight));
+				break;
+			}
+			/* Convert from UK6 atom format to UK7 format */
+			user_atom.jc = user_atom_v6.jc;
+			user_atom.udata = user_atom_v6.udata;
+			user_atom.extres_list = user_atom_v6.extres_list;
+			user_atom.nr_extres = user_atom_v6.nr_extres;
+			user_atom.core_req = user_atom_v6.core_req;
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[0])
+				dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[0],
+					user_atom_v6.pre_dep[0],
+					dep_types[0]);
+
+			/* atom number 0 is used for no dependency atoms */
+			if (!user_atom_v6.pre_dep[1])
+				dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+			base_jd_atom_dep_set(&user_atom.pre_dep[1],
+					user_atom_v6.pre_dep[1],
+					dep_types[1]);
+
+			user_atom.atom_number = user_atom_v6.atom_number;
+			user_atom.prio = user_atom_v6.prio;
+			user_atom.device_nr = user_atom_v6.device_nr;
+		} else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+		if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+			err = MALI_ERROR_FUNCTION_FAILED;
+			KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+			break;
+		}
+#ifdef BASE_LEGACY_UK6_SUPPORT
+		}
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+		user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+		mutex_lock(&jctx->lock);
+		katom = &jctx->atoms[user_atom.atom_number];
+
+		while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+			/* Atom number is already in use, wait for the atom to
+			 * complete
+			 */
+			mutex_unlock(&jctx->lock);
+		
+			/* This thread will wait for the atom to complete. Due
+			 * to thread scheduling we are not sure that the other
+			 * thread that owns the atom will also schedule the
+			 * context, so we force the scheduler to be active and
+			 * hence eventually schedule this context at some point
+			 * later.
+			 */
+			kbasep_js_try_schedule_head_ctx(kctx->kbdev);
+			if (wait_event_killable(katom->completed,
+				katom->status == KBASE_JD_ATOM_STATE_UNUSED)) {
+				/* We're being killed so the result code
+				 * doesn't really matter
+				 */
+				return MALI_ERROR_NONE;
+			}
+			mutex_lock(&jctx->lock);
+		}
+
+		need_to_try_schedule_context |=
+				       jd_submit_atom(kctx, &user_atom, katom);
+
+		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
+		 * (ie. being reset or replaying jobs).
+		 */
+		kbase_disjoint_event_potential(kbdev);
+
+		mutex_unlock(&jctx->lock);
+	}
+
+	if (need_to_try_schedule_context)
+		kbasep_js_try_schedule_head_ctx(kbdev);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit)
+
+static void kbasep_jd_cacheclean(struct kbase_device *kbdev)
+{
+	/* Limit the number of loops to avoid a hang if the interrupt is missed */
+	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+	mutex_lock(&kbdev->cacheclean_lock);
+
+	/* use GPU_COMMAND completion solution */
+	/* clean & invalidate the caches */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+	/* wait for cache flush to complete before continuing */
+	while (--max_loops && (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & CLEAN_CACHES_COMPLETED) == 0)
+		;
+
+	/* clear the CLEAN_CACHES_COMPLETED irq */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED, NULL);
+	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.state != KBASE_INSTR_STATE_CLEANING,
+	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+	mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+/**
+ * This function:
+ * - requeues the job from the runpool (if it was soft-stopped/removed from NEXT registers)
+ * - removes it from the system if it finished/failed/was cancelled.
+ * - resolves dependencies to add dependent jobs to the context, potentially starting them if necessary (which may add more references to the context)
+ * - releases the reference to the context from the no-longer-running job.
+ * - Handles retrying submission outside of IRQ context if it failed from within IRQ context.
+ */
+static void jd_done_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	u64 cache_jc = katom->jc;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+	kbdev = kctx->kbdev;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+        if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) && katom->event_code != BASE_JD_EVENT_DONE && !(katom->event_code & BASE_JD_SW_EVENT))
+                kbasep_jd_cacheclean(kbdev);  /* cache flush when jobs complete with non-done codes */
+        else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+                if (kbdev->gpu_props.num_core_groups > 1 &&
+                    !(katom->affinity & kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+                    (katom->affinity & kbdev->gpu_props.props.coherency_info.group[1].core_mask)) {
+                        dev_dbg(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+                        kbasep_jd_cacheclean(kbdev);
+                }
+        }
+
+        if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
+            (katom->core_req & BASE_JD_REQ_FS)                        &&
+            katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT       &&
+            (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+            !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+                dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+                if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+                        /* The job had a TILE_RANGE_FAULT after was soft-stopped.
+                         * Due to an HW issue we try to execute the job
+                         * again.
+                         */
+                        dev_dbg(kbdev->dev, "Clamping has been executed, try to rerun the job\n");
+                        katom->event_code = BASE_JD_EVENT_STOPPED;
+                        katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+
+                        /* The atom will be requeued, but requeing does not submit more
+                         * jobs. If this was the last job, we must also ensure that more
+                         * jobs will be run on slot 0 - this is a Fragment job. */
+                        kbasep_js_set_job_retry_submit_slot(katom, 0);
+                }
+        }
+
+	/*
+	 * Begin transaction on JD context and JS context
+	 */
+	mutex_lock(&jctx->lock);
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* This worker only gets called on contexts that are scheduled *in*. This is
+	 * because it only happens in response to an IRQ from a job that was
+	 * running.
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE);
+
+	/* If job was rejected due to BASE_JD_EVENT_PM_EVENT but was not
+	 * specifically targeting core group 1, then re-submit targeting core
+	 * group 0 */
+	if (katom->event_code == BASE_JD_EVENT_PM_EVENT && !(katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) {
+		katom->event_code = BASE_JD_EVENT_STOPPED;
+		/* Don't need to worry about any previously set retry-slot - it's
+		 * impossible for it to have been set previously, because we guarantee
+		 * kbase_jd_done() was called with done_code==0 on this atom */
+		kbasep_js_set_job_retry_submit_slot(katom, 1);
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+		kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+	/* Release cores this job was using (this might power down unused cores, and
+	 * cause extra latency if a job submitted here - such as depenedent jobs -
+	 * would use those cores) */
+	kbasep_js_job_check_deref_cores(kbdev, katom);
+
+	/* Retain state before the katom disappears */
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+	if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+		unsigned long flags;
+
+		/* Requeue the atom on soft-stop / removed from NEXT registers */
+		dev_dbg(kbdev->dev, "JS: Soft Stopped/Removed from next on Ctx %p; Requeuing", kctx);
+
+		mutex_lock(&js_devdata->runpool_mutex);
+		kbasep_js_clear_job_retry_submit(katom);
+		/* An atom that has been hard-stopped might have previously
+		 * been soft-stopped and has just finished before the hard-stop
+		 * occurred. For this reason, clear the hard-stopped flag */
+		katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+
+		KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, katom));
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		kbasep_js_policy_enqueue_job(js_policy, katom);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		/* A STOPPED/REMOVED job must cause a re-submit to happen, in case it
+		 * was the last job left. Crucially, work items on work queues can run
+		 * out of order e.g. on different CPUs, so being able to submit from
+		 * the IRQ handler is not a good indication that we don't need to run
+		 * jobs; the submitted job could be processed on the work-queue
+		 * *before* the stopped job, even though it was submitted after. */
+		{
+			int tmp;
+			KBASE_DEBUG_ASSERT(kbasep_js_get_atom_retry_submit_slot(&katom_retained_state, &tmp) != MALI_FALSE);
+			CSTD_UNUSED(tmp);
+		}
+
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	} else {
+		/* Remove the job from the system for all other reasons */
+		mali_bool need_to_try_schedule_context;
+
+		kbasep_js_remove_job(kbdev, kctx, katom);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+
+		need_to_try_schedule_context = jd_done_nolock(katom);
+
+		/* This ctx is already scheduled in, so return value guarenteed FALSE */
+		KBASE_DEBUG_ASSERT(need_to_try_schedule_context == MALI_FALSE);
+	}
+	/* katom may have been freed now, do not use! */
+
+	/*
+	 * Transaction complete
+	 */
+	mutex_unlock(&jctx->lock);
+
+	/* Job is now no longer running, so can now safely release the context
+	 * reference, and handle any actions that were logged against the atom's retained state */
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * Work queue job cancel function
+ * Only called as part of 'Zapping' a context (which occurs on termination)
+ * Operates serially with the jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * @note We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled). The only resources that are an exception to this are:
+ * - those held by kbasep_js_job_check_ref_cores(), because these resources are
+ *   held for non-running atoms as well as running atoms.
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+	struct kbase_jd_context *jctx;
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool need_to_try_schedule_context;
+	struct kbase_device *kbdev;
+
+	/* Soft jobs should never reach this function */
+	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+	kctx = katom->kctx;
+	kbdev = kctx->kbdev;
+	jctx = &kctx->jctx;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+	/* This only gets called on contexts that are scheduled out. Hence, we must
+	 * make sure we don't de-ref the number of running jobs (there aren't
+	 * any), nor must we try to schedule out the context (it's already
+	 * scheduled out).
+	 */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE);
+
+	/* Release cores this job was using (this might power down unused cores) */
+	kbasep_js_job_check_deref_cores(kctx->kbdev, katom);
+
+	/* Scheduler: Remove the job from the system */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	mutex_lock(&jctx->lock);
+
+	need_to_try_schedule_context = jd_done_nolock(katom);
+	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
+	 * around this too. */
+	KBASE_DEBUG_ASSERT(need_to_try_schedule_context == MALI_FALSE);
+
+	/* katom may have been freed now, do not use! */
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Complete a job that has been removed from the Hardware
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * - An IRQ indicates that the job finished (for both error and 'done' codes)
+ * - The job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a workqueue
+ *
+ * This can be called safely from atomic context.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+		kbasep_js_atom_done_code done_code)
+{
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(kctx);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+		katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+	kbase_timeline_job_slot_done(kbdev, kctx, katom, slot_nr, done_code);
+
+	KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+	kbase_job_check_leave_disjoint(kbdev, katom);
+	kbasep_js_job_done_slot_irq(katom, slot_nr, end_timestamp, done_code);
+
+	katom->slot_nr = slot_nr;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_done_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done)
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	kctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+	/* This should only be done from a context that is not scheduled */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE);
+
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, jd_cancel_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+struct zap_reset_data {
+	/* The stages are:
+	 * 1. The timer has never been called
+	 * 2. The zap has timed out, all slots are soft-stopped - the GPU reset will happen.
+	 *    The GPU has been reset when kbdev->reset_waitq is signalled
+	 *
+	 * (-1 - The timer has been cancelled)
+	 */
+	int stage;
+	struct kbase_device *kbdev;
+	struct hrtimer timer;
+	spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+	struct zap_reset_data *reset_data = container_of(timer, struct zap_reset_data, timer);
+	struct kbase_device *kbdev = reset_data->kbdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&reset_data->lock, flags);
+
+	if (reset_data->stage == -1)
+		goto out;
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_prepare_to_reset_gpu(kbdev)) {
+		dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ZAP_TIMEOUT);
+		kbase_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	reset_data->stage = 2;
+
+ out:
+	spin_unlock_irqrestore(&reset_data->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_jd_atom *katom;
+	struct list_head *entry, *tmp;
+	struct kbase_device *kbdev;
+	struct zap_reset_data reset_data;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kbdev = kctx->kbdev;
+
+	KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+	kbase_job_zap_context(kctx);
+
+	mutex_lock(&kctx->jctx.lock);
+
+	/*
+	 * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+	 * queued outside the job scheduler.
+	 */
+
+	list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+		katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
+		kbase_cancel_soft_job(katom);
+	}
+
+
+#ifdef CONFIG_KDS
+
+	/* For each job waiting on a kds resource, cancel the wait and force the job to
+	 * complete early, this is done so that we don't leave jobs outstanding waiting
+	 * on kds resources which may never be released when contexts are zapped, resulting
+	 * in a hang.
+	 *
+	 * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+	 * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+	 */
+
+	list_for_each(entry, &kctx->waiting_kds_resource) {
+		katom = list_entry(entry, struct kbase_jd_atom, node);
+
+		kbase_cancel_kds_wait_job(katom);
+	}
+#endif
+
+	mutex_unlock(&kctx->jctx.lock);
+
+	hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	reset_data.timer.function = zap_timeout_callback;
+
+	spin_lock_init(&reset_data.lock);
+
+	reset_data.kbdev = kbdev;
+	reset_data.stage = 1;
+
+	hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT), HRTIMER_MODE_REL);
+
+	/* Wait for all jobs to finish, and for the context to be not-scheduled
+	 * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+	 * policy queue either */
+	wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+	wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, kctx->jctx.sched_info.ctx.is_scheduled == MALI_FALSE);
+
+	spin_lock_irqsave(&reset_data.lock, flags);
+	if (reset_data.stage == 1) {
+		/* The timer hasn't run yet - so cancel it */
+		reset_data.stage = -1;
+	}
+	spin_unlock_irqrestore(&reset_data.lock, flags);
+
+	hrtimer_cancel(&reset_data.timer);
+
+	if (reset_data.stage == 2) {
+		/* The reset has already started.
+		 * Wait for the reset to complete
+		 */
+		wait_event(kbdev->reset_wait, atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_NOT_PENDING);
+	}
+	destroy_hrtimer_on_stack(&reset_data.timer);
+
+	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+	/* Ensure that the signallers of the waitqs have finished */
+	mutex_lock(&kctx->jctx.lock);
+	mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context)
+
+mali_error kbase_jd_init(struct kbase_context *kctx)
+{
+	int i;
+	mali_error mali_err = MALI_ERROR_NONE;
+#ifdef CONFIG_KDS
+	int err;
+#endif				/* CONFIG_KDS */
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1);
+	if (NULL == kctx->jctx.job_done_wq) {
+		mali_err = MALI_ERROR_OUT_OF_MEMORY;
+		goto out1;
+	}
+
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+		init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+		INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+		/* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+		kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+	}
+
+	mutex_init(&kctx->jctx.lock);
+
+	init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+	spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+	err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+	if (0 != err) {
+		mali_err = MALI_ERROR_FUNCTION_FAILED;
+		goto out2;
+	}
+#endif				/* CONFIG_KDS */
+
+	kctx->jctx.job_nr = 0;
+
+	return MALI_ERROR_NONE;
+
+#ifdef CONFIG_KDS
+ out2:
+	destroy_workqueue(kctx->jctx.job_done_wq);
+#endif				/* CONFIG_KDS */
+ out1:
+	return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init)
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+	kds_callback_term(&kctx->jctx.kds_cb);
+#endif				/* CONFIG_KDS */
+	/* Work queue is emptied by this */
+	destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100755
index 000000000000..f78910668216
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * @brief Show callback for the @c JD atoms debugfs file.
+ *
+ * This function is called to get the contents of the @c JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context::atoms .
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+	struct kbase_jd_atom *atoms;
+	unsigned long irq_flags;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Print table heading */
+	seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+	atoms = kctx->jctx.atoms;
+	/* General atom states */
+	mutex_lock(&kctx->jctx.lock);
+	/* JS-related states */
+	spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+		struct kbase_jd_atom *atom = &atoms[i];
+		s64 start_timestamp = 0;
+
+		if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+			continue;
+
+		/* start_timestamp is cleared as soon as the atom leaves UNUSED state
+		 * and set before a job is submitted to the h/w, a non-zero value means
+		 * it is valid */
+		if (ktime_to_ns(atom->start_timestamp))
+			start_timestamp = ktime_to_ns(
+					ktime_sub(ktime_get(), atom->start_timestamp));
+
+		seq_printf(sfile,
+				"%i,%u,%u,%u,%lu %lu,%lli,%llu\n",
+				i, atom->core_req, atom->status, atom->coreref_state,
+				atom->dep[0].atom ? atom->dep[0].atom - atoms : 0,
+				atom->dep[1].atom ? atom->dep[1].atom - atoms : 0,
+				(signed long long)start_timestamp,
+				(unsigned long long)(atom->time_spent_us ?
+					atom->time_spent_us * 1000 : start_timestamp)
+				);
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+	mutex_unlock(&kctx->jctx.lock);
+
+	return 0;
+}
+
+
+/**
+ * @brief File operations related to debugfs entry for atoms
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.open = kbasep_jd_debugfs_atoms_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+int kbasep_jd_debugfs_init(struct kbase_device *kbdev)
+{
+	kbdev->jd_directory = debugfs_create_dir(
+			"jd", kbdev->mali_debugfs_directory);
+	if (IS_ERR(kbdev->jd_directory)) {
+		dev_err(kbdev->dev, "Couldn't create mali jd debugfs directory\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	return -1;
+}
+
+
+void kbasep_jd_debugfs_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	if (!IS_ERR(kbdev->jd_directory))
+		debugfs_remove_recursive(kbdev->jd_directory);
+}
+
+
+int kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+{
+	/* Refer below for format string, %u is 10 chars max */
+	char dir_name[10 * 2 + 2];
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* Create per-context directory */
+	scnprintf(dir_name, sizeof(dir_name), "%u_%u", kctx->pid, kctx->id);
+	kctx->jd_ctx_dir = debugfs_create_dir(dir_name, kctx->kbdev->jd_directory);
+	if (IS_ERR(kctx->jd_ctx_dir))
+		goto err;
+
+	/* Expose all atoms */
+	if (IS_ERR(debugfs_create_file("atoms", S_IRUGO,
+			kctx->jd_ctx_dir, kctx, &kbasep_jd_debugfs_atoms_fops)))
+		goto err_jd_ctx_dir;
+
+	return 0;
+
+err_jd_ctx_dir:
+	debugfs_remove_recursive(kctx->jd_ctx_dir);
+err:
+	return -1;
+}
+
+
+void kbasep_jd_debugfs_ctx_remove(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (!IS_ERR(kctx->jd_ctx_dir))
+		debugfs_remove_recursive(kctx->jd_ctx_dir);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+/**
+ * @brief Stub functions for when debugfs is disabled
+ */
+int kbasep_jd_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+void kbasep_jd_debugfs_term(struct kbase_device *kbdev)
+{
+}
+int kbasep_jd_debugfs_ctx_add(struct kbase_context *ctx)
+{
+	return 0;
+}
+void kbasep_jd_debugfs_ctx_remove(struct kbase_context *ctx)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100755
index 000000000000..c045736139b6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+/**
+ * @brief Initialize JD debugfs entries
+ *
+ * This should be called during device probing after the main mali debugfs
+ * directory has been created.
+ *
+ * @param[in] kbdev Pointer to kbase_device
+ */
+int kbasep_jd_debugfs_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Clean up all JD debugfs entries and related data
+ *
+ * This should be called during device removal before the main mali debugfs
+ * directory will be removed.
+ *
+ * @param[in] kbdev Pointer to kbase_device
+ */
+void kbasep_jd_debugfs_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Add new entry to JD debugfs
+ *
+ * @param[in] kctx Pointer to kbase_context
+ *
+ * @return 0 on success, failure otherwise
+ */
+int kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+
+/**
+ * @brief Remove entry from JD debugfs
+ *
+ * param[in] kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_remove(struct kbase_context *kctx);
+
+#endif  /*_KBASE_JD_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100755
index 000000000000..19bbf1a8941e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,1532 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_jm.c
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_js_affinity.h>
+#include <mali_kbase_hw.h>
+
+#include "mali_kbase_jm.h"
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+#endif
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+	sprintf(js_string, "job_slot_%i", js);
+	return js_string;
+}
+#endif
+
+static void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+{
+	struct kbase_context *kctx;
+	u32 cfg;
+	u64 jc_head = katom->jc;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	kctx = katom->kctx;
+
+	/* Command register must be available */
+	KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+	/* Affinity is not violating */
+	kbase_js_debug_log_current_affinities(kbdev);
+	KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, katom->affinity));
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), jc_head & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> 32, kctx);
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+	{
+		u64 mask;
+		u32 value;
+
+		if (0 == js)
+		{
+			mask = mali_js0_affinity_mask;
+		}
+		else if (1 == js)
+		{
+			mask = mali_js1_affinity_mask;
+		}
+		else
+		{
+			mask = mali_js2_affinity_mask;
+		}
+
+		value = katom->affinity & (mask & 0xFFFFFFFF);
+
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), value, kctx);
+
+		value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF);
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), value, kctx);
+	}
+#else
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), katom->affinity & 0xFFFFFFFF, kctx);
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), katom->affinity >> 32, kctx);
+#endif
+
+	/* start MMU, medium priority, cache clean/flush on end, clean/flush on start */
+	cfg = kctx->as_nr | JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | JS_CONFIG_START_MMU | JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | JS_CONFIG_THREAD_PRI(8);
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (!kbdev->jm_slots[js].job_chain_flag) {
+			cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+			katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->jm_slots[js].job_chain_flag = MALI_TRUE;
+		} else {
+			katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+			kbdev->jm_slots[js].job_chain_flag = MALI_FALSE;
+		}
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+	/* Write an approximate start timestamp.
+	 * It's approximate because there might be a job in the HEAD register. In
+	 * such cases, we'll try to make a better approximation in the IRQ handler
+	 * (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+	katom->start_timestamp = ktime_get();
+
+	/* GO ! */
+	dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", katom, kctx, js, jc_head, katom->affinity);
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32) katom->affinity);
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); 
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (kbasep_jm_nr_jobs_submitted(&kbdev->jm_slots[js]) == 1)
+	{
+		/* If this is the only job on the slot, trace it as starting */
+		char js_string[16];
+		trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string), ktime_to_ns(katom->start_timestamp), (u32)katom->kctx, 0, katom->work_id);
+		kbdev->jm_slots[js].last_context = katom->kctx;
+	}
+#endif
+	kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_START, katom->kctx);
+}
+
+void kbase_job_submit_nolock(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+{
+	struct kbase_jm_slot *jm_slots;
+#if KBASE_PM_EN
+	base_jd_core_req core_req;
+#endif
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(katom);
+
+	jm_slots = kbdev->jm_slots;
+
+#if KBASE_PM_EN
+	core_req = katom->core_req;
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+		unsigned long flags;
+		int device_nr = (core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? katom->device_nr : 0;
+		KBASE_DEBUG_ASSERT(device_nr < 2);
+		spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+		kbasep_pm_record_job_status(kbdev);
+		kbdev->pm.metrics.active_cl_ctx[device_nr]++;
+		spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+	} else {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+		kbasep_pm_record_job_status(kbdev);
+		kbdev->pm.metrics.active_gl_ctx++;
+		spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+	}
+#endif
+
+	/*
+	 * We can have:
+	 * - one job already done (pending interrupt),
+	 * - one running,
+	 * - one ready to be run.
+	 * Hence a maximum of 3 inflight jobs. We have a 4 job
+	 * queue, which I hope will be enough...
+	 */
+	kbasep_jm_enqueue_submit_slot(&jm_slots[js], katom);
+	kbase_job_hw_submit(kbdev, katom, js);
+}
+
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp)
+{
+	struct kbase_jm_slot *slot;
+	struct kbase_jd_atom *katom;
+	mali_addr64 jc_head;
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (completion_code != BASE_JD_EVENT_DONE && completion_code != BASE_JD_EVENT_STOPPED)
+		dev_err(kbdev->dev, "GPU fault 0x%02lx from job slot %d\n", (unsigned long)completion_code, s);
+
+	/* IMPORTANT: this function must only contain work necessary to complete a
+	 * job from a Real IRQ (and not 'fake' completion, e.g. from
+	 * Soft-stop). For general work that must happen no matter how the job was
+	 * removed from the hardware, place it in kbase_jd_done() */
+
+	slot = &kbdev->jm_slots[s];
+	katom = kbasep_jm_dequeue_submit_slot(slot);
+
+	/* If the katom completed is because it's a dummy job for HW workarounds, then take no further action */
+	if (kbasep_jm_is_dummy_workaround_job(kbdev, katom)) {
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, NULL, NULL, 0, s, completion_code);
+		return;
+	}
+
+	jc_head = katom->jc;
+	kctx = katom->kctx;
+
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, jc_head, s, completion_code);
+
+	if (completion_code != BASE_JD_EVENT_DONE && completion_code != BASE_JD_EVENT_STOPPED) {
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+		KBASE_TRACE_DUMP(kbdev);
+#endif
+	}
+	if (job_tail != 0) {
+		mali_bool was_updated = (job_tail != jc_head);
+		/* Some of the job has been executed, so we update the job chain address to where we should resume from */
+		katom->jc = job_tail;
+		if (was_updated)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, kctx, katom, job_tail, s);
+	}
+
+	/* Only update the event code for jobs that weren't cancelled */
+	if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+		katom->event_code = (base_jd_event_code) completion_code;
+
+	kbase_device_trace_register_access(kctx, REG_WRITE, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 1 << s);
+
+	/* Complete the job, and start new ones
+	 *
+	 * Also defer remaining work onto the workqueue:
+	 * - Re-queue Soft-stopped jobs
+	 * - For any other jobs, queue the job back into the dependency system
+	 * - Schedule out the parent context if necessary, and schedule a new one in.
+	 */
+#ifdef CONFIG_GPU_TRACEPOINTS
+	if (kbasep_jm_nr_jobs_submitted(slot) != 0) {
+		struct kbase_jd_atom *katom;
+		char js_string[16];
+		katom = kbasep_jm_peek_idx_submit_slot(slot, 0);        /* The atom in the HEAD */
+		trace_gpu_sched_switch(kbasep_make_job_slot_string(s, js_string), ktime_to_ns(*end_timestamp), (u32)katom->kctx, 0, katom->work_id);
+		slot->last_context = katom->kctx;
+	} else {
+		char js_string[16];
+		trace_gpu_sched_switch(kbasep_make_job_slot_string(s, js_string), ktime_to_ns(ktime_get()), 0, 0, 0);
+		slot->last_context = 0;
+	}
+#endif
+	kbase_jd_done(katom, s, end_timestamp, KBASE_JS_ATOM_DONE_START_NEW_ATOMS);
+}
+
+/**
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+STATIC void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, struct kbase_jm_slot *slot, ktime_t end_timestamp)
+{
+	KBASE_DEBUG_ASSERT(slot);
+
+	if (kbasep_jm_nr_jobs_submitted(slot) > 0) {
+		struct kbase_jd_atom *katom;
+		ktime_t new_timestamp;
+		ktime_t timestamp_diff;
+		katom = kbasep_jm_peek_idx_submit_slot(slot, 0);	/* The atom in the HEAD */
+
+		KBASE_DEBUG_ASSERT(katom != NULL);
+
+		if (kbasep_jm_is_dummy_workaround_job(kbdev, katom) != MALI_FALSE) {
+			/* Don't access the members of HW workaround 'dummy' jobs */
+			return;
+		}
+
+		/* Account for any IRQ Throttle time - makes an overestimate of the time spent by the job */
+		new_timestamp = ktime_sub_ns(end_timestamp, KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+		timestamp_diff = ktime_sub(new_timestamp, katom->start_timestamp);
+		if (ktime_to_ns(timestamp_diff) >= 0) {
+			/* Only update the timestamp if it's a better estimate than what's currently stored.
+			 * This is because our estimate that accounts for the throttle time may be too much
+			 * of an overestimate */
+			katom->start_timestamp = new_timestamp;
+		}
+	}
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+	unsigned long flags;
+	int i;
+	u32 count = 0;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+	memset(&kbdev->slot_submit_count_irq[0], 0, sizeof(kbdev->slot_submit_count_irq));
+
+	/* write irq throttle register, this will prevent irqs from occurring until
+	 * the given number of gpu clock cycles have passed */
+	{
+		int irq_throttle_cycles = atomic_read(&kbdev->irq_throttle_cycles);
+		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), irq_throttle_cycles, NULL);
+	}
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	while (done) {
+		struct kbase_jm_slot *slot;
+		u32 failed = done >> 16;
+
+		/* treat failed slots as finished slots */
+		u32 finished = (done & 0xFFFF) | failed;
+
+		/* Note: This is inherently unfair, as we always check
+		 * for lower numbered interrupts before the higher
+		 * numbered ones.*/
+		i = ffs(finished) - 1;
+		KBASE_DEBUG_ASSERT(i >= 0);
+
+		slot = &kbdev->jm_slots[i];
+
+		do {
+			int nr_done;
+			u32 active;
+			u32 completion_code = BASE_JD_EVENT_DONE;	/* assume OK */
+			u64 job_tail = 0;
+
+			if (failed & (1u << i)) {
+				/* read out the job slot status code if the job slot reported failure */
+				completion_code = kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+				switch (completion_code) {
+				case BASE_JD_EVENT_STOPPED:
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+					kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0);
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+					/* Soft-stopped job - read the value of JS<n>_TAIL so that the job chain can be resumed */
+					job_tail = (u64) kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_TAIL_LO), NULL) | ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_TAIL_HI), NULL) << 32);
+					break;
+				case BASE_JD_EVENT_NOT_STARTED:
+					/* PRLAM-10673 can cause a TERMINATED job to come back as NOT_STARTED, but the error interrupt helps us detect it */
+					completion_code = BASE_JD_EVENT_TERMINATED;
+					/* fall throught */
+				default:
+					dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", i, completion_code, kbase_exception_name(completion_code));
+				}
+			}
+
+			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done & ((1 << i) | (1 << (i + 16))), NULL);
+			active = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL);
+
+			if (((active >> i) & 1) == 0 && (((done >> (i + 16)) & 1) == 0)) {
+				/* There is a potential race we must work around:
+				 *
+				 *  1. A job slot has a job in both current and next registers
+				 *  2. The job in current completes successfully, the IRQ handler reads RAWSTAT
+				 *     and calls this function with the relevant bit set in "done"
+				 *  3. The job in the next registers becomes the current job on the GPU
+				 *  4. Sometime before the JOB_IRQ_CLEAR line above the job on the GPU _fails_
+				 *  5. The IRQ_CLEAR clears the done bit but not the failed bit. This atomically sets
+				 *     JOB_IRQ_JS_STATE. However since both jobs have now completed the relevant bits
+				 *     for the slot are set to 0.
+				 *
+				 * If we now did nothing then we'd incorrectly assume that _both_ jobs had completed
+				 * successfully (since we haven't yet observed the fail bit being set in RAWSTAT).
+				 *
+				 * So at this point if there are no active jobs left we check to see if RAWSTAT has a failure
+				 * bit set for the job slot. If it does we know that there has been a new failure that we
+				 * didn't previously know about, so we make sure that we record this in active (but we wait
+				 * for the next loop to deal with it).
+				 *
+				 * If we were handling a job failure (i.e. done has the relevant high bit set) then we know that
+				 * the value read back from JOB_IRQ_JS_STATE is the correct number of remaining jobs because
+				 * the failed job will have prevented any futher jobs from starting execution.
+				 */
+				u32 rawstat = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+				if ((rawstat >> (i + 16)) & 1) {
+					/* There is a failed job that we've missed - add it back to active */
+					active |= (1u << i);
+				}
+			}
+
+			dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", completion_code);
+
+			nr_done = kbasep_jm_nr_jobs_submitted(slot);
+			nr_done -= (active >> i) & 1;
+			nr_done -= (active >> (i + 16)) & 1;
+
+			if (nr_done <= 0) {
+				dev_warn(kbdev->dev, "Spurious interrupt on slot %d", i);
+				goto spurious;
+			}
+
+			count += nr_done;
+
+			while (nr_done) {
+				if (nr_done == 1) {
+					kbase_job_done_slot(kbdev, i, completion_code, job_tail, &end_timestamp);
+				} else {
+					/* More than one job has completed. Since this is not the last job being reported this time it
+					 * must have passed. This is because the hardware will not allow further jobs in a job slot to
+					 * complete until the faile job is cleared from the IRQ status.
+					 */
+					kbase_job_done_slot(kbdev, i, BASE_JD_EVENT_DONE, 0, &end_timestamp);
+				}
+				nr_done--;
+			}
+
+ spurious:
+			done = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+				/* Workaround for missing interrupt caused by PRLAM-10883 */
+				if (((active >> i) & 1) && (0 == kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), NULL))) {
+					/* Force job slot to be processed again */
+					done |= (1u << i);
+				}
+			}
+
+			failed = done >> 16;
+			finished = (done & 0xFFFF) | failed;
+		} while (finished & (1 << i));
+
+		kbasep_job_slot_update_head_start_timestamp(kbdev, slot, end_timestamp);
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+#if KBASE_GPU_RESET_EN
+	if (atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_COMMITTED) {
+		/* If we're trying to reset the GPU then we might be able to do it early
+		 * (without waiting for a timeout) because some jobs have completed
+		 */
+		kbasep_try_reset_gpu_early(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done)
+
+static mali_bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, u16 core_reqs)
+{
+	mali_bool soft_stops_allowed = MALI_TRUE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			soft_stops_allowed = MALI_FALSE;
+	}
+	return soft_stops_allowed;
+}
+
+static mali_bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, u16 core_reqs)
+{
+	mali_bool hard_stops_allowed = MALI_TRUE;
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+		if ((core_reqs & BASE_JD_REQ_T) != 0)
+			hard_stops_allowed = MALI_FALSE;
+	}
+	return hard_stops_allowed;
+}
+
+static void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, u16 core_reqs, struct kbase_jd_atom *target_katom)
+{
+	struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+	u32 status_reg_before;
+	u64 job_in_head_before;
+	u32 status_reg_after;
+
+	KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+	/* Check the head pointer */
+	job_in_head_before = ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+	    | (((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_HI), NULL)) << 32);
+	status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), NULL);
+#endif
+
+	if (action == JS_COMMAND_SOFT_STOP) {
+		mali_bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, core_reqs);
+		if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+			dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", (unsigned int)core_reqs);
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+
+		/* We are about to issue a soft stop, so mark the atom as having been soft stopped */
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+	}
+
+	if (action == JS_COMMAND_HARD_STOP) {
+		mali_bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs);
+		if (!hard_stop_allowed) {
+			/* Jobs can be hard-stopped for the following reasons:
+			 *  * CFS decides the job has been running too long (and soft-stop has not occurred).
+			 *    In this case the GPU will be reset by CFS if the job remains on the GPU.
+			 *
+			 *  * The context is destroyed, kbase_jd_zap_context will attempt to hard-stop the job. However
+			 *    it also has a watchdog which will cause the GPU to be reset if the job remains on the GPU.
+			 *
+			 *  * An (unhandled) MMU fault occurred. As long as BASE_HW_ISSUE_8245 is defined then
+			 *    the GPU will be reset.
+			 *
+			 * All three cases result in the GPU being reset if the hard-stop fails,
+			 * so it is safe to just return and ignore the hard-stop request.
+			 */
+			dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", (unsigned int)core_reqs);
+			return;
+		}
+		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+	}
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && action == JS_COMMAND_SOFT_STOP) {
+		int i;
+		struct kbase_jm_slot *slot;
+		slot = &kbdev->jm_slots[js];
+
+		for (i = 0; i < kbasep_jm_nr_jobs_submitted(slot); i++) {
+			struct kbase_jd_atom *katom;
+
+			katom = kbasep_jm_peek_idx_submit_slot(slot, i);
+
+			KBASE_DEBUG_ASSERT(katom);
+
+			if (kbasep_jm_is_dummy_workaround_job(kbdev, katom) != MALI_FALSE) {
+				/* Don't access the members of HW workaround 'dummy' jobs
+				 *
+				 * This assumes that such jobs can't cause HW_ISSUE_8316, and could only be blocked
+				 * by other jobs causing HW_ISSUE_8316 (which will get poked/or eventually get killed) */
+				continue;
+			}
+
+			/* For HW_ISSUE_8316, only 'bad' jobs attacking the system can
+			 * cause this issue: normally, all memory should be allocated in
+			 * multiples of 4 pages, and growable memory should be changed size
+			 * in multiples of 4 pages.
+			 *
+			 * Whilst such 'bad' jobs can be cleared by a GPU reset, the
+			 * locking up of a uTLB entry caused by the bad job could also
+			 * stall other ASs, meaning that other ASs' jobs don't complete in
+			 * the 'grace' period before the reset. We don't want to lose other
+			 * ASs' jobs when they would normally complete fine, so we must
+			 * 'poke' the MMU regularly to help other ASs complete */
+			kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, katom);
+		}
+	}
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+		if (action == JS_COMMAND_SOFT_STOP)
+			action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				 JS_COMMAND_SOFT_STOP_1 :
+		         JS_COMMAND_SOFT_STOP_0;
+		else
+			action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ?
+				 JS_COMMAND_HARD_STOP_1 :
+		         JS_COMMAND_HARD_STOP_0;
+	}
+
+	kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+	status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), NULL);
+	if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+		struct kbase_jm_slot *slot;
+		struct kbase_jd_atom *head;
+		struct kbase_context *head_kctx;
+
+		slot = &kbdev->jm_slots[js];
+		head = kbasep_jm_peek_idx_submit_slot(slot, slot->submitted_nr - 1);
+		head_kctx = head->kctx;
+
+		/* We don't need to check kbasep_jm_is_dummy_workaround_job( head ) here:
+		 * - Members are not indirected through
+		 * - The members will all be zero anyway
+		 */
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	} else {
+		if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js);
+		else
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js);
+
+		switch (action) {
+		case JS_COMMAND_SOFT_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js);
+			break;
+		case JS_COMMAND_SOFT_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js);
+			break;
+		case JS_COMMAND_HARD_STOP:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_0:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js);
+			break;
+		case JS_COMMAND_HARD_STOP_1:
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js);
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+#endif
+}
+
+/* Helper macros used by kbasep_job_slot_soft_or_hard_stop */
+#define JM_SLOT_MAX_JOB_SUBMIT_REGS    2
+#define JM_JOB_IS_CURRENT_JOB_INDEX(n) (1 == n)	/* Index of the last job to process */
+#define JM_JOB_IS_NEXT_JOB_INDEX(n)    (2 == n)	/* Index of the prior to last job to process */
+
+/** Soft or hard-stop a slot
+ *
+ * This function safely ensures that the correct job is either hard or soft-stopped.
+ * It deals with evicting jobs from the next registers where appropriate.
+ *
+ * This does not attempt to stop or evict jobs that are 'dummy' jobs for HW workarounds.
+ *
+ * @param kbdev         The kbase device
+ * @param kctx          The context to soft/hard-stop job(s) from (or NULL is all jobs should be targeted)
+ * @param js            The slot that the job(s) are on
+ * @param target_katom  The atom that should be targeted (or NULL if all jobs from the context should be targeted)
+ * @param action        The action to perform, either JS_COMMAND_HARD_STOP or JS_COMMAND_SOFT_STOP
+ */
+static void kbasep_job_slot_soft_or_hard_stop(struct kbase_device *kbdev, struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom, u32 action)
+{
+	struct kbase_jd_atom *katom;
+	u8 i;
+	u8 jobs_submitted;
+	struct kbase_jm_slot *slot;
+	u16 core_reqs;
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool can_safely_stop = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION);
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	KBASE_DEBUG_ASSERT(hw_action == JS_COMMAND_HARD_STOP ||
+			hw_action == JS_COMMAND_SOFT_STOP);
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	slot = &kbdev->jm_slots[js];
+	KBASE_DEBUG_ASSERT(slot);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	jobs_submitted = kbasep_jm_nr_jobs_submitted(slot);
+
+	KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 1);
+	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SLOT_SOFT_OR_HARD_STOP, kctx, NULL, 0u, js, jobs_submitted);
+
+	if (jobs_submitted > JM_SLOT_MAX_JOB_SUBMIT_REGS)
+		i = jobs_submitted - JM_SLOT_MAX_JOB_SUBMIT_REGS;
+	else
+		i = 0;
+
+	/* Loop through all jobs that have been submitted to the slot and haven't completed */
+	for (; i < jobs_submitted; i++) {
+		katom = kbasep_jm_peek_idx_submit_slot(slot, i);
+
+		if (kctx && katom->kctx != kctx)
+			continue;
+
+		if (target_katom && katom != target_katom)
+			continue;
+
+		if (kbasep_jm_is_dummy_workaround_job(kbdev, katom))
+			continue;
+
+		core_reqs = katom->core_req;
+
+		/* This will be repeated for anything removed from the next
+		 * registers, since their normal flow was also interrupted, and
+		 * this function might not enter disjoint state e.g. if we
+		 * don't actually do a hard stop on the head atom
+		 */
+		kbase_job_check_enter_disjoint(kbdev, action, core_reqs, katom);
+
+		if (JM_JOB_IS_CURRENT_JOB_INDEX(jobs_submitted - i)) {
+			/* The last job in the slot, check if there is a job in the next register */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), NULL) == 0) {
+				kbasep_job_slot_soft_or_hard_stop_do_action(kbdev,
+						js, hw_action, core_reqs, katom);
+			} else {
+				/* The job is in the next registers */
+				beenthere(kctx, "clearing job from next registers on slot %d", js);
+				kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP, NULL);
+				/* Check to see if we did remove a job from the next registers */
+				if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) != 0) {
+					/* The job was successfully cleared from the next registers, requeue it */
+					struct kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot);
+					KBASE_DEBUG_ASSERT(dequeued_katom == katom);
+					jobs_submitted--;
+
+					/* Set the next registers to NULL */
+					kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), 0, NULL);
+					kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), 0, NULL);
+
+					/* As the job is removed from the next registers we undo the associated
+					 * update to the job_chain_flag for the job slot. */
+					if (can_safely_stop)
+						slot->job_chain_flag = !slot->job_chain_flag;
+
+					KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js);
+
+					kbase_job_check_enter_disjoint(kbdev, action, 0u, dequeued_katom);
+					/* Complete the job, indicate it took no time, but don't submit any more at this point */
+					kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+				} else {
+					/* The job transitioned into the current registers before we managed to evict it,
+					 * in this case we fall back to soft/hard-stopping the job */
+					beenthere(kctx, "missed job in next register, soft/hard-stopping slot %d", js);
+					kbasep_job_slot_soft_or_hard_stop_do_action(kbdev,
+							js, hw_action, core_reqs, katom);
+				}
+			}
+		} else if (JM_JOB_IS_NEXT_JOB_INDEX(jobs_submitted - i)) {
+			/* There's a job after this one, check to see if that
+			 * job is in the next registers.  If so, we need to pay
+			 * attention to not accidently stop that one when
+			 * issueing the command to stop the one pointed to by
+			 * the head registers (as the one in the head may
+			 * finish in the mean time and the one in the next
+			 * moves to the head). Either the hardware has support
+			 * for this using job chain disambiguation or we need
+			 * to evict the job from the next registers first to
+			 * ensure we can safely stop the one pointed to by the
+			 * head registers.
+			 */
+			if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), NULL) != 0) {
+				struct kbase_jd_atom *check_next_atom;
+				/* It is - we should remove that job and soft/hard-stop the slot */
+
+				/* Only proceed when the next job isn't a HW workaround 'dummy' job
+				 *
+				 * This can't be an ASSERT due to MMU fault code:
+				 * - This first hard-stops the job that caused the fault
+				 * - Under HW Issue 8245, it will then reset the GPU
+				 *  - This causes a Soft-stop to occur on all slots
+				 * - By the time of the soft-stop, we may (depending on timing) still have:
+				 *  - The original job in HEAD, if it's not finished the hard-stop
+				 *  - The dummy workaround job in NEXT
+				 *
+				 * Other cases could be coded in future that cause back-to-back Soft/Hard
+				 * stops with dummy workaround jobs in place, e.g. MMU handler code and Job
+				 * Scheduler watchdog timer running in parallel.
+				 *
+				 * Note, the index i+1 is valid to peek from: i == jobs_submitted-2, therefore
+				 * i+1 == jobs_submitted-1 */
+				check_next_atom = kbasep_jm_peek_idx_submit_slot(slot, i + 1);
+				if (kbasep_jm_is_dummy_workaround_job(kbdev, check_next_atom) != MALI_FALSE)
+					continue;
+
+				if (!can_safely_stop) {
+					beenthere(kctx, "clearing job from next registers on slot %d", js);
+					kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP, NULL);
+
+					/* Check to see if we did remove a job from the next registers */
+					if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) != 0) {
+						/* We did remove a job from the next registers, requeue it */
+						struct kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot);
+						KBASE_DEBUG_ASSERT(dequeued_katom != NULL);
+						jobs_submitted--;
+
+						/* Set the next registers to NULL */
+						kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), 0, NULL);
+						kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), 0, NULL);
+
+						KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js);
+
+						kbase_job_check_enter_disjoint(kbdev, action, 0u, dequeued_katom);
+						/* Complete the job, indicate it took no time, but don't submit any more at this point */
+						kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+					} else {
+						/* We missed the job, that means the job we're interested in left the hardware before
+						 * we managed to do anything, so we can proceed to the next job */
+						continue;
+					}
+				}
+
+				/* Next is now free, so we can soft/hard-stop the slot */
+				beenthere(kctx, "soft/hard-stopped slot %d (there was a job in next which was successfully cleared)\n", js);
+				kbasep_job_slot_soft_or_hard_stop_do_action(kbdev,
+						js, hw_action, core_reqs, katom);
+			}
+			/* If there was no job in the next registers, then the job we were
+			 * interested in has finished, so we need not take any action
+			 */
+		}
+	}
+
+	KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 0);
+}
+
+void kbase_job_kill_jobs_from_context(struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* Cancel any remaining running jobs for this kctx  */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Invalidate all jobs in context, to prevent re-submitting */
+	for (i = 0; i < BASE_JD_ATOM_COUNT; i++)
+		kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_hardstop(kctx, i, NULL);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+void kbase_job_zap_context(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	int i;
+	mali_bool evict_success;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/*
+	 * Critical assumption: No more submission is possible outside of the
+	 * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+	 * whilst the struct kbase_context is terminating.
+	 */
+
+	/* First, atomically do the following:
+	 * - mark the context as dying
+	 * - try to evict it from the policy queue */
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.is_dying = MALI_TRUE;
+
+	dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+	mutex_lock(&js_devdata->queue_mutex);
+	evict_success = kbasep_js_policy_try_evict_ctx(&js_devdata->policy, kctx);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	/*
+	 * At this point we know:
+	 * - If eviction succeeded, it was in the policy queue, but now no longer is
+	 *  - We must cancel the jobs here. No Power Manager active reference to
+	 * release.
+	 *  - This happens asynchronously - kbase_jd_zap_context() will wait for
+	 * those jobs to be killed.
+	 * - If eviction failed, then it wasn't in the policy queue. It is one of
+	 * the following:
+	 *  - a. it didn't have any jobs, and so is not in the Policy Queue or the
+	 * Run Pool (not scheduled)
+	 *   - Hence, no more work required to cancel jobs. No Power Manager active
+	 * reference to release.
+	 *  - b. it was in the middle of a scheduling transaction (and thus must
+	 * have at least 1 job). This can happen from a syscall or a kernel thread.
+	 * We still hold the jsctx_mutex, and so the thread must be waiting inside
+	 * kbasep_js_try_schedule_head_ctx(), before checking whether the runpool
+	 * is full. That thread will continue after we drop the mutex, and will
+	 * notice the context is dying. It will rollback the transaction, killing
+	 * all jobs at the same time. kbase_jd_zap_context() will wait for those
+	 * jobs to be killed.
+	 *   - Hence, no more work required to cancel jobs, or to release the Power
+	 * Manager active reference.
+	 *  - c. it is scheduled, and may or may not be running jobs
+	 * - We must cause it to leave the runpool by stopping it from submitting
+	 * any more jobs. When it finally does leave,
+	 * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+	 * (because it is dying), release the Power Manager active reference, and
+	 * will not requeue the context in the policy queue. kbase_jd_zap_context()
+	 * will wait for those jobs to be killed.
+	 *  - Hence, work required just to make it leave the runpool. Cancelling
+	 * jobs and releasing the Power manager active reference will be handled
+	 * when it leaves the runpool.
+	 */
+
+	if (evict_success != MALI_FALSE || js_kctx_info->ctx.is_scheduled == MALI_FALSE) {
+		/* The following events require us to kill off remaining jobs and
+		 * update PM book-keeping:
+		 * - we evicted it correctly (it must have jobs to be in the Policy Queue)
+		 *
+		 * These events need no action, but take this path anyway:
+		 * - Case a: it didn't have any jobs, and was never in the Queue
+		 * - Case b: scheduling transaction will be partially rolled-back (this
+		 * already cancels the jobs)
+		 */
+
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, js_kctx_info->ctx.is_scheduled);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p evict_success=%d, scheduled=%d", kctx, evict_success, js_kctx_info->ctx.is_scheduled);
+
+		if (evict_success != MALI_FALSE) {
+			/* Only cancel jobs when we evicted from the policy queue. No Power
+			 * Manager active reference was held.
+			 *
+			 * Having is_dying set ensures that this kills, and doesn't requeue */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_FALSE);
+		}
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	} else {
+		unsigned long flags;
+		mali_bool was_retained;
+		/* Case c: didn't evict, but it is scheduled - it's in the Run Pool */
+		KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, js_kctx_info->ctx.is_scheduled);
+		dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+		/* Disable the ctx from submitting any more jobs */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Retain and (later) release the context whilst it is is now disallowed from submitting
+		 * jobs - ensures that someone somewhere will be removing the context later on */
+		was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+		/* Since it's scheduled and we have the jsctx_mutex, it must be retained successfully */
+		KBASE_DEBUG_ASSERT(was_retained != MALI_FALSE);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+		/* Cancel any remaining running jobs for this kctx - if any. Submit is disallowed
+		 * which takes effect immediately, so no more new jobs will appear after we do this.  */
+		for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+			kbase_job_slot_hardstop(kctx, i, NULL);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+		dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", kctx);
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+	KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+	/* After this, you must wait on both the kbase_jd_context::zero_jobs_wait
+	 * and the kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the
+	 * jobs to be destroyed, and the context to be de-scheduled (if it was on
+	 * the runpool).
+	 *
+	 * kbase_jd_zap_context() will do this. */
+}
+KBASE_EXPORT_TEST_API(kbase_job_zap_context)
+
+mali_error kbase_job_slot_init(struct kbase_device *kbdev)
+{
+	int i;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbasep_jm_init_submit_slot(&kbdev->jm_slots[i]);
+
+	return MALI_ERROR_NONE;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init)
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term)
+
+/**
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * The job slot lock must be held when calling this function.
+ * The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ *
+ * @param kbdev         The kbase device
+ * @param js            The job slot to soft-stop
+ * @param target_katom  The job that should be soft-stopped (or NULL for any job)
+ * @param sw_flags      Flags to pass in about the soft-stop
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+			struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+	KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+	kbasep_job_slot_soft_or_hard_stop(kbdev, NULL, js, target_katom,
+			JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * Soft-stop the specified job slot
+ *
+ * The job slot lock must be held when calling this function.
+ * The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ *
+ * @param kbdev         The kbase device
+ * @param js            The job slot to soft-stop
+ * @param target_katom  The job that should be soft-stopped (or NULL for any job)
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom)
+{
+	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * Hard-stop the specified job slot
+ *
+ * The job slot lock must be held when calling this function.
+ *
+ * @param kctx		The kbase context that contains the job(s) that should
+ *			be hard-stopped
+ * @param js		The job slot to hard-stop
+ * @param target_katom	The job that should be hard-stopped (or NULL for all
+ *			jobs from the context)
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+				struct kbase_jd_atom *target_katom)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	kbasep_job_slot_soft_or_hard_stop(kbdev, kctx, js, target_katom,
+						JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+		kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+		(kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) &&
+		(target_katom == NULL || target_katom->core_req & BASE_JD_REQ_FS_AFBC))) {
+		/* MIDBASE-2916 if a fragment job with AFBC encoding is
+		 * hardstopped, ensure to do a soft reset also in order to
+		 * clear the GPU status.
+		 * Workaround for HW issue 8401 has an issue,so after
+		 * hard-stopping just reset the GPU. This will ensure that the
+		 * jobs leave the GPU.*/
+		if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+			dev_err(kbdev->dev, "Issueing GPU\
+			soft-reset after hard stopping due to hardware issue");
+			kbase_reset_gpu_locked(kbdev);
+		}
+	}
+#endif
+}
+
+/**
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+		u16 core_reqs, struct kbase_jd_atom *target_katom)
+{
+	u32 hw_action = action & JS_COMMAND_MASK;
+
+	/* For hard-stop, don't enter if hard-stop not allowed */
+	if (hw_action == JS_COMMAND_HARD_STOP &&
+			!kbasep_hard_stop_allowed(kbdev, core_reqs))
+		return;
+
+	/* For soft-stop, don't enter if soft-stop not allowed, or isn't
+	 * causing disjoint */
+	if (hw_action == JS_COMMAND_SOFT_STOP &&
+			!(kbasep_soft_stop_allowed(kbdev, core_reqs) &&
+			  (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+		return;
+
+	/* Nothing to do if already logged disjoint state on this atom */
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+		return;
+
+	target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+	kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+		struct kbase_jd_atom *target_katom)
+{
+	if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+		target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+		kbase_disjoint_state_down(kbdev);
+	}
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+	int i;
+	dev_err(kbdev->dev, "Register state:");
+	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+	for (i = 0; i < 3; i++) {
+		dev_err(kbdev->dev, "  JS%d_STATUS=0x%08x      JS%d_HEAD_LO=0x%08x",
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+					NULL),
+			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+					NULL));
+	}
+	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x",
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	int i;
+	ktime_t end_timestamp = ktime_get();
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} };
+	enum kbase_instr_state bckp_state;
+
+	KBASE_DEBUG_ASSERT(data);
+
+	kbdev = container_of(data, struct kbase_device, reset_work);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+	/* Make sure the timer has completed - this cannot be done from interrupt context,
+	 * so this cannot be done within kbasep_try_reset_gpu_early. */
+	hrtimer_cancel(&kbdev->reset_timer);
+
+	if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		/* This would re-activate the GPU. Since it's already idle, there's no
+		 * need to reset it */
+		atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
+		kbase_disjoint_state_down(kbdev);
+		wake_up(&kbdev->reset_wait);
+		return;
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	/* We hold the pm lock, so there ought to be a current policy */
+	KBASE_DEBUG_ASSERT(kbdev->pm.pm_current_policy);
+
+	/* All slot have been soft-stopped and we've waited SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point
+	 * we assume that anything that is still left on the GPU is stuck there and we'll kill it when we reset the GPU */
+
+	dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT);
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) {	/*the same interrupt handler preempted itself */
+		/* GPU is being reset */
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	}
+	/* Save the HW counters setup */
+	if (kbdev->hwcnt.kctx != NULL) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+		hwcnt_setup.dump_buffer = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & 0xffffffff;
+		hwcnt_setup.dump_buffer |= (mali_addr64) kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << 32;
+		hwcnt_setup.jm_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx);
+		hwcnt_setup.shader_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx);
+		hwcnt_setup.tiler_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx);
+		hwcnt_setup.l3_cache_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), kctx);
+		hwcnt_setup.mmu_l2_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx);
+	}
+
+	/* Output the state of some interesting registers to help in the
+	 * debugging of GPU resets */
+	kbase_debug_dump_registers(kbdev);
+
+	bckp_state = kbdev->hwcnt.state;
+	kbdev->hwcnt.state = KBASE_INSTR_STATE_RESETTING;
+	kbdev->hwcnt.triggered = 0;
+	/* Disable IRQ to avoid IRQ handlers to kick in after releaseing the spinlock;
+	 * this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Ensure that any IRQ handlers have finished
+	 * Must be done without any locks IRQ handlers will take */
+	kbase_synchronize_irqs(kbdev);
+
+	/* Reset the GPU */
+	kbase_pm_init_hw(kbdev, MALI_TRUE);
+	/* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */
+
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+	/* Restore the HW counters setup */
+	if (kbdev->hwcnt.kctx != NULL) {
+		struct kbase_context *kctx = kbdev->hwcnt.kctx;
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_OFF, kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),     hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),     hwcnt_setup.dump_buffer >> 32,        kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),       hwcnt_setup.jm_bm,                    kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),   hwcnt_setup.shader_bm,                kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), hwcnt_setup.l3_cache_bm,              kctx);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),   hwcnt_setup.mmu_l2_bm,                kctx);
+
+		/* Due to PRLAM-8186 we need to disable the Tiler before we enable the HW counter dump. */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx);
+		else
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), hwcnt_setup.tiler_bm, kctx);
+
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+		/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), hwcnt_setup.tiler_bm, kctx);
+	}
+	kbdev->hwcnt.state = bckp_state;
+	switch (kbdev->hwcnt.state) {
+	/* Cases for waking kbasep_cache_clean_worker worker */
+	case KBASE_INSTR_STATE_CLEANED:
+		/* Cache-clean IRQ occurred, but we reset:
+		 * Wakeup incase the waiter saw RESETTING */
+	case KBASE_INSTR_STATE_REQUEST_CLEAN:
+		/* After a clean was requested, but before the regs were written:
+		 * Wakeup incase the waiter saw RESETTING */
+		wake_up(&kbdev->hwcnt.cache_clean_wait);
+		break;
+	case KBASE_INSTR_STATE_CLEANING:
+		/* Either:
+		 * 1) We've not got the Cache-clean IRQ yet: it was lost, or:
+		 * 2) We got it whilst resetting: it was voluntarily lost
+		 *
+		 * So, move to the next state and wakeup: */
+		kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANED;
+		wake_up(&kbdev->hwcnt.cache_clean_wait);
+		break;
+
+	/* Cases for waking anyone else */
+	case KBASE_INSTR_STATE_DUMPING:
+		/* If dumping, abort the dump, because we may've lost the IRQ */
+		kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE;
+		kbdev->hwcnt.triggered = 1;
+		wake_up(&kbdev->hwcnt.wait);
+		break;
+	case KBASE_INSTR_STATE_DISABLED:
+	case KBASE_INSTR_STATE_IDLE:
+	case KBASE_INSTR_STATE_FAULT:
+		/* Every other reason: wakeup in that state */
+		kbdev->hwcnt.triggered = 1;
+		wake_up(&kbdev->hwcnt.wait);
+		break;
+
+	/* Unhandled cases */
+	case KBASE_INSTR_STATE_RESETTING:
+	default:
+		BUG();
+		break;
+	}
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+	/* Complete any jobs that were still on the GPU */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+		int nr_done;
+		struct kbase_jm_slot *slot = &kbdev->jm_slots[i];
+
+		nr_done = kbasep_jm_nr_jobs_submitted(slot);
+		while (nr_done) {
+			dev_err(kbdev->dev, "Job stuck in slot %d on the GPU was cancelled", i);
+			kbase_job_done_slot(kbdev, i, BASE_JD_EVENT_JOB_CANCELLED, 0, &end_timestamp);
+			nr_done--;
+		}
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Reprogram the GPU's MMU */
+	for (i = 0; i < BASE_MAX_NR_AS; i++) {
+		if (js_devdata->runpool_irq.per_as_data[i].kctx) {
+			struct kbase_as *as = &kbdev->as[i];
+			mutex_lock(&as->transaction_mutex);
+			kbase_mmu_update(js_devdata->runpool_irq.per_as_data[i].kctx);
+			mutex_unlock(&as->transaction_mutex);
+		}
+	}
+
+	atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
+
+	kbase_disjoint_state_down(kbdev);
+	wake_up(&kbdev->reset_wait);
+	dev_err(kbdev->dev, "Reset complete");
+
+	/* Find out what cores are required now */
+	kbase_pm_update_cores_state(kbdev);
+
+	/* Synchronously request and wait for those cores, because if
+	 * instrumentation is enabled it would need them immediately. */
+	kbase_pm_check_transitions_sync(kbdev);
+
+	/* Try submitting some jobs to restart processing */
+	if (js_devdata->nr_user_contexts_running > 0) {
+		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
+
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		kbasep_js_try_run_next_job_nolock(kbdev);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	}
+
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&kbdev->pm.lock);
+
+	kbase_pm_context_idle(kbdev);
+	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev = container_of(timer, struct kbase_device, reset_timer);
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Reset still pending? */
+	if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == KBASE_RESET_GPU_COMMITTED)
+		queue_work(kbdev->reset_workq, &kbdev->reset_work);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+	int i;
+	int pending_jobs = 0;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Count the number of jobs */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+		struct kbase_jm_slot *slot = &kbdev->jm_slots[i];
+		pending_jobs += kbasep_jm_nr_jobs_submitted(slot);
+	}
+
+	if (pending_jobs > 0) {
+		/* There are still jobs on the GPU - wait */
+		return;
+	}
+
+	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has been called), and that no other
+	 * thread beat this thread to starting the reset */
+	if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != KBASE_RESET_GPU_COMMITTED) {
+		/* Reset has already occurred */
+		return;
+	}
+	queue_work(kbdev->reset_workq, &kbdev->reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_try_reset_gpu_early_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/*
+ * Prepare for resetting the GPU.
+ * This function just soft-stops all the slots to ensure that as many jobs as possible are saved.
+ *
+ * The function returns a boolean which should be interpreted as follows:
+ * - MALI_TRUE - Prepared for reset, kbase_reset_gpu should be called.
+ * - MALI_FALSE - Another thread is performing a reset, kbase_reset_gpu should not be called.
+ *
+ * @return See description
+ */
+mali_bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_PREPARED) != KBASE_RESET_GPU_NOT_PENDING) {
+		/* Some other thread is already resetting the GPU */
+		return MALI_FALSE;
+	}
+
+	kbase_disjoint_state_up(kbdev);
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+		kbase_job_slot_softstop(kbdev, i, NULL);
+
+	return MALI_TRUE;
+}
+
+mali_bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	mali_bool ret;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu)
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu iff it returns MALI_TRUE.
+ * It should never be called without a corresponding call to kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu returned MALI_FALSE),
+ * the caller should wait for kbdev->reset_waitq to be signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+	u32 timeout_ms;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED);
+
+	timeout_ms = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS);
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", timeout_ms);
+	hrtimer_start(&kbdev->reset_timer, HR_TIMER_DELAY_MSEC(timeout_ms), HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu)
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+	u32 timeout_ms;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	/* Note this is an assert/atomic_set because it is a software issue for a race to be occuring here */
+	KBASE_DEBUG_ASSERT(atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_PREPARED);
+	atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED);
+
+	timeout_ms = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS);
+	dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", timeout_ms);
+	hrtimer_start(&kbdev->reset_timer, HR_TIMER_DELAY_MSEC(timeout_ms), HRTIMER_MODE_REL);
+
+	/* Try resetting early */
+	kbasep_try_reset_gpu_early_locked(kbdev);
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100755
index 000000000000..d17183381311
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,199 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_jm.h
+ * Job Manager Low-level APIs.
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_jm Job Manager Low-level APIs
+ * @{
+ *
+ */
+
+static INLINE int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= js && js < kbdev->gpu_props.num_job_slots);
+
+	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+/**
+ * This checks that:
+ * - there is enough space in the GPU's buffers (JS_NEXT and JS_HEAD registers) to accomodate the job.
+ * - there is enough space to track the job in a our Submit Slots. Note that we have to maintain space to
+ *   requeue one job in case the next registers on the hardware need to be cleared.
+ */
+static INLINE mali_bool kbasep_jm_is_submit_slots_free(struct kbase_device *kbdev, int js, struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= js && js < kbdev->gpu_props.num_job_slots);
+
+	if (atomic_read(&kbdev->reset_gpu) != KBASE_RESET_GPU_NOT_PENDING) {
+		/* The GPU is being reset - so prevent submission */
+		return MALI_FALSE;
+	}
+
+	return (mali_bool) (kbasep_jm_is_js_free(kbdev, js, kctx)
+			    && kbdev->jm_slots[js].submitted_nr < (BASE_JM_SUBMIT_SLOTS - 2));
+}
+
+/**
+ * Initialize a submit slot
+ */
+static INLINE void kbasep_jm_init_submit_slot(struct kbase_jm_slot *slot)
+{
+	slot->submitted_nr = 0;
+	slot->submitted_head = 0;
+}
+
+/**
+ * Find the atom at the idx'th element in the queue without removing it, starting at the head with idx==0.
+ */
+static INLINE struct kbase_jd_atom *kbasep_jm_peek_idx_submit_slot(struct kbase_jm_slot *slot, u8 idx)
+{
+	u8 pos;
+	struct kbase_jd_atom *katom;
+
+	KBASE_DEBUG_ASSERT(idx < BASE_JM_SUBMIT_SLOTS);
+
+	pos = (slot->submitted_head + idx) & BASE_JM_SUBMIT_SLOTS_MASK;
+	katom = slot->submitted[pos];
+
+	return katom;
+}
+
+/**
+ * Pop front of the submitted
+ */
+static INLINE struct kbase_jd_atom *kbasep_jm_dequeue_submit_slot(struct kbase_jm_slot *slot)
+{
+	u8 pos;
+	struct kbase_jd_atom *katom;
+
+	pos = slot->submitted_head & BASE_JM_SUBMIT_SLOTS_MASK;
+	katom = slot->submitted[pos];
+	slot->submitted[pos] = NULL;	/* Just to catch bugs... */
+	KBASE_DEBUG_ASSERT(katom);
+
+	/* rotate the buffers */
+	slot->submitted_head = (slot->submitted_head + 1) & BASE_JM_SUBMIT_SLOTS_MASK;
+	slot->submitted_nr--;
+
+	dev_dbg(katom->kctx->kbdev->dev, "katom %p new head %u", (void *)katom, (unsigned int)slot->submitted_head);
+
+	return katom;
+}
+
+/* Pop back of the submitted queue (unsubmit a job)
+ */
+static INLINE struct kbase_jd_atom *kbasep_jm_dequeue_tail_submit_slot(struct kbase_jm_slot *slot)
+{
+	u8 pos;
+
+	slot->submitted_nr--;
+
+	pos = (slot->submitted_head + slot->submitted_nr) & BASE_JM_SUBMIT_SLOTS_MASK;
+
+	return slot->submitted[pos];
+}
+
+static INLINE u8 kbasep_jm_nr_jobs_submitted(struct kbase_jm_slot *slot)
+{
+	return slot->submitted_nr;
+}
+
+/**
+ * Push back of the submitted
+ */
+static INLINE void kbasep_jm_enqueue_submit_slot(struct kbase_jm_slot *slot, struct kbase_jd_atom *katom)
+{
+	u8 nr;
+	u8 pos;
+	nr = slot->submitted_nr++;
+	KBASE_DEBUG_ASSERT(nr < BASE_JM_SUBMIT_SLOTS);
+
+	pos = (slot->submitted_head + nr) & BASE_JM_SUBMIT_SLOTS_MASK;
+	slot->submitted[pos] = katom;
+}
+
+/**
+ * @brief Query whether a job peeked/dequeued from the submit slots is a
+ * 'dummy' job that is used for hardware workaround purposes.
+ *
+ * Any time a job is peeked/dequeued from the submit slots, this should be
+ * queried on that job.
+ *
+ * If a \a atom is indicated as being a dummy job, then you <b>must not attempt
+ * to use \a atom</b>. This is because its members will not necessarily be
+ * initialized, and so could lead to a fault if they were used.
+ *
+ * @param[in] kbdev kbase device pointer
+ * @param[in] atom The atom to query
+ *
+ * @return    MALI_TRUE if \a atom is for a dummy job, in which case you must not
+ *            attempt to use it.
+ * @return    MALI_FALSE otherwise, and \a atom is safe to use.
+ */
+static INLINE mali_bool kbasep_jm_is_dummy_workaround_job(struct kbase_device *kbdev, struct kbase_jd_atom *atom)
+{
+	/* Query the set of workaround jobs here */
+	/* none exists today */
+	return MALI_FALSE;
+}
+
+/**
+ * @brief Submit a job to a certain job-slot
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != MALI_FALSE before calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
+
+/**
+ * @brief Complete the head job on a particular job-slot
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp);
+
+	  /** @} *//* end group kbase_jm */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100755
index 000000000000..0e9071eaf497
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,2156 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_affinity.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_hw.h>
+
+#include "mali_kbase_jm.h"
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+/*
+ * Private types
+ */
+
+/** Bitpattern indicating the result of releasing a context */
+enum {
+	/** The context was descheduled - caller should try scheduling in a new one
+	 * to keep the runpool full */
+	KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+};
+
+typedef u32 kbasep_js_release_result;
+
+/*
+ * Private function prototypes
+ */
+STATIC INLINE void kbasep_js_deref_permon_check_and_disable_cycle_counter(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+STATIC INLINE void kbasep_js_ref_permon_check_and_enable_cycle_counter(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+STATIC kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/** Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+STATIC int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+STATIC int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+/*
+ * Private types
+ */
+enum {
+	JS_DEVDATA_INIT_NONE = 0,
+	JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+	JS_DEVDATA_INIT_POLICY = (1 << 1),
+	JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+	JS_KCTX_INIT_NONE = 0,
+	JS_KCTX_INIT_CONSTANTS = (1 << 0),
+	JS_KCTX_INIT_POLICY = (1 << 1),
+	JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * Check if the job had performance monitoring enabled and decrement the count.  If no jobs require
+ * performance monitoring, then the cycle counters will be disabled in the GPU.
+ *
+ * No locks need to be held - locking is handled further down
+ *
+ * This function does not sleep.
+ */
+
+STATIC INLINE void kbasep_js_deref_permon_check_and_disable_cycle_counter(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	if (katom->core_req & BASE_JD_REQ_PERMON)
+		kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * Check if the job has performance monitoring enabled and keep a count of it.  If at least one
+ * job requires performance monitoring, then the cycle counters will be enabled in the GPU.
+ *
+ * No locks need to be held - locking is handled further down
+ *
+ * The L2 Cache must be ON when this function is called
+ *
+ * This function does not sleep.
+ */
+
+STATIC INLINE void kbasep_js_ref_permon_check_and_enable_cycle_counter(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	if (katom->core_req & BASE_JD_REQ_PERMON)
+		kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+STATIC INLINE void runpool_inc_context_count(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex));
+
+	/* Track total contexts */
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+	++(js_devdata->nr_all_contexts_running);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < S8_MAX);
+		++(js_devdata->nr_user_contexts_running);
+	}
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+STATIC INLINE void runpool_dec_context_count(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex));
+
+	/* Track total contexts */
+	--(js_devdata->nr_all_contexts_running);
+	KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		/* Track contexts that can submit jobs */
+		--(js_devdata->nr_user_contexts_running);
+		KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+	}
+}
+
+/**
+ * @brief check whether the runpool is full for a specified context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to MALI_FALSE.
+ *
+ * The following locking conditions are made on the caller:
+ * - In all cases, the caller must hold kbasep_js_device_data::runpool_mutex
+ * - When kctx != NULL the caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but it doesn't do any harm to do so).
+ */
+STATIC mali_bool check_is_runpool_full(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool is_runpool_full;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+	BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex));
+
+	/* Regardless of whether a context is submitting or not, can't have more than there
+	 * are HW address spaces */
+	is_runpool_full = (mali_bool) (js_devdata->nr_all_contexts_running >= kbdev->nr_hw_address_spaces);
+
+	if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+		BUG_ON(!mutex_is_locked(&kctx->jctx.sched_info.ctx.jsctx_mutex));
+		/* Contexts that submit might use less of the address spaces available, due to HW
+		 * workarounds.  In which case, the runpool is also full when the number of
+		 * submitting contexts exceeds the number of submittable address spaces.
+		 *
+		 * Both checks must be made: can have nr_user_address_spaces == nr_hw_address spaces,
+		 * and at the same time can have nr_user_contexts_running < nr_all_contexts_running. */
+		is_runpool_full |= (mali_bool) (js_devdata->nr_user_contexts_running >= kbdev->nr_user_address_spaces);
+	}
+
+	return is_runpool_full;
+}
+
+STATIC base_jd_core_req core_reqs_from_jsn_features(u16 features) /* JS<n>_FEATURE register value */
+{
+	base_jd_core_req core_req = 0u;
+
+	if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+		core_req |= BASE_JD_REQ_V;
+
+	if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+		core_req |= BASE_JD_REQ_CF;
+
+	if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+		core_req |= BASE_JD_REQ_CS;
+
+	if ((features & JS_FEATURE_TILER_JOB) != 0)
+		core_req |= BASE_JD_REQ_T;
+
+	if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+		core_req |= BASE_JD_REQ_FS;
+
+	return core_req;
+}
+
+/**
+ * Picks and reserves an address space.
+ *
+ * When this function returns, the address space returned is reserved and
+ * cannot be picked for another context until it is released.
+ *
+ * The caller must ensure there \b is a free address space before calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_mutex
+ *
+ * @return a non-NULL pointer to a struct kbase_as that is not in use by any other context
+ */
+STATIC struct kbase_as *pick_free_addr_space(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_as *current_as;
+	long ffs_result;
+
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* Find the free address space */
+	ffs_result = ffs(js_devdata->as_free) - 1;
+
+	/* ASSERT that we should've found a free one */
+	KBASE_DEBUG_ASSERT(0 <= ffs_result && ffs_result < kbdev->nr_hw_address_spaces);
+	/* Ensure no-one else picks this one */
+	js_devdata->as_free &= ~((u16) (1u << ffs_result));
+
+	current_as = &kbdev->as[ffs_result];
+
+	return current_as;
+}
+
+/**
+ * Release an address space, making it available for being picked again.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_mutex
+ */
+STATIC INLINE void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 as_bit = (1u << kctx_as_nr);
+
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+
+	/* The address space must not already be free */
+	KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+	js_devdata->as_free |= as_bit;
+}
+
+/**
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes:
+ * - setting up the global runpool_irq structure and the context on the AS
+ * - Activating the MMU on the AS
+ * - Allowing jobs to be submitted on the AS
+ *
+ * Locking conditions:
+ * - Caller must hold the kbasep_js_kctx_info::jsctx_mutex
+ * - Caller must hold the kbasep_js_device_data::runpool_mutex
+ * - Caller must hold AS transaction mutex
+ * - Caller must hold Runpool IRQ lock
+ */
+STATIC void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *current_as)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(current_as != NULL);
+
+	js_devdata = &kbdev->js_data;
+	as_nr = current_as->number;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&current_as->transaction_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	/* Attribute handling */
+	kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+	/* Assign addr space */
+	kctx->as_nr = as_nr;
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+	/* Activate this address space on the MMU */
+	kbase_mmu_update(kctx);
+
+	/* Allow it to run jobs */
+	kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+	/* Book-keeping */
+	js_per_as_data->kctx = kctx;
+	js_per_as_data->as_busy_refcount = 0;
+
+	/* Lastly, add the context to the policy's runpool - this really allows it to run jobs */
+	kbasep_js_policy_runpool_add_ctx(&js_devdata->policy, kctx);
+}
+
+void kbasep_js_try_run_next_job_nolock(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int js;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	/* It's cheap and simple to retest this here - otherwise we burden the
+	 * caller with it. In some cases, we do this higher up to optimize out the
+	 * spinlock. */
+	if (js_devdata->nr_user_contexts_running == 0)
+		return; /* No contexts present - the GPU might be powered off, so just return */
+
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; ++js)
+		kbasep_js_try_run_next_job_on_slot_nolock(kbdev, js);
+}
+
+/** Hold the kbasep_js_device_data::runpool_irq::lock for this */
+mali_bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	mali_bool result = MALI_FALSE;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		int new_refcnt;
+
+		KBASE_DEBUG_ASSERT(as_nr >= 0);
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+		new_refcnt = ++(js_per_as_data->as_busy_refcount);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, NULL, 0u, new_refcnt);
+		result = MALI_TRUE;
+	}
+
+	return result;
+}
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	if (js_devdata->nr_user_contexts_running != 0) {
+		/* Only try running jobs when we have contexts present, otherwise the GPU might be powered off.  */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_try_run_next_job_nolock(kbdev);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	}
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+void kbase_js_try_run_jobs_on_slot(struct kbase_device *kbdev, int js)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_devdata->runpool_mutex);
+	if (js_devdata->nr_user_contexts_running != 0) {
+		/* Only try running jobs when we have contexts present, otherwise the GPU might be powered off.  */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+		kbasep_js_try_run_next_job_on_slot_nolock(kbdev, js);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	}
+	mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+mali_error kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	mali_error err;
+	int i;
+	u16 as_present;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(js_devdata->init_status == JS_DEVDATA_INIT_NONE);
+
+	/* These two must be recalculated if nr_hw_address_spaces changes (e.g. for HW workarounds) */
+	as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+	kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		mali_bool use_workaround_for_security;
+
+		use_workaround_for_security = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+		if (use_workaround_for_security != MALI_FALSE) {
+			dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+			kbdev->nr_user_address_spaces = 1;
+		}
+	}
+#ifdef CONFIG_MALI_DEBUG
+	/* Soft-stop will be disabled on a single context by default unless softstop_always is set */
+	js_devdata->softstop_always = MALI_FALSE;
+#endif				/* CONFIG_MALI_DEBUG */
+	js_devdata->nr_all_contexts_running = 0;
+	js_devdata->nr_user_contexts_running = 0;
+	js_devdata->as_free = as_present;	/* All ASs initially free */
+	js_devdata->runpool_irq.submit_allowed = 0u;	/* No ctx allowed to submit */
+	memset(js_devdata->runpool_irq.ctx_attr_ref_count, 0, sizeof(js_devdata->runpool_irq.ctx_attr_ref_count));
+	memset(js_devdata->runpool_irq.slot_affinities, 0, sizeof(js_devdata->runpool_irq.slot_affinities));
+	js_devdata->runpool_irq.slots_blocked_on_affinity = 0u;
+	memset(js_devdata->runpool_irq.slot_affinity_refcount, 0, sizeof(js_devdata->runpool_irq.slot_affinity_refcount));
+	INIT_LIST_HEAD(&js_devdata->suspended_soft_jobs_list);
+
+	/* Config attributes */
+	js_devdata->scheduling_tick_ns = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS);
+	js_devdata->soft_stop_ticks = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS);
+	js_devdata->soft_stop_ticks_cl = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL);
+	js_devdata->hard_stop_ticks_ss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS);
+	js_devdata->hard_stop_ticks_cl = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL);
+	js_devdata->hard_stop_ticks_nss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS);
+	js_devdata->gpu_reset_ticks_ss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS);
+	js_devdata->gpu_reset_ticks_cl = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL);
+	js_devdata->gpu_reset_ticks_nss = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS);
+	js_devdata->ctx_timeslice_ns = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS);
+	js_devdata->cfs_ctx_runtime_init_slices = DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+	js_devdata->cfs_ctx_runtime_min_slices = DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+
+	dev_dbg(kbdev->dev, "JS Config Attribs: ");
+	dev_dbg(kbdev->dev, "\tscheduling_tick_ns:%u", js_devdata->scheduling_tick_ns);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", js_devdata->soft_stop_ticks);
+	dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", js_devdata->soft_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", js_devdata->hard_stop_ticks_ss);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", js_devdata->hard_stop_ticks_cl);
+	dev_dbg(kbdev->dev, "\thard_stop_ticks_nss:%u", js_devdata->hard_stop_ticks_nss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", js_devdata->gpu_reset_ticks_ss);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", js_devdata->gpu_reset_ticks_cl);
+	dev_dbg(kbdev->dev, "\tgpu_reset_ticks_nss:%u", js_devdata->gpu_reset_ticks_nss);
+	dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", js_devdata->ctx_timeslice_ns);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u", js_devdata->cfs_ctx_runtime_init_slices);
+	dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", js_devdata->cfs_ctx_runtime_min_slices);
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", js_devdata->soft_stop_ticks, js_devdata->scheduling_tick_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_nss==%u at %uns per tick. Other hard-stops may still occur.", js_devdata->hard_stop_ticks_ss, js_devdata->hard_stop_ticks_nss, js_devdata->scheduling_tick_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+	dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+	/* setup the number of irq throttle cycles base on given time */
+	{
+		int irq_throttle_time_us = kbdev->gpu_props.irq_throttle_time_us;
+		int irq_throttle_cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev, irq_throttle_time_us);
+
+		atomic_set(&kbdev->irq_throttle_cycles, irq_throttle_cycles);
+	}
+
+	/* Clear the AS data, including setting NULL pointers */
+	memset(&js_devdata->runpool_irq.per_as_data[0], 0, sizeof(js_devdata->runpool_irq.per_as_data));
+
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+		js_devdata->js_reqs[i] = core_reqs_from_jsn_features(kbdev->gpu_props.props.raw_props.js_features[i]);
+
+	js_devdata->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+
+	mutex_init(&js_devdata->runpool_mutex);
+	mutex_init(&js_devdata->queue_mutex);
+	spin_lock_init(&js_devdata->runpool_irq.lock);
+
+	err = kbasep_js_policy_init(kbdev);
+	if (err == MALI_ERROR_NONE)
+		js_devdata->init_status |= JS_DEVDATA_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_devdata->init_status != JS_DEVDATA_INIT_ALL)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	return MALI_ERROR_NONE;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+		s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+		/* The caller must de-register all contexts before calling this */
+		KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+		KBASE_DEBUG_ASSERT(memcmp(js_devdata->runpool_irq.ctx_attr_ref_count, zero_ctx_attr_ref_count, sizeof(js_devdata->runpool_irq.ctx_attr_ref_count)) == 0);
+		CSTD_UNUSED(zero_ctx_attr_ref_count);
+	}
+	if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+		kbasep_js_policy_term(&js_devdata->policy);
+
+	js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+mali_error kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_error err;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_kctx_info = &kctx->jctx.sched_info;
+	KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+	js_kctx_info->ctx.nr_jobs = 0;
+	js_kctx_info->ctx.is_scheduled = MALI_FALSE;
+	js_kctx_info->ctx.is_dying = MALI_FALSE;
+	memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+	/* Initially, the context is disabled from submission until the create flags are set */
+	js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+
+	js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+	/* On error, we could continue on: providing none of the below resources
+	 * rely on the ones above */
+	mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+	init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+	err = kbasep_js_policy_init_ctx(kbdev, kctx);
+	if (err == MALI_ERROR_NONE)
+		js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+	/* On error, do no cleanup; this will be handled by the caller(s), since
+	 * we've designed this resource to be safe to terminate on init-fail */
+	if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	return MALI_ERROR_NONE;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = kctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+		/* The caller must de-register all jobs before calling this */
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE);
+		KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+	}
+
+	if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+		kbasep_js_policy_term_ctx(js_policy, kctx);
+
+	js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+}
+
+/* Evict jobs from the NEXT registers
+ *
+ * The caller must hold:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex
+ * - kbasep_js_device_data::runpool_mutex
+ */
+STATIC void kbasep_js_runpool_evict_next_jobs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	int js;
+	struct kbasep_js_device_data *js_devdata;
+
+	js_devdata = &kbdev->js_data;
+
+	BUG_ON(!mutex_is_locked(&kctx->jctx.sched_info.ctx.jsctx_mutex));
+	BUG_ON(!mutex_is_locked(&js_devdata->runpool_mutex));
+
+	/* Prevent contexts in the runpool from submitting jobs */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* There's no need to prevent contexts in the runpool from submitting jobs,
+	 * because we complete this operation by the time we release the
+	 * runpool_irq.lock */
+
+	/* Evict jobs from the NEXT registers */
+	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+		struct kbase_jm_slot *slot;
+		struct kbase_jd_atom *tail;
+
+		if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), NULL)) {
+			/* No job in the NEXT register */
+			continue;
+		}
+
+		slot = &kbdev->jm_slots[js];
+		tail = kbasep_jm_peek_idx_submit_slot(slot, slot->submitted_nr - 1);
+
+		KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 1);
+		/* Clearing job from next registers */
+		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP, NULL);
+
+		/* Check to see if we did remove a job from the next registers */
+		if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) != 0) {
+			/* The job was successfully cleared from the next registers, requeue it */
+			struct kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot);
+
+			KBASE_DEBUG_ASSERT(dequeued_katom == tail);
+
+			/* Set the next registers to NULL */
+			kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), 0, NULL);
+			kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), 0, NULL);
+
+			KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js);
+
+			/* Complete the job, indicate that it took no time, and don't start
+			 * new atoms */
+			kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT);
+		}
+		KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 0);
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/**
+ * Fast start a higher priority job
+ * If the runpool is full, the lower priority contexts with no running jobs
+ * will be evicted from the runpool
+ *
+ * If \a kctx_new is NULL, the first context with no running jobs will be evicted
+ *
+ * The following locking conditions are made on the caller:
+ * - The caller must \b not hold \a kctx_new's
+ * kbasep_js_kctx_info::ctx::jsctx_mutex, or that mutex of any ctx in the
+ * runpool. This is because \a kctx_new's jsctx_mutex and one of the other
+ * scheduled ctx's jsctx_mutex will be obtained internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used
+ * internally).
+ */
+STATIC void kbasep_js_runpool_attempt_fast_start_ctx(struct kbase_device *kbdev, struct kbase_context *kctx_new)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_new;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+	int evict_as_nr;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	if (kctx_new != NULL) {
+		js_kctx_new = &kctx_new->jctx.sched_info;
+		mutex_lock(&js_kctx_new->ctx.jsctx_mutex);
+	} else {
+		js_kctx_new = NULL;
+		CSTD_UNUSED(js_kctx_new);
+	}
+
+	/* Setup a dummy katom_retained_state */
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* If the runpool is full and either there is no specified context or the specified context is not dying, then
+	   attempt to fast start the specified context or evict the first context with no running jobs. */
+	if (check_is_runpool_full(kbdev, kctx_new) &&
+			(!js_kctx_new || (js_kctx_new &&
+			!js_kctx_new->ctx.is_dying))) {
+		/* No free address spaces - attempt to evict non-running lower priority context */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		for (evict_as_nr = 0; evict_as_nr < kbdev->nr_hw_address_spaces; evict_as_nr++) {
+			struct kbase_context *kctx_evict;
+
+			js_per_as_data = &js_devdata->runpool_irq.per_as_data[evict_as_nr];
+			kctx_evict = js_per_as_data->kctx;
+
+			/* Look for the AS which is not currently running */
+			if (0 == js_per_as_data->as_busy_refcount && kctx_evict != NULL) {
+				/* Now compare the scheduled priority we are considering evicting with the new ctx priority
+				 * and take into consideration if the scheduled priority is a realtime policy or not.
+				 * Note that the lower the number, the higher the priority
+				 */
+				if ((kctx_new == NULL) || kbasep_js_policy_ctx_has_priority(js_policy, kctx_evict, kctx_new)) {
+					mali_bool retain_result;
+					kbasep_js_release_result release_result;
+
+					KBASE_TRACE_ADD(kbdev, JS_FAST_START_EVICTS_CTX, kctx_evict, NULL, 0u, (uintptr_t)kctx_new);
+
+					/* Retain the ctx to work on it - this shouldn't be able to fail */
+					retain_result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx_evict);
+					KBASE_DEBUG_ASSERT(retain_result != MALI_FALSE);
+					CSTD_UNUSED(retain_result);
+
+					/* This will cause the context to be scheduled out on the next runpool_release_ctx(),
+					 * and also stop its refcount increasing */
+					kbasep_js_clear_submit_allowed(js_devdata, kctx_evict);
+
+					spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+					mutex_unlock(&js_devdata->runpool_mutex);
+					if (kctx_new != NULL)
+						mutex_unlock(&js_kctx_new->ctx.jsctx_mutex);
+
+					/* Stop working on the target context, start working on the kctx_evict context */
+
+					mutex_lock(&kctx_evict->jctx.sched_info.ctx.jsctx_mutex);
+					mutex_lock(&js_devdata->runpool_mutex);
+					release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx_evict, &katom_retained_state);
+					mutex_unlock(&js_devdata->runpool_mutex);
+					/* Only requeue if actually descheduled, which is more robust in case
+					 * something else retains it (e.g. two high priority contexts racing
+					 * to evict the same lower priority context) */
+					if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+						kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx_evict, MALI_TRUE);
+
+					mutex_unlock(&kctx_evict->jctx.sched_info.ctx.jsctx_mutex);
+
+					/* release_result isn't propogated further:
+					 * - the caller will be scheduling in a context anyway
+					 * - which will also cause new jobs to run */
+
+					/* ctx fast start has taken place */
+					return;
+				}
+			}
+		}
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	}
+
+	/* ctx fast start has not taken place */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if (kctx_new != NULL)
+		mutex_unlock(&js_kctx_new->ctx.jsctx_mutex);
+}
+
+mali_bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	unsigned long flags;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	mali_bool policy_queue_updated = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+	lockdep_assert_held(&kctx->jctx.lock);
+
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Policy-specific initialization of atoms (which cannot fail). Anything that
+	 * could've failed must've been done at kbasep_jd_policy_init_job() time. */
+	kbasep_js_policy_register_job(js_policy, kctx, atom);
+
+	/*
+	 * Begin Runpool transaction
+	 */
+	mutex_lock(&js_devdata->runpool_mutex);
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* Refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+	++(js_kctx_info->ctx.nr_jobs);
+
+	/* Setup any scheduling information */
+	kbasep_js_clear_job_retry_submit(atom);
+
+	/* Lock for state available during IRQ */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Context Attribute Refcounting */
+	kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+	/* Enqueue the job in the policy, causing it to be scheduled if the
+	 * parent context gets scheduled */
+	kbasep_js_policy_enqueue_job(js_policy, atom);
+
+	if (js_kctx_info->ctx.is_scheduled != MALI_FALSE) {
+		/* Handle an already running context - try to run the new job, in case it
+		 * matches requirements that aren't matched by any other job in the Run
+		 * Pool */
+		kbasep_js_try_run_next_job_nolock(kbdev);
+	}
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	/* End runpool transaction */
+
+	if (js_kctx_info->ctx.is_scheduled == MALI_FALSE) {
+		if (js_kctx_info->ctx.is_dying) {
+			/* A job got added while/after kbase_job_zap_context() was called
+			 * on a non-scheduled context (e.g. KDS dependency resolved). Kill
+			 * that job by killing the context. */
+			kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_FALSE);
+		} else if (js_kctx_info->ctx.nr_jobs == 1) {
+			/* Handle Refcount going from 0 to 1: schedule the context on the Policy Queue */
+			KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE);
+			dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+			mutex_lock(&js_devdata->queue_mutex);
+			kbasep_js_policy_enqueue_ctx(js_policy, kctx);
+			mutex_unlock(&js_devdata->queue_mutex);
+
+			/* Policy Queue was updated - caller must try to schedule the head context
+			 * We also try to encourage a fast-start from here. */
+			policy_queue_updated = MALI_TRUE;
+		}
+	}
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* If the runpool is full and this job has a higher priority than the
+	 * non-running job in the runpool - evict it so this higher priority job
+	 * starts faster.  Fast-starting requires the jsctx_mutex to be dropped,
+	 * because it works on multiple ctxs
+	 *
+	 * Note: If the context is being killed with kbase_job_zap_context(), then
+	 * kctx can't disappear after the jsctx_mutex was dropped. This is because
+	 * the caller holds kctx->jctx.lock */
+	if (policy_queue_updated)
+		kbasep_js_runpool_attempt_fast_start_ctx(kbdev, kctx);
+
+	return policy_queue_updated;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+	/* De-refcount ctx.nr_jobs */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+	--(js_kctx_info->ctx.nr_jobs);
+
+	/* De-register the job from the system */
+	kbasep_js_policy_deregister_job(js_policy, kctx, atom);
+}
+
+void kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	unsigned long flags;
+	struct kbasep_js_atom_retained_state katom_retained_state;
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool attr_state_changed;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+	kbasep_js_remove_job(kbdev, kctx, katom);
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* The atom has 'finished' (will not be re-run), so no need to call
+	 * kbasep_js_has_atom_finished().
+	 *
+	 * This is because it returns MALI_FALSE for soft-stopped atoms, but we
+	 * want to override that, because we're cancelling an atom regardless of
+	 * whether it was soft-stopped or not */
+	attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, &katom_retained_state);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	if (attr_state_changed != MALI_FALSE) {
+		/* A change in runpool ctx attributes might mean we can run more jobs
+		 * than before. */
+		kbase_js_try_run_jobs(kbdev);
+	}
+}
+
+mali_bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+	   kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx = NULL;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+
+	if (found_kctx != NULL)
+		++(js_per_as_data->as_busy_refcount);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * @brief Try running more jobs after releasing a context and/or atom
+ *
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data::runpool_irq::lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change
+ * - The released atom caused a ctx attribute change
+ * - Slots were previously blocked due to affinity restrictions
+ * - Submission during IRQ handling failed
+ */
+STATIC void kbasep_js_run_jobs_after_ctx_and_atom_release(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state, mali_bool runpool_ctx_attr_change)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+	js_devdata = &kbdev->js_data;
+
+	lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+	lockdep_assert_held(&js_devdata->runpool_mutex);
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	if (js_devdata->nr_user_contexts_running != 0) {
+		mali_bool retry_submit;
+		int retry_jobslot;
+
+		retry_submit = kbasep_js_get_atom_retry_submit_slot(katom_retained_state, &retry_jobslot);
+
+		if (runpool_ctx_attr_change != MALI_FALSE) {
+			/* A change in runpool ctx attributes might mean we can run more jobs
+			 * than before  */
+			kbasep_js_try_run_next_job_nolock(kbdev);
+
+			/* A retry submit on all slots has now happened, so don't need to do it again */
+			retry_submit = MALI_FALSE;
+		}
+
+		/* Submit on any slots that might've had atoms blocked by the affinity of
+		 * a completed atom.
+		 *
+		 * If no atom has recently completed, then this is harmelss */
+		kbase_js_affinity_submit_to_blocked_slots(kbdev);
+
+		/* If the IRQ handler failed to get a job from the policy, try again from
+		 * outside the IRQ handler
+		 * NOTE: We may've already cleared retry_submit from submitting above */
+		if (retry_submit != MALI_FALSE) {
+			KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, kctx, NULL, 0u, retry_jobslot);
+			kbasep_js_try_run_next_job_on_slot_nolock(kbdev, retry_jobslot);
+		}
+	}
+}
+
+/**
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+STATIC kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	kbasep_js_release_result release_result = 0u;
+	mali_bool runpool_ctx_attr_change = MALI_FALSE;
+	int kctx_as_nr;
+	struct kbase_as *current_as;
+	int new_ref_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE);
+
+	/* kctx->as_nr and js_per_as_data are only read from here. The caller's
+	 * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+	 *
+	 * They will not change whilst we're reading them, because the refcount
+	 * is non-zero (and we ASSERT on that last fact).
+	 */
+	kctx_as_nr = kctx->as_nr;
+	KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/*
+	 * Transaction begins on AS and runpool_irq
+	 *
+	 * Assert about out calling contract
+	 */
+	current_as = &kbdev->as[kctx_as_nr];
+	mutex_lock(&current_as->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+	KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+	/* Update refcount */
+	new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+	/* Release the atom if it finished (i.e. wasn't soft-stopped) */
+	if (kbasep_js_has_atom_finished(katom_retained_state) != MALI_FALSE)
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, katom_retained_state);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, new_ref_count);
+
+	if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED &&
+			!kbase_pm_is_suspending(kbdev)) {
+		/* Context is kept scheduled into an address space even when there are no jobs, in this case we have
+		 * to handle the situation where all jobs have been evicted from the GPU and submission is disabled.
+		 *
+		 * At this point we re-enable submission to allow further jobs to be executed
+		 */
+		kbasep_js_set_submit_allowed(js_devdata, kctx);
+	}
+
+	/* Make a set of checks to see if the context should be scheduled out */
+	if (new_ref_count == 0 && (kctx->jctx.sched_info.ctx.nr_jobs == 0 || kbasep_js_is_submit_allowed(js_devdata, kctx) == MALI_FALSE)) {
+		/* Last reference, and we've been told to remove this context from the Run Pool */
+		dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d", kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+		kbasep_js_policy_runpool_remove_ctx(js_policy, kctx);
+
+		/* Stop any more refcounts occuring on the context */
+		js_per_as_data->kctx = NULL;
+
+		/* Ensure we prevent the context from submitting any new jobs
+		 * e.g. from kbasep_js_try_run_next_job_on_slot_irq_nolock()  */
+		kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+		/* Disable the MMU on the affected address space, and indicate it's invalid */
+		kbase_mmu_disable(kctx);
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+		kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+
+		kctx->as_nr = KBASEP_AS_NR_INVALID;
+
+		/* Ctx Attribute handling
+		 *
+		 * Releasing atoms attributes must either happen before this, or after
+		 * 'is_scheduled' is changed, otherwise we double-decount the attributes*/
+		runpool_ctx_attr_change |= kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+		/* Early update of context count, to optimize the
+		 * kbasep_js_run_jobs_after_ctx_and_atom_release() call */
+		runpool_dec_context_count(kbdev, kctx);
+
+		/* Releasing the context and katom retained state can allow more jobs to run */
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change);
+
+		/*
+		 * Transaction ends on AS and runpool_irq:
+		 *
+		 * By this point, the AS-related data is now clear and ready for re-use.
+		 *
+		 * Since releases only occur once for each previous successful retain, and no more
+		 * retains are allowed on this context, no other thread will be operating in this
+		 * code whilst we are
+		 */
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
+
+		/* Free up the address space */
+		release_addr_space(kbdev, kctx_as_nr);
+		/* Note: Don't reuse kctx_as_nr now */
+
+		/* Synchronize with any policy timers */
+		kbasep_js_policy_runpool_timers_sync(js_policy);
+
+		/* update book-keeping info */
+		js_kctx_info->ctx.is_scheduled = MALI_FALSE;
+		/* Signal any waiter that the context is not scheduled, so is safe for
+		 * termination - once the jsctx_mutex is also dropped, and jobs have
+		 * finished. */
+		wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+		/* Queue an action to occur after we've dropped the lock */
+		release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED;
+
+	} else {
+		kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change);
+
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+		mutex_unlock(&current_as->transaction_mutex);
+	}
+
+	return release_result;
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, mali_bool has_pm_ref)
+{
+	struct kbasep_js_device_data *js_devdata;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_policy = &kbdev->js_data.policy;
+	js_devdata = &kbdev->js_data;
+
+	/* This is called if and only if you've you've detached the context from
+	 * the Runpool or the Policy Queue, and not added it back to the Runpool */
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled == MALI_FALSE);
+
+	if (js_kctx_info->ctx.is_dying != MALI_FALSE) {
+		/* Dying: don't requeue, but kill all jobs on the context. This happens
+		 * asynchronously */
+		dev_dbg(kbdev->dev, "JS: ** Killing Context %p on RunPool Remove **", kctx);
+		kbasep_js_policy_foreach_ctx_job(js_policy, kctx, &kbase_jd_cancel, MALI_TRUE);
+	} else if (js_kctx_info->ctx.nr_jobs > 0) {
+		/* Not dying, has jobs: de-ref core counts from each job before addding
+		 * back to the queue */
+		kbasep_js_policy_foreach_ctx_job(js_policy, kctx, &kbasep_js_job_check_deref_cores, MALI_FALSE);
+
+		dev_dbg(kbdev->dev, "JS: Requeue Context %p", kctx);
+		mutex_lock(&js_devdata->queue_mutex);
+		kbasep_js_policy_enqueue_ctx(js_policy, kctx);
+		mutex_unlock(&js_devdata->queue_mutex);
+	} else {
+		/* Not dying, no jobs: don't add back to the queue */
+		dev_dbg(kbdev->dev, "JS: Idling Context %p (not requeued)", kctx);
+	}
+
+	if (has_pm_ref) {
+		/* In all cases where we had a pm active refcount, release it */
+		kbase_pm_context_idle(kbdev);
+	}
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_TRUE);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) {
+		/* We've freed up an address space, so let's try to schedule in another
+		 * context
+		 *
+		 * Note: if there's a context to schedule in, then it also tries to run
+		 * another job, in case the new context has jobs satisfying requirements
+		 * that no other context/job in the runpool does */
+		kbasep_js_try_schedule_head_ctx(kbdev);
+	}
+}
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_atom_retained_state katom_retained_state;
+
+	kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+}
+
+/** Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbasep_js_try_schedule_head_ctx() */
+STATIC void kbasep_js_runpool_release_ctx_no_schedule(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	kbasep_js_release_result release_result;
+	struct kbasep_js_atom_retained_state katom_retained_state_struct;
+	struct kbasep_js_atom_retained_state *katom_retained_state = &katom_retained_state_struct;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+	js_devdata = &kbdev->js_data;
+	kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+	release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, katom_retained_state);
+
+	/* Drop the runpool mutex to allow requeing kctx */
+	mutex_unlock(&js_devdata->runpool_mutex);
+	if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_TRUE);
+
+	/* Drop the jsctx_mutex to allow scheduling in a new context */
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	/* NOTE: could return release_result if the caller would like to know
+	 * whether it should schedule a new context, but currently no callers do */
+}
+
+
+/**
+ * @brief Handle retaining cores for power management and affinity management,
+ * ensuring that cores are powered up and won't violate affinity restrictions.
+ *
+ * This function enters at the following @ref enum kbase_atom_coreref_state states:
+ *
+ * - NO_CORES_REQUESTED,
+ * - WAITING_FOR_REQUESTED_CORES,
+ * - RECHECK_AFFINITY,
+ *
+ * The transitions are as folows:
+ * - NO_CORES_REQUESTED -> WAITING_FOR_REQUESTED_CORES
+ * - WAITING_FOR_REQUESTED_CORES -> ( WAITING_FOR_REQUESTED_CORES or RECHECK_AFFINITY )
+ * - RECHECK_AFFINITY -> ( WAITING_FOR_REQUESTED_CORES or CHECK_AFFINITY_VIOLATIONS )
+ * - CHECK_AFFINITY_VIOLATIONS -> ( RECHECK_AFFINITY or READY )
+ *
+ * The caller must hold:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return MALI_FALSE when the function makes a transition to the same or lower state, indicating
+ * that the cores are not ready.
+ * @return MALI_TRUE once READY state is reached, indicating that the cores are 'ready' and won't
+ * violate affinity restrictions.
+ *
+ */
+STATIC mali_bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, int js, struct kbase_jd_atom *katom)
+{
+	/* The most recently checked affinity. Having this at this scope allows us
+	 * to guarantee that we've checked the affinity in this function call. */
+	u64 recently_chosen_affinity = 0;
+	mali_bool chosen_affinity = MALI_FALSE;
+	mali_bool retry;
+
+	do {
+		retry = MALI_FALSE;
+
+		/* NOTE: The following uses a number of FALLTHROUGHs to optimize the
+		 * calls to this function. Ending of the function is indicated by BREAK OUT */
+		switch (katom->coreref_state) {
+			/* State when job is first attempted to be run */
+		case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+			KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+			/* Compute affinity */
+			if (MALI_FALSE == kbase_js_choose_affinity(&recently_chosen_affinity, kbdev, katom, js)) {
+				/* No cores are currently available */
+				/* *** BREAK OUT: No state transition *** */
+				break;
+			}
+
+			chosen_affinity = MALI_TRUE;
+
+			/* Request the cores */
+			kbase_pm_request_cores(kbdev, katom->core_req & BASE_JD_REQ_T, recently_chosen_affinity);
+
+			katom->affinity = recently_chosen_affinity;
+
+			/* Proceed to next state */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+			{
+				enum kbase_pm_cores_ready cores_ready;
+				KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T));
+
+				cores_ready = kbase_pm_register_inuse_cores(kbdev, katom->core_req & BASE_JD_REQ_T, katom->affinity);
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to previous state */
+					kbasep_js_job_check_deref_cores(kbdev, katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_INUSE_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous state, retry *** */
+					retry = MALI_TRUE;
+					break;
+				}
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Stay in this state and return, to retry at this state later */
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_INUSE_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity);
+					/* *** BREAK OUT: No state transition *** */
+					break;
+				}
+				/* Proceed to next state */
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+			}
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+		case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T));
+
+			/* Optimize out choosing the affinity twice in the same function call */
+			if (chosen_affinity == MALI_FALSE) {
+				/* See if the affinity changed since a previous call. */
+				if (MALI_FALSE == kbase_js_choose_affinity(&recently_chosen_affinity, kbdev, katom, js)) {
+					/* No cores are currently available */
+					kbasep_js_job_check_deref_cores(kbdev, katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, katom->kctx, katom, katom->jc, js, (u32) recently_chosen_affinity);
+					/* *** BREAK OUT: Transition to lower state *** */
+					break;
+				}
+				chosen_affinity = MALI_TRUE;
+			}
+
+			/* Now see if this requires a different set of cores */
+			if (recently_chosen_affinity != katom->affinity) {
+				enum kbase_pm_cores_ready cores_ready;
+
+				kbase_pm_request_cores(kbdev, katom->core_req & BASE_JD_REQ_T, recently_chosen_affinity);
+
+				/* Register new cores whilst we still hold the old ones, to minimize power transitions */
+				cores_ready = kbase_pm_register_inuse_cores(kbdev, katom->core_req & BASE_JD_REQ_T, recently_chosen_affinity);
+				kbasep_js_job_check_deref_cores(kbdev, katom);
+
+				/* Fixup the state that was reduced by deref_cores: */
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				katom->affinity = recently_chosen_affinity;
+				if (cores_ready == KBASE_NEW_AFFINITY) {
+					/* Affinity no longer valid - return to previous state */
+					katom->coreref_state = KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					kbasep_js_job_check_deref_cores(kbdev, katom);
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_INUSE_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity);
+					/* *** BREAK OUT: Return to previous state, retry *** */
+					retry = MALI_TRUE;
+					break;
+				}
+				/* Now might be waiting for powerup again, with a new affinity */
+				if (cores_ready == KBASE_CORES_NOT_READY) {
+					/* Return to previous state */
+					katom->coreref_state = KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+					KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, katom->kctx, katom, katom->jc, js, (u32) katom->affinity);
+					/* *** BREAK OUT: Transition to lower state *** */
+					break;
+				}
+			}
+			/* Proceed to next state */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+			/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+		case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+			KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T));
+			KBASE_DEBUG_ASSERT(katom->affinity == recently_chosen_affinity);
+
+			/* Note: this is where the caller must've taken the runpool_irq.lock */
+
+			/* Check for affinity violations - if there are any, then we just ask
+			 * the caller to requeue and try again later */
+			if (kbase_js_affinity_would_violate(kbdev, js, katom->affinity) != MALI_FALSE) {
+				/* Cause a re-attempt to submit from this slot on the next job complete */
+				kbase_js_affinity_slot_blocked_an_atom(kbdev, js);
+				/* Return to previous state */
+				katom->coreref_state = KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+				/* *** BREAK OUT: Transition to lower state *** */
+				KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_CORE_REF_AFFINITY_WOULD_VIOLATE, katom->kctx, katom, katom->jc, js, (u32) katom->affinity);
+				break;
+			}
+
+			/* No affinity violations would result, so the cores are ready */
+			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+			/* *** BREAK OUT: Cores Ready *** */
+			break;
+
+		default:
+			KBASE_DEBUG_ASSERT_MSG(MALI_FALSE, "Unhandled kbase_atom_coreref_state %d", katom->coreref_state);
+			break;
+		}
+	} while (retry != MALI_FALSE);
+
+	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+
+	switch (katom->coreref_state) {
+	case KBASE_ATOM_COREREF_STATE_READY:
+		/* State where atom was submitted to the HW - just proceed to power-down */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T));
+
+		/* *** FALLTHROUGH *** */
+
+	case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+		/* State where cores were registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, katom->affinity);
+
+		/* Note: We do not clear the state for kbase_js_affinity_slot_blocked_an_atom().
+		 * That is handled after finishing the job. This might be slightly
+		 * suboptimal for some corner cases, but is otherwise not a problem
+		 * (and resolves itself after the next job completes). */
+
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+		/* State where cores were requested, but not registered */
+		KBASE_DEBUG_ASSERT(katom->affinity != 0 || (katom->core_req & BASE_JD_REQ_T));
+		kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, katom->affinity);
+		break;
+
+	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+		/* Initial state - nothing required */
+		KBASE_DEBUG_ASSERT(katom->affinity == 0);
+		break;
+
+	default:
+		KBASE_DEBUG_ASSERT_MSG(MALI_FALSE, "Unhandled coreref_state: %d", katom->coreref_state);
+		break;
+	}
+
+	katom->affinity = 0;
+	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+/*
+ * Note: this function is quite similar to kbasep_js_try_run_next_job_on_slot()
+ */
+mali_bool kbasep_js_try_run_next_job_on_slot_irq_nolock(struct kbase_device *kbdev, int js, s8 *submit_count)
+{
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool cores_ready;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* The caller of this function may not be aware of Ctx Attribute state changes so we
+	 * must recheck if the given slot is still valid. Otherwise do not try to run.
+	 */
+	if (kbase_js_can_run_job_on_slot_no_lock(kbdev, js)) {
+		/* Keep submitting while there's space to run a job on this job-slot,
+		 * and there are jobs to get that match its requirements (see 'break'
+		 * statement below) */
+		while (*submit_count < KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ && kbasep_jm_is_submit_slots_free(kbdev, js, NULL) != MALI_FALSE) {
+			struct kbase_jd_atom *dequeued_atom;
+			mali_bool has_job = MALI_FALSE;
+
+			/* Dequeue a job that matches the requirements */
+			has_job = kbasep_js_policy_dequeue_job(kbdev, js, &dequeued_atom);
+
+			if (has_job != MALI_FALSE) {
+				/* NOTE: since the runpool_irq lock is currently held and acts across
+				 * all address spaces, any context whose busy refcount has reached
+				 * zero won't yet be scheduled out whilst we're trying to run jobs
+				 * from it */
+				struct kbase_context *parent_ctx = dequeued_atom->kctx;
+				mali_bool retain_success;
+
+				/* Retain/power up the cores it needs, check if cores are ready */
+				cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, dequeued_atom);
+
+				if (dequeued_atom->event_code == BASE_JD_EVENT_PM_EVENT || cores_ready != MALI_TRUE) {
+					/* The job either can't be submitted until the cores are ready, or
+					 * the job will fail due to the specified core group being unavailable.
+					 * To avoid recursion this will be handled outside of IRQ content by
+					 * kbasep_js_try_run_next_job_on_slot_nolock */
+					kbasep_js_policy_enqueue_job(&kbdev->js_data.policy, dequeued_atom);
+					return MALI_TRUE;
+				}
+
+				/* ASSERT that the Policy picked a job from an allowed context */
+				KBASE_DEBUG_ASSERT(kbasep_js_is_submit_allowed(js_devdata, parent_ctx));
+
+				/* Retain the context to stop it from being scheduled out
+				 * This is released when the job finishes */
+				retain_success = kbasep_js_runpool_retain_ctx_nolock(kbdev, parent_ctx);
+				KBASE_DEBUG_ASSERT(retain_success != MALI_FALSE);
+				CSTD_UNUSED(retain_success);
+
+				/* Retain the affinity on the slot */
+				kbase_js_affinity_retain_slot_cores(kbdev, js, dequeued_atom->affinity);
+
+				/* Check if this job needs the cycle counter enabled before submission */
+				kbasep_js_ref_permon_check_and_enable_cycle_counter(kbdev, dequeued_atom);
+
+				/* Submit the job */
+				kbase_job_submit_nolock(kbdev, dequeued_atom, js);
+
+				++(*submit_count);
+			} else {
+				/* No more jobs - stop submitting for this slot */
+				break;
+			}
+		}
+	}
+
+	/* Indicate whether a retry in submission should be tried on a different
+	 * dequeue function. These are the reasons why it *must* happen:
+	 * - the KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ threshold was reached
+	 *   and new scheduling must be performed outside of IRQ mode.
+	 *
+	 * Failure to indicate this correctly could stop further jobs being processed.
+	 *
+	 * However, we do not _need_ to indicate a retry for the following:
+	 * - kbasep_js_policy_dequeue_job() couldn't get a job. In which case,
+	 *   there's no point re-trying outside of IRQ, because the result will be
+	 *   the same until job dependencies are resolved, or user-space provides
+	 *   more jobs. In both those cases, we try to run jobs anyway, so
+	 *   processing does not stop.
+	 * - kbasep_jm_is_submit_slots_free() was MALI_FALSE, indicating jobs were
+	 *   already running. When those jobs complete, that will still cause events
+	 *   that cause us to resume job submission.
+	 * - kbase_js_can_run_job_on_slot_no_lock() was MALI_FALSE - this is for
+	 *   Ctx Attribute handling. That _can_ change outside of IRQ context, but
+	 *   is handled explicitly by kbasep_js_runpool_release_ctx_and_katom_retained_state().
+	 */
+	return (mali_bool) (*submit_count >= KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ);
+}
+
+void kbasep_js_try_run_next_job_on_slot_nolock(struct kbase_device *kbdev, int js)
+{
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool has_job;
+	mali_bool cores_ready;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running > 0);
+
+	/* Keep submitting while there's space to run a job on this job-slot,
+	 * and there are jobs to get that match its requirements (see 'break'
+	 * statement below) */
+	if (kbasep_jm_is_submit_slots_free(kbdev, js, NULL) != MALI_FALSE) {
+		/* The caller of this function may not be aware of Ctx Attribute state changes so we
+		 * must recheck if the given slot is still valid. Otherwise do not try to run.
+		 */
+		if (kbase_js_can_run_job_on_slot_no_lock(kbdev, js)) {
+			do {
+				struct kbase_jd_atom *dequeued_atom;
+
+				/* Dequeue a job that matches the requirements */
+				has_job = kbasep_js_policy_dequeue_job(kbdev, js, &dequeued_atom);
+
+				if (has_job != MALI_FALSE) {
+					/* NOTE: since the runpool_irq lock is currently held and acts across
+					 * all address spaces, any context whose busy refcount has reached
+					 * zero won't yet be scheduled out whilst we're trying to run jobs
+					 * from it */
+					struct kbase_context *parent_ctx = dequeued_atom->kctx;
+					mali_bool retain_success;
+
+					/* Retain/power up the cores it needs, check if cores are ready */
+					cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, dequeued_atom);
+
+					if (cores_ready != MALI_TRUE && dequeued_atom->event_code != BASE_JD_EVENT_PM_EVENT) {
+						/* The job can't be submitted until the cores are ready, requeue the job */
+						kbasep_js_policy_enqueue_job(&kbdev->js_data.policy, dequeued_atom);
+						break;
+					}
+					/* ASSERT that the Policy picked a job from an allowed context */
+					KBASE_DEBUG_ASSERT(kbasep_js_is_submit_allowed(js_devdata, parent_ctx));
+
+					/* Retain the context to stop it from being scheduled out
+					 * This is released when the job finishes */
+					retain_success = kbasep_js_runpool_retain_ctx_nolock(kbdev, parent_ctx);
+					KBASE_DEBUG_ASSERT(retain_success != MALI_FALSE);
+					CSTD_UNUSED(retain_success);
+
+					/* Retain the affinity on the slot */
+					kbase_js_affinity_retain_slot_cores(kbdev, js, dequeued_atom->affinity);
+
+					/* Check if this job needs the cycle counter enabled before submission */
+					kbasep_js_ref_permon_check_and_enable_cycle_counter(kbdev, dequeued_atom);
+
+					if (dequeued_atom->event_code == BASE_JD_EVENT_PM_EVENT) {
+						dev_warn(kbdev->dev, "Rejecting atom due to BASE_JD_EVENT_PM_EVENT\n");
+						/* The job has failed due to the specified core group being unavailable */
+						kbase_jd_done(dequeued_atom, js, NULL, 0);
+					} else {
+						/* Submit the job */
+						kbase_job_submit_nolock(kbdev, dequeued_atom, js);
+					}
+				}
+
+			} while (kbasep_jm_is_submit_slots_free(kbdev, js, NULL) != MALI_FALSE && has_job != MALI_FALSE);
+		}
+	}
+}
+
+void kbasep_js_try_schedule_head_ctx(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool has_kctx;
+	struct kbase_context *head_kctx;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool is_runpool_full;
+	struct kbase_as *new_address_space;
+	unsigned long flags;
+	mali_bool head_kctx_suspended = MALI_FALSE;
+	int pm_active_err;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	js_devdata = &kbdev->js_data;
+
+	/* We *don't* make a speculative check on whether we can fit a context in the
+	 * runpool, because most of our use-cases assume 2 or fewer contexts, and
+	 * so we will usually have enough address spaces free.
+	 *
+	 * In any case, the check will be done later on once we have a context */
+
+	/* Grab the context off head of queue - if there is one */
+	mutex_lock(&js_devdata->queue_mutex);
+	has_kctx = kbasep_js_policy_dequeue_head_ctx(&js_devdata->policy, &head_kctx);
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	if (has_kctx == MALI_FALSE) {
+		/* No ctxs to run - nothing to do */
+		return;
+	}
+	js_kctx_info = &head_kctx->jctx.sched_info;
+
+	dev_dbg(kbdev->dev, "JS: Dequeue Context %p", head_kctx);
+
+	pm_active_err = kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE);
+
+	/*
+	 * Atomic transaction on the Context and Run Pool begins
+	 */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	mutex_lock(&js_devdata->runpool_mutex);
+
+	/* Check to see if we shouldn't add the context to run Run Pool:
+	 * - it can't take the specified context, and so is 'full'. This may be
+	 * 'full' even when there are addres spaces available, since some contexts
+	 * are allowed in whereas others may not due to HW workarounds
+	 * - A suspend is taking place
+	 * - The context is dying due to kbase_job_zap_context() */
+	is_runpool_full = check_is_runpool_full(kbdev, head_kctx);
+	if (is_runpool_full || pm_active_err || js_kctx_info->ctx.is_dying) {
+		/* Roll back the transaction so far and return */
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		/* Note: If a Power Management active reference was taken, it's released by
+		 * this: */
+		kbasep_js_runpool_requeue_or_kill_ctx(kbdev, head_kctx, !pm_active_err);
+
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		return;
+	}
+
+	/* From the point on, the Power Management active reference is released
+	 * only if kbasep_js_runpool_release_ctx() causes the context to be removed
+	 * from the runpool */
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, head_kctx, NULL, 0u, kbasep_js_trace_get_refcnt(kbdev, head_kctx));
+
+#if !MALI_CUSTOMER_RELEASE
+	if (js_devdata->nr_user_contexts_running == 0) {
+		/* Only when there are no other contexts submitting jobs:
+		 * Latch in run-time job scheduler timeouts that were set through js_timeouts sysfs file */
+		if (kbdev->js_soft_stop_ticks != 0)
+			js_devdata->soft_stop_ticks = kbdev->js_soft_stop_ticks;
+
+		if (kbdev->js_soft_stop_ticks_cl != 0)
+			js_devdata->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl;
+
+		if (kbdev->js_hard_stop_ticks_ss != 0)
+			js_devdata->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss;
+
+		if (kbdev->js_hard_stop_ticks_cl != 0)
+			js_devdata->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl;
+
+		if (kbdev->js_hard_stop_ticks_nss != 0)
+			js_devdata->hard_stop_ticks_nss = kbdev->js_hard_stop_ticks_nss;
+
+		if (kbdev->js_reset_ticks_ss != 0)
+			js_devdata->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss;
+
+		if (kbdev->js_reset_ticks_cl != 0)
+			js_devdata->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl;
+
+		if (kbdev->js_reset_ticks_nss != 0)
+			js_devdata->gpu_reset_ticks_nss = kbdev->js_reset_ticks_nss;
+	}
+#endif
+
+	runpool_inc_context_count(kbdev, head_kctx);
+	/* Cause any future waiter-on-termination to wait until the context is
+	 * descheduled */
+	js_kctx_info->ctx.is_scheduled = MALI_TRUE;
+	wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+	/* Pick the free address space (guaranteed free by check_is_runpool_full() ) */
+	new_address_space = pick_free_addr_space(kbdev);
+
+	/* Lock the address space whilst working on it */
+	mutex_lock(&new_address_space->transaction_mutex);
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Do all the necessaries to assign the address space (inc. update book-keeping info)
+	 * Add the context to the Run Pool, and allow it to run jobs */
+	assign_and_activate_kctx_addr_space(kbdev, head_kctx, new_address_space);
+
+	/* NOTE: If Linux allows, then we can drop the new_address_space->transaction mutex here */
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+		/* We need to retain it to keep the corresponding address space */
+		kbasep_js_runpool_retain_ctx_nolock(kbdev, head_kctx);
+	}
+
+	/* Re-check for suspending: a suspend could've occurred after we
+	 * pm_context_active'd, and all the contexts could've been removed from the
+	 * runpool before we took this lock. In this case, we don't want to allow
+	 * this context to run jobs, we just want it out immediately.
+	 *
+	 * The DMB required to read the suspend flag was issued recently as part of
+	 * the runpool_irq locking. If a suspend occurs *after* that lock was taken
+	 * (i.e. this condition doesn't execute), then the kbasep_js_suspend() code
+	 * will cleanup this context instead (by virtue of it being called strictly
+	 * after the suspend flag is set, and will wait for this lock to drop) */
+	if (kbase_pm_is_suspending(kbdev)) {
+		/* Cause it to leave at some later point */
+		mali_bool retained;
+
+		retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, head_kctx);
+		KBASE_DEBUG_ASSERT(retained);
+		kbasep_js_clear_submit_allowed(js_devdata, head_kctx);
+		head_kctx_suspended = MALI_TRUE;
+	}
+
+	/* Try to run the next job, in case this context has jobs that match the
+	 * job slot requirements, but none of the other currently running contexts
+	 * do */
+	kbasep_js_try_run_next_job_nolock(kbdev);
+
+	/* Transaction complete */
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+	mutex_unlock(&new_address_space->transaction_mutex);
+	mutex_unlock(&js_devdata->runpool_mutex);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Note: after this point, the context could potentially get scheduled out immediately */
+
+	if (head_kctx_suspended) {
+		/* Finishing forcing out the context due to a suspend. Use a variant of
+		 * kbasep_js_runpool_release_ctx() that doesn't schedule a new context,
+		 * to prevent a risk of recursion back into this function */
+		kbasep_js_runpool_release_ctx_no_schedule(kbdev, head_kctx);
+	}
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool is_scheduled;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* This must never be attempted whilst suspending - i.e. it should only
+	 * happen in response to a syscall from a user-space thread */
+	BUG_ON(kbase_pm_is_suspending(kbdev));
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	/* Mark the context as privileged */
+	js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
+
+	is_scheduled = js_kctx_info->ctx.is_scheduled;
+	if (is_scheduled == MALI_FALSE) {
+		mali_bool is_runpool_full;
+
+		/* Add the context to the runpool */
+		mutex_lock(&js_devdata->queue_mutex);
+		kbasep_js_policy_enqueue_ctx(&js_devdata->policy, kctx);
+		mutex_unlock(&js_devdata->queue_mutex);
+
+		mutex_lock(&js_devdata->runpool_mutex);
+		{
+			is_runpool_full = check_is_runpool_full(kbdev, kctx);
+			if (is_runpool_full != MALI_FALSE) {
+				/* Evict jobs from the NEXT registers to free an AS asap */
+				kbasep_js_runpool_evict_next_jobs(kbdev, kctx);
+			}
+		}
+		mutex_unlock(&js_devdata->runpool_mutex);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+		/* Fast-starting requires the jsctx_mutex to be dropped, because it works on multiple ctxs */
+
+		if (is_runpool_full != MALI_FALSE) {
+			/* Evict non-running contexts from the runpool */
+			kbasep_js_runpool_attempt_fast_start_ctx(kbdev, NULL);
+		}
+		/* Try to schedule the context in */
+		kbasep_js_try_schedule_head_ctx(kbdev);
+
+		/* Wait for the context to be scheduled in */
+		wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, kctx->jctx.sched_info.ctx.is_scheduled == MALI_TRUE);
+	} else {
+		/* Already scheduled in - We need to retain it to keep the corresponding address space */
+		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	}
+}
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* We don't need to use the address space anymore */
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+	kbase_pm_release_l2_caches(kbdev);
+
+	/* Release the context - it will be scheduled out if there is no pending job */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+}
+
+void kbasep_js_job_done_slot_irq(struct kbase_jd_atom *katom, int slot_nr,
+		ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+	struct kbase_device *kbdev;
+	union kbasep_js_policy *js_policy;
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool submit_retry_needed = MALI_TRUE;	/* If we don't start jobs here, start them from the workqueue */
+	ktime_t tick_diff;
+	u64 microseconds_spent = 0u;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(katom);
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx);
+	kbdev = parent_ctx->kbdev;
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+	js_policy = &kbdev->js_data.policy;
+
+	lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+	/*
+	 * Release resources before submitting new jobs (bounds the refcount of
+	 * the resource to BASE_JM_SUBMIT_SLOTS)
+	 */
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, slot_nr), NULL, 0);
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+
+	/* Check if submitted jobs no longer require the cycle counter to be enabled */
+	kbasep_js_deref_permon_check_and_disable_cycle_counter(kbdev, katom);
+
+	/* Release the affinity from the slot - must happen before next submission to this slot */
+	kbase_js_affinity_release_slot_cores(kbdev, slot_nr, katom->affinity);
+	kbase_js_debug_log_current_affinities(kbdev);
+	/* Calculate the job's time used */
+	if (end_timestamp != NULL) {
+		/* Only calculating it for jobs that really run on the HW (e.g. removed
+		 * from next jobs never actually ran, so really did take zero time) */
+		tick_diff = ktime_sub(*end_timestamp, katom->start_timestamp);
+
+		microseconds_spent = ktime_to_ns(tick_diff);
+		do_div(microseconds_spent, 1000);
+
+		/* Round up time spent to the minimum timer resolution */
+		if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+			microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+	}
+
+	/* Log the result of the job (completion status, and time spent). */
+	kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+	/* Determine whether the parent context's timeslice is up */
+	if (kbasep_js_policy_should_remove_ctx(js_policy, parent_ctx) != MALI_FALSE)
+		kbasep_js_clear_submit_allowed(js_devdata, parent_ctx);
+
+	if (done_code & KBASE_JS_ATOM_DONE_START_NEW_ATOMS) {
+		/* Submit a new job (if there is one) to help keep the GPU's HEAD and NEXT registers full */
+		KBASE_TRACE_ADD_SLOT(kbdev, JS_JOB_DONE_TRY_RUN_NEXT_JOB, parent_ctx, katom, katom->jc, slot_nr);
+
+		submit_retry_needed = kbasep_js_try_run_next_job_on_slot_irq_nolock(kbdev, slot_nr, &kbdev->slot_submit_count_irq[slot_nr]);
+	}
+
+	if (submit_retry_needed != MALI_FALSE || katom->event_code == BASE_JD_EVENT_STOPPED) {
+		/* The extra condition on STOPPED jobs is needed because they may be
+		 * the only job present, but they won't get re-run until the JD work
+		 * queue activates. Crucially, work queues can run items out of order
+		 * e.g. on different CPUs, so being able to submit from the IRQ handler
+		 * is not a good indication that we don't need to run jobs; the
+		 * submitted job could be processed on the work-queue *before* the
+		 * stopped job, even though it was submitted after.
+		 *
+		 * Therefore, we must try to run it, otherwise it might not get run at
+		 * all after this. */
+
+		KBASE_TRACE_ADD_SLOT(kbdev, JS_JOB_DONE_RETRY_NEEDED, parent_ctx, katom, katom->jc, slot_nr);
+		kbasep_js_set_job_retry_submit_slot(katom, slot_nr);
+	}
+}
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+	u16 retained = 0u;
+	int nr_privileged_ctx = 0;
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	/* Prevent all contexts from submitting */
+	js_devdata->runpool_irq.submit_allowed = 0;
+
+	/* Retain each of the contexts, so we can cause it to leave even if it had
+	 * no refcount to begin with */
+	for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+		struct kbasep_js_per_as_data *js_per_as_data = &js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		retained = retained << 1;
+
+		if (kctx) {
+			++(js_per_as_data->as_busy_refcount);
+			retained |= 1u;
+			/* We can only cope with up to 1 privileged context - the
+			 * instrumented context. It'll be suspended by disabling
+			 * instrumentation */
+			if (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED)
+				KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1);
+		}
+	}
+	CSTD_UNUSED(nr_privileged_ctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* De-ref the previous retain to ensure each context gets pulled out
+	 * sometime later. */
+	for (i = 0;
+		 i < BASE_MAX_NR_AS;
+		 ++i, retained = retained >> 1) {
+		struct kbasep_js_per_as_data *js_per_as_data = &js_devdata->runpool_irq.per_as_data[i];
+		struct kbase_context *kctx = js_per_as_data->kctx;
+
+		if (retained & 1u)
+			kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+
+	/* Caller must wait for all Power Manager active references to be dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int i;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Schedule in as many contexts as address spaces. This also starts atoms. */
+	for (i = 0 ; i < kbdev->nr_hw_address_spaces; ++i)
+		kbasep_js_try_schedule_head_ctx(kbdev);
+
+	/* JS Resume complete */
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100755
index 000000000000..3eb01c38e057
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,931 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include <malisw/mali_malisw.h>
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+mali_error kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+mali_error kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the job is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is MALI_FALSE.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return MALI_TRUE indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return MALI_FALSE indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+mali_bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ *   the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ *   remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ *  - it is not being killed with kbasep_jd_cancel()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ *  - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ */
+void kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return value != MALI_FALSE if the retain succeeded, and the context will not be scheduled out.
+ * @return MALI_FALSE if the retain failed (because the context is being/has been scheduled out).
+ */
+mali_bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return value != MALI_FALSE if the retain succeeded, and the context will not be scheduled out.
+ * @return MALI_FALSE if the retain failed (because the context is being/has been scheduled out).
+ */
+mali_bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true.  \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, mali_bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ *  - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ *  - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ *  - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ *  - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * @brief Try to submit the next job on a \b particular slot whilst in IRQ
+ * context, and whilst the caller already holds the runpool IRQ spinlock.
+ *
+ * \a *submit_count will be checked against
+ * KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ to see whether too many jobs have
+ * been submitted. This is to prevent the IRQ handler looping over lots of GPU
+ * NULL jobs, which may complete whilst the IRQ handler is still processing. \a
+ * submit_count itself should point to kbase_device::slot_submit_count_irq[ \a js ],
+ * which is initialized to zero on entry to the IRQ handler.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return truthful (i.e. != MALI_FALSE) if too many jobs were submitted from
+ * IRQ. Therefore, this indicates that submission should be retried from a
+ * work-queue, by using
+ * kbasep_js_try_run_next_job_on_slot_nolock()/kbase_js_try_run_jobs_on_slot().
+ * @return MALI_FALSE if submission had no problems: the GPU is either already
+ * full of jobs in the HEAD and NEXT registers, or we were able to get enough
+ * jobs from the Run Pool to fill the GPU's HEAD and NEXT registers.
+ */
+mali_bool kbasep_js_try_run_next_job_on_slot_irq_nolock(struct kbase_device *kbdev, int js, s8 *submit_count);
+
+/**
+ * @brief Try to submit the next job on a particular slot, outside of IRQ context
+ *
+ * This obtains the Job Slot lock for the duration of the call only.
+ *
+ * Unlike kbasep_js_try_run_next_job_on_slot_irq_nolock(), there is no limit on
+ * submission, because eventually IRQ_THROTTLE will kick in to prevent us
+ * getting stuck in a loop of submitting GPU NULL jobs. This is because the IRQ
+ * handler will be delayed, and so this function will eventually fill up the
+ * space in our software 'submitted' slot (kbase_jm_slot::submitted).
+ *
+ * In addition, there's no return value - we'll run the maintenence functions
+ * on the Policy's Run Pool, but if there's nothing there after that, then the
+ * Run Pool is truely empty, and so no more action need be taken.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_mutex
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This must only be called whilst the GPU is powered - for example, when
+ * kbdev->jsdata.nr_user_contexts_running > 0.
+ *
+ * @note The caller \em might be holding one of the
+ * kbasep_js_kctx_info::ctx::jsctx_mutex locks.
+ *
+ */
+void kbasep_js_try_run_next_job_on_slot_nolock(struct kbase_device *kbdev, int js);
+
+/**
+ * @brief Try to submit the next job for each slot in the system, outside of IRQ context
+ *
+ * This will internally call kbasep_js_try_run_next_job_on_slot_nolock(), so similar
+ * locking conditions on the caller are required.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_mutex
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * @note The caller \em might be holding one of the
+ * kbasep_js_kctx_info::ctx::jsctx_mutex locks.
+ *
+ */
+void kbasep_js_try_run_next_job_nolock(struct kbase_device *kbdev);
+
+/**
+ * @brief Try to schedule the next context onto the Run Pool
+ *
+ * This checks whether there's space in the Run Pool to accommodate a new
+ * context. If so, it attempts to dequeue a context from the Policy Queue, and
+ * submit this to the Run Pool.
+ *
+ * If the scheduling succeeds, then it also makes a call to
+ * kbasep_js_try_run_next_job_nolock(), in case the new context has jobs
+ * matching the job slot requirements, but no other currently scheduled context
+ * has such jobs.
+ *
+ * Whilst attempting to obtain a context from the policy queue, or add a
+ * context to the runpool, this function takes a Power Manager active
+ * reference. If for any reason a context cannot be added to the runpool, any
+ * reference obtained is released once the context is safely back in the policy
+ * queue. If no context was available on the policy queue, any reference
+ * obtained is released too.
+ *
+ * Only if the context gets placed in the runpool does the Power Manager active
+ * reference stay held (and is effectively now owned by the
+ * context/runpool). It is only released once the context is removed
+ * completely, or added back to the policy queue
+ * (e.g. kbasep_js_runpool_release_ctx(),
+ * kbasep_js_runpool_requeue_or_kill_ctx(), etc)
+ *
+ * If any of these actions fail (Run Pool Full, Policy Queue empty, can't get
+ * PM active reference due to a suspend, etc) then any actions taken are rolled
+ * back and the function just returns normally.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_try_schedule_head_ctx(struct kbase_device *kbdev);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Handle the Job Scheduler component for the IRQ of a job finishing
+ *
+ * This does the following:
+ * -# Releases resources held by the atom
+ * -# if \a end_timestamp != NULL, updates the runpool's notion of time spent by a running ctx
+ * -# determines whether a context should be marked for scheduling out
+ * -# examines done_code to determine whether to submit the next job on the slot
+ * (picking from all ctxs in the runpool)
+ *
+ * In addition, if submission didn't happen (the submit-from-IRQ function
+ * failed or done_code didn't specify to start new jobs), then this sets a
+ * message on katom that submission needs to be retried from the worker thread.
+ *
+ * Normally, the time calculated from end_timestamp is rounded up to the
+ * minimum time precision. Therefore, to ensure the job is recorded as not
+ * spending any time, then set end_timestamp to NULL. For example, this is necessary when
+ * evicting jobs from JS_HEAD_NEXT (because they didn't actually run).
+ *
+ * NOTE: It's possible to move the steps (2) and (3) (inc calculating job's time
+ * used) into the worker (outside of IRQ context), but this may allow a context
+ * to use up to twice as much timeslice as is allowed by the policy. For
+ * policies that order by time spent, this is not a problem for overall
+ * 'fairness', but can still increase latency between contexts.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbasep_js_job_done_slot_irq(struct kbase_jd_atom *katom, int slot_nr,
+                                 ktime_t *end_timestamp,
+                                 kbasep_js_atom_done_code done_code);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Try to submit the next job on a specfic slot
+ *
+ * The following locking conditions are made on the caller:
+ *
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this
+ * will be obtained internally)
+ *
+ */
+void kbase_js_try_run_jobs_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * @brief Handle releasing cores for power management and affinity management,
+ * ensuring that cores are powered down and affinity tracking is updated.
+ *
+ * This must only be called on an atom that is not currently running, and has
+ * not been re-queued onto the context (and so does not need locking)
+ *
+ * This function enters at the following @ref enum kbase_atom_coreref_state states:
+ * - NO_CORES_REQUESTED
+ * - WAITING_FOR_REQUESTED_CORES
+ * - RECHECK_AFFINITY
+ * - READY
+ *
+ * It transitions the above states back to NO_CORES_REQUESTED by the end of the
+ * function call (possibly via intermediate states).
+ *
+ * No locks need be held by the caller, since this takes the necessary Power
+ * Management locks itself. The runpool_irq.lock is not taken (the work that
+ * requires it is handled by kbase_js_affinity_submit_to_blocked_slots() ).
+ *
+ * @note The corresponding kbasep_js_job_check_ref_cores() is private to the
+ * Job Scheduler, and is called automatically when running the next job.
+ */
+void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any mali_bool, never test the return value with MALI_TRUE.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static INLINE mali_bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 test_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled != MALI_FALSE);
+
+	test_bit = (u16) (1u << kctx->as_nr);
+
+	return (mali_bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static INLINE void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 set_bit;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled != MALI_FALSE);
+
+	set_bit = (u16) (1u << kctx->as_nr);
+
+	dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static INLINE void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+	u16 clear_bit;
+	u16 clear_mask;
+
+	/* Ensure context really is scheduled in */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled != MALI_FALSE);
+
+	clear_bit = (u16) (1u << kctx->as_nr);
+	clear_mask = ~clear_bit;
+
+	dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+	js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static INLINE void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+	atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static INLINE void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+	KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT(atom->retry_submit_on_slot == KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID
+	                   || atom->retry_submit_on_slot == js);
+
+	atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static INLINE void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+	retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+	retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+	retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static INLINE void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+	retained_state->event_code = katom->event_code;
+	retained_state->core_req = katom->core_req;
+	retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+	retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return    MALI_FALSE if the atom has not finished
+ * @return    !=MALI_FALSE if the atom has finished
+ */
+static INLINE mali_bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (mali_bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return    MALI_FALSE if the retained state is invalid, and can be ignored
+ * @return    !=MALI_FALSE if the retained state is valid
+ */
+static INLINE mali_bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	return (mali_bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static INLINE mali_bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+	int js = katom_retained_state->retry_submit_on_slot;
+	*res = js;
+	return (mali_bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static INLINE int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int result = -1;
+	int as_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID)
+		result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return result;
+}
+#endif				/* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static INLINE struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_context *found_kctx;
+	struct kbasep_js_per_as_data *js_per_as_data;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+	js_devdata = &kbdev->js_data;
+	js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+	found_kctx = js_per_as_data->kctx;
+	KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static INLINE u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static INLINE u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static INLINE u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static INLINE u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+	u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+	KBASE_DEBUG_ASSERT(0 != gpu_freq);
+	return ticks / gpu_freq * 1000;
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.c
new file mode 100755
index 000000000000..7a4cae3be870
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.c
@@ -0,0 +1,382 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_affinity.c
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+
+
+STATIC INLINE mali_bool affinity_job_uses_high_cores(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		struct kbase_context *kctx;
+		kbase_context_flags ctx_flags;
+
+		kctx = katom->kctx;
+		ctx_flags = kctx->jctx.sched_info.ctx.flags;
+
+		/* In this HW Workaround, compute-only jobs/contexts use the high cores
+		 * during a core-split, all other contexts use the low cores. */
+		return (mali_bool) ((katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) != 0 || (ctx_flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != 0);
+	}
+	return MALI_FALSE;
+}
+
+/**
+ * @brief Decide whether a split in core affinity is required across job slots
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * @param kbdev The kbase device structure of the device
+ * @return MALI_FALSE if a core split is not required
+ * @return != MALI_FALSE if a core split is required.
+ */
+STATIC INLINE mali_bool kbase_affinity_requires_split(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		s8 nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE);
+		s8 nr_noncompute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+		/* In this case, a mix of Compute+Non-Compute determines whether a
+		 * core-split is required, to ensure jobs with different numbers of RMUs
+		 * don't use the same cores.
+		 *
+		 * When it's entirely compute, or entirely non-compute, then no split is
+		 * required.
+		 *
+		 * A context can be both Compute and Non-compute, in which case this will
+		 * correctly decide that a core-split is required. */
+
+		return (mali_bool) (nr_compute_ctxs > 0 && nr_noncompute_ctxs > 0);
+	}
+	return MALI_FALSE;
+}
+
+mali_bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js)
+{
+	/*
+	 * Here are the reasons for using job slot 2:
+	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+	 * - In absence of the above, then:
+	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
+	 *  - But, only when there aren't contexts with
+	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+	 *  all cores on slot 1 could be blocked by those using a coherent group
+	 *  on slot 2
+	 *  - And, only when you actually have 2 or more coregroups - if you only
+	 *  have 1 coregroup, then having jobs for slot 2 implies they'd also be
+	 *  for slot 1, meaning you'll get interference from them. Jobs able to
+	 *  run on slot 2 could also block jobs that can only run on slot 1
+	 *  (tiler jobs)
+	 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		return MALI_TRUE;
+
+	if (js != 2)
+		return MALI_TRUE;
+
+	/* Only deal with js==2 now: */
+	if (kbdev->gpu_props.num_core_groups > 1) {
+		/* Only use slot 2 in the 2+ coregroup case */
+		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == MALI_FALSE) {
+			/* ...But only when we *don't* have atoms that run on all cores */
+
+			/* No specific check for BASE_JD_REQ_COHERENT_GROUP atoms - the policy will sort that out */
+			return MALI_TRUE;
+		}
+	}
+
+	/* Above checks failed mean we shouldn't use slot 2 */
+	return MALI_FALSE;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ *   and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ *   and do not get to this function (e.g. CS+T+NSS is not
+ *   supported here).
+ * - This function is frequently called and can be optimized,
+ *   (see notes in loops), but as the functionallity will likely
+ *   be modified, optimization has not been addressed.
+*/
+mali_bool kbase_js_choose_affinity(u64 * const affinity, struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js)
+{
+	base_jd_core_req core_req = katom->core_req;
+	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+	u64 core_availability_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+	/*
+	 * If no cores are currently available (core availability policy is
+	 * transitioning) then fail.
+	 */
+	if (0 == core_availability_mask) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		*affinity = 0;
+		return MALI_FALSE;
+	}
+
+	KBASE_DEBUG_ASSERT(js >= 0);
+
+	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		/* Tiler only job, bit 0 needed to enable tiler but no shader cores required */
+		*affinity = 1;
+		return MALI_TRUE;
+	}
+
+	if (1 == kbdev->gpu_props.num_cores) {
+		/* trivial case only one core, nothing to do */
+		*affinity = core_availability_mask;
+	} else if (kbase_affinity_requires_split(kbdev) == MALI_FALSE) {
+		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+			if (js == 0 || num_core_groups == 1) {
+				/* js[0] and single-core-group systems just get the first core group */
+				*affinity = kbdev->gpu_props.props.coherency_info.group[0].core_mask & core_availability_mask;
+			} else {
+				/* js[1], js[2] use core groups 0, 1 for dual-core-group systems */
+				u32 core_group_idx = ((u32) js) - 1;
+
+				KBASE_DEBUG_ASSERT(core_group_idx < num_core_groups);
+				*affinity = kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask & core_availability_mask;
+
+				/* If the job is specifically targeting core group 1 and the core
+				 * availability policy is keeping that core group off, then fail */
+				if (*affinity == 0 && core_group_idx == 1 && kbdev->pm.cg1_disabled == MALI_TRUE)
+					katom->event_code = BASE_JD_EVENT_PM_EVENT;
+			}
+		} else {
+			/* All cores are available when no core split is required */
+			*affinity = core_availability_mask;
+		}
+	} else {
+		/* Core split required - divide cores in two non-overlapping groups */
+		u64 low_bitmap, high_bitmap;
+		int n_high_cores = kbdev->gpu_props.num_cores >> 1;
+		KBASE_DEBUG_ASSERT(1 == num_core_groups);
+		KBASE_DEBUG_ASSERT(0 != n_high_cores);
+
+		/* compute the reserved high cores bitmap */
+		high_bitmap = ~0;
+		/* note: this can take a while, optimization desirable */
+		while (n_high_cores != hweight32(high_bitmap & kbdev->shader_present_bitmap))
+			high_bitmap = high_bitmap << 1;
+
+		high_bitmap &= core_availability_mask;
+		low_bitmap = core_availability_mask ^ high_bitmap;
+
+		if (affinity_job_uses_high_cores(kbdev, katom))
+			*affinity = high_bitmap;
+		else
+			*affinity = low_bitmap;
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/*
+	 * If no cores are currently available in the desired core group(s)
+	 * (core availability policy is transitioning) then fail.
+	 */
+	if (*affinity == 0)
+		return MALI_FALSE;
+
+	/* Enable core 0 if tiler required */
+	if (core_req & BASE_JD_REQ_T)
+		*affinity = *affinity | 1;
+
+	return MALI_TRUE;
+}
+
+STATIC INLINE mali_bool kbase_js_affinity_is_violating(struct kbase_device *kbdev, u64 *affinities)
+{
+	/* This implementation checks whether the two slots involved in Generic thread creation
+	 * have intersecting affinity. This is due to micro-architectural issues where a job in
+	 * slot A targetting cores used by slot B could prevent the job in slot B from making
+	 * progress until the job in slot A has completed.
+	 *
+	 * @note It just so happens that this restriction also allows
+	 * BASE_HW_ISSUE_8987 to be worked around by placing on job slot 2 the
+	 * atoms from ctxs with KBASE_CTX_FLAG_HINT_ONLY_COMPUTE flag set
+	 */
+	u64 affinity_set_left;
+	u64 affinity_set_right;
+	u64 intersection;
+
+	KBASE_DEBUG_ASSERT(affinities != NULL);
+
+	affinity_set_left = affinities[1];
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		/* The left set also includes those on the Fragment slot when
+		 * we are using the HW workaround for BASE_HW_ISSUE_8987 */
+		affinity_set_left |= affinities[0];
+	}
+
+	affinity_set_right = affinities[2];
+
+	/* A violation occurs when any bit in the left_set is also in the right_set */
+	intersection = affinity_set_left & affinity_set_right;
+
+	return (mali_bool) (intersection != (u64) 0u);
+}
+
+mali_bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, sizeof(js_devdata->runpool_irq.slot_affinities));
+
+	new_affinities[js] |= affinity;
+
+	return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) == MALI_FALSE);
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] < BASE_JM_SUBMIT_SLOTS);
+
+		cnt = ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (cnt == 1)
+			js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+		cores &= ~bit;
+	}
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, u64 affinity)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u64 cores;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	cores = affinity;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		s8 cnt;
+
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+		cnt = --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+		if (0 == cnt)
+			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+		cores &= ~bit;
+	}
+
+}
+
+void kbase_js_affinity_slot_blocked_an_atom(struct kbase_device *kbdev, int js)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+	js_devdata = &kbdev->js_data;
+
+	js_devdata->runpool_irq.slots_blocked_on_affinity |= 1u << js;
+}
+
+void kbase_js_affinity_submit_to_blocked_slots(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u16 slots;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running != 0);
+
+	/* Must take a copy because submitting jobs will update this member. */
+	slots = js_devdata->runpool_irq.slots_blocked_on_affinity;
+
+	while (slots) {
+		int bitnum = fls(slots) - 1;
+
+		u16 bit = 1u << bitnum;
+		slots &= ~bit;
+
+		KBASE_TRACE_ADD_SLOT(kbdev, JS_AFFINITY_SUBMIT_TO_BLOCKED, NULL, NULL, 0u, bitnum);
+
+		/* must update this before we submit, incase it's set again */
+		js_devdata->runpool_irq.slots_blocked_on_affinity &= ~bit;
+
+		kbasep_js_try_run_next_job_on_slot_nolock(kbdev, bitnum);
+
+		/* Don't re-read slots_blocked_on_affinity after this - it could loop for a long time */
+	}
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int slot_nr;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, NULL, 0u, slot_nr, (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif				/* KBASE_TRACE_ENABLE  */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.h
new file mode 100755
index 000000000000..83da812802dc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_affinity.h
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_affinity.h
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_affinity Affinity Manager internal APIs.
+ * @{
+ *
+ */
+
+/**
+ * @brief Decide whether it is possible to submit a job to a particular job slot in the current status
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status.  For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * @param kbdev The kbase device structure of the device
+ * @param js    Job slot number to check for allowance
+ */
+mali_bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, int js);
+
+/**
+ * @brief Compute affinity for a given job.
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns MALI_TRUE if a valid affinity was chosen, MALI_FALSE if
+ * no cores were available.
+ *
+ * @param[out] affinity       Affinity bitmap computed
+ * @param kbdev The kbase device structure of the device
+ * @param katom Job chain of which affinity is going to be found
+ * @param js    Slot the job chain is being submitted
+
+ */
+mali_bool kbase_js_choose_affinity(u64 * const affinity, struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js);
+
+/**
+ * @brief Determine whether a proposed \a affinity on job slot \a js would
+ * cause a violation of affinity restrictions.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+mali_bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, u64 affinity);
+
+/**
+ * @brief Affinity tracking: retain cores used by a slot
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, u64 affinity);
+
+/**
+ * @brief Affinity tracking: release cores used by a slot
+ *
+ * Cores \b must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, u64 affinity);
+
+/**
+ * @brief Register a slot as blocking atoms due to affinity violations
+ *
+ * Once a slot has been registered, we must check after every atom completion
+ * (including those on different slots) to see if the slot can be
+ * unblocked. This is done by calling
+ * kbase_js_affinity_submit_to_blocked_slots(), which will also deregister the
+ * slot if it no long blocks atoms due to affinity violations.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_slot_blocked_an_atom(struct kbase_device *kbdev, int js);
+
+/**
+ * @brief Submit to job slots that have registered that an atom was blocked on
+ * the slot previously due to affinity violations.
+ *
+ * This submits to all slots registered by
+ * kbase_js_affinity_slot_blocked_an_atom(). If submission succeeded, then the
+ * slot is deregistered as having blocked atoms due to affinity
+ * violations. Otherwise it stays registered, and the next atom to complete
+ * must attempt to submit to the blocked slots again.
+ *
+ * This must only be called whilst the GPU is powered - for example, when
+ * kbdev->jsdata.nr_user_contexts_running > 0.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_mutex
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_submit_to_blocked_slots(struct kbase_device *kbdev);
+
+/**
+ * @brief Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else				/*  KBASE_TRACE_ENABLE  */
+static INLINE void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif				/*  KBASE_TRACE_ENABLE  */
+
+	  /** @} *//* end group kbase_js_affinity */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+
+#endif				/* _KBASE_JS_AFFINITY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100755
index 000000000000..aa13bdb7fda3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,329 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return MALI_TRUE indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return MALI_FALSE indicates no change in ctx attributes state of the runpool.
+ */
+STATIC mali_bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool runpool_state_changed = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != MALI_FALSE) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+		++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+			/* First refcount indicates a state change */
+			runpool_state_changed = MALI_TRUE;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return MALI_TRUE indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return MALI_FALSE indicates no change in ctx attributes state of the runpool.
+ */
+STATIC mali_bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool runpool_state_changed = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != MALI_FALSE);
+
+	if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != MALI_FALSE) {
+		KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+		--(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+		if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+			/* Last de-refcount indicates a state change */
+			runpool_state_changed = MALI_TRUE;
+			KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return MALI_TRUE indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return MALI_FALSE indicates no change in ctx attributes state of the runpool.
+ */
+STATIC mali_bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool runpool_state_changed = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+	++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	if (js_kctx_info->ctx.is_scheduled != MALI_FALSE && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+		/* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+	}
+
+	return runpool_state_changed;
+}
+
+/**
+ * @brief Release a certain attribute on a ctx, also releasign it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return MALI_TRUE indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return MALI_FALSE indicates no change in ctx attributes state of the runpool.
+ */
+STATIC mali_bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool runpool_state_changed = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+	KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+	if (js_kctx_info->ctx.is_scheduled != MALI_FALSE && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+		lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+		/* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+		runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+		KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+	}
+
+	/* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+	--(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+	return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+	mali_bool runpool_state_changed = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != MALI_FALSE) {
+		/* This context never submits, so don't track any scheduling attributes */
+		return;
+	}
+
+	/* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+	if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != MALI_FALSE) {
+		/* Compute context */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	}
+	/* NOTE: Whether this is a non-compute context depends on the jobs being
+	 * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */
+
+	/* ... More attributes can be added here ... */
+
+	/* The context should not have been scheduled yet, so ASSERT if this caused
+	 * runpool state changes (note that other threads *can't* affect the value
+	 * of runpool_state_changed, due to how it's calculated) */
+	KBASE_DEBUG_ASSERT(runpool_state_changed == MALI_FALSE);
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	mali_bool runpool_state_changed;
+	int i;
+
+	/* Retain any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != MALI_FALSE) {
+			/* The context is being scheduled in, so update the runpool with the new attributes */
+			runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+			/* We don't need to know about state changed, because retaining a
+			 * context occurs on scheduling it, and that itself will also try
+			 * to run new atoms */
+			CSTD_UNUSED(runpool_state_changed);
+		}
+	}
+}
+
+mali_bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	mali_bool runpool_state_changed = MALI_FALSE;
+	int i;
+
+	/* Release any existing attributes */
+	for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+		if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != MALI_FALSE) {
+			/* The context is being scheduled out, so update the runpool on the removed attributes */
+			runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+		}
+	}
+
+	return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	mali_bool runpool_state_changed = MALI_FALSE;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom);
+	core_req = katom->core_req;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	else
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	/* We don't need to know about state changed, because retaining an
+	 * atom occurs on adding it, and that itself will also try to run
+	 * new atoms */
+	CSTD_UNUSED(runpool_state_changed);
+}
+
+mali_bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+	mali_bool runpool_state_changed = MALI_FALSE;
+	base_jd_core_req core_req;
+
+	KBASE_DEBUG_ASSERT(katom_retained_state);
+	core_req = katom_retained_state->core_req;
+
+	/* No-op for invalid atoms */
+	if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == MALI_FALSE)
+		return MALI_FALSE;
+
+	if (core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+#if KBASE_PM_EN
+		unsigned long flags;
+		int device_nr = (core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? katom_retained_state->device_nr : 0;
+		KBASE_DEBUG_ASSERT(device_nr < 2);
+
+		spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+		kbasep_pm_record_job_status(kbdev);
+		kbdev->pm.metrics.active_cl_ctx[device_nr]--;
+		spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+#endif
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+	} else {
+#if KBASE_PM_EN
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+		kbasep_pm_record_job_status(kbdev);
+		kbdev->pm.metrics.active_gl_ctx--;
+		spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+#endif
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+	}
+
+	if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+		/* Atom that can run on slot1 or slot2, and can use all cores */
+		runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+	}
+
+	return runpool_state_changed;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100755
index 000000000000..6a57538298d0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return MALI_TRUE indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return MALI_FALSE indicates no change in ctx attributes state of the runpool.
+ */
+mali_bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return MALI_TRUE indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return MALI_FALSE indicates no change in ctx attributes state of the runpool.
+ */
+mali_bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static INLINE s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_devdata = &kbdev->js_data;
+
+	return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static INLINE mali_bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+	/* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+	return (mali_bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static INLINE mali_bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+	struct kbasep_js_kctx_info *js_kctx_info;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	/* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+	return (mali_bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100755
index 000000000000..e9572ba754a9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+/* Types used by the policies must go here */
+enum {
+	/** Context will not submit any jobs */
+	KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+	/** Set if the context uses an address space and should be kept scheduled in */
+	KBASE_CTX_FLAG_PRIVILEGED = (1u << 1),
+
+	/** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */
+	KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2)
+
+	    /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+	base_jd_core_req core_req;
+	kbase_context_flags ctx_req;
+	u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+#ifdef KBASE_JS_POLICY_AVAILABLE_FCFS
+	struct kbasep_js_policy_fcfs fcfs;
+#endif
+#ifdef KBASE_JS_POLICY_AVAILABLE_CFS
+	struct kbasep_js_policy_cfs cfs;
+#endif
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+#ifdef KBASE_JS_POLICY_AVAILABLE_FCFS
+	struct kbasep_js_policy_fcfs_ctx fcfs;
+#endif
+#ifdef KBASE_JS_POLICY_AVAILABLE_CFS
+	struct kbasep_js_policy_cfs_ctx cfs;
+#endif
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+#ifdef KBASE_JS_POLICY_AVAILABLE_FCFS
+	struct kbasep_js_policy_fcfs_job fcfs;
+#endif
+#ifdef KBASE_JS_POLICY_AVAILABLE_CFS
+	struct kbasep_js_policy_cfs_job cfs;
+#endif
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ *
+ * Examples of use:
+ * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE
+ * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool
+ */
+enum kbasep_js_ctx_attr {
+	/** Attribute indicating a context that contains Compute jobs. That is,
+	 * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type
+	 * @ref BASE_JD_REQ_ONLY_COMPUTE
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE,
+
+	/** Attribute indicating a context that contains Non-Compute jobs. That is,
+	 * the context has some jobs that are \b not of type @ref
+	 * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+	 * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+	 * workarounds in use in the Job Scheduling Policy.
+	 *
+	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
+	 * both types of jobs.
+	 */
+	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+	/** Attribute indicating that a context contains compute-job atoms that
+	 * aren't restricted to a coherent group, and can run on all cores.
+	 *
+	 * Specifically, this is when the atom's \a core_req satisfy:
+	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+	 *
+	 * Such atoms could be blocked from running if one of the coherent groups
+	 * is being used by another job slot, so tracking this context attribute
+	 * allows us to prevent such situations.
+	 *
+	 * @note This doesn't take into account the 1-coregroup case, where all
+	 * compute atoms would effectively be able to run on 'all cores', but
+	 * contexts will still not always get marked with this attribute. Instead,
+	 * it is the caller's responsibility to take into account the number of
+	 * coregroups when interpreting this attribute.
+	 *
+	 * @note Whilst Tiler atoms are normally combined with
+	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+	 * enough to handle anyway.
+	 */
+	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+	/** Must be the last in the enum */
+	KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+	/** Bit indicating that new atom should be started because this atom completed */
+	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
+	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+	/**
+	 * Ref count of whether this AS is busy, and must not be scheduled out
+	 *
+	 * When jobs are running this is always positive. However, it can still be
+	 * positive when no jobs are running. If all you need is a heuristic to
+	 * tell you whether jobs might be running, this should be sufficient.
+	 */
+	int as_busy_refcount;
+
+	/** Pointer to the current context on this address space, or NULL for no context */
+	struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+	/** Sub-structure to collect together Job Scheduling data used in IRQ context */
+	struct runpool_irq {
+		/**
+		 * Lock for accessing Job Scheduling data used in IRQ context
+		 *
+		 * This lock must be held whenever this data is accessed (read, or
+		 * write). Even for read-only access, memory barriers would be needed.
+		 * In any case, it is likely that decisions based on only reading must
+		 * also be atomic with respect to data held here and elsewhere in the
+		 * Job Scheduler.
+		 *
+		 * This lock must also be held for accessing:
+		 * - kbase_context::as_nr
+		 * - kbase_device::jm_slots
+		 * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+		 * the policy in question for more information)
+		 */
+		spinlock_t lock;
+
+		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+		 * When bit 'N' is set in this, it indicates whether the context bound to address space
+		 * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+		 *
+		 * It is placed here because it's much more memory efficient than having a mali_bool8 in
+		 * struct kbasep_js_per_as_data to store this flag  */
+		u16 submit_allowed;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of contexts
+		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+		 *
+		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+		 * the refcount. Hence, it's not worthwhile reducing this to
+		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
+		 * sub-fields would be easy to do and would save space).
+		 *
+		 * Whilst this must not become negative, the sign bit is used for:
+		 * - error detection in debug builds
+		 * - Optimization: it is undefined for a signed int to overflow, and so
+		 * the compiler can optimize for that never happening (thus, no masking
+		 * is required on updating the variable) */
+		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		/** Data that is unique for each AS */
+		struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+		/*
+		 * Affinity management and tracking
+		 */
+		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+		/** Bitvector indicating which slots \em might have atoms blocked on
+		 * them because otherwise they'd violate affinity restrictions */
+		u16 slots_blocked_on_affinity;
+		/** Refcount for each core owned by each slot. Used to generate the
+		 * slot_affinities array of bitvectors
+		 *
+		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+		 * because it is refcounted only when a job is definitely about to be
+		 * submitted to a slot, and is de-refcounted immediately after a job
+		 * finishes */
+		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+	} runpool_irq;
+
+	/**
+	 * Run Pool mutex, for managing contexts within the runpool.
+	 * Unless otherwise specified, you must hold this lock whilst accessing any
+	 * members that follow
+	 *
+	 * In addition, this is used to access:
+	 * - the kbasep_js_kctx_info::runpool substructure
+	 */
+	struct mutex runpool_mutex;
+
+	/**
+	 * Queue Lock, used to access the Policy's queue of contexts independently
+	 * of the Run Pool.
+	 *
+	 * Of course, you don't need the Run Pool lock to access this.
+	 */
+	struct mutex queue_mutex;
+
+	u16 as_free;				/**< Bitpattern of free Address Spaces */
+
+	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+	s8 nr_user_contexts_running;
+	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+	s8 nr_all_contexts_running;
+
+	/**
+	 * Policy-specific information.
+	 *
+	 * Refer to the structure defined by the current policy to determine which
+	 * locks must be held when accessing this.
+	 */
+	union kbasep_js_policy policy;
+
+	/** Core Requirements to match up with base_js_atom's core_req memeber
+	 * @note This is a write-once member, and so no locking is required to read */
+	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+	u32 scheduling_tick_ns;		 /**< Value for KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS */
+	u32 soft_stop_ticks;		 /**< Value for KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS */
+	u32 soft_stop_ticks_cl;		 /**< Value for KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL */
+	u32 hard_stop_ticks_ss;		 /**< Value for KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS */
+	u32 hard_stop_ticks_cl;		 /**< Value for KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL */
+	u32 hard_stop_ticks_nss;	 /**< Value for KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS */
+	u32 gpu_reset_ticks_ss;		 /**< Value for KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS */
+	u32 gpu_reset_ticks_cl;		 /**< Value for KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL */
+	u32 gpu_reset_ticks_nss;	 /**< Value for KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS */
+	u32 ctx_timeslice_ns;		 /**< Value for KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS */
+	u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+	u32 cfs_ctx_runtime_min_slices;	 /**< Value for  DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+	/** List of suspended soft jobs */
+	struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+	/* Support soft-stop on a single context */
+	mali_bool softstop_always;
+#endif				/* CONFIG_MALI_DEBUG */
+	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths).
+	 * @note This is a write-once member, and so no locking is required to read */
+	int init_status;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+	/**
+	 * Runpool substructure. This must only be accessed whilst the Run Pool
+	 * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+	 *
+	 * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+	 * held for certain sub-members.
+	 *
+	 * @note some of the members could be moved into struct kbasep_js_device_data for
+	 * improved d-cache/tlb efficiency.
+	 */
+	struct {
+		union kbasep_js_policy_ctx_info policy_ctx;	/**< Policy-specific context */
+	} runpool;
+
+	/**
+	 * Job Scheduler Context information sub-structure. These members are
+	 * accessed regardless of whether the context is:
+	 * - In the Policy's Run Pool
+	 * - In the Policy's Queue
+	 * - Not queued nor in the Run Pool.
+	 *
+	 * You must obtain the jsctx_mutex before accessing any other members of
+	 * this substructure.
+	 *
+	 * You may not access any of these members from IRQ context.
+	 */
+	struct {
+		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
+
+		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+		 * for such jobs*/
+		u32 nr_jobs;
+
+		/** Context Attributes:
+		 * Each is large enough to hold a refcount of the number of atoms on
+		 * the context. **/
+		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+		kbase_context_flags flags;
+		/* NOTE: Unify the following flags into kbase_context_flags */
+		/**
+		 * Is the context scheduled on the Run Pool?
+		 *
+		 * This is only ever updated whilst the jsctx_mutex is held.
+		 */
+		mali_bool is_scheduled;
+		/**
+		 * Wait queue to wait for is_scheduled state changes.
+		 * */
+		wait_queue_head_t is_scheduled_wait;
+
+		mali_bool is_dying;			/**< Is the context in the process of being evicted? */
+	} ctx;
+
+	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+	 * only be using this during init/term paths) */
+	int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+	/** Event code - to determine whether the atom has finished */
+	enum base_jd_event_code event_code;
+	/** core requirements */
+	base_jd_core_req core_req;
+	/** Job Slot to retry submitting to if submission from IRQ handler failed */
+	int retry_submit_on_slot;
+	/* Core group atom was executed on */
+	u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+#endif				/* _KBASE_JS_DEFS_H_ */
+
+	  /** @} *//* end group kbase_js */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100755
index 000000000000..6c777a92000d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,767 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by RT-priority, and then by
+ * time. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies both to a context as a whole, and to the jobs within a
+ * context. The jobs specify a priority in the base_jd_atom::prio member, which
+ * is relative to that of the context. A positive setting indicates a reduction
+ * in priority, whereas a negative setting indicates a boost in priority. Of
+ * course, the boost in priority should only be honoured when the originating
+ * process has sufficient priviledges, and should be ignored for unpriviledged
+ * processes. The meaning of the combined priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only the high-priority jobs are run, and that the context is scheduled out
+ * once only low priority jobs remain. This ensures that the low priority jobs
+ * within the context do not gain from the priority boost, yet they still get
+ * scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ *  - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ *  - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ *  - If Completed: job is just removed from the system
+ *  - If Hard-stop or failure: job is removed from the system
+ *  - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ *  - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ *  - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ *  - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ *   - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ *   - kbasep_js_policy_enqueue_ctx() on old_ctx
+ *   - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ *   - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ *   - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ *  - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ *  - if this returned MALI_TRUE, submit the job to the completed slot.
+ *  - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * MALI_FALSE, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *  - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ *  - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ *  - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ *  - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ *  - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ *  - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ *  - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ *  - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ *  - Call kbasep_js_policy_enqueue_job()
+ *  - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ *  - Call kbasep_js_policy_enqueue_ctx()
+ *  - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ *  - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ *  - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ *  - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ *  - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ *  - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ *  - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ *   - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ *   - if we got one, submit it to the job slot.
+ *   - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * MALI_FALSE, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ *  - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ *  - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ *   - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ *   - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ *   - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ *   - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ *    - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ *   - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ *   - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ *   - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ *  - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ *  - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+mali_error kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+mali_error kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return MALI_TRUE if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return MALI_FALSE if no contexts were available.
+ */
+mali_bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return MALI_TRUE if the context was evicted from the Policy Queue
+ * @return MALI_FALSE if the context was not found in the Policy Queue
+ */
+mali_bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == MALI_FALSE), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, mali_bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be MALI_TRUE.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+mali_bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+mali_bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+	  /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, OS priority etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g user-side relative priority)
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return MALI_ERROR_NONE if initialization was correct.
+ */
+mali_error kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return MALI_TRUE if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return MALI_FALSE if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+mali_bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
+ *
+ * @param js_policy     job scheduler policy
+ * @param katom         job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+	  /** @} *//* end group kbase_js_policy_job */
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif				/* _KBASE_JS_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100755
index 000000000000..78ec5f1fb551
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,1482 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/** Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE       40
+#define WEIGHT_0_NICE           (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL            (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define LOOKUP_VARIANT_MASK ((1u<<KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS) - 1u)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX  (19)
+
+/** Core requirements that all the variants support */
+#define JS_CORE_REQ_ALL_OTHERS \
+	(BASE_JD_REQ_CF | BASE_JD_REQ_V | BASE_JD_REQ_PERMON | BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)
+
+/** Context requirements the all the variants support */
+
+/* In HW issue 8987 workaround, restrict Compute-only contexts and Compute jobs onto job slot[2],
+ * which will ensure their affinity does not intersect GLES jobs */
+#define JS_CTX_REQ_ALL_OTHERS_8987 \
+	(KBASE_CTX_FLAG_PRIVILEGED)
+#define JS_CORE_REQ_COMPUTE_SLOT_8987 \
+	(BASE_JD_REQ_CS)
+#define JS_CORE_REQ_ONLY_COMPUTE_SLOT_8987 \
+	(BASE_JD_REQ_ONLY_COMPUTE)
+
+/* Otherwise, compute-only contexts/compute jobs can use any job slot */
+#define JS_CTX_REQ_ALL_OTHERS \
+	(KBASE_CTX_FLAG_PRIVILEGED | KBASE_CTX_FLAG_HINT_ONLY_COMPUTE)
+#define JS_CORE_REQ_COMPUTE_SLOT \
+	(BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE)
+
+/* core_req variants are ordered by least restrictive first, so that our
+ * algorithm in cached_variant_idx_init picks the least restrictive variant for
+ * each job . Note that coherent_group requirement is added to all CS variants as the
+ * selection of job-slot does not depend on the coherency requirement. */
+static const struct kbasep_atom_req core_req_variants[] = {
+	{
+	 /* 0: Fragment variant */
+	 (JS_CORE_REQ_ALL_OTHERS | BASE_JD_REQ_FS | BASE_JD_REQ_FS_AFBC |
+						BASE_JD_REQ_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS),
+	 0},
+	{
+	 /* 1: Compute variant, can use all coregroups */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT),
+	 (JS_CTX_REQ_ALL_OTHERS),
+	 0},
+	{
+	 /* 2: Compute variant, uses only coherent coregroups */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT | BASE_JD_REQ_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS),
+	 0},
+	{
+	 /* 3: Compute variant, might only use coherent coregroup, and must use tiling */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT | BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_T),
+	 (JS_CTX_REQ_ALL_OTHERS),
+	 0},
+
+	{
+	 /* 4: Unused */
+	 0,
+	 0,
+	 0},
+
+	{
+	 /* 5: Compute variant for specific-coherent-group targetting CoreGroup 0 */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT | BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS),
+	 0			/* device_nr */
+	 },
+	{
+	 /* 6: Compute variant for specific-coherent-group targetting CoreGroup 1 */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT | BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS),
+	 1			/* device_nr */
+	 },
+
+	/* Unused core_req variants, to bring the total up to a power of 2 */
+	{
+	 /* 7 */
+	 0,
+	 0,
+	 0},
+};
+
+static const struct kbasep_atom_req core_req_variants_8987[] = {
+	{
+	 /* 0: Fragment variant */
+	 (JS_CORE_REQ_ALL_OTHERS | BASE_JD_REQ_FS | BASE_JD_REQ_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS_8987),
+	 0},
+	{
+	 /* 1: Compute variant, can use all coregroups */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT_8987),
+	 (JS_CTX_REQ_ALL_OTHERS_8987),
+	 0},
+	{
+	 /* 2: Compute variant, uses only coherent coregroups */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT_8987 | BASE_JD_REQ_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS_8987),
+	 0},
+	{
+	 /* 3: Compute variant, might only use coherent coregroup, and must use tiling */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_COMPUTE_SLOT_8987 | BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_T),
+	 (JS_CTX_REQ_ALL_OTHERS_8987),
+	 0},
+
+	{
+	 /* 4: Variant guarenteed to support Compute contexts/atoms
+	  *
+	  * In the case of a context that's specified as 'Only Compute', it'll
+	  * not allow Tiler or Fragment atoms, and so those get rejected */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_ONLY_COMPUTE_SLOT_8987 | BASE_JD_REQ_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS_8987 | KBASE_CTX_FLAG_HINT_ONLY_COMPUTE),
+	 0},
+
+	{
+	 /* 5: Compute variant for specific-coherent-group targetting CoreGroup 0
+	  * Specifically, this only allows 'Only Compute' contexts/atoms */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_ONLY_COMPUTE_SLOT_8987 | BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS_8987 | KBASE_CTX_FLAG_HINT_ONLY_COMPUTE),
+	 0			/* device_nr */
+	 },
+	{
+	 /* 6: Compute variant for specific-coherent-group targetting CoreGroup 1
+	  * Specifically, this only allows 'Only Compute' contexts/atoms */
+	 (JS_CORE_REQ_ALL_OTHERS | JS_CORE_REQ_ONLY_COMPUTE_SLOT_8987 | BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP),
+	 (JS_CTX_REQ_ALL_OTHERS_8987 | KBASE_CTX_FLAG_HINT_ONLY_COMPUTE),
+	 1			/* device_nr */
+	 },
+	/* Unused core_req variants, to bring the total up to a power of 2 */
+	{
+	 /* 7 */
+	 0,
+	 0,
+	 0},
+};
+
+#define CORE_REQ_VARIANT_FRAGMENT                    0
+#define CORE_REQ_VARIANT_COMPUTE_ALL_CORES           1
+#define CORE_REQ_VARIANT_COMPUTE_ONLY_COHERENT_GROUP 2
+#define CORE_REQ_VARIANT_COMPUTE_OR_TILING           3
+#define CORE_REQ_VARIANT_COMPUTE_SPECIFIC_COHERENT_0 5
+#define CORE_REQ_VARIANT_COMPUTE_SPECIFIC_COHERENT_1 6
+
+#define CORE_REQ_VARIANT_ONLY_COMPUTE_8987                     4
+#define CORE_REQ_VARIANT_ONLY_COMPUTE_8987_SPECIFIC_COHERENT_0 5
+#define CORE_REQ_VARIANT_ONLY_COMPUTE_8987_SPECIFIC_COHERENT_1 6
+
+#define NUM_CORE_REQ_VARIANTS NELEMS(core_req_variants)
+#define NUM_CORE_REQ_VARIANTS_8987 NELEMS(core_req_variants_8987)
+
+/** Mappings between job slot and variant lists for Soft-Stoppable State */
+static const u32 variants_supported_ss_state[] = {
+	/* js[0] uses Fragment only */
+	(1u << CORE_REQ_VARIANT_FRAGMENT),
+
+	/* js[1] uses: Compute-all-cores, Compute-only-coherent, Compute-or-Tiling,
+	 * compute-specific-coregroup-0 */
+	(1u << CORE_REQ_VARIANT_COMPUTE_ALL_CORES)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_ONLY_COHERENT_GROUP)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_OR_TILING)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_SPECIFIC_COHERENT_0),
+
+	/* js[2] uses: Compute-only-coherent, compute-specific-coregroup-1 */
+	(1u << CORE_REQ_VARIANT_COMPUTE_ONLY_COHERENT_GROUP)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_SPECIFIC_COHERENT_1)
+};
+
+/** Mappings between job slot and variant lists for Soft-Stoppable State, when
+ * we have atoms that can use all the cores (KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES)
+ * and there's more than one coregroup */
+static const u32 variants_supported_ss_allcore_state[] = {
+	/* js[0] uses Fragment only */
+	(1u << CORE_REQ_VARIANT_FRAGMENT),
+
+	/* js[1] uses: Compute-all-cores, Compute-only-coherent, Compute-or-Tiling,
+	 * compute-specific-coregroup-0, compute-specific-coregroup-1 */
+	(1u << CORE_REQ_VARIANT_COMPUTE_ALL_CORES)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_ONLY_COHERENT_GROUP)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_OR_TILING)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_SPECIFIC_COHERENT_0)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_SPECIFIC_COHERENT_1),
+
+	/* js[2] not used */
+	0
+};
+
+/** Mappings between job slot and variant lists for Soft-Stoppable State for
+ * BASE_HW_ISSUE_8987
+ *
+ * @note There is no 'allcores' variant of this, because this HW issue forces all
+ * atoms with BASE_JD_CORE_REQ_SPECIFIC_COHERENT_GROUP to use slot 2 anyway -
+ * hence regardless of whether a specific coregroup is targetted, those atoms
+ * still make progress. */
+static const u32 variants_supported_ss_state_8987[] = {
+	/* js[0] uses Fragment only */
+	(1u << CORE_REQ_VARIANT_FRAGMENT),
+
+	/* js[1] uses: Compute-all-cores, Compute-only-coherent, Compute-or-Tiling */
+	(1u << CORE_REQ_VARIANT_COMPUTE_ALL_CORES)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_ONLY_COHERENT_GROUP)
+	    | (1u << CORE_REQ_VARIANT_COMPUTE_OR_TILING),
+
+	/* js[2] uses: All Only-compute atoms (including those targetting a
+	 * specific coregroup), and nothing else. This is because their affinity
+	 * must not intersect with non-only-compute atoms.
+	 *
+	 * As a side effect, this causes the 'device_nr' for atoms targetting a
+	 * specific coregroup to be ignored */
+	(1u << CORE_REQ_VARIANT_ONLY_COMPUTE_8987)
+	    | (1u << CORE_REQ_VARIANT_ONLY_COMPUTE_8987_SPECIFIC_COHERENT_0)
+	    | (1u << CORE_REQ_VARIANT_ONLY_COMPUTE_8987_SPECIFIC_COHERENT_1)
+};
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED       (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED    (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED    (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+	KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+	KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+	KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+	KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: midgard/scripts/gen_cfs_weight_of_prio.c */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+	/*  -20 */ 11, 14, 18, 23,
+	/*  -16 */ 29, 36, 45, 56,
+	/*  -12 */ 70, 88, 110, 137,
+	/*   -8 */ 171, 214, 268, 335,
+	/*   -4 */ 419, 524, 655, 819,
+	/*    0 */ 1024, 1280, 1600, 2000,
+	/*    4 */ 2500, 3125, 3906, 4883,
+	/*    8 */ 6104, 7630, 9538, 11923,
+	/*   12 */ 14904, 18630, 23288, 29110,
+	/*   16 */ 36388, 45485, 56856, 71070
+};
+
+/**
+ * @note There is nothing to stop the priority of the ctx containing \a
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+STATIC u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+	u64 time_delta_us;
+	int priority;
+
+	priority = ctx_info->process_priority + ctx_info->bag_priority;
+
+	/* Adjust runtime_us using priority weight if required */
+	if (priority != 0 && time_us != 0) {
+		int clamped_priority;
+
+		/* Clamp values to min..max weights */
+		if (priority > PROCESS_PRIORITY_MAX)
+			clamped_priority = PROCESS_PRIORITY_MAX;
+		else if (priority < PROCESS_PRIORITY_MIN)
+			clamped_priority = PROCESS_PRIORITY_MIN;
+		else
+			clamped_priority = priority;
+
+		/* Fixed point multiplication */
+		time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+		/* Remove fraction */
+		time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+		/* Make sure the time always increases */
+		if (0 == time_delta_us)
+			time_delta_us++;
+	} else {
+		time_delta_us = time_us;
+	}
+
+	return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+STATIC int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	int as_nr;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	as_nr = kctx->as_nr;
+	if (as_nr != KBASEP_AS_NR_INVALID) {
+		struct kbasep_js_per_as_data *js_per_as_data;
+		js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+		refcnt = js_per_as_data->as_busy_refcount;
+	}
+
+	return refcnt;
+}
+
+STATIC INLINE int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	unsigned long flags;
+	struct kbasep_js_device_data *js_devdata;
+	int refcnt = 0;
+
+	js_devdata = &kbdev->js_data;
+
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return refcnt;
+}
+#else				/* KBASE_TRACE_ENABLE  */
+STATIC int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+
+STATIC INLINE int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(kctx);
+	return 0;
+}
+#endif				/* KBASE_TRACE_ENABLE  */
+
+#ifdef CONFIG_MALI_DEBUG
+STATIC void kbasep_js_debug_check(struct kbasep_js_policy_cfs *policy_info, struct kbase_context *kctx, kbasep_js_check check_flag)
+{
+	/* This function uses the ternary operator and non-explicit comparisons,
+	 * because it makes for much shorter, easier to read code */
+
+	if (check_flag & KBASEP_JS_CHECKFLAG_QUEUED) {
+		mali_bool is_queued;
+		mali_bool expect_queued;
+
+		is_queued = (kbasep_list_member_of(
+				&policy_info->ctx_queue_head,
+				&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list)) ?
+				MALI_TRUE : MALI_FALSE;
+
+		if (!is_queued)
+			is_queued = (kbasep_list_member_of(&policy_info->ctx_rt_queue_head, &kctx->jctx.sched_info.runpool.policy_ctx.cfs.list)) ? MALI_TRUE : MALI_FALSE;
+
+		expect_queued = (check_flag & KBASEP_JS_CHECKFLAG_IS_QUEUED) ? MALI_TRUE : MALI_FALSE;
+
+		KBASE_DEBUG_ASSERT_MSG(expect_queued == is_queued, "Expected context %p to be %s but it was %s\n", kctx, (expect_queued) ? "queued" : "not queued", (is_queued) ? "queued" : "not queued");
+
+	}
+
+	if (check_flag & KBASEP_JS_CHECKFLAG_SCHEDULED) {
+		mali_bool is_scheduled;
+		mali_bool expect_scheduled;
+
+		is_scheduled = (kbasep_list_member_of(
+			&policy_info->scheduled_ctxs_head,
+			&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list)) ?
+			MALI_TRUE : MALI_FALSE;
+
+		expect_scheduled = (check_flag & KBASEP_JS_CHECKFLAG_IS_SCHEDULED) ? MALI_TRUE : MALI_FALSE;
+		KBASE_DEBUG_ASSERT_MSG(expect_scheduled == is_scheduled, "Expected context %p to be %s but it was %s\n", kctx, (expect_scheduled) ? "scheduled" : "not scheduled", (is_scheduled) ? "scheduled" : "not scheduled");
+	}
+
+}
+#else				/* CONFIG_MALI_DEBUG */
+STATIC void kbasep_js_debug_check(struct kbasep_js_policy_cfs *policy_info, struct kbase_context *kctx, kbasep_js_check check_flag)
+{
+	CSTD_UNUSED(policy_info);
+	CSTD_UNUSED(kctx);
+	CSTD_UNUSED(check_flag);
+}
+#endif				/* CONFIG_MALI_DEBUG */
+
+STATIC INLINE void set_slot_to_variant_lookup(u32 *bit_array, u32 slot_idx, u32 variants_supported)
+{
+	u32 overall_bit_idx = slot_idx * KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS;
+	u32 word_idx = overall_bit_idx / 32;
+	u32 bit_idx = overall_bit_idx % 32;
+
+	KBASE_DEBUG_ASSERT(slot_idx < BASE_JM_MAX_NR_SLOTS);
+	KBASE_DEBUG_ASSERT((variants_supported & ~LOOKUP_VARIANT_MASK) == 0);
+
+	bit_array[word_idx] |= variants_supported << bit_idx;
+}
+
+STATIC INLINE u32 get_slot_to_variant_lookup(u32 *bit_array, u32 slot_idx)
+{
+	u32 overall_bit_idx = slot_idx * KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS;
+	u32 word_idx = overall_bit_idx / 32;
+	u32 bit_idx = overall_bit_idx % 32;
+
+	u32 res;
+
+	KBASE_DEBUG_ASSERT(slot_idx < BASE_JM_MAX_NR_SLOTS);
+
+	res = bit_array[word_idx] >> bit_idx;
+	res &= LOOKUP_VARIANT_MASK;
+
+	return res;
+}
+
+/* Check the core_req_variants: make sure that every job slot is satisifed by
+ * one of the variants. This checks that cached_variant_idx_init will produce a
+ * valid result for jobs that make maximum use of the job slots.
+ *
+ * @note The checks are limited to the job slots - this does not check that
+ * every context requirement is covered (because some are intentionally not
+ * supported, such as KBASE_CTX_FLAG_SUBMIT_DISABLED) */
+#ifdef CONFIG_MALI_DEBUG
+STATIC void debug_check_core_req_variants(struct kbase_device *kbdev, struct kbasep_js_policy_cfs *policy_info)
+{
+	struct kbasep_js_device_data *js_devdata;
+	u32 i;
+	int j;
+
+	js_devdata = &kbdev->js_data;
+
+	for (j = 0; j < kbdev->gpu_props.num_job_slots; ++j) {
+		base_jd_core_req job_core_req;
+		mali_bool found = MALI_FALSE;
+
+		job_core_req = js_devdata->js_reqs[j];
+		for (i = 0; i < policy_info->num_core_req_variants; ++i) {
+			base_jd_core_req var_core_req;
+
+			var_core_req = policy_info->core_req_variants[i].core_req;
+
+			if ((var_core_req & job_core_req) == job_core_req) {
+				found = MALI_TRUE;
+				break;
+			}
+		}
+
+		/* Early-out on any failure */
+		KBASE_DEBUG_ASSERT_MSG(found != MALI_FALSE, "Job slot %d features 0x%x not matched by core_req_variants. " "Rework core_req_variants and vairants_supported_<...>_state[] to match\n", j, job_core_req);
+	}
+}
+#endif
+
+STATIC void build_core_req_variants(struct kbase_device *kbdev, struct kbasep_js_policy_cfs *policy_info)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(policy_info != NULL);
+	CSTD_UNUSED(kbdev);
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+		KBASE_DEBUG_ASSERT(NUM_CORE_REQ_VARIANTS_8987 <= KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS);
+
+		/* Assume a static set of variants */
+		memcpy(policy_info->core_req_variants, core_req_variants_8987, sizeof(core_req_variants_8987));
+
+		policy_info->num_core_req_variants = NUM_CORE_REQ_VARIANTS_8987;
+	} else {
+		KBASE_DEBUG_ASSERT(NUM_CORE_REQ_VARIANTS <= KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS);
+
+		/* Assume a static set of variants */
+		memcpy(policy_info->core_req_variants, core_req_variants, sizeof(core_req_variants));
+
+		policy_info->num_core_req_variants = NUM_CORE_REQ_VARIANTS;
+	}
+
+	KBASE_DEBUG_CODE(debug_check_core_req_variants(kbdev, policy_info));
+}
+
+STATIC void build_slot_lookups(struct kbase_device *kbdev, struct kbasep_js_policy_cfs *policy_info)
+{
+	u8 i;
+	const u32 *variants_supported_ss_for_this_hw = variants_supported_ss_state;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(policy_info != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->gpu_props.num_job_slots <= NELEMS(variants_supported_ss_state));
+	KBASE_DEBUG_ASSERT(kbdev->gpu_props.num_job_slots <= NELEMS(variants_supported_ss_allcore_state));
+	KBASE_DEBUG_ASSERT(kbdev->gpu_props.num_job_slots <= NELEMS(variants_supported_ss_state_8987));
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+		variants_supported_ss_for_this_hw = variants_supported_ss_state_8987;
+
+	/* Given the static set of variants, provide a static set of lookups */
+	for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+		set_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_state, i, variants_supported_ss_for_this_hw[i]);
+
+		set_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_allcore_state, i, variants_supported_ss_allcore_state[i]);
+	}
+
+}
+
+STATIC mali_error cached_variant_idx_init(const struct kbasep_js_policy_cfs *policy_info, const struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+	struct kbasep_js_policy_cfs_job *job_info;
+	u32 i;
+	base_jd_core_req job_core_req;
+	u32 job_device_nr;
+	kbase_context_flags ctx_flags;
+	const struct kbasep_js_kctx_info *js_kctx_info;
+	const struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(policy_info != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(atom != NULL);
+
+	kbdev = container_of(policy_info, const struct kbase_device, js_data.policy.cfs);
+	job_info = &atom->sched_info.cfs;
+	job_core_req = atom->core_req;
+	job_device_nr = atom->device_nr;
+	js_kctx_info = &kctx->jctx.sched_info;
+	ctx_flags = js_kctx_info->ctx.flags;
+
+	/* Initial check for atoms targetting a specific coregroup */
+	if ((job_core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) != MALI_FALSE && job_device_nr >= kbdev->gpu_props.num_core_groups) {
+		/* device_nr exceeds the number of coregroups - not allowed by
+		 * @ref struct base_jd_atom_v2 API contract */
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+	/* Pick a core_req variant that matches us. Since they're ordered by least
+	 * restrictive first, it picks the least restrictive variant */
+	for (i = 0; i < policy_info->num_core_req_variants; ++i) {
+		base_jd_core_req var_core_req;
+		kbase_context_flags var_ctx_req;
+		u32 var_device_nr;
+		var_core_req = policy_info->core_req_variants[i].core_req;
+		var_ctx_req = policy_info->core_req_variants[i].ctx_req;
+		var_device_nr = policy_info->core_req_variants[i].device_nr;
+
+		if ((var_core_req & job_core_req) == job_core_req && (var_ctx_req & ctx_flags) == ctx_flags && ((var_core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) == MALI_FALSE || var_device_nr == job_device_nr)) {
+			job_info->cached_variant_idx = i;
+			return MALI_ERROR_NONE;
+		}
+	}
+
+	/* Could not find a matching requirement, this should only be caused by an
+	 * attempt to attack the driver. */
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+STATIC mali_bool dequeue_job(struct kbase_device *kbdev,
+			     struct kbase_context *kctx,
+			     u32 variants_supported,
+			     struct kbase_jd_atom ** const katom_ptr,
+			     int job_slot_idx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom_ptr != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	/* Only submit jobs from contexts that are allowed */
+	if (kbasep_js_is_submit_allowed(js_devdata, kctx) != MALI_FALSE) {
+		/* Check each variant in turn */
+		while (variants_supported != 0) {
+			long variant_idx;
+			struct list_head *job_list;
+
+			variant_idx = ffs(variants_supported) - 1;
+			job_list = &ctx_info->job_list_head[variant_idx];
+
+			if (!list_empty(job_list)) {
+				/* Found a context with a matching job */
+				{
+					struct kbase_jd_atom *front_atom = 
+							list_entry(job_list->next, struct kbase_jd_atom, sched_info.cfs.list);
+
+					KBASE_TRACE_ADD_SLOT(kbdev, JS_POLICY_DEQUEUE_JOB, front_atom->kctx, front_atom, front_atom->jc, job_slot_idx);
+				}
+				*katom_ptr = list_entry(job_list->next, struct kbase_jd_atom, sched_info.cfs.list);
+				list_del(job_list->next);
+
+				(*katom_ptr)->sched_info.cfs.ticks = 0;
+
+				/* Put this context at the back of the Run Pool */
+				list_del(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list);
+				list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, &policy_info->scheduled_ctxs_head);
+
+				return MALI_TRUE;
+			}
+
+			variants_supported &= ~(1u << variant_idx);
+		}
+		/* All variants checked by here */
+	}
+
+	/* The context does not have a  matching job */
+
+	return MALI_FALSE;
+}
+
+/**
+ * Hold the runpool_irq spinlock for this
+ */
+STATIC INLINE mali_bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	s8 nr_running_ctxs;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+
+	/* nr_user_contexts_running is updated with the runpool_mutex. However, the
+	 * locking in the caller gives us a barrier that ensures nr_user_contexts is
+	 * up-to-date for reading */
+	nr_running_ctxs = js_devdata->nr_user_contexts_running;
+
+#ifdef CONFIG_MALI_DEBUG
+	if (js_devdata->softstop_always && nr_running_ctxs > 0) {
+		/* Debug support for allowing soft-stop on a single context */
+		return MALI_TRUE;
+	}
+#endif				/* CONFIG_MALI_DEBUG */
+
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+		/* Timeouts would have to be 4x longer (due to micro-architectural design)
+		 * to support OpenCL conformance tests, so only run the timer when there's:
+		 * - 2 or more CL contexts
+		 * - 1 or more GLES contexts
+		 *
+		 * NOTE: We will treat a context that has both Compute and Non-Compute jobs
+		 * will be treated as an OpenCL context (hence, we don't check
+		 * KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+		 */
+		{
+			s8 nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE);
+			s8 nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs;
+
+			return (mali_bool) (nr_compute_ctxs >= 2 || nr_noncompute_ctxs > 0);
+		}
+	} else {
+		/* Run the timer callback whenever you have at least 1 context */
+		return (mali_bool) (nr_running_ctxs > 0);
+	}
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+	int s;
+	mali_bool reset_needed = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	policy_info = container_of(timer, struct kbasep_js_policy_cfs, scheduling_timer);
+	kbdev = container_of(policy_info, struct kbase_device, js_data.policy.cfs);
+	js_devdata = &kbdev->js_data;
+
+	/* Loop through the slots */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+		struct kbase_jm_slot *slot = &kbdev->jm_slots[s];
+		struct kbase_jd_atom *atom = NULL;
+
+		if (kbasep_jm_nr_jobs_submitted(slot) > 0) {
+			atom = kbasep_jm_peek_idx_submit_slot(slot, 0);
+			KBASE_DEBUG_ASSERT(atom != NULL);
+
+			if (kbasep_jm_is_dummy_workaround_job(kbdev, atom) != MALI_FALSE) {
+				/* Prevent further use of the atom - never cause a soft-stop, hard-stop, or a GPU reset due to it. */
+				atom = NULL;
+			}
+		}
+
+		if (atom != NULL) {
+			/* The current version of the model doesn't support Soft-Stop */
+			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+				u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+				u32 soft_stop_ticks, hard_stop_ticks, gpu_reset_ticks;
+				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+					soft_stop_ticks = js_devdata->soft_stop_ticks_cl;
+					hard_stop_ticks = js_devdata->hard_stop_ticks_cl;
+					gpu_reset_ticks = js_devdata->gpu_reset_ticks_cl;
+				} else {
+					soft_stop_ticks = js_devdata->soft_stop_ticks;
+					hard_stop_ticks = js_devdata->hard_stop_ticks_ss;
+					gpu_reset_ticks = js_devdata->gpu_reset_ticks_ss;
+				}
+
+				/* Job is Soft-Stoppable */
+				if (ticks == soft_stop_ticks) {
+					int disjoint_threshold =
+							KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+					u32 softstop_flags = 0u;
+					/* Job has been scheduled for at least js_devdata->soft_stop_ticks ticks.
+					 * Soft stop the slot so we can run other jobs.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+					/* nr_user_contexts_running is updated with the runpool_mutex,
+					 * but we can't take that here.
+					 *
+					 * However, if it's about to be increased then the new context
+					 * can't run any jobs until they take the runpool_irq lock, so
+					 * it's OK to observe the older value.
+					 *
+					 * Similarly, if it's about to be decreased, the last job from
+					 * another context has already finished, so it's not too bad
+					 * that we observe the older value and register a disjoint
+					 * event when we try soft-stopping */
+					if (js_devdata->nr_user_contexts_running >= disjoint_threshold)
+						softstop_flags |= JS_COMMAND_SW_CAUSES_DISJOINT;
+					kbase_job_slot_softstop_swflags(kbdev,
+							s, atom, softstop_flags);
+#endif
+				} else if (ticks == hard_stop_ticks) {
+					/* Job has been scheduled for at least js_devdata->hard_stop_ticks_ss ticks.
+					 * It should have been soft-stopped by now. Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", (unsigned long)ticks, (unsigned long)(js_devdata->scheduling_tick_ns / 1000000u));
+					kbase_job_slot_hardstop(atom->kctx, s, atom);
+#endif
+				} else if (ticks == gpu_reset_ticks) {
+					/* Job has been scheduled for at least js_devdata->gpu_reset_ticks_ss ticks.
+					 * It should have left the GPU by now. Signal that the GPU needs to be reset.
+					 */
+					reset_needed = MALI_TRUE;
+				}
+#else				/* !CINSTR_DUMPING_ENABLED */
+				/* NOTE: During CINSTR_DUMPING_ENABLED, we use the alternate timeouts, which
+				 * makes the hard-stop and GPU reset timeout much longer. We also ensure that
+				 * we don't soft-stop at all. */
+				if (ticks == js_devdata->soft_stop_ticks) {
+					/* Job has been scheduled for at least js_devdata->soft_stop_ticks.
+					 * We do not soft-stop during CINSTR_DUMPING_ENABLED, however.
+					 */
+					dev_dbg(kbdev->dev, "Soft-stop");
+				} else if (ticks == js_devdata->hard_stop_ticks_nss) {
+					/* Job has been scheduled for at least js_devdata->hard_stop_ticks_nss ticks.
+					 * Hard stop the slot.
+					 */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+					dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", (unsigned long)ticks, (unsigned long)(js_devdata->scheduling_tick_ns / 1000000u));
+					kbase_job_slot_hardstop(atom->kctx, s, atom);
+#endif
+				} else if (ticks == js_devdata->gpu_reset_ticks_nss) {
+					/* Job has been scheduled for at least js_devdata->gpu_reset_ticks_nss ticks.
+					 * It should have left the GPU by now. Signal that the GPU needs to be reset.
+					 */
+					reset_needed = MALI_TRUE;
+				}
+#endif				/* !CINSTR_DUMPING_ENABLED */
+			}
+		}
+	}
+#if KBASE_GPU_RESET_EN
+	if (reset_needed) {
+		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS/NSS timeout hit). Issueing GPU soft-reset to resolve.");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev))
+			kbase_reset_gpu_locked(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* the timer is re-issued if there is contexts in the run-pool */
+
+	if (timer_callback_should_run(kbdev) != MALI_FALSE) {
+		hrtimer_start(&policy_info->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_tick_ns), HRTIMER_MODE_REL);
+	} else {
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_END, NULL, NULL, 0u, 0u);
+		/* timer_running state is updated by kbasep_js_policy_runpool_timers_sync() */
+	}
+
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * Non-private functions
+ */
+
+mali_error kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	INIT_LIST_HEAD(&policy_info->ctx_queue_head);
+	INIT_LIST_HEAD(&policy_info->scheduled_ctxs_head);
+	INIT_LIST_HEAD(&policy_info->ctx_rt_queue_head);
+
+	atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+	hrtimer_init(&policy_info->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	policy_info->scheduling_timer.function = timer_callback;
+
+	policy_info->timer_running = MALI_FALSE;
+	policy_info->head_runtime_us = 0;
+
+	/* Build up the core_req variants */
+	build_core_req_variants(kbdev, policy_info);
+	/* Build the slot to variant lookups */
+	build_slot_lookups(kbdev, policy_info);
+
+	return MALI_ERROR_NONE;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	policy_info = &js_policy->cfs;
+
+	/* ASSERT that there are no contexts queued */
+	KBASE_DEBUG_ASSERT(list_empty(&policy_info->ctx_queue_head));
+	KBASE_DEBUG_ASSERT(KBASEP_JS_RUNTIME_EMPTY == atomic64_read(&policy_info->least_runtime_us));
+
+	/* ASSERT that there are no contexts scheduled */
+	KBASE_DEBUG_ASSERT(list_empty(&policy_info->scheduled_ctxs_head));
+
+	/* ASSERT that there are no contexts queued */
+	KBASE_DEBUG_ASSERT(list_empty(&policy_info->ctx_rt_queue_head));
+	KBASE_DEBUG_ASSERT(KBASEP_JS_RUNTIME_EMPTY == atomic64_read(&policy_info->rt_least_runtime_us));
+
+	hrtimer_cancel(&policy_info->scheduling_timer);
+}
+
+mali_error kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	u32 i;
+	int policy;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &kbdev->js_data.policy.cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	for (i = 0; i < policy_info->num_core_req_variants; ++i)
+		INIT_LIST_HEAD(&ctx_info->job_list_head[i]);
+
+	policy = current->policy;
+	if (policy == SCHED_FIFO || policy == SCHED_RR) {
+		ctx_info->process_rt_policy = MALI_TRUE;
+		ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+	} else {
+		ctx_info->process_rt_policy = MALI_FALSE;
+		ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+	}
+
+	ctx_info->bag_total_priority = 0;
+	ctx_info->bag_total_nr_atoms = 0;
+
+	/* Initial runtime (relative to least-run context runtime)
+	 *
+	 * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+	 * queue mutex to issue memory barriers - also ensure future updates to
+	 * head_runtime_us occur strictly after this context is initialized */
+	mutex_lock(&js_devdata->queue_mutex);
+
+	/* No need to hold the the runpool_irq.lock here, because we're initializing
+	 * the value, and the context is definitely not being updated in the
+	 * runpool at this point. The queue_mutex ensures the memory barrier. */
+	ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+	mutex_unlock(&js_devdata->queue_mutex);
+
+	return MALI_ERROR_NONE;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	u32 i;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	{
+		struct kbase_device *kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+	}
+
+	/* ASSERT that no jobs are present */
+	for (i = 0; i < policy_info->num_core_req_variants; ++i)
+		KBASE_DEBUG_ASSERT(list_empty(&ctx_info->job_list_head[i]));
+
+	/* No work to do */
+}
+
+/*
+ * Context Management
+ */
+
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *head_ctx;
+	struct kbase_context *list_kctx = NULL;
+	struct kbasep_js_device_data *js_devdata;
+	struct list_head *queue_head;
+	struct list_head *pos;
+	struct kbase_device *kbdev;
+	atomic64_t *least_runtime_us;
+	u64 head_runtime;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	js_devdata = &kbdev->js_data;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_ENQUEUE_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* ASSERT about scheduled-ness/queued-ness */
+	kbasep_js_debug_check(policy_info, kctx, KBASEP_JS_CHECK_NOTQUEUED);
+
+	/* Clamp the runtime to prevent DoS attacks through "stored-up" runtime */
+	if (policy_info->head_runtime_us > ctx_info->runtime_us + (u64) js_devdata->cfs_ctx_runtime_min_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u)) {
+		/* No need to hold the the runpool_irq.lock here, because we're essentially
+		 * initializing the value, and the context is definitely not being updated in the
+		 * runpool at this point. The queue_mutex held by the caller ensures the memory
+		 * barrier. */
+		ctx_info->runtime_us = policy_info->head_runtime_us - (u64) js_devdata->cfs_ctx_runtime_min_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u);
+	}
+
+	/* Find the position where the context should be enqueued */
+	if (ctx_info->process_rt_policy) {
+		queue_head = &policy_info->ctx_rt_queue_head;
+		least_runtime_us = &policy_info->rt_least_runtime_us;
+	} else {
+		queue_head = &policy_info->ctx_queue_head;
+		least_runtime_us = &policy_info->least_runtime_us;
+	}
+
+	if (list_empty(queue_head)) {
+		list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, queue_head);
+	} else {
+		list_for_each(pos, queue_head) {
+			struct kbasep_js_policy_cfs_ctx *list_ctx_info;
+
+			list_kctx = list_entry(pos, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+			list_ctx_info = &list_kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+			if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0)
+				break;
+
+			if ((list_ctx_info->runtime_us > ctx_info->runtime_us) && ((list_kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) == 0))
+				break;
+		}
+
+		/* Add the context to the queue */
+		list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, &list_kctx->jctx.sched_info.runpool.policy_ctx.cfs.list);
+	}
+
+	/* Ensure least_runtime_us is up to date*/
+	head_ctx = list_entry(queue_head->next, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+	head_runtime = head_ctx->jctx.sched_info.runpool.policy_ctx.cfs.runtime_us;
+	atomic64_set(least_runtime_us, head_runtime);
+}
+
+mali_bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbase_context *head_ctx;
+	struct list_head *queue_head;
+	atomic64_t *least_runtime_us;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx_ptr != NULL);
+
+	policy_info = &js_policy->cfs;
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+
+	/* attempt to dequeue from the 'realttime' queue first */
+	if (list_empty(&policy_info->ctx_rt_queue_head)) {
+		if (list_empty(&policy_info->ctx_queue_head)) {
+			/* Nothing to dequeue */
+			return MALI_FALSE;
+		} else {
+			queue_head = &policy_info->ctx_queue_head;
+			least_runtime_us = &policy_info->least_runtime_us;
+		}
+	} else {
+		queue_head = &policy_info->ctx_rt_queue_head;
+		least_runtime_us = &policy_info->rt_least_runtime_us;
+	}
+
+	/* Contexts are dequeued from the front of the queue */
+	*kctx_ptr = list_entry(queue_head->next, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+	/* If dequeuing will empty the list, then set least_runtime_us prior to deletion */
+	if (queue_head->next->next == queue_head)
+		atomic64_set(least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+	list_del(queue_head->next);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_DEQUEUE_HEAD_CTX, *kctx_ptr, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, *kctx_ptr));
+
+	/* Update the head runtime */
+	if (!list_empty(queue_head)) {
+		u64 head_runtime;
+
+		head_ctx = list_entry(queue_head->next, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+
+		/* No need to hold the the runpool_irq.lock here for reading - the
+		 * context is definitely not being updated in the runpool at this
+		 * point. The queue_mutex held by the caller ensures the memory barrier. */
+		head_runtime = head_ctx->jctx.sched_info.runpool.policy_ctx.cfs.runtime_us;
+
+		if (head_runtime > policy_info->head_runtime_us)
+			policy_info->head_runtime_us = head_runtime;
+
+		atomic64_set(least_runtime_us, head_runtime);
+	}
+
+	return MALI_TRUE;
+}
+
+mali_bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	mali_bool is_present;
+	struct list_head *queue_head;
+	atomic64_t *least_runtime_us;
+	struct list_head *qhead;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+
+	if (ctx_info->process_rt_policy) {
+		queue_head = &policy_info->ctx_rt_queue_head;
+		least_runtime_us = &policy_info->rt_least_runtime_us;
+	} else {
+		queue_head = &policy_info->ctx_queue_head;
+		least_runtime_us = &policy_info->least_runtime_us;
+	}
+
+	qhead = queue_head;
+
+	is_present = kbasep_list_member_of(qhead, &kctx->jctx.sched_info.runpool.policy_ctx.cfs.list);
+
+	KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, JS_POLICY_TRY_EVICT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx), is_present);
+
+	if (is_present != MALI_FALSE) {
+		struct kbase_context *head_ctx;
+
+		qhead = queue_head;
+
+		/* If dequeuing will empty the list, then set least_runtime_us prior to deletion */
+		if (queue_head->next->next == queue_head)
+			atomic64_set(least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+		/* Remove the context */
+		list_del(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list);
+
+		qhead = queue_head;
+		/* Update the head runtime */
+		if (!list_empty(qhead)) {
+			u64 head_runtime;
+
+			head_ctx = list_entry(qhead->next, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+
+			/* No need to hold the the runpool_irq.lock here for reading - the
+			 * context is definitely not being updated in the runpool at this
+			 * point. The queue_mutex held by the caller ensures the memory barrier. */
+			head_runtime = head_ctx->jctx.sched_info.runpool.policy_ctx.cfs.runtime_us;
+
+			if (head_runtime > policy_info->head_runtime_us)
+				policy_info->head_runtime_us = head_runtime;
+
+			atomic64_set(least_runtime_us, head_runtime);
+		}
+	}
+
+	return is_present;
+}
+
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+	kbasep_js_policy_ctx_job_cb callback, mali_bool detach_jobs)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_device *kbdev;
+	u32 i;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+	/* Invoke callback on jobs on each variant in turn */
+	for (i = 0; i < policy_info->num_core_req_variants; ++i) {
+		struct list_head *job_list;
+		struct kbase_jd_atom *atom;
+		struct kbase_jd_atom *tmp_iter;
+		job_list = &ctx_info->job_list_head[i];
+		/* Invoke callback on all kbase_jd_atoms in this list, optionally
+		 * removing them from the list */
+		list_for_each_entry_safe(atom, tmp_iter, job_list, sched_info.cfs.list) {
+			if (detach_jobs)
+				list_del(&atom->sched_info.cfs.list);
+			callback(kbdev, atom);
+		}
+	}
+
+}
+
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbasep_js_device_data *js_devdata;
+	struct kbase_device *kbdev;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	js_devdata = container_of(js_policy, struct kbasep_js_device_data, policy);
+
+	kbdev = kctx->kbdev;
+
+	{
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_RUNPOOL_ADD_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx));
+	}
+
+	/* ASSERT about scheduled-ness/queued-ness */
+	kbasep_js_debug_check(policy_info, kctx, KBASEP_JS_CHECK_NOTSCHEDULED);
+
+	/* All enqueued contexts go to the back of the runpool */
+	list_add_tail(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list, &policy_info->scheduled_ctxs_head);
+
+	if (timer_callback_should_run(kbdev) != MALI_FALSE && policy_info->timer_running == MALI_FALSE) {
+		hrtimer_start(&policy_info->scheduling_timer, HR_TIMER_DELAY_NSEC(js_devdata->scheduling_tick_ns), HRTIMER_MODE_REL);
+
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u);
+		policy_info->timer_running = MALI_TRUE;
+	}
+}
+
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+
+	{
+		struct kbase_device *kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+
+		KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_RUNPOOL_REMOVE_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx));
+	}
+
+	/* ASSERT about scheduled-ness/queued-ness */
+	kbasep_js_debug_check(policy_info, kctx, KBASEP_JS_CHECK_SCHEDULED);
+
+	/* No searching or significant list maintenance required to remove this context */
+	list_del(&kctx->jctx.sched_info.runpool.policy_ctx.cfs.list);
+}
+
+mali_bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbasep_js_device_data *js_devdata;
+	u64 least_runtime_us;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	js_devdata = container_of(js_policy, struct kbasep_js_device_data, policy);
+
+	if (ctx_info->process_rt_policy)
+		least_runtime_us = atomic64_read(&policy_info->rt_least_runtime_us);
+	else
+		least_runtime_us = atomic64_read(&policy_info->least_runtime_us);
+
+	if (KBASEP_JS_RUNTIME_EMPTY == least_runtime_us) {
+		/* Queue is empty */
+		return MALI_FALSE;
+	}
+
+	if ((least_runtime_us + priority_weight(ctx_info, (u64) (js_devdata->ctx_timeslice_ns / 1000u)))
+	    < ctx_info->runtime_us) {
+		/* The context is scheduled out if it's not the least-run context anymore.
+		 * The "real" head runtime is used instead of the cached runtime so the current
+		 * context is not scheduled out when there is less contexts than address spaces.
+		 */
+		return MALI_TRUE;
+	}
+
+	return MALI_FALSE;
+}
+
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy)
+{
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbase_device *kbdev;
+	struct kbasep_js_device_data *js_devdata;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+
+	policy_info = &js_policy->cfs;
+	kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+	js_devdata = &kbdev->js_data;
+
+	if (!timer_callback_should_run(kbdev)) {
+		unsigned long flags;
+
+		/* If the timer is running now, synchronize with it by
+		 * locking/unlocking its spinlock, to ensure it's not using an old value
+		 * from timer_callback_should_run() */
+		spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+		spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+		/* From now on, return value of timer_callback_should_run() will also
+		 * cause the timer to not requeue itself. Its return value cannot
+		 * change, because it depends on variables updated with the
+		 * runpool_mutex held, which the caller of this must also hold */
+		hrtimer_cancel(&policy_info->scheduling_timer);
+
+		policy_info->timer_running = MALI_FALSE;
+	}
+}
+
+/*
+ * Job Chain Management
+ */
+
+mali_error kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	const struct kbasep_js_policy_cfs *policy_info;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	policy_info = &js_policy->cfs;
+
+	/* Determine the job's index into the job list head, will return error if the
+	 * atom is malformed and so is reported. */
+	return cached_variant_idx_init(policy_info, kctx, katom);
+}
+
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	/* Adjust context priority to include the new job */
+	ctx_info->bag_total_nr_atoms++;
+	ctx_info->bag_total_priority += katom->nice_prio;
+
+	/* Get average priority and convert to NICE range -20..19 */
+	if (ctx_info->bag_total_nr_atoms)
+		ctx_info->bag_priority = (ctx_info->bag_total_priority / ctx_info->bag_total_nr_atoms) - 20;
+}
+
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	CSTD_UNUSED(js_policy);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	/* Adjust context priority to no longer include removed job */
+	KBASE_DEBUG_ASSERT(ctx_info->bag_total_nr_atoms > 0);
+	ctx_info->bag_total_nr_atoms--;
+	ctx_info->bag_total_priority -= katom->nice_prio;
+	KBASE_DEBUG_ASSERT(ctx_info->bag_total_priority >= 0);
+
+	/* Get average priority and convert to NICE range -20..19 */
+	if (ctx_info->bag_total_nr_atoms)
+		ctx_info->bag_priority = (ctx_info->bag_total_priority / ctx_info->bag_total_nr_atoms) - 20;
+}
+KBASE_EXPORT_TEST_API(kbasep_js_policy_deregister_job)
+
+mali_bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev,
+				       int job_slot_idx,
+				       struct kbase_jd_atom ** const katom_ptr)
+{
+	struct kbasep_js_device_data *js_devdata;
+	struct kbasep_js_policy_cfs *policy_info;
+	struct kbase_context *kctx;
+	u32 variants_supported;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(katom_ptr != NULL);
+	KBASE_DEBUG_ASSERT(job_slot_idx < BASE_JM_MAX_NR_SLOTS);
+
+	js_devdata = &kbdev->js_data;
+	policy_info = &js_devdata->policy.cfs;
+
+	/* Get the variants for this slot */
+	if (kbdev->gpu_props.num_core_groups > 1 && kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) != MALI_FALSE) {
+		/* SS-allcore state, and there's more than one coregroup */
+		variants_supported = get_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_allcore_state, job_slot_idx);
+	} else {
+		/* SS-state */
+		variants_supported = get_slot_to_variant_lookup(policy_info->slot_to_variant_lookup_ss_state, job_slot_idx);
+	}
+
+	/* First pass through the runpool we consider the realtime priority jobs */
+	list_for_each(pos, &policy_info->scheduled_ctxs_head) {
+		kctx = list_entry(pos, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+		if (kctx->jctx.sched_info.runpool.policy_ctx.cfs.process_rt_policy) {
+			if (dequeue_job(kbdev, kctx, variants_supported, katom_ptr, job_slot_idx)) {
+				/* Realtime policy job matched */
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* Second pass through the runpool we consider the non-realtime priority jobs */
+	list_for_each(pos, &policy_info->scheduled_ctxs_head) {
+		kctx = list_entry(pos, struct kbase_context, jctx.sched_info.runpool.policy_ctx.cfs.list);
+		if (kctx->jctx.sched_info.runpool.policy_ctx.cfs.process_rt_policy == MALI_FALSE) {
+			if (dequeue_job(kbdev, kctx, variants_supported, katom_ptr, job_slot_idx)) {
+				/* Non-realtime policy job matched */
+				return MALI_TRUE;
+			}
+		}
+	}
+
+	/* By this point, no contexts had a matching job */
+	return MALI_FALSE;
+}
+
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom)
+{
+	struct kbasep_js_policy_cfs_job *job_info;
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	job_info = &katom->sched_info.cfs;
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	{
+		struct kbase_device *kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+		KBASE_TRACE_ADD(kbdev, JS_POLICY_ENQUEUE_JOB, katom->kctx, katom, katom->jc, 0);
+	}
+	list_add_tail(&katom->sched_info.cfs.list, &ctx_info->job_list_head[job_info->cached_variant_idx]);
+}
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+	struct kbasep_js_policy_cfs_ctx *ctx_info;
+	struct kbase_context *parent_ctx;
+	KBASE_DEBUG_ASSERT(js_policy != NULL);
+	KBASE_DEBUG_ASSERT(katom != NULL);
+	CSTD_UNUSED(js_policy);
+
+	parent_ctx = katom->kctx;
+	KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+	ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+	katom->time_spent_us += time_spent_us;
+}
+
+mali_bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+	struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+	struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+	KBASE_DEBUG_ASSERT(current_ctx != NULL);
+	KBASE_DEBUG_ASSERT(new_ctx != NULL);
+	CSTD_UNUSED(js_policy);
+
+	current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+	new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+	if ((current_ctx_info->process_rt_policy == MALI_FALSE) && (new_ctx_info->process_rt_policy == MALI_TRUE))
+		return MALI_TRUE;
+
+	if ((current_ctx_info->process_rt_policy == new_ctx_info->process_rt_policy) && (current_ctx_info->bag_priority > new_ctx_info->bag_priority))
+		return MALI_TRUE;
+
+	return MALI_FALSE;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100755
index 000000000000..099f25799b4b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,167 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy_cfs.h
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASE_JS_POLICY_AVAILABLE_CFS
+
+/** @addtogroup base_api
+ * @{ */
+/** @addtogroup base_kbase_api
+ * @{ */
+/** @addtogroup kbase_js_policy
+ * @{ */
+
+/**
+ * Internally, this policy keeps a few internal queues for different variants
+ * of core requirements, which are used to decide how to schedule onto the
+ * different job slots.
+ *
+ * Must be a power of 2 to keep the lookup math simple
+ */
+#define KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS_LOG2 3
+#define KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS      (1u << KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS_LOG2)
+
+/** Bits needed in the lookup to support all slots */
+#define KBASEP_JS_VARIANT_LOOKUP_BITS_NEEDED (BASE_JM_MAX_NR_SLOTS * KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS)
+/** Number of u32s needed in the lookup array to support all slots */
+#define KBASEP_JS_VARIANT_LOOKUP_WORDS_NEEDED ((KBASEP_JS_VARIANT_LOOKUP_BITS_NEEDED + 31) / 32)
+
+#define KBASEP_JS_RUNTIME_EMPTY 	((u64)-1)
+
+typedef struct kbasep_js_policy_cfs {
+	/** List of all contexts in the context queue. Hold
+	 * kbasep_js_device_data::queue_mutex whilst accessing. */
+	struct list_head ctx_queue_head;
+
+	/** List of all contexts in the realtime (priority) context queue */
+	struct list_head ctx_rt_queue_head;
+
+	/** List of scheduled contexts. Hold kbasep_jd_device_data::runpool_irq::lock
+	 * whilst accessing, which is a spinlock */
+	struct list_head scheduled_ctxs_head;
+
+	/** Number of valid elements in the core_req_variants member, and the
+	 * kbasep_js_policy_rr_ctx::job_list_head array */
+	u32 num_core_req_variants;
+
+	/** Variants of the core requirements */
+	struct kbasep_atom_req core_req_variants[KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS];
+
+	/* Lookups per job slot against which core_req_variants match it */
+	u32 slot_to_variant_lookup_ss_state[KBASEP_JS_VARIANT_LOOKUP_WORDS_NEEDED];
+	u32 slot_to_variant_lookup_ss_allcore_state[KBASEP_JS_VARIANT_LOOKUP_WORDS_NEEDED];
+
+	/* The timer tick used for rescheduling jobs */
+	struct hrtimer scheduling_timer;
+
+	/* Is the timer running?
+	 *
+	 * The kbasep_js_device_data::runpool_mutex must be held whilst modifying this.  */
+	mali_bool timer_running;
+
+	/* Number of us the least-run context has been running for
+	 *
+	 * The kbasep_js_device_data::queue_mutex must be held whilst updating this
+	 * Reads are possible without this mutex, but an older value might be read
+	 * if no memory barriers are issued beforehand. */
+	u64 head_runtime_us;
+
+	/* Number of us the least-run context in the context queue has been running for.
+	 * -1 if context queue is empty. */
+	atomic64_t least_runtime_us;
+
+	/* Number of us the least-run context in the realtime (priority) context queue
+	 * has been running for. -1 if realtime context queue is empty. */
+	atomic64_t rt_least_runtime_us;
+} kbasep_js_policy_cfs;
+
+/**
+ * This policy contains a single linked list of all contexts.
+ */
+typedef struct kbasep_js_policy_cfs_ctx {
+	/** Link implementing the Policy's Queue, and Currently Scheduled list */
+	struct list_head list;
+
+	/** Job lists for use when in the Run Pool - only using
+	 * kbasep_js_policy_fcfs::num_unique_slots of them. We still need to track
+	 * the jobs when we're not in the runpool, so this member is accessed from
+	 * outside the policy queue (for the first job), inside the policy queue,
+	 * and inside the runpool.
+	 *
+	 * If the context is in the runpool, then this must only be accessed with
+	 * kbasep_js_device_data::runpool_irq::lock held
+	 *
+	 * Jobs are still added to this list even when the context is not in the
+	 * runpool. In that case, the kbasep_js_kctx_info::ctx::jsctx_mutex must be
+	 * held before accessing this. */
+	struct list_head job_list_head[KBASEP_JS_MAX_NR_CORE_REQ_VARIANTS];
+
+	/** Number of us this context has been running for
+	 *
+	 * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held
+	 * whilst updating this. Initializing will occur on context init and
+	 * context enqueue (which can only occur in one thread at a time), but
+	 * multi-thread access only occurs while the context is in the runpool.
+	 *
+	 * Reads are possible without this spinlock, but an older value might be read
+	 * if no memory barriers are issued beforehand */
+	u64 runtime_us;
+
+	/* Calling process policy scheme is a realtime scheduler and will use the priority queue
+	 * Non-mutable after ctx init */
+	mali_bool process_rt_policy;
+	/* Calling process NICE priority */
+	int process_priority;
+	/* Average NICE priority of all atoms in bag:
+	 * Hold the kbasep_js_kctx_info::ctx::jsctx_mutex when accessing  */
+	int bag_priority;
+	/* Total NICE priority of all atoms in bag
+	 * Hold the kbasep_js_kctx_info::ctx::jsctx_mutex when accessing  */
+	int bag_total_priority;
+	/* Total number of atoms in the bag
+	 * Hold the kbasep_js_kctx_info::ctx::jsctx_mutex when accessing  */
+	int bag_total_nr_atoms;
+
+} kbasep_js_policy_cfs_ctx;
+
+/**
+ * In this policy, each Job is part of at most one of the per_corereq lists
+ */
+typedef struct kbasep_js_policy_cfs_job {
+	struct list_head list;	    /**< Link implementing the Run Pool list/Jobs owned by the ctx */
+	u32 cached_variant_idx;	  /**< Cached index of the list this should be entered into on re-queue */
+
+	/** Number of ticks that this job has been executing for
+	 *
+	 * To access this, the kbasep_js_device_data::runpool_irq::lock must be held */
+	u32 ticks;
+} kbasep_js_policy_cfs_job;
+
+	  /** @} *//* end group kbase_js_policy */
+	  /** @} *//* end group base_kbase_api */
+	  /** @} *//* end group base_api */
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100755
index 000000000000..0d024f279761
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if defined(MALI_KERNEL_TEST_API)
+#if (1 == MALI_KERNEL_TEST_API)
+#define KBASE_EXPORT_TEST_API(func)		EXPORT_SYMBOL(func);
+#else
+#define KBASE_EXPORT_TEST_API(func)
+#endif
+#else
+#define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func)		EXPORT_SYMBOL(func);
+
+#endif				/* _KBASE_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100755
index 000000000000..435d6bec55d1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,1310 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_gator.h>
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+STATIC int kbase_region_tracker_match_zone(struct kbase_va_region *reg1, struct kbase_va_region *reg2)
+{
+	return ((reg1->flags & KBASE_REG_ZONE_MASK) == (reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone)
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+	u64 start_pfn = new_reg->start_pfn;
+	struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+	struct rb_node *parent = NULL;
+
+	/* Find the right place in the tree using tree search */
+	while (*link) {
+		struct kbase_va_region *old_reg;
+
+		parent = *link;
+		old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+		/* RBTree requires no duplicate entries. */
+		KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+		if (old_reg->start_pfn > start_pfn)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Put the new node there, and rebalance tree */
+	rb_link_node(&(new_reg->rblink), parent, link);
+	rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+		struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	u64 end_pfn = start_pfn + nr_pages;
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (start_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (end_pfn > tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, mali_addr64 gpu_addr)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		u64 tmp_start_pfn, tmp_end_pfn;
+
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		tmp_start_pfn = reg->start_pfn;
+		tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+		/* If start is lower than this, go left. */
+		if (gpu_pfn < tmp_start_pfn)
+			rbnode = rbnode->rb_left;
+		/* If end is higher than this, then go right. */
+		else if (gpu_pfn >= tmp_end_pfn)
+			rbnode = rbnode->rb_right;
+		else	/* Enclosing */
+			return reg;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address)
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, mali_addr64 gpu_addr)
+{
+	u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	rbnode = kctx->reg_rbtree.rb_node;
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if (reg->start_pfn > gpu_pfn)
+			rbnode = rbnode->rb_left;
+		else if (reg->start_pfn < gpu_pfn)
+			rbnode = rbnode->rb_right;
+		else if (gpu_pfn == reg->start_pfn)
+			return reg;
+		else
+			rbnode = NULL;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address)
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	/* Note that this search is a linear search, as we do not have a target
+	   address in mind, so does not benefit from the rbtree search */
+	rbnode = rb_first(&(kctx->reg_rbtree));
+	while (rbnode) {
+		reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+		if ((reg->nr_pages >= nr_pages) &&
+				(reg->flags & KBASE_REG_FREE) &&
+				kbase_region_tracker_match_zone(reg, reg_reqs)) {
+			/* Check alignment */
+			u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+			if ((start_pfn >= reg->start_pfn) &&
+					(start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+					((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+				return reg;
+		}
+		rbnode = rb_next(rbnode);
+	}
+
+	return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions.  It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+STATIC mali_error kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	struct rb_node *rbprev;
+	struct kbase_va_region *prev = NULL;
+	struct rb_node *rbnext;
+	struct kbase_va_region *next = NULL;
+
+	int merged_front = 0;
+	int merged_back = 0;
+	mali_error err = MALI_ERROR_NONE;
+
+	/* Try to merge with the previous block first */
+	rbprev = rb_prev(&(reg->rblink));
+	if (rbprev) {
+		prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+		if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+			/* We're compatible with the previous VMA, merge with it */
+			prev->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			reg = prev;
+			merged_front = 1;
+		}
+	}
+
+	/* Try to merge with the next block second */
+	/* Note we do the lookup here as the tree may have been rebalanced. */
+	rbnext = rb_next(&(reg->rblink));
+	if (rbnext) {
+		/* We're compatible with the next VMA, merge with it */
+		next = rb_entry(rbnext, struct kbase_va_region, rblink);
+		if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+			next->start_pfn = reg->start_pfn;
+			next->nr_pages += reg->nr_pages;
+			rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+			merged_back = 1;
+			if (merged_front) {
+				/* We already merged with prev, free it */
+				kbase_free_alloced_region(reg);
+			}
+		}
+	}
+
+	/* If we failed to merge then we need to add a new block */
+	if (!(merged_front || merged_back)) {
+		/*
+		 * We didn't merge anything. Add a new free
+		 * placeholder and remove the original one.
+		 */
+		struct kbase_va_region *free_reg;
+
+		free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+		if (!free_reg) {
+			err = MALI_ERROR_OUT_OF_MEMORY;
+			goto out;
+		}
+
+		rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+	}
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region)
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static mali_error kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+	mali_error err = MALI_ERROR_NONE;
+
+	/* Must be a free region */
+	KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+	/* start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+	/* at least nr_pages from start_pfn should be contained within at_reg */
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	/* Regions are a whole use, so swap and delete old one. */
+	if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+		rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+		kbase_free_alloced_region(at_reg);
+	}
+	/* New region replaces the start of the old one, so insert before. */
+	else if (at_reg->start_pfn == start_pfn) {
+		at_reg->start_pfn += nr_pages;
+		KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region replaces the end of the old one, so insert after. */
+	else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+		at_reg->nr_pages -= nr_pages;
+
+		kbase_region_tracker_insert(kctx, new_reg);
+	}
+	/* New region splits the old one, so insert and create new */
+	else {
+		struct kbase_va_region *new_front_reg;
+
+		new_front_reg = kbase_alloc_free_region(kctx,
+				at_reg->start_pfn,
+				start_pfn - at_reg->start_pfn,
+				at_reg->flags & KBASE_REG_ZONE_MASK);
+
+		if (new_front_reg) {
+			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+			at_reg->start_pfn = start_pfn + nr_pages;
+
+			kbase_region_tracker_insert(kctx, new_front_reg);
+			kbase_region_tracker_insert(kctx, new_reg);
+		} else {
+			err = MALI_ERROR_OUT_OF_MEMORY;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+mali_error kbase_add_va_region(struct kbase_context *kctx,
+		struct kbase_va_region *reg, mali_addr64 addr,
+		size_t nr_pages, size_t align)
+{
+	struct kbase_va_region *tmp;
+	u64 gpu_pfn = addr >> PAGE_SHIFT;
+	mali_error err = MALI_ERROR_NONE;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (!align)
+		align = 1;
+
+	/* must be a power of 2 */
+	KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+	/* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+	if (gpu_pfn) {
+		struct device *dev = kctx->kbdev->dev;
+
+		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+		tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+		if (!tmp) {
+			dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+			err = MALI_ERROR_OUT_OF_GPU_MEMORY;
+			goto exit;
+		}
+
+		if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+				(!(tmp->flags & KBASE_REG_FREE))) {
+			dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+			dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+			dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+			err = MALI_ERROR_OUT_OF_GPU_MEMORY;
+			goto exit;
+		}
+
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+		if (err) {
+			dev_warn(dev, "Failed to insert va region");
+			err = MALI_ERROR_OUT_OF_GPU_MEMORY;
+			goto exit;
+		}
+
+		goto exit;
+	}
+
+	/* Path 2: Map any free address which meets the requirements.  */
+	{
+		u64 start_pfn;
+
+		tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align);
+		if (!tmp) {
+			err = MALI_ERROR_OUT_OF_GPU_MEMORY;
+			goto exit;
+		}
+		start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1);
+		err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages);
+	}
+
+ exit:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region)
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg)
+{
+	kctx->reg_rbtree = RB_ROOT;
+	kbase_region_tracker_insert(kctx, same_va_reg);
+
+	/* exec and custom_va_reg doesn't always exist */
+	if (exec_reg && custom_va_reg) {
+		kbase_region_tracker_insert(kctx, exec_reg);
+		kbase_region_tracker_insert(kctx, custom_va_reg);
+	}
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	struct rb_node *rbnode;
+	struct kbase_va_region *reg;
+
+	do {
+		rbnode = rb_first(&(kctx->reg_rbtree));
+		if (rbnode) {
+			rb_erase(rbnode, &(kctx->reg_rbtree));
+			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+			kbase_free_alloced_region(reg);
+		}
+	} while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+mali_error kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	struct kbase_va_region *same_va_reg;
+	struct kbase_va_region *exec_reg = NULL;
+	struct kbase_va_region *custom_va_reg = NULL;
+	size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+
+#if defined(CONFIG_ARM64)
+	same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+	same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+	if (kctx->is_compat)
+		same_va_bits = 32;
+	else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+		same_va_bits = 33;
+#endif
+
+	if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	/* all have SAME_VA */
+	same_va_reg = kbase_alloc_free_region(kctx, 1,
+			(1ULL << (same_va_bits - PAGE_SHIFT)) - 2,
+			KBASE_REG_ZONE_SAME_VA);
+
+	if (!same_va_reg)
+		return MALI_ERROR_OUT_OF_MEMORY;
+
+#ifdef CONFIG_64BIT
+	/* only 32-bit clients have the other two zones */
+	if (kctx->is_compat) {
+#endif
+		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+			kbase_free_alloced_region(same_va_reg);
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+		/* If the current size of TMEM is out of range of the
+		 * virtual address space addressable by the MMU then
+		 * we should shrink it to fit
+		 */
+		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+		exec_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_EXEC_BASE,
+				KBASE_REG_ZONE_EXEC_SIZE,
+				KBASE_REG_ZONE_EXEC);
+
+		if (!exec_reg) {
+			kbase_free_alloced_region(same_va_reg);
+			return MALI_ERROR_OUT_OF_MEMORY;
+		}
+
+		custom_va_reg = kbase_alloc_free_region(kctx,
+				KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+		if (!custom_va_reg) {
+			kbase_free_alloced_region(same_va_reg);
+			kbase_free_alloced_region(exec_reg);
+			return MALI_ERROR_OUT_OF_MEMORY;
+		}
+#ifdef CONFIG_64BIT
+	}
+#endif
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+	return MALI_ERROR_NONE;
+}
+
+mali_error kbase_mem_init(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	/* Initialize memory usage */
+	atomic_set(&memdev->used_pages, 0);
+
+	/* nothing to do, zero-inited when struct kbase_device was created */
+	return MALI_ERROR_NONE;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+	struct kbasep_mem_device *memdev;
+	int pages;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	memdev = &kbdev->memdev;
+
+	pages = atomic_read(&memdev->used_pages);
+	if (pages != 0)
+		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term)
+
+/**
+ * @brief Wait for GPU write flush - only in use for BASE_HW_ISSUE_6367
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush its write buffer.
+ * @note If GPU resets occur then the counters are reset to zero, the delay may not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+	u32 base_count = 0;
+
+	/* A suspend won't happen here, because we're in a syscall from a userspace thread */
+	kbase_pm_context_active(kctx->kbdev);
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	while (MALI_TRUE) {
+		u32 new_count;
+
+		new_count = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		/* First time around, just store the count. */
+		if (base_count == 0) {
+			base_count = new_count;
+			continue;
+		}
+
+		/* No need to handle wrapping, unsigned maths works for this. */
+		if ((new_count - base_count) > 1000)
+			break;
+	}
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+	kbase_pm_context_idle(kctx->kbdev);
+}
+#endif				/* CONFIG_MALI_NO_MALI */
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+	struct kbase_va_region *new_reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	/* zone argument should only contain zone related region flags */
+	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+	KBASE_DEBUG_ASSERT(nr_pages > 0);
+	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (UINT64_MAX / PAGE_SIZE));	/* 64-bit address range is the max */
+
+	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+	if (!new_reg) {
+		dev_warn(kctx->kbdev->dev, "kzalloc failed");
+		return NULL;
+	}
+
+	new_reg->alloc = NULL; /* no alloc bound yet */
+	new_reg->kctx = kctx;
+	new_reg->flags = zone | KBASE_REG_FREE;
+
+	new_reg->flags |= KBASE_REG_GROWABLE;
+
+	/* Set up default MEMATTR usage */
+	new_reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+
+	new_reg->start_pfn = start_pfn;
+	new_reg->nr_pages = nr_pages;
+
+	return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region)
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	if (!(reg->flags & KBASE_REG_FREE)) {
+		kbase_mem_phy_alloc_put(reg->alloc);
+		/* To detect use-after-free in debug builds */
+		KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+	}
+	kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region)
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kbdev = kctx->kbdev;
+	as = &kbdev->as[kctx->as_nr];
+	current_setup = &as->current_setup;
+
+	/* Use GPU implementation-defined caching policy. */
+	current_setup->memattr = kctx->mem_attrs;
+
+	current_setup->transtab = (u64) kctx->pgd & ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_ADDR_SPACE_MASK);
+	current_setup->transtab |= AS_TRANSTAB_READ_INNER |
+				   AS_TRANSTAB_ADRMODE_TABLE;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_update)
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev;
+	struct kbase_as *as;
+	struct kbase_mmu_setup *current_setup;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the runpool_irq lock */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	kbdev = kctx->kbdev;
+	as = &kbdev->as[kctx->as_nr];
+	current_setup = &as->current_setup;
+
+	current_setup->transtab = 0ULL;
+
+	/* Apply the address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_disable)
+
+mali_error kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align)
+{
+	mali_error err;
+	size_t i = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+
+	err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+	if (MALI_ERROR_NONE != err)
+		return err;
+
+	if (reg->alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->alloc->imported.alias.aliased);
+		for (i = 0; i < reg->alloc->imported.alias.nents; i++) {
+			if (reg->alloc->imported.alias.aliased[i].alloc) {
+				err = kbase_mmu_insert_pages(kctx,
+						reg->start_pfn + (i * stride),
+						reg->alloc->imported.alias.aliased[i].alloc->pages + reg->alloc->imported.alias.aliased[i].offset,
+						reg->alloc->imported.alias.aliased[i].length,
+						reg->flags);
+				if (MALI_ERROR_NONE != err)
+					goto bad_insert;
+
+				kbase_mem_phy_alloc_gpu_mapped(reg->alloc->imported.alias.aliased[i].alloc);
+			} else {
+				err = kbase_mmu_insert_single_page(kctx,
+						reg->start_pfn + i * stride,
+						kctx->aliasing_sink_page,
+						reg->alloc->imported.alias.aliased[i].length,
+						(reg->flags & ~KBASE_REG_MEMATTR_MASK) | KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC)
+						);
+				if (MALI_ERROR_NONE != err)
+					goto bad_insert;
+			}
+		}
+	} else {
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+				kbase_get_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				reg->flags);
+		if (MALI_ERROR_NONE != err)
+			goto bad_insert;
+		kbase_mem_phy_alloc_gpu_mapped(reg->alloc);
+	}
+
+	return err;
+
+bad_insert:
+	if (reg->alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 stride;
+
+		stride = reg->alloc->imported.alias.stride;
+		KBASE_DEBUG_ASSERT(reg->alloc->imported.alias.aliased);
+		while (i--)
+			if (reg->alloc->imported.alias.aliased[i].alloc) {
+				kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->alloc->imported.alias.aliased[i].length);
+				kbase_mem_phy_alloc_gpu_unmapped(reg->alloc->imported.alias.aliased[i].alloc);
+			}
+	}
+
+	kbase_remove_va_region(kctx, reg);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap)
+
+mali_error kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	mali_error err;
+
+	if (reg->start_pfn == 0)
+		return MALI_ERROR_NONE;
+
+	if (reg->alloc && reg->alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		size_t i;
+
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+		KBASE_DEBUG_ASSERT(reg->alloc->imported.alias.aliased);
+		for (i = 0; i < reg->alloc->imported.alias.nents; i++)
+			if (reg->alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(reg->alloc->imported.alias.aliased[i].alloc);
+	} else {
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+		kbase_mem_phy_alloc_gpu_unmapped(reg->alloc);
+	}
+
+	if (MALI_ERROR_NONE != err)
+		return err;
+
+	err = kbase_remove_va_region(kctx, reg);
+	return err;
+}
+
+STATIC struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+	struct kbase_cpu_mapping *map;
+	struct list_head *pos;
+
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(reg->alloc);
+
+	if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+		return NULL;
+
+	list_for_each(pos, &reg->alloc->mappings) {
+		map = list_entry(pos, kbase_cpu_mapping, mappings_list);
+		if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+			return map;
+	}
+
+	return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region)
+
+mali_error kbasep_find_enclosing_cpu_mapping_offset(
+	struct kbase_context *kctx, mali_addr64 gpu_addr,
+	unsigned long uaddr, size_t size, mali_size64 *offset)
+{
+	struct kbase_cpu_mapping *map = NULL;
+	const struct kbase_va_region *reg;
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (reg && !(reg->flags & KBASE_REG_FREE)) {
+		map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+				size);
+		if (map) {
+			*offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+						 (map->page_off << PAGE_SHIFT);
+			err = MALI_ERROR_NONE;
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset)
+
+void kbase_sync_single(struct kbase_context *kctx,
+		phys_addr_t pa, size_t size, kbase_sync_kmem_fn sync_fn)
+{
+	struct page *p = pfn_to_page(PFN_DOWN(pa));
+	off_t offset = pa & ~PAGE_MASK;
+	dma_addr_t dma_addr;
+
+	BUG_ON(!p);
+	BUG_ON(offset + size > PAGE_SIZE);
+
+	dma_addr = kbase_dma_addr(p) + offset;
+
+	sync_fn(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
+}
+
+static mali_error kbase_do_syncset(struct kbase_context *kctx, struct base_syncset *set, kbase_sync_kmem_fn sync_fn)
+{
+	mali_error err = MALI_ERROR_NONE;
+	struct basep_syncset *sset = &set->basep_sset;
+	struct kbase_va_region *reg;
+	struct kbase_cpu_mapping *map;
+	unsigned long start;
+	size_t size;
+	phys_addr_t *pa;
+	u64 page_off, page_count;
+	u64 i;
+	unsigned int offset;
+
+	kbase_os_mem_map_lock(kctx);
+	kbase_gpu_vm_lock(kctx);
+
+	/* find the region where the virtual address is contained */
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, sset->mem_handle);
+	if (!reg) {
+		dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", sset->mem_handle);
+		err = MALI_ERROR_FUNCTION_FAILED;
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
+		goto out_unlock;
+
+	start = (uintptr_t)sset->user_addr;
+	size = (size_t)sset->size;
+
+	map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+	if (!map) {
+		dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", start, sset->mem_handle);
+		err = MALI_ERROR_FUNCTION_FAILED;
+		goto out_unlock;
+	}
+
+	offset = start & (PAGE_SIZE - 1);
+	page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+	page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	pa = kbase_get_phy_pages(reg);
+
+	/* Sync first page */
+	if (pa[page_off]) {
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+		kbase_sync_single(kctx, pa[page_off] + offset, sz, sync_fn);
+	}
+
+	/* Sync middle pages (if any) */
+	for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+		/* we grow upwards, so bail on first non-present page */
+		if (!pa[page_off + i])
+			break;
+
+		kbase_sync_single(kctx, pa[page_off + i], PAGE_SIZE, sync_fn);
+	}
+
+	/* Sync last page (if any) */
+	if (page_count > 1 && pa[page_off + page_count - 1]) {
+		size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+		kbase_sync_single(kctx, pa[page_off + page_count - 1], sz,
+			sync_fn);
+	}
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_os_mem_map_unlock(kctx);
+	return err;
+}
+
+mali_error kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+	mali_error err = MALI_ERROR_FUNCTION_FAILED;
+	struct basep_syncset *sset;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != syncset);
+
+	sset = &syncset->basep_sset;
+
+	switch (sset->type) {
+	case BASE_SYNCSET_OP_MSYNC:
+		err = kbase_do_syncset(kctx, syncset, dma_sync_single_for_device);
+		break;
+
+	case BASE_SYNCSET_OP_CSYNC:
+		err = kbase_do_syncset(kctx, syncset, dma_sync_single_for_cpu);
+		break;
+
+	default:
+		dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+		break;
+	}
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now)
+
+/* vm lock must be held */
+mali_error kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+	mali_error err;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	BUG_ON(!mutex_is_locked(&kctx->reg_lock));
+	err = kbase_gpu_munmap(kctx, reg);
+	if (err) {
+		dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+		goto out;
+	}
+
+	if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+		/* Wait for GPU to flush write buffer before freeing physical pages */
+		kbase_wait_write_flush(kctx);
+	}
+
+	/* This will also free the physical pages */
+	kbase_free_alloced_region(reg);
+
+ out:
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region)
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+mali_error kbase_mem_free(struct kbase_context *kctx, mali_addr64 gpu_addr)
+{
+	mali_error err = MALI_ERROR_NONE;
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	if (0 == gpu_addr) {
+		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+	kbase_gpu_vm_lock(kctx);
+
+	if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+	    gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+		int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			err = MALI_ERROR_FUNCTION_FAILED;
+			goto out_unlock;
+		}
+
+		/* ask to unlink the cookie as we'll free it */
+
+		kctx->pending_regions[cookie] = NULL;
+		kctx->cookies |= (1UL << cookie);
+
+		kbase_free_alloced_region(reg);
+	} else {
+		/* A real GPU va */
+
+		/* Validate the region */
+		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+		if (!reg || (reg->flags & KBASE_REG_FREE)) {
+			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+					gpu_addr);
+			err = MALI_ERROR_FUNCTION_FAILED;
+			goto out_unlock;
+		}
+
+		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+			/* SAME_VA must be freed through munmap */
+			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+					gpu_addr);
+			err = MALI_ERROR_FUNCTION_FAILED;
+			goto out_unlock;
+		}
+
+		err = kbase_mem_free_region(kctx, reg);
+	}
+
+ out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free)
+
+void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+	reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+	/* all memory is now growable */
+	reg->flags |= KBASE_REG_GROWABLE;
+
+	if (flags & BASE_MEM_GROW_ON_GPF)
+		reg->flags |= KBASE_REG_PF_GROW;
+
+	if (flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+		reg->flags |= KBASE_REG_GPU_NX;
+
+	if (flags & BASE_MEM_COHERENT_LOCAL)
+		reg->flags |= KBASE_REG_SHARE_IN;
+	else if (flags & BASE_MEM_COHERENT_SYSTEM)
+		reg->flags |= KBASE_REG_SHARE_BOTH;
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags)
+
+int kbase_alloc_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_requested)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+	if (nr_pages_requested == 0)
+		goto done; /*nothing to do*/
+
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	/* Increase mm counters before we allocate pages so that this
+	 * allocation is visible to the OOM killer */
+	kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+	if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&alloc->imported.kctx->osalloc, nr_pages_requested, alloc->pages + alloc->nents))
+		goto no_alloc;
+
+	alloc->nents += nr_pages_requested;
+done:
+	return 0;
+
+no_alloc:
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+	struct kbase_mem_phy_alloc *alloc,
+	size_t nr_pages_to_free)
+{
+	mali_bool syncback;
+	phys_addr_t *start_free;
+
+	KBASE_DEBUG_ASSERT(alloc);
+	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+	KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+	/* early out if nothing to do */
+	if (0 == nr_pages_to_free)
+		return 0;
+
+	start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+	syncback = (alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED) ? MALI_TRUE : MALI_FALSE;
+
+	kbase_mem_allocator_free(&alloc->imported.kctx->osalloc,
+				  nr_pages_to_free,
+				  start_free,
+				  syncback);
+
+	alloc->nents -= nr_pages_to_free;
+	kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
+	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages);
+	kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+	struct kbase_mem_phy_alloc *alloc;
+
+	alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+	switch (alloc->type) {
+	case KBASE_MEM_TYPE_NATIVE: {
+		KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+		kbase_free_phy_pages_helper(alloc, alloc->nents);
+		break;
+	}
+	case KBASE_MEM_TYPE_ALIAS: {
+		/* just call put on the underlying phy allocs */
+		size_t i;
+		struct kbase_aliased *aliased;
+
+		aliased = alloc->imported.alias.aliased;
+		if (aliased) {
+			for (i = 0; i < alloc->imported.alias.nents; i++)
+				if (aliased[i].alloc)
+					kbase_mem_phy_alloc_put(aliased[i].alloc);
+			vfree(aliased);
+		}
+		break;
+	}
+	case KBASE_MEM_TYPE_RAW:
+		/* raw pages, external cleanup */
+		break;
+ #ifdef CONFIG_UMP
+	case KBASE_MEM_TYPE_IMPORTED_UMP:
+		ump_dd_release(alloc->imported.ump_handle);
+		break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		dma_buf_detach(alloc->imported.umm.dma_buf,
+			       alloc->imported.umm.dma_attachment);
+		dma_buf_put(alloc->imported.umm.dma_buf);
+		break;
+#endif
+	case KBASE_MEM_TYPE_TB:{
+		void *tb;
+
+		tb = alloc->imported.kctx->jctx.tb;
+		kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+		vfree(tb);
+		break;
+	}
+	default:
+		WARN(1, "Unexecpted free of type %d\n", alloc->type);
+		break;
+	}
+
+	/* Free based on allocation type */
+	if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		vfree(alloc);
+	else
+		kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+	KBASE_DEBUG_ASSERT(NULL != reg);
+	KBASE_DEBUG_ASSERT(vsize > 0);
+
+	/* validate user provided arguments */
+	if (size > vsize || vsize > reg->nr_pages)
+		goto out_term;
+
+	/* Prevent vsize*sizeof from wrapping around.
+	 * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+	 */
+	if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->alloc->pages)))
+		goto out_term;
+
+	KBASE_DEBUG_ASSERT(0 != vsize);
+
+	if (MALI_ERROR_NONE != kbase_alloc_phy_pages_helper(reg->alloc, size))
+		goto out_term;
+
+	return 0;
+
+ out_term:
+	return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages)
+
+mali_bool kbase_check_alloc_flags(unsigned long flags)
+{
+	/* Only known flags should be set. */
+	if (flags & ~((1ul << BASE_MEM_FLAGS_NR_INPUT_BITS) - 1))
+		return MALI_FALSE;
+
+	/* At least one flag should be set */
+	if (flags == 0)
+		return MALI_FALSE;
+
+	/* Either the GPU or CPU must be reading from the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+		return MALI_FALSE;
+
+	/* Either the GPU or CPU must be writing to the allocated memory */
+	if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+		return MALI_FALSE;
+
+	/* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return MALI_FALSE;
+
+	/* GPU should have at least read or write access otherwise there is no
+	   reason for allocating. */
+	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock)
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100755
index 000000000000..f27ea147957e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,695 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <malisw/mali_malisw.h>
+#include <linux/kref.h>
+
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif				/* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#include "mali_kbase_gator.h"
+#endif  /*CONFIG_MALI_GATOR_SUPPORT*/
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2)	/* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid.   */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3)	/* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0)	/* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+typedef struct kbase_cpu_mapping {
+	struct   list_head mappings_list;
+	struct   kbase_mem_phy_alloc *alloc;
+	struct   kbase_context *kctx;
+	struct   kbase_va_region *region;
+	pgoff_t  page_off;
+	int      count;
+	unsigned long vm_start;
+	unsigned long vm_end;
+} kbase_cpu_mapping;
+
+enum kbase_memory_type {
+	KBASE_MEM_TYPE_NATIVE,
+	KBASE_MEM_TYPE_IMPORTED_UMP,
+	KBASE_MEM_TYPE_IMPORTED_UMM,
+	KBASE_MEM_TYPE_ALIAS,
+	KBASE_MEM_TYPE_TB,
+	KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+	struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+	u64 offset; /* in pages */
+	u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+  */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc
+{
+	struct kref           kref; /* number of users of this alloc */
+	atomic_t              gpu_mappings;
+	size_t                nents; /* 0..N */
+	phys_addr_t *         pages; /* N elements, only 0..nents are valid */
+
+	/* kbase_cpu_mappings */
+	struct list_head      mappings;
+
+	/* type of buffer */
+	enum kbase_memory_type type;
+
+	unsigned long properties;
+
+	/* member in union valid based on @a type */
+	union {
+#ifdef CONFIG_UMP
+		ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+		struct {
+			struct dma_buf *dma_buf;
+			struct dma_buf_attachment *dma_attachment;
+			unsigned int current_mapping_usage_count;
+			struct sg_table *sgt;
+		} umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+		struct {
+			mali_size64 stride;
+			size_t nents;
+			struct kbase_aliased *aliased;
+		} alias;
+		/* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+		struct kbase_context *kctx;
+	} imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+	KBASE_DEBUG_ASSERT(alloc);
+	/* we only track mappings of NATIVE buffers */
+	if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+		if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+			pr_err("Mismatched %s:\n", __func__);
+			dump_stack();
+		}
+}
+
+void kbase_mem_kref_free(struct kref * kref);
+
+mali_error kbase_mem_init(struct kbase_device * kbdev);
+void kbase_mem_halt(struct kbase_device * kbdev);
+void kbase_mem_term(struct kbase_device * kbdev);
+
+static inline struct kbase_mem_phy_alloc * kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc * alloc)
+{
+	kref_get(&alloc->kref);
+	return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc * kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc * alloc)
+{
+	kref_put(&alloc->kref, kbase_mem_kref_free);
+	return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+typedef struct kbase_va_region {
+	struct rb_node rblink;
+	struct list_head link;
+
+	struct kbase_context *kctx;	/* Backlink to base context */
+
+	u64 start_pfn;		/* The PFN in GPU space */
+	size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE              (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR            (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR            (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX            (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED        (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED        (1ul << 5)
+
+#define KBASE_REG_GROWABLE          (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW           (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA         (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN          (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH        (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK         (3ul << 11)
+#define KBASE_REG_ZONE(x)           (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD            (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD            (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED           (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface 
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+#define KBASE_REG_ZONE_EXEC         KBASE_REG_ZONE(1)	/* Dedicated 16MB region for shader code */
+#define KBASE_REG_ZONE_EXEC_BASE    ((1ULL << 32) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE    ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+	unsigned long flags;
+
+	size_t extent; /* nr of pages alloc'd on PF */
+
+	struct kbase_mem_phy_alloc * alloc; /* the one alloc object we mmap to the GPU and CPU when mapping this region */
+
+	/* non-NULL if this memory object is a kds_resource */
+	struct kds_resource *kds_res;
+
+} kbase_va_region;
+
+/* Common functions */
+static INLINE phys_addr_t *kbase_get_phy_pages(struct kbase_va_region *reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(reg->alloc);
+
+	return reg->alloc->pages;
+}
+
+static INLINE size_t kbase_reg_current_backed_size(struct kbase_va_region * reg)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	/* if no alloc object the backed size naturally is 0 */
+	if (reg->alloc)
+		return reg->alloc->nents;
+	else
+		return 0;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static INLINE struct kbase_mem_phy_alloc * kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+	struct kbase_mem_phy_alloc *alloc;
+	const size_t alloc_size =
+			sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+
+	/* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */
+	if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+			/ sizeof(*alloc->pages)))
+		return ERR_PTR(-ENOMEM);
+
+	/* Allocate based on the size to reduce internal fragmentation of vmem */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc = vzalloc(alloc_size);
+	else
+		alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+	if (!alloc)
+		return ERR_PTR(-ENOMEM);
+
+	/* Store allocation method */
+	if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+		alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+	kref_init(&alloc->kref);
+	atomic_set(&alloc->gpu_mappings, 0);
+	alloc->nents = 0;
+	alloc->pages = (void*)(alloc + 1);
+	INIT_LIST_HEAD(&alloc->mappings);
+	alloc->type = type;
+
+	return alloc;
+}
+
+static INLINE int kbase_reg_prepare_native(struct kbase_va_region * reg, struct kbase_context * kctx)
+{
+	KBASE_DEBUG_ASSERT(reg);
+	KBASE_DEBUG_ASSERT(!reg->alloc);
+	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+	reg->alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE);
+	if (IS_ERR(reg->alloc))
+		return PTR_ERR(reg->alloc);
+	else if (!reg->alloc)
+		return -ENOMEM;
+	reg->alloc->imported.kctx = kctx;
+	reg->flags &= ~KBASE_REG_FREE;
+	return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_add_return(num_pages, used_pages);
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+	int new_val = atomic_sub_return(num_pages, used_pages);
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+	return new_val;
+}
+
+/**
+ * @brief Initialize low-level memory access for a kbase device
+ *
+ * Performs any low-level setup needed for a kbase device to access memory on
+ * the device.
+ *
+ * @param kbdev kbase device to initialize memory access for
+ * @return 0 on success, Linux error code on failure
+ */
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev);
+
+
+/**
+ * @brief Terminate low-level memory access for a kbase device
+ *
+ * Perform any low-level cleanup needed to clean
+ * after @ref kbase_mem_lowlevel_init
+ *
+ * @param kbdev kbase device to clean up for
+ */
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize an OS based memory allocator.
+ *
+ * Initializes a allocator.
+ * Must be called before any allocation is attempted.
+ * \a kbase_mem_allocator_alloc and \a kbase_mem_allocator_free is used
+ * to allocate and free memory.
+ * \a kbase_mem_allocator_term must be called to clean up the allocator.
+ * All memory obtained via \a kbase_mem_allocator_alloc must have been
+ * \a kbase_mem_allocator_free before \a kbase_mem_allocator_term is called.
+ *
+ * @param allocator Allocator object to initialize
+ * @param max_size Maximum number of pages to keep on the freelist.
+ * @param kbdev The kbase device this allocator is used with
+ * @return MALI_ERROR_NONE on success, an error code indicating what failed on
+ * error.
+ */
+mali_error kbase_mem_allocator_init(struct kbase_mem_allocator *allocator,
+				    unsigned int max_size,
+				    struct kbase_device *kbdev);
+
+/**
+ * @brief Allocate memory via an OS based memory allocator.
+ *
+ * @param[in] allocator Allocator to obtain the memory from
+ * @param nr_pages Number of pages to allocate
+ * @param[out] pages Pointer to an array where the physical address of the allocated pages will be stored
+ * @return MALI_ERROR_NONE if the pages were allocated, an error code indicating what failed on error
+ */
+mali_error kbase_mem_allocator_alloc(struct kbase_mem_allocator * allocator, size_t nr_pages, phys_addr_t *pages);
+
+/**
+ * @brief Free memory obtained for an OS based memory allocator.
+ *
+ * @param[in] allocator Allocator to free the memory back to
+ * @param nr_pages Number of pages to free
+ * @param[in] pages Pointer to an array holding the physical address of the paghes to free.
+ * @param[in] sync_back MALI_TRUE case the memory should be synced back
+ */
+void kbase_mem_allocator_free(struct kbase_mem_allocator * allocator, size_t nr_pages, phys_addr_t *pages, mali_bool sync_back);
+
+/**
+ * @brief Terminate an OS based memory allocator.
+ *
+ * Frees all cached allocations and clean up internal state.
+ * All allocate pages must have been \a kbase_mem_allocator_free before
+ * this function is called.
+ *
+ * @param[in] allocator Allocator to terminate
+ */
+void kbase_mem_allocator_term(struct kbase_mem_allocator * allocator);
+
+
+
+mali_error kbase_region_tracker_init(struct kbase_context *kctx);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, mali_addr64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, mali_addr64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+mali_error kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align);
+
+mali_error kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align);
+mali_bool kbase_check_alloc_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+mali_error kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+mali_error kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags);
+mali_error kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags);
+
+mali_error kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+mali_error kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t* phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+mali_error kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, mali_addr64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+mali_error kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in]   kctx        The kbase context to dump
+ * @param[in]   nr_pages    The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+mali_error kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t pa,
+		size_t size, kbase_sync_kmem_fn sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/**
+ * Set attributes for imported tmem region
+ *
+ * This function sets (extends with) requested attributes for given region
+ * of imported external memory
+ *
+ * @param[in]  kctx  	    The kbase context which the tmem belongs to
+ * @param[in]  gpu_addr     The base address of the tmem region
+ * @param[in]  attributes   The attributes of tmem region to be set
+ *
+ * @return MALI_ERROR_NONE on success.  Any other value indicates failure.
+ */
+mali_error kbase_tmem_set_attributes(struct kbase_context *kctx, mali_addr64 gpu_addr, u32  attributes);
+
+/**
+ * Get attributes of imported tmem region
+ *
+ * This function retrieves the attributes of imported external memory
+ *
+ * @param[in]  kctx  	    The kbase context which the tmem belongs to
+ * @param[in]  gpu_addr     The base address of the tmem region
+ * @param[out] attributes   The actual attributes of tmem region
+ *
+ * @return MALI_ERROR_NONE on success.  Any other value indicates failure.
+ */
+mali_error kbase_tmem_get_attributes(struct kbase_context *kctx, mali_addr64 gpu_addr, u32 * const attributes);
+
+/* OS specific functions */
+mali_error kbase_mem_free(struct kbase_context *kctx, mali_addr64 gpu_addr);
+mali_error kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx  The kbase context 
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static INLINE void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx  The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static INLINE void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+	kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ *        a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx      The kernel base context used for the allocation.
+ * @param[in]     gpu_addr  GPU address of the start of the allocated region
+ *                          within which to search.
+ * @param[in]     uaddr     Start of the CPU virtual address range.
+ * @param[in]     size      Size of the CPU virtual address range (in bytes).
+ * @param[out]    offset    The offset from the start of the allocation to the
+ *                          specified CPU virtual address.
+ *
+ * @return MALI_ERROR_NONE if offset was obtained successfully. Error code
+ *         otherwise.
+ */
+mali_error kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+							mali_addr64 gpu_addr,
+							unsigned long uaddr,
+							size_t size,
+							mali_size64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc * alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc * alloc, size_t nr_pages_to_free);
+
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+	SetPagePrivate(p);
+	if (sizeof(dma_addr_t) > sizeof(p->private)) {
+		/* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+		 * private filed stays the same. So we have to be clever and
+		 * use the fact that we only store DMA addresses of whole pages,
+		 * so the low bits should be zero */
+		KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+		set_page_private(p, dma_addr >> PAGE_SHIFT);
+	} else {
+		set_page_private(p, dma_addr);
+	}
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+	if (sizeof(dma_addr_t) > sizeof(p->private))
+		return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+	return (dma_addr_t)page_private(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev   The @ref kbase_device the fault happened on
+* @param[in] kctx    The @ref kbase_context for the faulting address space if
+*                    one was found.
+* @param[in] as      The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as);
+
+#endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c
new file mode 100755
index 000000000000..248eb549f520
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c
@@ -0,0 +1,304 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#include <mali_kbase.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev)
+{
+	return;
+}
+
+static unsigned long kbase_mem_allocator_count(struct shrinker *s,
+						struct shrink_control *sc)
+{
+	struct kbase_mem_allocator *allocator;
+
+	allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+	return atomic_read(&allocator->free_list_size);
+}
+
+static unsigned long kbase_mem_allocator_scan(struct shrinker *s,
+						struct shrink_control *sc)
+{
+	struct kbase_mem_allocator *allocator;
+	int i;
+	int freed;
+
+	allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+
+	might_sleep();
+
+	mutex_lock(&allocator->free_list_lock);
+	i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan);
+	freed = i;
+
+	atomic_sub(i, &allocator->free_list_size);
+
+	while (i--) {
+		struct page *p;
+
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head,
+					struct page, lru);
+		list_del(&p->lru);
+		ClearPagePrivate(p);
+		__free_page(p);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	return atomic_read(&allocator->free_list_size);
+
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_allocator_shrink(struct shrinker *s,
+		struct shrink_control *sc)
+{
+	if (sc->nr_to_scan == 0)
+		return kbase_mem_allocator_count(s, sc);
+	else
+		return kbase_mem_allocator_scan(s, sc);
+}
+#endif
+
+mali_error kbase_mem_allocator_init(struct kbase_mem_allocator *const allocator,
+		unsigned int max_size, struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	INIT_LIST_HEAD(&allocator->free_list_head);
+
+	allocator->kbdev = kbdev;
+
+	mutex_init(&allocator->free_list_lock);
+
+	atomic_set(&allocator->free_list_size, 0);
+
+	allocator->free_list_max_size = max_size;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+	allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink;
+#else
+	allocator->free_list_reclaimer.count_objects =
+						kbase_mem_allocator_count;
+	allocator->free_list_reclaimer.scan_objects = kbase_mem_allocator_scan;
+#endif
+	allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS;
+	/* Kernel versions prior to 3.1 :
+	 * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+	allocator->free_list_reclaimer.batch = 0;
+#endif
+
+	register_shrinker(&allocator->free_list_reclaimer);
+
+	return MALI_ERROR_NONE;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_init)
+
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	unregister_shrinker(&allocator->free_list_reclaimer);
+	mutex_lock(&allocator->free_list_lock);
+	while (!list_empty(&allocator->free_list_head)) {
+		struct page *p;
+
+		p = list_first_entry(&allocator->free_list_head, struct page,
+				     lru);
+		list_del(&p->lru);
+		dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+			       PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		__free_page(p);
+	}
+	atomic_set(&allocator->free_list_size, 0);
+	mutex_unlock(&allocator->free_list_lock);
+	mutex_destroy(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_term)
+
+mali_error kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages)
+{
+	struct page *p;
+	void *mp;
+	int i;
+	int num_from_free_list;
+	struct list_head from_free_list = LIST_HEAD_INIT(from_free_list);
+	gfp_t gfp;
+
+	might_sleep();
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	/* take from the free list first */
+	mutex_lock(&allocator->free_list_lock);
+	num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size));
+	atomic_sub(num_from_free_list, &allocator->free_list_size);
+	for (i = 0; i < num_from_free_list; i++)
+	{
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head, struct page, lru);
+		list_move(&p->lru, &from_free_list);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	i = 0;
+
+	/* Allocate as many pages from the pool of already allocated pages. */
+	list_for_each_entry(p, &from_free_list, lru) {
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+		i++;
+	}
+
+	if (i == nr_pages)
+		return MALI_ERROR_NONE;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	gfp = GFP_USER;
+#else
+	gfp = GFP_HIGHUSER;
+#endif
+
+	if (current->flags & PF_KTHREAD) {
+		/* Don't trigger OOM killer from kernel threads, e.g. when
+		 * growing memory on GPU page fault */
+		gfp |= __GFP_NORETRY;
+	}
+
+	/* If not all pages were sourced from the pool, request new ones. */
+	for (; i < nr_pages; i++) {
+		dma_addr_t dma_addr;
+		p = alloc_page(gfp);
+		if (NULL == p)
+			goto err_out_roll_back;
+		mp = kmap(p);
+		if (NULL == mp) {
+			__free_page(p);
+			goto err_out_roll_back;
+		}
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can do cache maintenance */
+		kunmap(p);
+
+		dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE,
+				        DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) {
+			__free_page(p);
+			goto err_out_roll_back;
+		}
+
+		SetPagePrivate(p);
+		kbase_set_dma_addr(p, dma_addr);
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+		BUG_ON(dma_addr != pages[i]);
+	}
+
+	return MALI_ERROR_NONE;
+
+err_out_roll_back:
+	while (i--) {
+		struct page *p;
+		p = pfn_to_page(PFN_DOWN(pages[i]));
+		pages[i] = (phys_addr_t)0;
+		dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+			       PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		__free_page(p);
+	}
+
+	return MALI_ERROR_OUT_OF_MEMORY;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_alloc)
+
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, mali_bool sync_back)
+{
+	int i = 0;
+	int page_count = 0;
+	int tofree;
+
+	LIST_HEAD(new_free_list_items);
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	might_sleep();
+
+	/* Starting by just freeing the overspill.
+	* As we do this outside of the lock we might spill too many pages
+	* or get too many on the free list, but the max_size is just a ballpark so it is ok
+	* providing that tofree doesn't exceed nr_pages
+	*/
+	tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0);
+	tofree = nr_pages - MIN(tofree, nr_pages);
+	for (; i < tofree; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+				       PAGE_SIZE,
+				       DMA_BIDIRECTIONAL);
+			ClearPagePrivate(p);
+			pages[i] = (phys_addr_t)0;
+			__free_page(p);
+		}
+	}
+
+	for (; i < nr_pages; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			pages[i] = (phys_addr_t)0;
+			/* Sync back the memory to ensure that future cache
+			 * invalidations don't trample on memory.
+			 */
+			if (sync_back)
+				dma_sync_single_for_cpu(allocator->kbdev->dev,
+						kbase_dma_addr(p),
+						PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+
+			list_add(&p->lru, &new_free_list_items);
+			page_count++;
+		}
+	}
+	mutex_lock(&allocator->free_list_lock);
+	list_splice(&new_free_list_items, &allocator->free_list_head);
+	atomic_add(page_count, &allocator->free_list_size);
+	mutex_unlock(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_free)
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h
new file mode 100755
index 000000000000..cb0e153b13bf
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+/* raw page handling */
+struct kbase_mem_allocator
+{
+	struct kbase_device *kbdev;
+	atomic_t            free_list_size;
+	unsigned int        free_list_max_size;
+	struct mutex        free_list_lock;
+	struct list_head    free_list_head;
+	struct shrinker     free_list_reclaimer;
+};
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c
new file mode 100755
index 000000000000..8fa93b914302
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c
@@ -0,0 +1,410 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#include <mali_kbase.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/memblock.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+
+
+/* This code does not support having multiple kbase devices, or rmmod/insmod */
+
+static unsigned long kbase_carveout_start_pfn = ~0UL;
+static unsigned long kbase_carveout_end_pfn;
+static LIST_HEAD(kbase_carveout_free_list);
+static DEFINE_MUTEX(kbase_carveout_free_list_lock);
+static unsigned int kbase_carveout_pages;
+static atomic_t kbase_carveout_used_pages;
+static atomic_t kbase_carveout_system_pages;
+
+static struct page *kbase_carveout_get_page(struct kbase_mem_allocator *allocator)
+{
+	struct page *p = NULL;
+	gfp_t gfp;
+
+	mutex_lock(&kbase_carveout_free_list_lock);
+	if (!list_empty(&kbase_carveout_free_list)) {
+		p = list_first_entry(&kbase_carveout_free_list, struct page, lru);
+		list_del(&p->lru);
+		atomic_inc(&kbase_carveout_used_pages);
+	}
+	mutex_unlock(&kbase_carveout_free_list_lock);
+
+	if (!p) {
+		dma_addr_t dma_addr;
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+		/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+		gfp = GFP_USER;
+#else
+		gfp = GFP_HIGHUSER;
+#endif
+
+		if (current->flags & PF_KTHREAD) {
+			/* Don't trigger OOM killer from kernel threads, e.g.
+			 * when growing memory on GPU page fault */
+			gfp |= __GFP_NORETRY;
+		}
+
+		p = alloc_page(gfp);
+		if (!p)
+			goto out;
+
+		dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) {
+			__free_page(p);
+			p = NULL;
+			goto out;
+		}
+
+		kbase_set_dma_addr(p, dma_addr);
+		BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p)));
+		atomic_inc(&kbase_carveout_system_pages);
+	}
+out:
+	return p;
+}
+
+static void kbase_carveout_put_page(struct page *p,
+				    struct kbase_mem_allocator *allocator)
+{
+	if (page_to_pfn(p) >= kbase_carveout_start_pfn &&
+			page_to_pfn(p) <= kbase_carveout_end_pfn) {
+		mutex_lock(&kbase_carveout_free_list_lock);
+		list_add(&p->lru, &kbase_carveout_free_list);
+		atomic_dec(&kbase_carveout_used_pages);
+		mutex_unlock(&kbase_carveout_free_list_lock);
+	} else {
+		dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+				PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		__free_page(p);
+		atomic_dec(&kbase_carveout_system_pages);
+	}
+}
+
+static int kbase_carveout_seq_show(struct seq_file *s, void *data)
+{
+	seq_printf(s, "carveout pages: %u\n", kbase_carveout_pages);
+	seq_printf(s, "used carveout pages: %u\n",
+			atomic_read(&kbase_carveout_used_pages));
+	seq_printf(s, "used system pages: %u\n",
+			atomic_read(&kbase_carveout_system_pages));
+	return 0;
+}
+
+static int kbasep_carveout_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, kbase_carveout_seq_show, NULL);
+}
+
+static const struct file_operations kbase_carveout_debugfs_fops = {
+	.open           = kbasep_carveout_debugfs_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release_private,
+};
+
+static int kbase_carveout_init(struct device *dev)
+{
+	unsigned long pfn;
+	static int once;
+
+	mutex_lock(&kbase_carveout_free_list_lock);
+	BUG_ON(once);
+	once = 1;
+
+	for (pfn = kbase_carveout_start_pfn; pfn <= kbase_carveout_end_pfn; pfn++) {
+		struct page *p = pfn_to_page(pfn);
+		dma_addr_t dma_addr;
+
+		dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr))
+			goto out_rollback;
+
+		kbase_set_dma_addr(p, dma_addr);
+		BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p)));
+
+		list_add_tail(&p->lru, &kbase_carveout_free_list);
+	}
+
+	mutex_unlock(&kbase_carveout_free_list_lock);
+
+	debugfs_create_file("kbase_carveout", S_IRUGO, NULL, NULL,
+		    &kbase_carveout_debugfs_fops);
+
+	return 0;
+
+out_rollback:
+	while (!list_empty(&kbase_carveout_free_list)) {
+		struct page *p;
+
+		p = list_first_entry(&kbase_carveout_free_list, struct page, lru);
+		dma_unmap_page(dev, kbase_dma_addr(p),
+				PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
+		ClearPagePrivate(p);
+		list_del(&p->lru);
+	}
+
+	mutex_unlock(&kbase_carveout_free_list_lock);
+	return -ENOMEM;
+}
+
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+	phys_addr_t mem;
+
+#if defined(CONFIG_ARM) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+	/* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+	mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ACCESSIBLE);
+#else
+	mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+#endif
+	if (mem == 0) {
+		pr_warn("%s: Failed to allocate %d for kbase carveout\n",
+				__func__, size);
+		return -ENOMEM;
+	}
+
+	kbase_carveout_start_pfn = PFN_DOWN(mem);
+	kbase_carveout_end_pfn = PFN_DOWN(mem + size - 1);
+	kbase_carveout_pages = kbase_carveout_end_pfn - kbase_carveout_start_pfn + 1;
+
+	return 0;
+}
+
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev)
+{
+	return kbase_carveout_init(kbdev->dev);
+}
+
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev)
+{
+}
+
+STATIC int kbase_mem_allocator_shrink(struct shrinker *s, struct shrink_control *sc)
+{
+	struct kbase_mem_allocator *allocator;
+	int i;
+	int freed;
+
+	allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+
+	if (sc->nr_to_scan == 0)
+		return atomic_read(&allocator->free_list_size);
+
+	might_sleep();
+
+	mutex_lock(&allocator->free_list_lock);
+	i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan);
+	freed = i;
+
+	atomic_sub(i, &allocator->free_list_size);
+
+	while (i--) {
+		struct page *p;
+
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head, struct page, lru);
+		list_del(&p->lru);
+		kbase_carveout_put_page(p, allocator);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	return atomic_read(&allocator->free_list_size);
+}
+
+mali_error kbase_mem_allocator_init(struct kbase_mem_allocator * const allocator,
+		unsigned int max_size, struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	INIT_LIST_HEAD(&allocator->free_list_head);
+
+	allocator->kbdev = kbdev;
+
+	mutex_init(&allocator->free_list_lock);
+
+	atomic_set(&allocator->free_list_size, 0);
+
+	allocator->free_list_max_size = max_size;
+	allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink;
+	allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) /* Kernel versions prior to 3.1 : struct shrinker does not define batch */
+	allocator->free_list_reclaimer.batch = 0;
+#endif
+
+	register_shrinker(&allocator->free_list_reclaimer);
+
+	return MALI_ERROR_NONE;
+}
+
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator)
+{
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	unregister_shrinker(&allocator->free_list_reclaimer);
+
+	while (!list_empty(&allocator->free_list_head)) {
+		struct page *p;
+
+		p = list_first_entry(&allocator->free_list_head, struct page,
+				lru);
+		list_del(&p->lru);
+
+		kbase_carveout_put_page(p, allocator);
+	}
+	mutex_destroy(&allocator->free_list_lock);
+}
+
+
+mali_error kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages)
+{
+	struct page *p;
+	void *mp;
+	int i;
+	int num_from_free_list;
+	struct list_head from_free_list = LIST_HEAD_INIT(from_free_list);
+
+	might_sleep();
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	/* take from the free list first */
+	mutex_lock(&allocator->free_list_lock);
+	num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size));
+	atomic_sub(num_from_free_list, &allocator->free_list_size);
+	for (i = 0; i < num_from_free_list; i++) {
+		BUG_ON(list_empty(&allocator->free_list_head));
+		p = list_first_entry(&allocator->free_list_head, struct page, lru);
+		list_move(&p->lru, &from_free_list);
+	}
+	mutex_unlock(&allocator->free_list_lock);
+	i = 0;
+
+	/* Allocate as many pages from the pool of already allocated pages. */
+	list_for_each_entry(p, &from_free_list, lru)
+	{
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+		i++;
+	}
+
+	if (i == nr_pages)
+		return MALI_ERROR_NONE;
+
+	/* If not all pages were sourced from the pool, request new ones. */
+	for (; i < nr_pages; i++) {
+		p = kbase_carveout_get_page(allocator);
+		if (NULL == p)
+			goto err_out_roll_back;
+
+		mp = kmap(p);
+		if (NULL == mp) {
+			kbase_carveout_put_page(p, allocator);
+			goto err_out_roll_back;
+		}
+		memset(mp, 0x00, PAGE_SIZE); /* instead of __GFP_ZERO, so we can
+						do cache maintenance */
+		dma_sync_single_for_device(allocator->kbdev->dev,
+					   kbase_dma_addr(p),
+					   PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		kunmap(p);
+		pages[i] = PFN_PHYS(page_to_pfn(p));
+	}
+
+	return MALI_ERROR_NONE;
+
+err_out_roll_back:
+	while (i--) {
+		struct page *p;
+
+		p = pfn_to_page(PFN_DOWN(pages[i]));
+		pages[i] = (phys_addr_t)0;
+		kbase_carveout_put_page(p, allocator);
+	}
+
+	return MALI_ERROR_OUT_OF_MEMORY;
+}
+
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, u32 nr_pages, phys_addr_t *pages, mali_bool sync_back)
+{
+	int i = 0;
+	int page_count = 0;
+	int tofree;
+
+	LIST_HEAD(new_free_list_items);
+
+	KBASE_DEBUG_ASSERT(NULL != allocator);
+
+	might_sleep();
+
+	/* Starting by just freeing the overspill.
+	* As we do this outside of the lock we might spill too many pages
+	* or get too many on the free list, but the max_size is just a ballpark so it is ok
+	* providing that tofree doesn't exceed nr_pages
+	*/
+	tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0);
+	tofree = nr_pages - MIN(tofree, nr_pages);
+	for (; i < tofree; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			pages[i] = (phys_addr_t)0;
+			kbase_carveout_put_page(p, allocator);
+		}
+	}
+
+	for (; i < nr_pages; i++) {
+		if (likely(0 != pages[i])) {
+			struct page *p;
+
+			p = pfn_to_page(PFN_DOWN(pages[i]));
+			pages[i] = (phys_addr_t)0;
+			/* Sync back the memory to ensure that future cache
+			 * invalidations don't trample on memory.
+			 */
+			if (sync_back)
+				dma_sync_single_for_cpu(allocator->kbdev->dev,
+						kbase_dma_addr(p),
+						PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+			list_add(&p->lru, &new_free_list_items);
+			page_count++;
+		}
+	}
+	mutex_lock(&allocator->free_list_lock);
+	list_splice(&new_free_list_items, &allocator->free_list_head);
+	atomic_add(page_count, &allocator->free_list_size);
+	mutex_unlock(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_free)
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100755
index 000000000000..72a7aea73ec9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,1823 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	#include <linux/dma-attrs.h>
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+	int zone;
+	int gpu_pc_bits;
+	int cpu_va_bits;
+	struct kbase_va_region *reg;
+	struct device *dev;
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_alignment);
+
+	dev = kctx->kbdev->dev;
+	*va_alignment = 0; /* no alignment by default */
+	*gpu_va = 0; /* return 0 on failure */
+
+	gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+	cpu_va_bits = BITS_PER_LONG;
+
+	if (0 == va_pages) {
+		dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+		goto bad_size;
+	}
+
+	if (va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+#if defined(CONFIG_64BIT)
+	if (kctx->is_compat)
+		cpu_va_bits = 32;
+	else
+		/* force SAME_VA if a 64-bit client */
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	if (!kbase_check_alloc_flags(*flags)) {
+		dev_warn(dev,
+				"kbase_mem_alloc called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	/* Limit GPU executable allocs to GPU PC size */
+	if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+	    (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+		goto bad_ex_size;
+
+	/* find out which VA zone to use */
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = KBASE_REG_ZONE_SAME_VA;
+	else if (*flags & BASE_MEM_PROT_GPU_EX)
+		zone = KBASE_REG_ZONE_EXEC;
+	else
+		zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+	reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+	if (!reg) {
+		dev_err(dev, "Failed to allocate free region");
+		goto no_region;
+	}
+
+	if (MALI_ERROR_NONE != kbase_reg_prepare_native(reg, kctx)) {
+		dev_err(dev, "Failed to prepare region");
+		goto prepare_failed;
+	}
+
+	kbase_update_region_flags(reg, *flags);
+
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		reg->extent = extent;
+	else
+		reg->extent = 0;
+
+	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages)) {
+		dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", 
+				(unsigned long long)commit_pages,
+				(unsigned long long)va_pages);
+		goto no_mem;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & BASE_MEM_SAME_VA) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+		/* See if we must align memory due to GPU PC bits vs CPU VA */
+		if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+		    (cpu_va_bits > gpu_pc_bits)) {
+			*va_alignment = gpu_pc_bits;
+			reg->flags |= KBASE_REG_ALIGNED;
+		}
+	} else /* we control the VA */ {
+		if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0, va_pages, 1)) {
+			dev_warn(dev, "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+	return reg;
+
+no_mmap:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+no_mem:
+	kbase_mem_phy_alloc_put(reg->alloc);
+prepare_failed:
+	kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+	return NULL;
+}
+
+mali_error kbase_mem_query(struct kbase_context *kctx, mali_addr64 gpu_addr, int query, u64 * const out)
+{
+	struct kbase_va_region *reg;
+	mali_error ret = MALI_ERROR_FUNCTION_FAILED;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(out);
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	switch (query) {
+	case KBASE_MEM_QUERY_COMMIT_SIZE:
+		if (reg->alloc->type != KBASE_MEM_TYPE_ALIAS) {
+			*out = kbase_reg_current_backed_size(reg);
+		} else {
+			size_t i;
+			struct kbase_aliased *aliased;
+			*out = 0;
+			aliased = reg->alloc->imported.alias.aliased;
+			for (i = 0; i < reg->alloc->imported.alias.nents; i++)
+				*out += aliased[i].length;
+		}
+		break;
+	case KBASE_MEM_QUERY_VA_SIZE:
+		*out = reg->nr_pages;
+		break;
+	case KBASE_MEM_QUERY_FLAGS:
+	{
+		*out = 0;
+		if (KBASE_REG_CPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_WR;
+		if (KBASE_REG_CPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_CPU_RD;
+		if (KBASE_REG_CPU_CACHED & reg->flags)
+			*out |= BASE_MEM_CACHED_CPU;
+		if (KBASE_REG_GPU_WR & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_WR;
+		if (KBASE_REG_GPU_RD & reg->flags)
+			*out |= BASE_MEM_PROT_GPU_RD;
+		if (!(KBASE_REG_GPU_NX & reg->flags))
+			*out |= BASE_MEM_PROT_GPU_EX;
+		if (KBASE_REG_SHARE_BOTH & reg->flags)
+			*out |= BASE_MEM_COHERENT_SYSTEM;
+		if (KBASE_REG_SHARE_IN & reg->flags)
+			*out |= BASE_MEM_COHERENT_LOCAL;
+		break;
+	}
+	default:
+		*out = 0;
+		goto out_unlock;
+	}
+
+	ret = MALI_ERROR_NONE;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return ret;
+}
+
+mali_error kbase_mem_flags_change(struct kbase_context *kctx, mali_addr64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+	struct kbase_va_region *reg;
+	mali_error ret = MALI_ERROR_FUNCTION_FAILED;
+	unsigned int real_flags = 0;
+	unsigned int prev_flags = 0;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	if (!gpu_addr)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	/* nuke other bits */
+	flags &= mask;
+
+	/* check for only supported flags */
+	if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+		goto out;
+
+	/* mask covers bits we don't support? */
+	if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+		goto out;
+
+	/* convert flags */
+	if (BASE_MEM_COHERENT_SYSTEM & flags)
+		real_flags |= KBASE_REG_SHARE_BOTH;
+	else if (BASE_MEM_COHERENT_LOCAL & flags)
+		real_flags |= KBASE_REG_SHARE_IN;
+
+	/* now we can lock down the context, and find the region */
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	/* limit to imported memory */
+	if ((reg->alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+	     (reg->alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+		goto out_unlock;
+
+	/* no change? */
+	if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+		ret = MALI_ERROR_NONE;
+		goto out_unlock;
+	}
+
+	/* save for roll back */
+	prev_flags = reg->flags;
+	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	reg->flags |= real_flags;
+
+	/* Currently supporting only imported memory */
+	switch (reg->alloc->type) {
+#ifdef CONFIG_UMP
+		case KBASE_MEM_TYPE_IMPORTED_UMP:
+			ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_phy_pages(reg), reg->alloc->nents, reg->flags);
+			break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+		case KBASE_MEM_TYPE_IMPORTED_UMM:
+			/* Future use will use the new flags, existing mapping will NOT be updated
+			 * as memory should not be in use by the GPU when updating the flags.
+			 */
+			ret = MALI_ERROR_NONE;
+			WARN_ON(reg->alloc->imported.umm.current_mapping_usage_count);
+			break;
+#endif
+		default:
+			break;
+	}
+
+	/* roll back on error, i.e. not UMP */
+	if (ret != MALI_ERROR_NONE)
+		reg->flags = prev_flags;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_TOTAL_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	ump_dd_handle umph;
+	u64 block_count;
+	const ump_dd_physical_block_64 *block_array;
+	u64 i, j;
+	int page = 0;
+	ump_alloc_flags ump_flags;
+	ump_alloc_flags cpu_flags;
+	ump_alloc_flags gpu_flags;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+	umph = ump_dd_from_secure_id(id);
+	if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+		goto bad_id;
+
+	ump_flags = ump_dd_allocation_flags_get(umph);
+	cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+	gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+			UMP_DEVICE_MASK;
+
+	*va_pages = ump_dd_size_get_64(umph);
+	*va_pages >>= PAGE_SHIFT;
+
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	if (*flags & BASE_MEM_SAME_VA)
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	else
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+	if (!reg)
+		goto no_region;
+
+	/* we've got pages to map now, and support SAME_VA */
+	*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+	reg->alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+	if (IS_ERR_OR_NULL(reg->alloc))
+		goto no_alloc_obj;
+
+	reg->alloc->imported.ump_handle = umph;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMP is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMP cannot be grown */
+
+	/* Override import flags based on UMP flags */
+	*flags &= ~(BASE_MEM_CACHED_CPU);
+	*flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+	*flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+	if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+		reg->flags |= KBASE_REG_CPU_CACHED;
+		*flags |= BASE_MEM_CACHED_CPU;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_WR) {
+		reg->flags |= KBASE_REG_CPU_WR;
+		*flags |= BASE_MEM_PROT_CPU_WR;
+	}
+
+	if (cpu_flags & UMP_PROT_CPU_RD) {
+		reg->flags |= KBASE_REG_CPU_RD;
+		*flags |= BASE_MEM_PROT_CPU_RD;
+	}
+
+	if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+	    (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+		reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (gpu_flags & UMP_PROT_DEVICE_WR) {
+		reg->flags |= KBASE_REG_GPU_WR;
+		*flags |= BASE_MEM_PROT_GPU_WR;
+	}
+
+	if (gpu_flags & UMP_PROT_DEVICE_RD) {
+		reg->flags |= KBASE_REG_GPU_RD;
+		*flags |= BASE_MEM_PROT_GPU_RD;
+	}
+
+	/* ump phys block query */
+	ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+	for (i = 0; i < block_count; i++) {
+		for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+			reg->alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+			page++;
+		}
+	}
+	reg->alloc->nents = *va_pages;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	ump_dd_release(umph);
+bad_id:
+	return NULL;
+}
+#endif				/* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+	struct dma_buf *dma_buf;
+	struct dma_buf_attachment *dma_attachment;
+
+	dma_buf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dma_buf))
+		goto no_buf;
+
+	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+	if (!dma_attachment)
+		goto no_attachment;
+
+	*va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+	if (!*va_pages)
+		goto bad_size;
+
+	if (*va_pages > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* ignore SAME_VA */
+	*flags &= ~BASE_MEM_SAME_VA;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_region;
+
+	reg->alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+	if (IS_ERR_OR_NULL(reg->alloc))
+		goto no_alloc_obj;
+
+	/* No pages to map yet */
+	reg->alloc->nents = 0;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
+	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
+	reg->flags |= KBASE_REG_GPU_CACHED;
+
+	if (*flags & BASE_MEM_PROT_CPU_WR)
+		reg->flags |= KBASE_REG_CPU_WR;
+
+	if (*flags & BASE_MEM_PROT_CPU_RD)
+		reg->flags |= KBASE_REG_CPU_RD;
+
+	if (*flags & BASE_MEM_PROT_GPU_WR)
+		reg->flags |= KBASE_REG_GPU_WR;
+
+	if (*flags & BASE_MEM_PROT_GPU_RD)
+		reg->flags |= KBASE_REG_GPU_RD;
+
+	/* no read or write permission given on import, only on run do we give the right permissions */
+
+	reg->alloc->type = BASE_TMEM_IMPORT_TYPE_UMM;
+	reg->alloc->imported.umm.sgt = NULL;
+	reg->alloc->imported.umm.dma_buf = dma_buf;
+	reg->alloc->imported.umm.dma_attachment = dma_attachment;
+	reg->alloc->imported.umm.current_mapping_usage_count = 0;
+	reg->extent = 0;
+
+	return reg;
+
+no_alloc_obj:
+	kfree(reg);
+no_region:
+bad_size:
+	dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+	dma_buf_put(dma_buf);
+no_buf:
+	return NULL;
+}
+#endif  /* CONFIG_DMA_SHARED_BUFFER */
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+		    u64 nents, struct base_mem_aliasing_info *ai,
+		    u64 *num_pages)
+{
+	struct kbase_va_region *reg;
+	u64 gpu_va;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(flags);
+	KBASE_DEBUG_ASSERT(ai);
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	/* mask to only allowed flags */
+	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+		   BASE_MEM_HINT_GPU_RD | BASE_MEM_HINT_GPU_WR |
+		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL);
+
+	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+		dev_warn(kctx->kbdev->dev,
+				"kbase_mem_alias called with bad flags (%llx)",
+				(unsigned long long)*flags);
+		goto bad_flags;
+	}
+
+	if (!stride)
+		goto bad_stride;
+
+	if (!nents)
+		goto bad_nents;
+
+	if ((nents * stride) > (UINT64_MAX / PAGE_SIZE))
+		/* 64-bit address range is the max */
+		goto bad_size;
+
+	/* calculate the number of pages this alias will cover */
+	*num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* 64-bit tasks must MMAP anyway, but not expose this address to
+		 * clients */
+		*flags |= BASE_MEM_NEED_MMAP;
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
+	} else {
+#else
+	if (1) {
+#endif
+		reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	/* zero-sized page array, as we don't need one/can support one */
+	reg->alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+	if (IS_ERR_OR_NULL(reg->alloc))
+		goto no_alloc_obj;
+
+	kbase_update_region_flags(reg, *flags);
+
+	reg->alloc->imported.alias.nents = nents;
+	reg->alloc->imported.alias.stride = stride;
+	reg->alloc->imported.alias.aliased = vzalloc(sizeof(*reg->alloc->imported.alias.aliased) * nents);
+	if (!reg->alloc->imported.alias.aliased)
+		goto no_aliased_array;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* validate and add src handles */
+	for (i = 0; i < nents; i++) {
+		if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+			if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE)
+				goto bad_handle; /* unsupported magic handle */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+			reg->alloc->imported.alias.aliased[i].length = ai[i].length;
+		} else {
+			struct kbase_va_region *aliasing_reg;
+			struct kbase_mem_phy_alloc *alloc;
+
+			aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+			/* validate found region */
+			if (!aliasing_reg)
+				goto bad_handle; /* Not found */
+			if (aliasing_reg->flags & KBASE_REG_FREE)
+				goto bad_handle; /* Free region */
+			if (!aliasing_reg->alloc)
+				goto bad_handle; /* No alloc */
+			if (aliasing_reg->alloc->type != KBASE_MEM_TYPE_NATIVE)
+				goto bad_handle; /* Not a native alloc */
+
+			/* check size against stride */
+			if (!ai[i].length)
+				goto bad_handle; /* must be > 0 */
+			if (ai[i].length > stride)
+				goto bad_handle; /* can't be larger than the
+						    stride */
+
+			alloc = aliasing_reg->alloc;
+
+			/* check against the alloc's size */
+			if (ai[i].offset > alloc->nents)
+				goto bad_handle; /* beyond end */
+			if (ai[i].offset + ai[i].length > alloc->nents)
+				goto bad_handle; /* beyond end */
+
+			reg->alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+			reg->alloc->imported.alias.aliased[i].length = ai[i].length;
+			reg->alloc->imported.alias.aliased[i].offset = ai[i].offset;
+		}
+	}
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat) {
+		/* Bind to a cookie */
+		if (!kctx->cookies) {
+			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+			goto no_cookie;
+		}
+		/* return a cookie */
+		gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << gpu_va);
+		BUG_ON(kctx->pending_regions[gpu_va]);
+		kctx->pending_regions[gpu_va] = reg;
+
+		/* relocate to correct base */
+		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		gpu_va <<= PAGE_SHIFT;
+	} else /* we control the VA */ {
+#else
+	if (1) {
+#endif
+		if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0,
+						      *num_pages, 1)) {
+			dev_warn(kctx->kbdev->dev,
+					       "Failed to map memory on GPU");
+			goto no_mmap;
+		}
+		/* return real GPU VA */
+		gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	reg->flags &= ~KBASE_REG_FREE;
+	reg->flags &= ~KBASE_REG_GROWABLE;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+	kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+	kbase_mem_phy_alloc_put(reg->alloc);
+no_alloc_obj:
+	kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+	return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, mali_addr64 *gpu_va, u64 *va_pages, u64 *flags)
+{
+	struct kbase_va_region *reg;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(gpu_va);
+	KBASE_DEBUG_ASSERT(va_pages);
+	KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+	if (!kctx->is_compat)
+		*flags |= BASE_MEM_SAME_VA;
+#endif
+
+	switch (type) {
+#ifdef CONFIG_UMP
+	case BASE_MEM_IMPORT_TYPE_UMP:
+		reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags);
+		break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+	case BASE_MEM_IMPORT_TYPE_UMM:
+		reg = kbase_mem_from_umm(kctx, handle, va_pages, flags);
+		break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+	default:
+		reg = NULL;
+		break;
+	}
+
+	if (!reg)
+		goto no_reg;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* mmap needed to setup VA? */
+	if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+		/* Bind to a cookie */
+		if (!kctx->cookies)
+			goto no_cookie;
+		/* return a cookie */
+		*gpu_va = __ffs(kctx->cookies);
+		kctx->cookies &= ~(1UL << *gpu_va);
+		BUG_ON(kctx->pending_regions[*gpu_va]);
+		kctx->pending_regions[*gpu_va] = reg;
+
+		/* relocate to correct base */
+		*gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		*gpu_va <<= PAGE_SHIFT;
+
+	} else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES)  {
+		/* we control the VA, mmap now to the GPU */
+		if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1))
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	} else {
+		/* we control the VA, but nothing to mmap yet */
+		if (MALI_ERROR_NONE != kbase_add_va_region(kctx, reg, 0, *va_pages, 1))
+			goto no_gpu_va;
+		/* return real GPU VA */
+		*gpu_va = reg->start_pfn << PAGE_SHIFT;
+	}
+
+	/* clear out private flags */
+	*flags &= ((1UL << BASE_MEM_FLAGS_NR_TOTAL_BITS) - 1);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+
+no_gpu_va:
+no_cookie:
+	kbase_gpu_vm_unlock(kctx);
+	kbase_mem_phy_alloc_put(reg->alloc);
+	kfree(reg);
+no_reg:
+	*gpu_va = 0;
+	*va_pages = 0;
+	*flags = 0;
+	return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+		const struct vm_operations_struct *vm_ops,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	int err = -EINVAL; /* in case end < start */
+
+	while (start < end) {
+		unsigned long local_end;
+
+		vma = find_vma_intersection(mm, start, end);
+		if (!vma)
+			break;
+
+		/* is it ours? */
+		if (vma->vm_ops != vm_ops)
+			goto try_next;
+
+		local_end = vma->vm_end;
+
+		if (end < local_end)
+			local_end = end;
+
+		err = zap_vma_ptes(vma, start, local_end - start);
+		if (unlikely(err))
+			break;
+
+try_next:
+		/* go to next vma, if any */
+		start = vma->vm_end;
+	}
+
+	return err;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, mali_addr64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+	u64 old_pages;
+	u64 delta;
+	int res = -EINVAL;
+	struct kbase_va_region *reg;
+	phys_addr_t *phy_pages;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(failure_reason);
+	KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+	down_read(&current->mm->mmap_sem);
+	kbase_gpu_vm_lock(kctx);
+
+	/* Validate the region */
+	reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	KBASE_DEBUG_ASSERT(reg->alloc);
+
+	if (reg->alloc->type != KBASE_MEM_TYPE_NATIVE) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages > reg->nr_pages) {
+		/* Would overflow the VA region */
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+		goto out_unlock;
+	}
+
+	/* can't be mapped more than once on the GPU */
+	if (atomic_read(&reg->alloc->gpu_mappings) > 1) {
+		*failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+		goto out_unlock;
+	}
+
+	if (new_pages == reg->alloc->nents) {
+		/* no change */
+		res = 0;
+		goto out_unlock;
+	}
+
+	phy_pages = kbase_get_phy_pages(reg);
+	old_pages = kbase_reg_current_backed_size(reg);
+
+	if (new_pages > old_pages) {
+		/* growing */
+		mali_error err;
+		delta = new_pages - old_pages;
+		/* Allocate some more pages */
+		if (MALI_ERROR_NONE != kbase_alloc_phy_pages_helper(reg->alloc, delta)) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+		err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, phy_pages + old_pages, delta, reg->flags);
+		if (MALI_ERROR_NONE != err) {
+			kbase_free_phy_pages_helper(reg->alloc, delta);
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+	} else {
+		/* shrinking */
+		struct kbase_cpu_mapping *mapping;
+		mali_error err;
+
+		/* first, unmap from any mappings affected */
+		list_for_each_entry(mapping, &reg->alloc->mappings, mappings_list) {
+			unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT;
+
+			/* is this mapping affected ?*/
+			if ((mapping->page_off + mapping_size) > new_pages) {
+				unsigned long first_bad = 0;
+				int zap_res;
+
+				if (new_pages > mapping->page_off)
+					first_bad = new_pages - mapping->page_off;
+
+				zap_res = zap_range_nolock(current->mm,
+						&kbase_vm_ops,
+						mapping->vm_start +
+						(first_bad << PAGE_SHIFT),
+						mapping->vm_end);
+				WARN(zap_res,
+				     "Failed to zap VA range (0x%lx - 0x%lx);\n",
+				     mapping->vm_start +
+				     (first_bad << PAGE_SHIFT),
+				     mapping->vm_end
+				     );
+			}
+		}
+
+		/* Free some pages */
+		delta = old_pages - new_pages;
+		err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, delta);
+		if (MALI_ERROR_NONE != err) {
+			*failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+			goto out_unlock;
+		}
+
+		if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+			/* Wait for GPU to flush write buffer before freeing physical pages */
+			kbase_wait_write_flush(kctx);
+		}
+
+		kbase_free_phy_pages_helper(reg->alloc, delta);
+	}
+
+	res = 0;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	up_read(&current->mm->mmap_sem);
+
+	return res;
+}
+
+STATIC void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	/* non-atomic as we're under Linux' mm lock */
+	map->count++;
+}
+
+STATIC void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+
+	/* non-atomic as we're under Linux' mm lock */
+	if (--map->count)
+		return;
+
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	kbase_gpu_vm_lock(map->kctx);
+
+	if (map->region) {
+		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA);
+		kbase_mem_free_region(map->kctx, map->region);
+	}
+
+	list_del(&map->mappings_list);
+
+	kbase_gpu_vm_unlock(map->kctx);
+
+	kbase_mem_phy_alloc_put(map->alloc);
+	kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close)
+
+
+STATIC int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kbase_cpu_mapping *map = vma->vm_private_data;
+	pgoff_t rel_pgoff;
+	size_t i;
+
+	KBASE_DEBUG_ASSERT(map);
+	KBASE_DEBUG_ASSERT(map->count > 0);
+	KBASE_DEBUG_ASSERT(map->kctx);
+	KBASE_DEBUG_ASSERT(map->alloc);
+
+	/* we don't use vmf->pgoff as it's affected by our mmap with
+	 * offset being a GPU VA or a cookie */
+	rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+			>> PAGE_SHIFT;
+
+	kbase_gpu_vm_lock(map->kctx);
+	if (map->page_off + rel_pgoff >= map->alloc->nents)
+		goto locked_bad_fault;
+
+	/* insert all valid pages from the fault location */
+	for (i = rel_pgoff;
+	     i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+	     map->alloc->nents - map->page_off); i++) {
+		int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+		    PFN_DOWN(map->alloc->pages[map->page_off + i]));
+		if (ret < 0 && ret != -EBUSY)
+			goto locked_bad_fault;
+	}
+
+	kbase_gpu_vm_unlock(map->kctx);
+	/* we resolved it, nothing for VM to do */
+	return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+	kbase_gpu_vm_unlock(map->kctx);
+	send_sig(SIGSEGV, current, 1);
+	return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+	.open  = kbase_cpu_vm_open,
+	.close = kbase_cpu_vm_close,
+	.fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+	struct kbase_cpu_mapping *map;
+	u64 start_off = vma->vm_pgoff - reg->start_pfn;
+	phys_addr_t *page_array;
+	int err = 0;
+	int i;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+	if (!map) {
+		WARN_ON(1);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+	 * VM_DONTEXPAND - disable mremap on this region
+	 * VM_IO - disables paging
+	 * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+	 * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+	 *               This is needed to support using the dedicated and
+	 *               the OS based memory backends together.
+	 */
+	/*
+	 * This will need updating to propagate coherency flags
+	 * See MIDBASE-1057
+	 */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_ops;
+	vma->vm_private_data = map;
+
+	page_array = kbase_get_phy_pages(reg);
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+		/* We can't map vmalloc'd memory uncached.
+		 * Other memory will have been returned from
+		 * kbase_mem_allocator_alloc which would be
+		 * suitable for mapping uncached.
+		 */
+		BUG_ON(kaddr);
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (!kaddr) {
+		vma->vm_flags |= VM_PFNMAP;
+		for (i = 0; i < nr_pages; i++) {
+			err = vm_insert_pfn(vma, vma->vm_start + (i << PAGE_SHIFT), page_array[i + start_off] >> PAGE_SHIFT);
+			if (WARN_ON(err))
+				break;
+		}
+	} else {
+		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+		vma->vm_flags |= VM_MIXEDMAP;
+		/* vmalloc remaping is easy... */
+		err = remap_vmalloc_range(vma, kaddr, 0);
+		WARN_ON(err);
+	}
+
+	if (err) {
+		kfree(map);
+		goto out;
+	}
+
+	map->page_off = start_off;
+	map->region = free_on_close ? reg : NULL;
+	map->kctx = reg->kctx;
+	map->vm_start = vma->vm_start + aligned_offset;
+	if (aligned_offset) {
+		KBASE_DEBUG_ASSERT(!start_off);
+		map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+	} else {
+		map->vm_end = vma->vm_end;
+	}
+	map->alloc = kbase_mem_phy_alloc_get(reg->alloc);
+	map->count = 1; /* start with one ref */
+
+	if (reg->flags & KBASE_REG_CPU_CACHED)
+		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+	list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+	return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+	struct kbase_va_region *new_reg;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+	u32 *tb;
+	int owns_tb = 1;
+
+	dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	if (!kctx->jctx.tb) {
+		KBASE_DEBUG_ASSERT(0 != size);
+		tb = vmalloc_user(size);
+
+		if (NULL == tb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		kbase_device_trace_buffer_install(kctx, tb, size);
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	*kaddr = kctx->jctx.tb;
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_region;
+	}
+
+	new_reg->alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+	if (IS_ERR_OR_NULL(new_reg->alloc)) {
+		err = -ENOMEM;
+		new_reg->alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->alloc->imported.kctx = kctx;
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+	/* alloc now owns the tb */
+	owns_tb = 0;
+
+	if (MALI_ERROR_NONE != kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1)) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_no_va_region;
+	}
+
+	*reg = new_reg;
+
+	/* map read only, noexec */
+	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	/* the rest of the flags is added by the cpu_mmap handler */
+
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+	return 0;
+
+out_no_va_region:
+out_no_alloc:
+	kbase_free_alloced_region(new_reg);
+out_no_region:
+	if (owns_tb) {
+		kbase_device_trace_buffer_uninstall(kctx);
+		vfree(tb);
+	}
+out:
+	return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+	struct kbase_va_region *new_reg;
+	void *kaddr;
+	u32 nr_pages;
+	size_t size;
+	int err = 0;
+
+	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	size = (vma->vm_end - vma->vm_start);
+	nr_pages = size >> PAGE_SHIFT;
+
+	kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+	if (!kaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+	if (!new_reg) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out;
+	}
+
+	new_reg->alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(new_reg->alloc)) {
+		err = -ENOMEM;
+		new_reg->alloc = NULL;
+		WARN_ON(1);
+		goto out_no_alloc;
+	}
+
+	new_reg->flags &= ~KBASE_REG_FREE;
+	new_reg->flags |= KBASE_REG_CPU_CACHED;
+	if (MALI_ERROR_NONE != kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1)) {
+		err = -ENOMEM;
+		WARN_ON(1);
+		goto out_va_region;
+	}
+
+	*kmap_addr = kaddr;
+	*reg = new_reg;
+
+	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	return 0;
+
+out_no_alloc:
+out_va_region:
+	kbase_free_alloced_region(new_reg);
+out:
+	return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+	struct mm_struct *mm = current->mm;
+	(void)kctx;
+	up_read(&mm->mmap_sem);
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = file->private_data;
+	struct kbase_va_region *reg;
+	void *kaddr = NULL;
+	size_t nr_pages;
+	int err = 0;
+	int free_on_close = 0;
+	struct device *dev = kctx->kbdev->dev;
+	size_t aligned_offset = 0;
+
+	dev_dbg(dev, "kbase_mmap\n");
+	nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+	/* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+	vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+	if (0 == nr_pages) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+		/* The non-mapped tracking helper page */
+		err = kbase_tracking_page_setup(kctx, vma);
+		goto out_unlock;
+	}
+
+	/* if not the MTP, verify that the MTP has been mapped */
+	rcu_read_lock();
+	/* catches both when the special page isn't present or
+	 * when we've forked */
+	if (rcu_dereference(kctx->process_mm) != current->mm) {
+		err = -EINVAL;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	switch (vma->vm_pgoff) {
+	case PFN_DOWN(BASE_MEM_INVALID_HANDLE):
+	case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE):
+		/* Illegal handle for direct map */
+		err = -EINVAL;
+		goto out_unlock;
+	case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+		err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+		/* MMU dump */
+		err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+		if (0 != err)
+			goto out_unlock;
+		/* free the region on munmap */
+		free_on_close = 1;
+		goto map;
+	case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+	     PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+		/* SAME_VA stuff, fetch the right region */
+		int gpu_pc_bits;
+		int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+		gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+		reg = kctx->pending_regions[cookie];
+		if (NULL != reg) {
+			if (reg->flags & KBASE_REG_ALIGNED) {
+				/* nr_pages must be able to hold alignment pages
+				 * plus actual pages */
+				if (nr_pages != ((1UL << gpu_pc_bits >>
+							PAGE_SHIFT) +
+							reg->nr_pages)) {
+					/* incorrect mmap size */
+					/* leave the cookie for a potential
+					 * later mapping, or to be reclaimed
+					 * later when the context is freed */
+					err = -ENOMEM;
+					goto out_unlock;
+				}
+
+				aligned_offset = (vma->vm_start +
+						  (1UL << gpu_pc_bits) - 1) &
+						 ~((1UL << gpu_pc_bits) - 1);
+				aligned_offset -= vma->vm_start;
+			} else if (reg->nr_pages != nr_pages) {
+				/* incorrect mmap size */
+				/* leave the cookie for a potential later
+				 * mapping, or to be reclaimed later when the
+				 * context is freed */
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+			/* adjust down nr_pages to what we have physically */
+			nr_pages = kbase_reg_current_backed_size(reg);
+
+			if (MALI_ERROR_NONE != kbase_gpu_mmap(kctx, reg,
+							      vma->vm_start +
+							      aligned_offset,
+							      reg->nr_pages,
+							      1)) {
+				dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+				/* Unable to map in GPU space. */
+				WARN_ON(1);
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
+			/* no need for the cookie anymore */
+			kctx->pending_regions[cookie] = NULL;
+			kctx->cookies |= (1UL << cookie);
+
+			/*
+			 * Overwrite the offset with the
+			 * region start_pfn, so we effectively
+			 * map from offset 0 in the region.
+			 */
+			vma->vm_pgoff = reg->start_pfn;
+
+			/* free the region on munmap */
+			free_on_close = 1;
+			goto map;
+		}
+
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	default: {
+		reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+		if (reg && !(reg->flags & KBASE_REG_FREE)) {
+			/* will this mapping overflow the size of the region? */
+			if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+				goto overflow;
+
+			if ((vma->vm_flags & VM_READ &&
+			     !(reg->flags & KBASE_REG_CPU_RD)) ||
+			    (vma->vm_flags & VM_WRITE &&
+			     !(reg->flags & KBASE_REG_CPU_WR))) {
+				/* VM flags inconsistent with region flags */
+				err = -EPERM;
+				dev_err(dev, "%s:%d inconsistent VM flags\n",
+					__FILE__, __LINE__);
+				goto out_unlock;
+			}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+			if (reg->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+				goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+			/* limit what we map to the amount currently backed */
+			if (reg->alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				if ((vma->vm_pgoff - reg->start_pfn) >= reg->alloc->nents)
+					nr_pages = 0;
+				else
+					nr_pages = reg->alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+			}
+
+			goto map;
+		}
+
+overflow:
+		err = -ENOMEM;
+		goto out_unlock;
+	} /* default */
+	} /* switch */
+map:
+	err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+	if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+		/* MMU dump - userspace should now have a reference on
+		 * the pages, so we can now free the kernel mapping */
+		vfree(kaddr);
+	}
+	goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+	err = dma_buf_mmap(reg->alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#endif				/* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+out:
+	if (err)
+		dev_err(dev, "mmap failed %d\n", err);
+
+	return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap)
+
+void *kbase_vmap(struct kbase_context *kctx, mali_addr64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map)
+{
+	struct kbase_va_region *reg;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!reg || (reg->flags & KBASE_REG_FREE))
+		goto out_unlock;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		goto out_unlock;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(reg))
+		goto out_unlock;
+
+	page_array = kbase_get_phy_pages(reg);
+	if (!page_array)
+		goto out_unlock;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_unlock;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+	kfree(pages);
+
+	if (!cpu_addr)
+		goto out_unlock;
+
+	map->gpu_addr = gpu_addr;
+	map->alloc = kbase_mem_phy_alloc_get(reg->alloc);
+	map->pages = &kbase_get_phy_pages(reg)[page_index];
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+
+	if (map->is_cached) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t pa = map->pages[0] + offset;
+
+		kbase_sync_single(kctx, pa, sz, dma_sync_single_for_cpu);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			pa = map->pages[i];
+			kbase_sync_single(kctx, pa, PAGE_SIZE, dma_sync_single_for_cpu);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			pa = map->pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, pa, sz, dma_sync_single_for_cpu);
+		}
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return map->addr;
+
+out_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return NULL;
+}
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+
+	vunmap(addr);
+
+	if (map->is_cached) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t pa = map->pages[0] + offset;
+
+		kbase_sync_single(kctx, pa, sz, dma_sync_single_for_device);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			pa = map->pages[i];
+			kbase_sync_single(kctx, pa, PAGE_SIZE, dma_sync_single_for_device);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			pa = map->pages[page_count - 1];
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, pa, sz, dma_sync_single_for_device);
+		}
+	}
+
+	map->gpu_addr = 0;
+	map->alloc = kbase_mem_phy_alloc_put(map->alloc);
+	map->pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = false;
+}
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+	struct mm_struct *mm;
+
+	rcu_read_lock();
+	mm = rcu_dereference(kctx->process_mm);
+	if (mm) {
+		atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+		spin_lock(&mm->page_table_lock);
+		add_mm_counter(mm, MM_FILEPAGES, pages);
+		spin_unlock(&mm->page_table_lock);
+#endif
+	}
+	rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+	int pages;
+	struct mm_struct *mm;
+
+	spin_lock(&kctx->mm_update_lock);
+	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+	if (!mm) {
+		spin_unlock(&kctx->mm_update_lock);
+		return;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, NULL);
+	spin_unlock(&kctx->mm_update_lock);
+	synchronize_rcu();
+
+	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_FILEPAGES, -pages);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx;
+
+	kctx = vma->vm_private_data;
+	kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+	/* check that this is the only tracking page */
+	spin_lock(&kctx->mm_update_lock);
+	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+		spin_unlock(&kctx->mm_update_lock);
+		return -EFAULT;
+	}
+
+	rcu_assign_pointer(kctx->process_mm, current->mm);
+
+	spin_unlock(&kctx->mm_update_lock);
+
+	/* no real access */
+	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
+
+	return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+	int i;
+	int res;
+	void *va;
+	dma_addr_t  dma_pa;
+	struct kbase_va_region *reg;
+	phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(0 != size);
+	KBASE_DEBUG_ASSERT(0 != pages);
+
+	if (size == 0)
+		goto err;
+
+	/* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
+#else
+	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+	if (!va)
+		goto err;
+
+	/* Store the state so we can free it later. */
+	handle->cpu_va = va;
+	handle->dma_pa = dma_pa;
+	handle->size   = size;
+
+
+	reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+	if (!reg)
+		goto no_reg;
+
+	reg->flags &= ~KBASE_REG_FREE;
+	kbase_update_region_flags(reg, flags);
+
+	reg->alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+	if (IS_ERR_OR_NULL(reg->alloc))
+		goto no_alloc;
+
+	page_array = kbase_get_phy_pages(reg);
+
+	for (i = 0; i < pages; i++)
+		page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+	reg->alloc->nents = pages;
+
+	kbase_gpu_vm_lock(kctx);
+	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+	kbase_gpu_vm_unlock(kctx);
+	if (res)
+		goto no_mmap;
+
+	return va;
+
+no_mmap:
+	kbase_mem_phy_alloc_put(reg->alloc);
+no_alloc:
+	kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+	return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+	struct kbase_va_region *reg;
+	mali_error err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	DEFINE_DMA_ATTRS(attrs);
+#endif
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+	KBASE_DEBUG_ASSERT(reg);
+	err = kbase_gpu_munmap(kctx, reg);
+	kbase_gpu_vm_unlock(kctx);
+	KBASE_DEBUG_ASSERT(err == MALI_ERROR_NONE);
+
+	kbase_mem_phy_alloc_put(reg->alloc);
+	kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	dma_free_attrs(kctx->kbdev->dev, handle->size,
+			handle->cpu_va, handle->dma_pa, &attrs);
+#else
+	dma_free_writecombine(kctx->kbdev->dev, handle->size,
+				handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100755
index 000000000000..6bd1d0d86d06
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+typedef struct kbase_hwc_dma_mapping {
+	void       *cpu_va;
+	dma_addr_t  dma_pa;
+	size_t      size;
+} kbase_hwc_dma_mapping;
+
+struct kbase_va_region * kbase_mem_alloc(struct kbase_context * kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 * flags, u64 * gpu_va, u16 * va_alignment);
+mali_error kbase_mem_query(struct kbase_context *kctx, mali_addr64 gpu_addr, int query, u64 * const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, mali_addr64 * gpu_va, u64 * va_pages, u64 * flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64* flags, u64 stride, u64 nents, struct base_mem_aliasing_info* ai, u64 * num_pages);
+mali_error kbase_mem_flags_change(struct kbase_context *kctx, mali_addr64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context * kctx, mali_addr64 gpu_addr, u64 new_pages, enum base_backing_threshold_status * failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+struct kbase_vmap_struct {
+	mali_addr64 gpu_addr;
+	struct kbase_mem_phy_alloc *alloc;
+	phys_addr_t *pages;
+	void *addr;
+	size_t size;
+	bool is_cached;
+};
+void *kbase_vmap(struct kbase_context *kctx, mali_addr64 gpu_addr, size_t size,
+		struct kbase_vmap_struct *map);
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param size   The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx   The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100755
index 000000000000..2b3985ea4fca
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0)	/** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR   (1 << 0)	/** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON  (1 << 1)	/** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE  0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+/**
+ * A pointer to a cache synchronization function, either dma_sync_single_for_cpu
+ * or dma_sync_single_for_device.
+ */
+typedef void (*kbase_sync_kmem_fn) (struct device *, dma_addr_t, size_t size,
+				   enum dma_data_direction);
+
+
+#endif				/* _KBASE_LOWLEVEL_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100755
index 000000000000..b7d709e8696d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,137 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/* mam_profile file name max length 22 based on format <int>_<int>\0 */
+#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1)
+
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+		size_t size)
+{
+	spin_lock(&kctx->mem_profile_lock);
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = data;
+	kctx->mem_profile_size = size;
+	spin_unlock(&kctx->mem_profile_lock);
+}
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ *         -1 if it encountered an error
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+	struct kbase_context *kctx = sfile->private;
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	spin_lock(&kctx->mem_profile_lock);
+	seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+	seq_putc(sfile, '\n');
+	spin_unlock(&kctx->mem_profile_lock);
+
+	return 0;
+}
+
+/*
+ *  File operations related to debugfs entry for mem_profile
+ */
+STATIC int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.open = kbasep_mem_profile_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+mali_error kbasep_mem_profile_debugfs_add(struct kbase_context *kctx)
+{
+	char name[KBASEP_DEBUGFS_FNAME_SIZE_MAX];
+
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	spin_lock_init(&kctx->mem_profile_lock);
+
+	scnprintf(name, KBASEP_DEBUGFS_FNAME_SIZE_MAX, "%d_%d", kctx->pid,
+			kctx->id);
+
+	kctx->mem_dentry = debugfs_create_file(name, S_IRUGO,
+			kctx->kbdev->memory_profile_directory,
+			kctx, &kbasep_mem_profile_debugfs_fops);
+	if (IS_ERR(kctx->mem_dentry))
+		goto error_out;
+
+	return MALI_ERROR_NONE;
+
+error_out:
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(kctx != NULL);
+
+	spin_lock(&kctx->mem_profile_lock);
+	kfree(kctx->mem_profile_data);
+	kctx->mem_profile_data = NULL;
+	spin_unlock(&kctx->mem_profile_lock);
+
+	if (IS_ERR(kctx->mem_dentry))
+		return;
+	debugfs_remove(kctx->mem_dentry);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+/**
+ * @brief Stub function for when debugfs is disabled
+ */
+mali_error kbasep_mem_profile_debugfs_add(struct kbase_context *ctx)
+{
+	return MALI_ERROR_NONE;
+}
+
+/**
+ * @brief Stub function for when debugfs is disabled
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *ctx)
+{
+}
+
+/**
+ * @brief Stub function for when debugfs is disabled
+ */
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+		size_t size)
+{
+	kfree(data);
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100755
index 000000000000..ef908c842475
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Add new entry to Mali memory profile debugfs
+ */
+mali_error kbasep_mem_profile_debugfs_add(struct kbase_context *kctx);
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert data to debugfs file, so it can be read by userspce
+ *
+ * Function takes ownership of @c data and frees it later when new data
+ * are inserted.
+ *
+ * @param kctx Context to which file data should be inserted
+ * @param data NULL-terminated string to be inserted to mem_profile file,
+		without trailing new line character
+ * @param size @c buf length
+ */
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+		size_t size);
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100755
index 000000000000..176392115997
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ( ( size_t ) ( 64 + ( ( 80 + ( 56 * 64 ) ) * 15 ) + 56 ) )
+
+#endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100755
index 000000000000..8e6c2c93beb5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,1661 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG    1 */
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ *        level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ *        a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str);
+
+
+/* Helper Function to perform assignment of page table entries, to ensure the use of
+ * strd, which is required on LPAE systems.
+ */
+
+static inline void page_table_entry_set(struct kbase_device *kbdev, u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+	*pte = phy;
+#elif defined(CONFIG_ARM)
+	/*
+	 *
+	 * In order to prevent the compiler keeping cached copies of memory, we have to explicitly
+	 * say that we have updated memory.
+	 *
+	 * Note: We could manually move the data ourselves into R0 and R1 by specifying
+	 * register variables that are explicitly given registers assignments, the down side of
+	 * this is that we have to assume cpu endianess.  To avoid this we can use the ldrd to read the
+	 * data from memory into R0 and R1 which will respect the cpu endianess, we then use strd to
+	 * make the 64 bit assignment to the page table entry.
+	 *
+	 */
+
+	asm	volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+				"strd r0, r1, [%[pte]]\n\t"
+				: "=m" (*pte)
+				: [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+				: "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+	size_t remainder = minimum % multiple;
+	if (remainder == 0)
+		return minimum;
+	else
+		return minimum + multiple - remainder;
+}
+
+static void page_fault_worker(struct work_struct *data)
+{
+	u64 fault_pfn;
+	u32 fault_access;
+	size_t new_pages;
+	size_t fault_rel_pfn;
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_va_region *region;
+	mali_error err;
+
+	faulting_as = container_of(data, struct kbase_as, work_pagefault);
+	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 *
+	 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+
+	if (kctx == NULL) {
+		/* Only handle this if not already suspended */
+		if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+			struct kbase_mmu_setup *current_setup = &faulting_as->current_setup;
+
+			/* Address space has no context, terminate the work */
+
+			/* AS transaction begin */
+			mutex_lock(&faulting_as->transaction_mutex);
+
+			/* Switch to unmapped mode */
+			current_setup->transtab &= ~(u64)MMU_TRANSTAB_ADRMODE_MASK;
+			current_setup->transtab |= AS_TRANSTAB_ADRMODE_UNMAPPED;
+
+			/* Apply new address space settings */
+			kbase_mmu_hw_configure(kbdev, faulting_as, kctx);
+
+			mutex_unlock(&faulting_as->transaction_mutex);
+			/* AS transaction end */
+
+			kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE);
+			kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					KBASE_MMU_FAULT_TYPE_PAGE);
+			kbase_pm_context_idle(kbdev);
+		}
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+	kbase_gpu_vm_lock(kctx);
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx, faulting_as->fault_addr);
+	if (NULL == region || region->flags & KBASE_REG_FREE) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not mapped on the GPU");
+		goto fault_done;
+	}
+
+	fault_access = faulting_as->fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK;
+	if (((fault_access == AS_FAULTSTATUS_ACCESS_TYPE_READ) &&
+			!(region->flags & KBASE_REG_GPU_RD)) ||
+			((fault_access == AS_FAULTSTATUS_ACCESS_TYPE_WRITE) &&
+			!(region->flags & KBASE_REG_GPU_WR)) ||
+			((fault_access == AS_FAULTSTATUS_ACCESS_TYPE_EX) &&
+			(region->flags & KBASE_REG_GPU_NX))) {
+		dev_warn(kbdev->dev, "Access permissions don't match: region->flags=0x%lx", region->flags);
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Access permissions mismatch");
+		goto fault_done;
+	}
+
+	if (!(region->flags & GROWABLE_FLAGS_REQUIRED)) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Memory is not growable");
+		goto fault_done;
+	}
+
+	/* find the size we need to grow it by */
+	/* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+	 * validating the fault_adress to be within a size_t from the start_pfn */
+	fault_rel_pfn = fault_pfn - region->start_pfn;
+
+	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+				faulting_as->fault_addr, region->start_pfn,
+				region->start_pfn +
+				kbase_reg_current_backed_size(region));
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* [1] in case another page fault occurred while we were
+		 * handling the (duplicate) page fault we need to ensure we
+		 * don't loose the other page fault as result of us clearing
+		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+		 * an UNLOCK command that will retry any stalled memory
+		 * transaction (which should cause the other page fault to be
+		 * raised again).
+		 */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+
+		goto fault_done;
+	}
+
+	new_pages = make_multiple(fault_rel_pfn -
+			kbase_reg_current_backed_size(region) + 1,
+			region->extent);
+
+	/* cap to max vsize */
+	if (new_pages + kbase_reg_current_backed_size(region) >
+			region->nr_pages)
+		new_pages = region->nr_pages -
+				kbase_reg_current_backed_size(region);
+
+	if (0 == new_pages) {
+		/* Duplicate of a fault we've already handled, nothing to do */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		/* See comment [1] about UNLOCK usage */
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, 0,
+				AS_COMMAND_UNLOCK, 1);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+				KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+		goto fault_done;
+	}
+
+	if (MALI_ERROR_NONE == kbase_alloc_phy_pages_helper(region->alloc, new_pages)) {
+		u32 op;
+
+		/* alloc success */
+		KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+		/* AS transaction begin */
+		mutex_lock(&faulting_as->transaction_mutex);
+
+		/* set up the new pages */
+		err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
+		if (MALI_ERROR_NONE != err) {
+			/* failed to insert pages, handle as a normal PF */
+			mutex_unlock(&faulting_as->transaction_mutex);
+			kbase_free_phy_pages_helper(region->alloc, new_pages);
+			kbase_gpu_vm_unlock(kctx);
+			/* The locked VA region will be unlocked and the cache invalidated in here */
+			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+					"Page table update failure");
+			goto fault_done;
+		}
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+
+		/* flush L2 and unlock the VA (resumes the MMU) */
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+			op = AS_COMMAND_FLUSH;
+		else
+			op = AS_COMMAND_FLUSH_PT;
+
+		/* clear MMU interrupt - this needs to be done after updating
+		 * the page tables but before issuing a FLUSH command. The
+		 * FLUSH cmd has a side effect that it restarts stalled memory
+		 * transactions in other address spaces which may cause
+		 * another fault to occur. If we didn't clear the interrupt at
+		 * this stage a new IRQ might not be raised when the GPU finds
+		 * a MMU IRQ is already pending.
+		 */
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+
+		kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+					  faulting_as->fault_addr >> PAGE_SHIFT,
+					  new_pages,
+					  op, 1);
+
+		mutex_unlock(&faulting_as->transaction_mutex);
+		/* AS transaction end */
+
+		/* reenable this in the mask */
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_PAGE);
+		kbase_gpu_vm_unlock(kctx);
+	} else {
+		/* failed to extend, handle as a normal PF */
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Page allocation failure");
+	}
+
+fault_done:
+	/*
+	 * By this point, the fault was handled in some way,
+	 * so release the ctx refcount
+	 */
+	kbasep_js_runpool_release_ctx(kbdev, kctx);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+	phys_addr_t pgd;
+	u64 *page;
+	int i;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	kbase_atomic_add_pages(1, &kctx->used_pages);
+	kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd))
+		goto sub_pages;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page)
+		goto alloc_free;
+
+	kbase_process_page_usage_inc(kctx, 1);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+		page_table_entry_set(kctx->kbdev, &page[i], ENTRY_IS_INVAL);
+
+	/* Clean the full page */
+	dma_sync_single_for_device(kctx->kbdev->dev,
+				   kbase_dma_addr(p),
+				   PAGE_SIZE,
+				   DMA_TO_DEVICE);
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	return pgd;
+
+alloc_free:
+	kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, MALI_FALSE);
+sub_pages:
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd)
+
+static phys_addr_t mmu_pte_to_phy_addr(u64 entry)
+{
+	if (!(entry & 1))
+		return 0;
+
+	return entry & ~0xFFF;
+}
+
+static u64 mmu_phyaddr_to_pte(phys_addr_t phy)
+{
+	return (phy & ~0xFFF) | ENTRY_IS_PTE;
+}
+
+static u64 mmu_phyaddr_to_ate(phys_addr_t phy, u64 flags)
+{
+	return (phy & ~0xFFF) | (flags & ENTRY_FLAGS_MASK) | ENTRY_IS_ATE;
+}
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+	struct page *p;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	p = pfn_to_page(PFN_DOWN(pgd));
+	page = kmap(p);
+	if (NULL == page) {
+		dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+		return 0;
+	}
+
+	target_pgd = mmu_pte_to_phy_addr(page[vpfn]);
+
+	if (!target_pgd) {
+		target_pgd = kbase_mmu_alloc_pgd(kctx);
+		if (!target_pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+			kunmap(p);
+			return 0;
+		}
+
+		page_table_entry_set(kctx->kbdev, &page[vpfn],
+				mmu_phyaddr_to_pte(target_pgd));
+
+		dma_sync_single_for_device(kctx->kbdev->dev,
+					   kbase_dma_addr(p),
+					   PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		/* Rely on the caller to update the address space flags. */
+	}
+
+	kunmap(p);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+		pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Handle failure condition */
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+			return 0;
+		}
+	}
+
+	return pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+	u64 *page;
+	phys_addr_t target_pgd;
+
+	KBASE_DEBUG_ASSERT(pgd);
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/*
+	 * Architecture spec defines level-0 as being the top-most.
+	 * This is a bit unfortunate here, but we keep the same convention.
+	 */
+	vpfn >>= (3 - level) * 9;
+	vpfn &= 0x1FF;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	target_pgd = mmu_pte_to_phy_addr(page[vpfn]);
+	/* As we are recovering from what has already been set up, we should have a target_pgd */
+	KBASE_DEBUG_ASSERT(0 != target_pgd);
+
+	kunmap_atomic(page);
+	return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+	phys_addr_t pgd;
+	int l;
+
+	pgd = kctx->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+		pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+		/* Should never fail */
+		KBASE_DEBUG_ASSERT(0 != pgd);
+	}
+
+	return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+					      size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+		KBASE_DEBUG_ASSERT(0 != pgd);
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+
+		pgd_page = kmap_atomic(p);
+		KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+		/* Invalidate the entries we added */
+		for (i = 0; i < count; i++)
+			page_table_entry_set(kctx->kbdev, &pgd_page[index + i],
+					     ENTRY_IS_INVAL);
+
+		vpfn += count;
+		nr -= count;
+
+		dma_sync_single_for_device(kctx->kbdev->dev,
+					   kbase_dma_addr(p),
+					   PAGE_SIZE, DMA_TO_DEVICE);
+		kunmap_atomic(pgd_page);
+	}
+}
+
+/**
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 kbase_mmu_get_mmu_flags(unsigned long flags)
+{
+	u64 mmu_flags;
+
+	/* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+	mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+	/* write perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+	/* read perm if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+	/* nx if requested */
+	mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+	if (flags & KBASE_REG_SHARE_BOTH) {
+		/* inner and outer shareable */
+		mmu_flags |= SHARE_BOTH_BITS;
+	} else if (flags & KBASE_REG_SHARE_IN) {
+		/* inner shareable coherency */
+		mmu_flags |= SHARE_INNER_BITS;
+	}
+
+	return mmu_flags;
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+mali_error kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+					phys_addr_t phys, size_t nr,
+					unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 pte_entry;
+	/* In case the insert_single_page only partially completes we need to be
+	 * able to recover */
+	mali_bool recover_required = MALI_FALSE;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* the one entry we'll populate everywhere */
+	pte_entry = mmu_phyaddr_to_ate(phys, kbase_mmu_get_mmu_flags(flags));
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev,
+					       "kbase_mmu_insert_pages: "
+					       "mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev,
+					       "kbase_mmu_insert_pages: "
+					       "kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return MALI_ERROR_OUT_OF_MEMORY;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			page_table_entry_set(kctx->kbdev, &pgd_page[ofs],
+					     pte_entry);
+		}
+
+		vpfn += count;
+		nr -= count;
+
+		dma_sync_single_for_device(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64),
+					   DMA_TO_DEVICE);
+
+
+		kunmap(p);
+		/* We have started modifying the page table.
+		 * If further pages need inserting and fail we need to undo what
+		 * has already taken place */
+		recover_required = MALI_TRUE;
+		recover_count += count;
+	}
+	return MALI_ERROR_NONE;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+mali_error kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+				  phys_addr_t *phys, size_t nr,
+				  unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	u64 mmu_flags = 0;
+	/* In case the insert_pages only partially completes we need to be able
+	 * to recover */
+	mali_bool recover_required = MALI_FALSE;
+	u64 recover_vpfn = vpfn;
+	size_t recover_count = 0;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	/* 64-bit address range is the max */
+	KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_flags = kbase_mmu_get_mmu_flags(flags);
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		/*
+		 * Repeatedly calling mmu_get_bottom_pte() is clearly
+		 * suboptimal. We don't have to re-parse the whole tree
+		 * each time (just cache the l0-l2 sequence).
+		 * On the other hand, it's only a gain when we map more than
+		 * 256 pages at once (on average). Do we really care?
+		 */
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev,
+					       "kbase_mmu_insert_pages: "
+					       "mmu_get_bottom_pgd failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev,
+					       "kbase_mmu_insert_pages: "
+					       "kmap failure\n");
+			if (recover_required) {
+				/* Invalidate the pages we have partially
+				 * completed */
+				mmu_insert_pages_failure_recovery(kctx,
+								  recover_vpfn,
+								  recover_count);
+			}
+			return MALI_ERROR_OUT_OF_MEMORY;
+		}
+
+		for (i = 0; i < count; i++) {
+			unsigned int ofs = index + i;
+			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+			page_table_entry_set(kctx->kbdev, &pgd_page[ofs],
+					     mmu_phyaddr_to_ate(phys[i],
+								mmu_flags)
+					     );
+		}
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		dma_sync_single_for_device(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64),
+					   DMA_TO_DEVICE);
+
+		kunmap(p);
+		/* We have started modifying the page table. If further pages
+		 * need inserting and fail we need to undo what has already
+		 * taken place */
+		recover_required = MALI_TRUE;
+		recover_count += count;
+	}
+	return MALI_ERROR_NONE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages)
+
+/**
+ * This function is responsible for validating the MMU PTs
+ * triggering reguired flushes.
+ *
+ * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	struct kbase_device *kbdev;
+	mali_bool ctx_is_in_runpool;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	kbdev = kctx->kbdev;
+
+	/* We must flush if we're currently running jobs. At the very least, we need to retain the
+	 * context to ensure it doesn't schedule out whilst we're trying to flush it */
+	ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+	if (ctx_is_in_runpool) {
+		KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+		/* Second level check is to try to only do this when jobs are running. The refcount is
+		 * a heuristic for this. */
+		if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
+			int ret;
+			u32 op;
+
+			/* AS transaction begin */
+			mutex_lock(&kbdev->as[kctx->as_nr].transaction_mutex);
+
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+				op = AS_COMMAND_FLUSH;
+			else
+				op = AS_COMMAND_FLUSH_MEM;
+
+			ret = kbase_mmu_hw_do_operation(kbdev,
+							&kbdev->as[kctx->as_nr],
+							kctx, vpfn, nr,
+							op, 0);
+#if KBASE_GPU_RESET_EN
+			if (ret) {
+				/* Flush failed to complete, assume the GPU has hung and perform a reset to recover */
+				dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+				if (kbase_prepare_to_reset_gpu(kbdev))
+					kbase_reset_gpu(kbdev);
+			}
+#endif /* KBASE_GPU_RESET_EN */
+
+			mutex_unlock(&kbdev->as[kctx->as_nr].transaction_mutex);
+			/* AS transaction end */
+		}
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+	}
+}
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused.  Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+mali_error kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	phys_addr_t pgd;
+	u64 *pgd_page;
+	struct kbase_device *kbdev;
+	size_t requested_nr = nr;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (0 == nr) {
+		/* early out if nothing to do */
+		return MALI_ERROR_NONE;
+	}
+
+	kbdev = kctx->kbdev;
+
+	while (nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+			return MALI_ERROR_OUT_OF_MEMORY;
+		}
+
+		for (i = 0; i < count; i++) {
+			page_table_entry_set(kctx->kbdev, &pgd_page[index + i], ENTRY_IS_INVAL);
+		}
+
+		vpfn += count;
+		nr -= count;
+
+		dma_sync_single_for_device(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64),
+					   DMA_TO_DEVICE);
+
+		kunmap(p);
+	}
+
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+	return MALI_ERROR_NONE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages)
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+mali_error kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t* phys, size_t nr, unsigned long flags)
+{
+	phys_addr_t pgd;
+	u64* pgd_page;
+	u64 mmu_flags = 0;
+	size_t requested_nr = nr;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(0 != vpfn);
+	KBASE_DEBUG_ASSERT(vpfn <= (UINT64_MAX / PAGE_SIZE));
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_flags = kbase_mmu_get_mmu_flags(flags);
+
+	dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags "\
+			"on GPU PFN 0x%llx from phys %p, %zu pages", 
+			vpfn, phys, nr);
+
+
+	while(nr) {
+		unsigned int i;
+		unsigned int index = vpfn & 0x1FF;
+		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+		struct page *p;
+
+		if (count > nr)
+			count = nr;
+
+		pgd = mmu_get_bottom_pgd(kctx, vpfn);
+		if (!pgd) {
+			dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		p = pfn_to_page(PFN_DOWN(pgd));
+		pgd_page = kmap(p);
+		if (!pgd_page) {
+			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			return MALI_ERROR_OUT_OF_MEMORY;
+		}
+
+		for (i = 0; i < count; i++) {
+			page_table_entry_set(kctx->kbdev, &pgd_page[index + i],  mmu_phyaddr_to_ate(phys[i], mmu_flags));
+		}
+
+		phys += count;
+		vpfn += count;
+		nr -= count;
+
+		dma_sync_single_for_device(kctx->kbdev->dev,
+					   kbase_dma_addr(p) +
+					   (index * sizeof(u64)),
+					   count * sizeof(u64),
+					   DMA_TO_DEVICE);
+
+		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	}
+
+	kbase_mmu_flush(kctx, vpfn, requested_nr);
+
+	return MALI_ERROR_NONE;
+}
+
+static int mmu_pte_is_valid(u64 pte)
+{
+	return ((pte & 3) == ENTRY_IS_ATE);
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+	u64 *page;
+	int i;
+
+	page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != page);
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if (mmu_pte_is_valid(page[i]))
+			beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+	}
+	kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	/* kmap_atomic should NEVER fail. */
+	KBASE_DEBUG_ASSERT(NULL != pgd_page);
+	/* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+	kunmap_atomic(pgd_page);
+	pgd_page = pgd_page_buffer;
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		target_pgd = mmu_pte_to_phy_addr(pgd_page[i]);
+
+		if (target_pgd) {
+			if (level < 2) {
+				mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+			} else {
+				/*
+				 * So target_pte is a level-3 page.
+				 * As a leaf, it is safe to free it.
+				 * Unless we have live pages attached to it!
+				 */
+				mmu_check_unused(kctx, target_pgd);
+			}
+
+			beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+			if (zap) {
+				kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, MALI_TRUE);
+				kbase_process_page_usage_dec(kctx, 1);
+				kbase_atomic_sub_pages(1, &kctx->used_pages);
+				kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+			}
+		}
+	}
+}
+
+mali_error kbase_mmu_init(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+	kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+	kctx->mem_attrs = (AS_MEMATTR_IMPL_DEF_CACHE_POLICY <<
+			   (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+			  (AS_MEMATTR_FORCE_TO_CACHE_ALL    <<
+			   (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+			  (AS_MEMATTR_WRITE_ALLOC           <<
+			   (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+			  0; /* The other indices are unused for now */
+
+	if (NULL == kctx->mmu_teardown_pages)
+		return MALI_ERROR_OUT_OF_MEMORY;
+
+	return MALI_ERROR_NONE;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	kfree(kctx->mmu_teardown_pages);
+	kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+
+	beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+	kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, MALI_TRUE);
+	kbase_process_page_usage_dec(kctx, 1);
+	kbase_atomic_sub_pages(1, &kctx->used_pages);
+	kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd)
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+	phys_addr_t target_pgd;
+	u64 *pgd_page;
+	int i;
+	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+	size_t dump_size;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	if (!pgd_page) {
+		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		return 0;
+	}
+
+	if (*size_left >= size) {
+		/* A modified physical address that contains the page table level */
+		u64 m_pgd = pgd | level;
+
+		/* Put the modified physical address in the output buffer */
+		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+		*buffer += sizeof(m_pgd);
+
+		/* Followed by the page table itself */
+		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+		*size_left -= size;
+	}
+
+	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+		if ((pgd_page[i] & ENTRY_IS_PTE) == ENTRY_IS_PTE) {
+			target_pgd = mmu_pte_to_phy_addr(pgd_page[i]);
+
+			dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
+			if (!dump_size) {
+				kunmap(pfn_to_page(PFN_DOWN(pgd)));
+				return 0;
+			}
+			size += dump_size;
+		}
+	}
+
+	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+	return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+	void *kaddr;
+	size_t size_left;
+
+	KBASE_DEBUG_ASSERT(kctx);
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	if (0 == nr_pages) {
+		/* can't find in a 0 sized buffer, early out */
+		return NULL;
+	}
+
+	size_left = nr_pages * PAGE_SIZE;
+
+	KBASE_DEBUG_ASSERT(0 != size_left);
+	kaddr = vmalloc_user(size_left);
+
+	if (kaddr) {
+		u64 end_marker = 0xFFULL;
+		char *buffer = (char *)kaddr;
+
+		size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left);
+		if (!size) {
+			vfree(kaddr);
+			return NULL;
+		}
+
+		/* Add on the size for the end marker */
+		size += sizeof(u64);
+
+		if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+			/* The buffer isn't big enough - free the memory and return failure */
+			vfree(kaddr);
+			return NULL;
+		}
+
+		/* Add the end marker */
+		memcpy(buffer, &end_marker, sizeof(u64));
+	}
+
+	return kaddr;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump)
+
+static void bus_fault_worker(struct work_struct *data)
+{
+	struct kbase_as *faulting_as;
+	int as_no;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+	mali_bool reset_status = MALI_FALSE;
+#endif /* KBASE_GPU_RESET_EN */
+
+	faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+	as_no = faulting_as->number;
+
+	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
+	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	 *
+	 * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+		struct kbase_mmu_setup *current_setup = &faulting_as->current_setup;
+
+		/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+		/* AS transaction begin */
+		mutex_lock(&kbdev->as[as_no].transaction_mutex);
+
+		/* Set the MMU into unmapped mode */
+		current_setup->transtab &= ~(u64)MMU_TRANSTAB_ADRMODE_MASK;
+		current_setup->transtab |= AS_TRANSTAB_ADRMODE_UNMAPPED;
+
+		/* Apply the new settings */
+		kbase_mmu_hw_configure(kbdev, faulting_as, kctx);
+
+		mutex_unlock(&kbdev->as[as_no].transaction_mutex);
+		/* AS transaction end */
+
+		kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS);
+		kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+					 KBASE_MMU_FAULT_TYPE_BUS);
+
+		kbase_pm_context_idle(kbdev);
+	}
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+	/* By this point, the fault was handled in some way, so release the ctx refcount */
+	if (kctx != NULL)
+		kbasep_js_runpool_release_ctx(kbdev, kctx);
+}
+
+const char *kbase_exception_name(u32 exception_code)
+{
+	const char *e;
+
+	switch (exception_code) {
+		/* Non-Fault Status code */
+	case 0x00:
+		e = "NOT_STARTED/IDLE/OK";
+		break;
+	case 0x01:
+		e = "DONE";
+		break;
+	case 0x02:
+		e = "INTERRUPTED";
+		break;
+	case 0x03:
+		e = "STOPPED";
+		break;
+	case 0x04:
+		e = "TERMINATED";
+		break;
+	case 0x08:
+		e = "ACTIVE";
+		break;
+		/* Job exceptions */
+	case 0x40:
+		e = "JOB_CONFIG_FAULT";
+		break;
+	case 0x41:
+		e = "JOB_POWER_FAULT";
+		break;
+	case 0x42:
+		e = "JOB_READ_FAULT";
+		break;
+	case 0x43:
+		e = "JOB_WRITE_FAULT";
+		break;
+	case 0x44:
+		e = "JOB_AFFINITY_FAULT";
+		break;
+	case 0x48:
+		e = "JOB_BUS_FAULT";
+		break;
+	case 0x50:
+		e = "INSTR_INVALID_PC";
+		break;
+	case 0x51:
+		e = "INSTR_INVALID_ENC";
+		break;
+	case 0x52:
+		e = "INSTR_TYPE_MISMATCH";
+		break;
+	case 0x53:
+		e = "INSTR_OPERAND_FAULT";
+		break;
+	case 0x54:
+		e = "INSTR_TLS_FAULT";
+		break;
+	case 0x55:
+		e = "INSTR_BARRIER_FAULT";
+		break;
+	case 0x56:
+		e = "INSTR_ALIGN_FAULT";
+		break;
+	case 0x58:
+		e = "DATA_INVALID_FAULT";
+		break;
+	case 0x59:
+		e = "TILE_RANGE_FAULT";
+		break;
+	case 0x5A:
+		e = "ADDR_RANGE_FAULT";
+		break;
+	case 0x60:
+		e = "OUT_OF_MEMORY";
+		break;
+		/* GPU exceptions */
+	case 0x80:
+		e = "DELAYED_BUS_FAULT";
+		break;
+	case 0x88:
+		e = "SHAREABILITY_FAULT";
+		break;
+		/* MMU exceptions */
+	case 0xC0:
+	case 0xC1:
+	case 0xC2:
+	case 0xC3:
+	case 0xC4:
+	case 0xC5:
+	case 0xC6:
+	case 0xC7:
+		e = "TRANSLATION_FAULT";
+		break;
+	case 0xC8:
+		e = "PERMISSION_FAULT";
+		break;
+	case 0xD0:
+	case 0xD1:
+	case 0xD2:
+	case 0xD3:
+	case 0xD4:
+	case 0xD5:
+	case 0xD6:
+	case 0xD7:
+		e = "TRANSTAB_BUS_FAULT";
+		break;
+	case 0xD8:
+		e = "ACCESS_FLAG";
+		break;
+	default:
+		e = "UNKNOWN";
+		break;
+	};
+
+	return e;
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+		struct kbase_as *as, const char *reason_str)
+{
+	unsigned long flags;
+	int exception_type;
+	int access_type;
+	int source_id;
+	int as_no;
+	struct kbase_device *kbdev;
+	struct kbase_mmu_setup *current_setup;
+	struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+	mali_bool reset_status = MALI_FALSE;
+#endif
+	static const char * const access_type_names[] = { "RESERVED", "EXECUTE", "READ", "WRITE" };
+
+	as_no = as->number;
+	kbdev = kctx->kbdev;
+	js_devdata = &kbdev->js_data;
+
+	/* ASSERT that the context won't leave the runpool */
+	KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+	/* decode the fault status */
+	exception_type = as->fault_status & 0xFF;
+	access_type = (as->fault_status >> 8) & 0x3;
+	source_id = (as->fault_status >> 16);
+
+	/* terminal fault, print info about the fault */
+	dev_err(kbdev->dev,
+		"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+		"Reason: %s\n"
+		"raw fault status 0x%X\n"
+		"decoded fault status: %s\n"
+		"exception type 0x%X: %s\n"
+		"access type 0x%X: %s\n"
+		"source id 0x%X\n",
+		as_no, as->fault_addr,
+		reason_str,
+		as->fault_status,
+		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		exception_type, kbase_exception_name(exception_type),
+		access_type, access_type_names[access_type],
+		source_id);
+
+	/* hardware counters dump fault handling */
+	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING)) {
+		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+		if ((as->fault_addr >= kbdev->hwcnt.addr) && (as->fault_addr < (kbdev->hwcnt.addr + (num_core_groups * 2048))))
+			kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT;
+	}
+
+	/* Stop the kctx from submitting more jobs and cause it to be scheduled
+	 * out/rescheduled - this will occur on releasing the context's refcount */
+	spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+	kbasep_js_clear_submit_allowed(js_devdata, kctx);
+	spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+	 * context can appear in the job slots from this point on */
+	kbase_job_kill_jobs_from_context(kctx);
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+		 * are evicted from the GPU before the switch.
+		 */
+		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+		reset_status = kbase_prepare_to_reset_gpu(kbdev);
+	}
+#endif /* KBASE_GPU_RESET_EN */
+	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+	current_setup = &as->current_setup;
+
+	current_setup->transtab &= ~(u64)MMU_TRANSTAB_ADRMODE_MASK;
+	current_setup->transtab |= AS_TRANSTAB_ADRMODE_UNMAPPED;
+
+	/* Apply the new address space setting */
+	kbase_mmu_hw_configure(kbdev, as, kctx);
+
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+
+	/* Clear down the fault */
+	kbase_mmu_hw_clear_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
+	kbase_mmu_hw_enable_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
+
+#if KBASE_GPU_RESET_EN
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+		kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+	struct kbase_as *as;
+	struct kbase_device *kbdev;
+	struct kbase_context *kctx;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(work);
+	as = container_of(work, struct kbase_as, poke_work);
+	kbdev = container_of(as, struct kbase_device, as[as->number]);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	/* GPU power will already be active by virtue of the caller holding a JS
+	 * reference on the address space, and will not release it until this worker
+	 * has finished */
+
+	/* Further to the comment above, we know that while this function is running
+	 * the AS will not be released as before the atom is released this workqueue
+	 * is flushed (in kbase_as_poking_timer_release_atom)
+	 */
+	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+	/* AS transaction begin */
+	mutex_lock(&as->transaction_mutex);
+	/* Force a uTLB invalidate */
+	kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+				  AS_COMMAND_UNLOCK, 0);
+	mutex_unlock(&as->transaction_mutex);
+	/* AS transaction end */
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	if (as->poke_refcount &&
+		!(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+		/* Only queue up the timer if we need it, and we're not trying to kill it */
+		hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+	struct kbase_as *as;
+	int queue_work_ret;
+
+	KBASE_DEBUG_ASSERT(NULL != timer);
+	as = container_of(timer, struct kbase_as, poke_timer);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+	KBASE_DEBUG_ASSERT(queue_work_ret);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (katom->poking)
+		return;
+
+	katom->poking = 1;
+
+	/* It's safe to work on the as/as_nr without an explicit reference,
+	 * because the caller holds the runpool_irq lock, and the atom itself
+	 * was also running and had already taken a reference  */
+	as = &kbdev->as[kctx->as_nr];
+
+	if (++(as->poke_refcount) == 1) {
+		/* First refcount for poke needed: check if not already in flight */
+		if (!as->poke_state) {
+			/* need to start poking */
+			as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+			queue_work(as->poke_wq, &as->poke_work);
+		}
+	}
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+	struct kbase_as *as;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(katom);
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	if (!katom->poking)
+		return;
+
+	as = &kbdev->as[kctx->as_nr];
+
+	spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+	KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+	KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+	if (--(as->poke_refcount) == 0) {
+		as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+		spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+		hrtimer_cancel(&as->poke_timer);
+		flush_workqueue(as->poke_wq);
+
+		spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+		/* Re-check whether it's still needed */
+		if (as->poke_refcount) {
+			int queue_work_ret;
+			/* Poking still needed:
+			 * - Another retain will not be starting the timer or queueing work,
+			 * because it's still marked as in-flight
+			 * - The hrtimer has finished, and has not started a new timer or
+			 * queued work because it's been marked as killing
+			 *
+			 * So whatever happens now, just queue the work again */
+			as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+			queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+			KBASE_DEBUG_ASSERT(queue_work_ret);
+		} else {
+			/* It isn't - so mark it as not in flight, and not killing */
+			as->poke_state = 0u;
+
+			/* The poke associated with the atom has now finished. If this is
+			 * also the last atom on the context, then we can guarentee no more
+			 * pokes (and thus no more poking register accesses) will occur on
+			 * the context until new atoms are run */
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+	katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+	unsigned long flags;
+
+	if (kctx == NULL) {
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+				 as->number, as->fault_addr);
+	}
+
+	if (kbase_as_has_bus_fault(as)) {
+		if (kctx) {
+			/*
+			 * hw counters dumping in progress, signal the
+			 * other thread that it failed
+			 */
+			if ((kbdev->hwcnt.kctx == kctx) &&
+			    (kbdev->hwcnt.state == KBASE_INSTR_STATE_DUMPING))
+				kbdev->hwcnt.state = KBASE_INSTR_STATE_FAULT;
+
+			/*
+			 * Stop the kctx from submitting more jobs and cause it
+			 * to be scheduled out/rescheduled when all references
+			 * to it are released
+			 */
+			spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+			kbasep_js_clear_submit_allowed(js_devdata, kctx);
+			spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+					       flags);
+
+			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+					 as->number, as->fault_addr);
+		}
+
+		/*
+		 * We need to switch to UNMAPPED mode - but we do this in a
+		 * worker so that we can sleep
+		 */
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+		INIT_WORK(&as->work_busfault, bus_fault_worker);
+		queue_work(as->pf_wq, &as->work_busfault);
+	} else {
+		KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+		INIT_WORK(&as->work_pagefault, page_fault_worker);
+		queue_work(as->pf_wq, &as->work_pagefault);
+	}
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100755
index 000000000000..876eae8abd7f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw  MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+	KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+	KBASE_MMU_FAULT_TYPE_PAGE,
+	KBASE_MMU_FAULT_TYPE_BUS
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in]  kbdev          kbase device to configure.
+ * @param[in]  as             address space to configure.
+ * @param[in]  kctx           kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+		struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in]  kbdev         kbase device to issue the MMU operation on.
+ * @param[in]  as            address space to issue the MMU operation on.
+ * @param[in]  kctx          kbase context to issue the MMU operation on.
+ * @param[in]  vpfn          MMU Virtual Page Frame Number to start the
+ *                           operation on.
+ * @param[in]  nr            Number of pages to work on.
+ * @param[in]  type          Operation type (written to ASn_COMMAND).
+ * @param[in]  handling_irq  Is this operation being called during the handling
+ *                           of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+		unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in]  kbdev         kbase device to  clear the fault from.
+ * @param[in]  as            address space to  clear the fault from.
+ * @param[in]  kctx          kbase context to clear the fault from or NULL.
+ * @param[in]  type          The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in]  kbdev         kbase device to again enable the fault from.
+ * @param[in]  as            address space to again enable the fault from.
+ * @param[in]  kctx          kbase context to again enable the fault from.
+ * @param[in]  type          The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif	/* _MALI_KBASE_MMU_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw_direct.c
new file mode 100755
index 000000000000..7ce94e922584
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,294 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_hw_direct.h>
+
+#if KBASE_MMU_HW_BACKEND
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+		u32 num_pages)
+{
+	u64 region;
+
+	/* can't lock a zero sized range */
+	KBASE_DEBUG_ASSERT(num_pages);
+
+	region = pfn << PAGE_SHIFT;
+	/*
+	 * fls returns (given the ASSERT above):
+	 * 1 .. 32
+	 *
+	 * 10 + fls(num_pages)
+	 * results in the range (11 .. 42)
+	 */
+
+	/* gracefully handle num_pages being zero */
+	if (0 == num_pages) {
+		region |= 11;
+	} else {
+		u8 region_width;
+
+		region_width = 10 + fls(num_pages);
+		if (num_pages != (1ul << (region_width - 11))) {
+			/* not pow2, so must go up to the next pow2 */
+			region_width += 1;
+		}
+		KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+		KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+		region |= region_width;
+	}
+
+	return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+		unsigned int as_nr, struct kbase_context *kctx)
+{
+	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+
+	/* Wait for the MMU status to indicate there is no active command. */
+	while (--max_loops && kbase_reg_read(kbdev,
+			MMU_AS_REG(as_nr, AS_STATUS),
+			kctx) & AS_STATUS_AS_ACTIVE) {
+		;
+	}
+
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+		struct kbase_context *kctx)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(kbdev, as_nr, kctx);
+	if (status == 0)
+		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd, kctx);
+
+	return status;
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+	const int num_as = 16;
+	const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+	const int pf_shift = 0;
+	const unsigned long as_bit_mask = (1UL << num_as) - 1;
+	unsigned long flags;
+	u32 new_mask;
+	u32 tmp;
+
+	/* bus faults */
+	u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+	/* page faults (note: Ignore ASes with both pf and bf) */
+	u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+	/* remember current mask */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	/* mask interrupts for now */
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+	while (bf_bits | pf_bits) {
+		struct kbase_as *as;
+		int as_no;
+		struct kbase_context *kctx;
+
+		/*
+		 * the while logic ensures we have a bit set, no need to check
+		 * for not-found here
+		 */
+		as_no = ffs(bf_bits | pf_bits) - 1;
+		as = &kbdev->as[as_no];
+
+		/*
+		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+		 * Bus/Page faults _should_ only occur whilst jobs are running,
+		 * and a job causing the Bus/Page fault shouldn't complete until
+		 * the MMU is updated
+		 */
+		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+		/* find faulting address */
+		as->fault_addr = kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no, AS_FAULTADDRESS_HI),
+						kctx);
+		as->fault_addr <<= 32;
+		as->fault_addr |= kbase_reg_read(kbdev,
+						MMU_AS_REG(as_no, AS_FAULTADDRESS_LO),
+						kctx);
+
+		/* record the fault status */
+		as->fault_status = kbase_reg_read(kbdev,
+						  MMU_AS_REG(as_no, AS_FAULTSTATUS),
+						  kctx);
+
+		/* find the fault type */
+		as->fault_type = (bf_bits & (1 << as_no)) ?
+				KBASE_MMU_FAULT_TYPE_BUS :
+				KBASE_MMU_FAULT_TYPE_PAGE;
+
+		if (kbase_as_has_bus_fault(as)) {
+			/* Mark bus fault as handled.
+			 * Note that a bus fault is processed first in case
+			 * where both a bus fault and page fault occur.
+			 */
+			bf_bits &= ~(1UL << as_no);
+
+			/* remove the queued BF (and PF) from the mask */
+			new_mask &= ~(MMU_BUS_ERROR(as_no) |
+					MMU_PAGE_FAULT(as_no));
+		} else {
+			/* Mark page fault as handled */
+			pf_bits &= ~(1UL << as_no);
+
+			/* remove the queued PF from the mask */
+			new_mask &= ~MMU_PAGE_FAULT(as_no);
+		}
+
+		/* Process the interrupt for this address space */
+		kbase_mmu_interrupt_process(kbdev, kctx, as);
+	}
+
+	/* reenable interrupts */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+	new_mask |= tmp;
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx)
+{
+	struct kbase_mmu_setup *current_setup = &as->current_setup;
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+			current_setup->transtab & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+			(current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+			current_setup->memattr & 0xFFFFFFFFUL, kctx);
+	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+			(current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+		unsigned int handling_irq)
+{
+	int ret;
+
+	if (op == AS_COMMAND_UNLOCK) {
+		/* Unlock doesn't require a lock first */
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+	} else {
+		u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+		/* Lock the region that needs to be updated */
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+				lock_addr & 0xFFFFFFFFUL, kctx);
+		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+				(lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+		write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3285) &&
+				handling_irq) {
+			kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR),
+					(1UL << as->number), NULL);
+			write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+		}
+
+		/* Run the MMU operation */
+		write_cmd(kbdev, as->number, op, kctx);
+
+		/* Wait for the flush to complete */
+		ret = wait_ready(kbdev, as->number, kctx);
+
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+			/* Issue an UNLOCK command to ensure that valid page
+			   tables are re-read by the GPU after an update.
+			   Note that, the FLUSH command should perform all the
+			   actions necessary, however the bus logs show that if
+			   multiple page faults occur within an 8 page region
+			   the MMU does not always re-read the updated page
+			   table entries for later faults or is only partially
+			   read, it subsequently raises the page fault IRQ for
+			   the same addresses, the unlock ensures that the MMU
+			   cache is flushed, so updates can be re-read.  As the
+			   region is now unlocked we need to issue 2 UNLOCK
+			   commands in order to flush the MMU/uTLB,
+			   see PRLAM-8812.
+			 */
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+			write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+		}
+	}
+
+	return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	u32 pf_bf_mask;
+
+	/* Clear the page (and bus fault IRQ as well in case one occurred) */
+	pf_bf_mask = MMU_PAGE_FAULT(as->number);
+	if (type == KBASE_MMU_FAULT_TYPE_BUS)
+		pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+		struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+	unsigned long flags;
+	u32 irq_mask;
+
+	/* Enable the page fault IRQ (and bus fault IRQ as well in case one
+	 * occurred) */
+	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+			MMU_PAGE_FAULT(as->number);
+
+	if (type == KBASE_MMU_FAULT_TYPE_BUS)
+		irq_mask |= MMU_BUS_ERROR(as->number);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw_direct.h
new file mode 100755
index 000000000000..9737dbb9aa2e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for the direct implementation for MMU hardware access
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_direct_page Direct MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_direct_intro_sec Introduction
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ * @ref mali_kbase_mmu_hw_page .
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * @addtogroup mali_kbase_mmu_hw
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw_direct Direct register access to MMU
+ * @{
+ */
+
+/** @brief Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the @ref kbase_device.
+ *
+ * @param[in]  kbdev          kbase context to clear the fault from.
+ * @param[in]  irq_stat       Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** @} *//* end group mali_kbase_mmu_hw_direct */
+/** @} *//* end group mali_kbase_mmu_hw */
+
+#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100755
index 000000000000..c36b93589497
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,142 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT  3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in]  io_resource      Input IO resource data
+ * @param[out] linux_resources  Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+	if (!io_resources || !linux_resources) {
+		pr_err("%s: couldn't find proper resources\n", __func__);
+		return;
+	}
+
+	memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+	linux_resources[0].start = io_resources->io_memory_region.start;
+	linux_resources[0].end   = io_resources->io_memory_region.end;
+	linux_resources[0].flags = IORESOURCE_MEM;
+	linux_resources[1].start = io_resources->job_irq_number;
+	linux_resources[1].end   = io_resources->job_irq_number;
+	linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[2].start = io_resources->mmu_irq_number;
+	linux_resources[2].end   = io_resources->mmu_irq_number;
+	linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+	linux_resources[3].start = io_resources->gpu_irq_number;
+	linux_resources[3].end   = io_resources->gpu_irq_number;
+	linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+	struct kbase_platform_config *config;
+	int attribute_count;
+#ifndef CONFIG_OF
+	struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+	int err;
+
+	config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+	if (config == NULL) {
+		pr_err("%s: couldn't get platform config\n", __func__);
+		return -ENODEV;
+	}
+
+	attribute_count = kbasep_get_config_attribute_count(config->attributes);
+#ifdef CONFIG_MACH_MANTA
+	err = platform_device_add_data(&exynos5_device_g3d, config->attributes, attribute_count * sizeof(config->attributes[0]));
+	if (err)
+		return err;
+#else
+
+	mali_device = platform_device_alloc("mali", 0);
+	if (mali_device == NULL)
+		return -ENOMEM;
+
+#ifndef CONFIG_OF
+	kbasep_config_parse_io_resources(config->io_resources, resources);
+	err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+	if (err) {
+		platform_device_put(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_OF */
+
+	err = platform_device_add_data(mali_device, config->attributes, attribute_count * sizeof(config->attributes[0]));
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+
+	err = platform_device_add(mali_device);
+	if (err) {
+		platform_device_unregister(mali_device);
+		mali_device = NULL;
+		return err;
+	}
+#endif /* CONFIG_CONFIG_MACH_MANTA */
+
+	return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+	if (mali_device)
+		platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100755
index 000000000000..08d96936ca3d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,454 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+#include <mali_kbase_pm.h>
+
+#if KBASE_PM_EN
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS);
+
+	if (callbacks)
+		callbacks->power_on_callback(kbdev);
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+	struct kbase_pm_callback_conf *callbacks;
+
+	callbacks = (struct kbase_pm_callback_conf *)kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS);
+
+	if (callbacks)
+		callbacks->power_off_callback(kbdev);
+}
+
+mali_error kbase_pm_init(struct kbase_device *kbdev)
+{
+	mali_error ret = MALI_ERROR_NONE;
+	struct kbase_pm_callback_conf *callbacks;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_init(&kbdev->pm.lock);
+
+	kbdev->pm.gpu_powered = MALI_FALSE;
+	kbdev->pm.suspending = MALI_FALSE;
+#ifdef CONFIG_MALI_DEBUG
+	kbdev->pm.driver_ready_for_irqs = MALI_FALSE;
+#endif /* CONFIG_MALI_DEBUG */
+	kbdev->pm.gpu_in_desired_state = MALI_TRUE;
+	init_waitqueue_head(&kbdev->pm.gpu_in_desired_state_wait);
+
+	callbacks = (struct kbase_pm_callback_conf *)kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS);
+	if (callbacks) {
+		kbdev->pm.callback_power_on = callbacks->power_on_callback;
+		kbdev->pm.callback_power_off = callbacks->power_off_callback;
+		kbdev->pm.callback_power_suspend =
+					callbacks->power_suspend_callback;
+		kbdev->pm.callback_power_resume =
+					callbacks->power_resume_callback;
+		kbdev->pm.callback_power_runtime_init = callbacks->power_runtime_init_callback;
+		kbdev->pm.callback_power_runtime_term = callbacks->power_runtime_term_callback;
+		kbdev->pm.callback_power_runtime_on = callbacks->power_runtime_on_callback;
+		kbdev->pm.callback_power_runtime_off = callbacks->power_runtime_off_callback;
+	} else {
+		kbdev->pm.callback_power_on = NULL;
+		kbdev->pm.callback_power_off = NULL;
+		kbdev->pm.callback_power_suspend = NULL;
+		kbdev->pm.callback_power_resume = NULL;
+		kbdev->pm.callback_power_runtime_init = NULL;
+		kbdev->pm.callback_power_runtime_term = NULL;
+		kbdev->pm.callback_power_runtime_on = NULL;
+		kbdev->pm.callback_power_runtime_off = NULL;
+	}
+
+	kbdev->pm.platform_dvfs_frequency = (u32) kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_POWER_MANAGEMENT_DVFS_FREQ);
+
+	/* Initialise the metrics subsystem */
+	ret = kbasep_pm_metrics_init(kbdev);
+	if (MALI_ERROR_NONE != ret)
+		return ret;
+
+	init_waitqueue_head(&kbdev->pm.l2_powered_wait);
+	kbdev->pm.l2_powered = 0;
+
+	init_waitqueue_head(&kbdev->pm.reset_done_wait);
+	kbdev->pm.reset_done = MALI_FALSE;
+
+	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	kbdev->pm.active_count = 0;
+
+	spin_lock_init(&kbdev->pm.power_change_lock);
+	spin_lock_init(&kbdev->pm.gpu_cycle_counter_requests_lock);
+	spin_lock_init(&kbdev->pm.gpu_powered_lock);
+
+	if (MALI_ERROR_NONE != kbase_pm_ca_init(kbdev))
+		goto workq_fail;
+
+	if (MALI_ERROR_NONE != kbase_pm_policy_init(kbdev))
+		goto pm_policy_fail;
+
+	return MALI_ERROR_NONE;
+
+pm_policy_fail:
+	kbase_pm_ca_term(kbdev);
+workq_fail:
+	kbasep_pm_metrics_term(kbdev);
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_init)
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, mali_bool is_resume)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Turn clocks and interrupts on - no-op if we haven't done a previous
+	 * kbase_pm_clock_off() */
+	kbase_pm_clock_on(kbdev, is_resume);
+
+	/* Update core status as required by the policy */
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+	kbase_pm_update_cores_state(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+	/* NOTE: We don't wait to reach the desired state, since running atoms
+	 * will wait for that state to be reached anyway */
+}
+
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, mali_bool is_suspend)
+{
+	unsigned long flags;
+	mali_bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Force all cores off */
+	kbdev->pm.desired_shader_state = 0;
+
+	/* Force all cores to be unavailable, in the situation where
+	 * transitions are in progress for some cores but not others,
+	 * and kbase_pm_check_transitions_nolock can not immediately
+	 * power off the cores */
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_available_bitmap = 0;
+
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* NOTE: We won't wait to reach the core's desired state, even if we're
+	 * powering off the GPU itself too. It's safe to cut the power whilst
+	 * they're transitioning to off, because the cores should be idle and all
+	 * cache flushes should already have occurred */
+
+	/* Consume any change-state events */
+	kbase_timeline_pm_check_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	/* Disable interrupts and turn the clock off */
+	kbase_pm_clock_off(kbdev, is_suspend);
+}
+
+mali_error kbase_pm_powerup(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	mali_error ret;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* A suspend won't happen during startup/insmod */
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+	/* Power up the GPU, don't enable IRQs as we are not ready to receive them. */
+	ret = kbase_pm_init_hw(kbdev, MALI_FALSE);
+	if (ret != MALI_ERROR_NONE) {
+		mutex_unlock(&kbdev->pm.lock);
+		return ret;
+	}
+
+	kbasep_pm_read_present_cores(kbdev);
+
+	kbdev->pm.debug_core_mask = kbdev->shader_present_bitmap;
+
+	/* Pretend the GPU is active to prevent a power policy turning the GPU cores off */
+	kbdev->pm.active_count = 1;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+	/* Ensure cycle counter is off */
+	kbdev->pm.gpu_cycle_counter_requests = 0;
+	spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+
+	/* We are ready to receive IRQ's now as power policy is set up, so enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+	kbdev->pm.driver_ready_for_irqs = MALI_TRUE;
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+#endif
+	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the GPU and any cores needed by the policy */
+	kbase_pm_do_poweron(kbdev, MALI_FALSE);
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	return MALI_ERROR_NONE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_powerup)
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+	(void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerup. Sometimes the event might be missed due to reading the count
+	 * outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	mutex_lock(&kbdev->pm.lock);
+	if (kbase_pm_is_suspending(kbdev)) {
+		switch (suspend_handler) {
+		case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+			if (kbdev->pm.active_count != 0)
+				break;
+			/* FALLTHROUGH */
+		case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+			mutex_unlock(&kbdev->pm.lock);
+			if (old_count == 0)
+				kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+			return 1;
+
+		case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+			/* FALLTHROUGH */
+		default:
+			KBASE_DEBUG_ASSERT_MSG(MALI_FALSE, "unreachable");
+			break;
+		}
+	}
+	c = ++kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+	if (c == 1) {
+		/* First context active: Power on the GPU and any cores requested by
+		 * the policy */
+		kbase_pm_update_active(kbdev);
+
+		kbasep_pm_record_gpu_active(kbdev);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active)
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+	int c;
+	int old_count;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* Trace timeline information about how long it took to handle the decision
+	 * to powerdown. Sometimes the event might be missed due to reading the
+	 * count outside of mutex, but this is necessary to get the trace timing
+	 * correct. */
+	old_count = kbdev->pm.active_count;
+	if (old_count == 0)
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	c = --kbdev->pm.active_count;
+	KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+
+	KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+	KBASE_DEBUG_ASSERT(c >= 0);
+
+	/* Trace the event being handled */
+	if (old_count == 0)
+		kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+	if (c == 0) {
+		/* Last context has gone idle */
+		kbase_pm_update_active(kbdev);
+
+		kbasep_pm_record_gpu_idle(kbdev);
+
+		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+		 * waiters must synchronize with us by locking the pm.lock after
+		 * waiting */
+		wake_up(&kbdev->pm.zero_active_count_wait);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle)
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, MALI_FALSE);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_halt)
+
+void kbase_pm_term(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests == 0);
+
+	/* Free any resources the policy allocated */
+	kbase_pm_policy_term(kbdev);
+	kbase_pm_ca_term(kbdev);
+
+	/* Shut down the metrics subsystem */
+	kbasep_pm_metrics_term(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_term)
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+	int nr_keep_gpu_powered_ctxs;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+	kbdev->pm.suspending = MALI_TRUE;
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* From now on, the active count will drop towards zero. Sometimes, it'll
+	 * go up briefly before going down again. However, once it reaches zero it
+	 * will stay there - guaranteeing that we've idled all pm references */
+
+	/* Suspend job scheduler and associated components, so that it releases all
+	 * the PM active count references */
+	kbasep_js_suspend(kbdev);
+
+	/* Suspend any counter collection that might be happening */
+	kbase_instr_hwcnt_suspend(kbdev);
+
+	/* Cancel the keep_gpu_powered calls */
+	for (nr_keep_gpu_powered_ctxs = atomic_read(&kbdev->keep_gpu_powered_count);
+		 nr_keep_gpu_powered_ctxs > 0;
+		 --nr_keep_gpu_powered_ctxs) {
+		kbase_pm_context_idle(kbdev);
+	}
+
+	/* Wait for the active count to reach zero. This is not the same as
+	 * waiting for a power down, since not all policies power down when this
+	 * reaches zero. */
+	wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+	/* NOTE: We synchronize with anything that was just finishing a
+	 * kbase_pm_context_idle() call by locking the pm.lock below */
+
+	/* Force power off the GPU and all cores (regardless of policy), only after
+	 * the PM active count reaches zero (otherwise, we risk turning it off
+	 * prematurely) */
+	mutex_lock(&kbdev->pm.lock);
+	kbase_pm_cancel_deferred_poweroff(kbdev);
+	kbase_pm_do_poweroff(kbdev, MALI_TRUE);
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+	int nr_keep_gpu_powered_ctxs;
+
+	/* MUST happen before any pm_context_active calls occur */
+	mutex_lock(&kbdev->pm.lock);
+	kbdev->pm.suspending = MALI_FALSE;
+	kbase_pm_do_poweron(kbdev, MALI_TRUE);
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Initial active call, to power on the GPU/cores if needed */
+	kbase_pm_context_active(kbdev);
+
+	/* Restore the keep_gpu_powered calls */
+	for (nr_keep_gpu_powered_ctxs = atomic_read(&kbdev->keep_gpu_powered_count);
+		 nr_keep_gpu_powered_ctxs > 0;
+		 --nr_keep_gpu_powered_ctxs) {
+		kbase_pm_context_active(kbdev);
+	}
+
+	/* Re-enable instrumentation, if it was previously disabled */
+	kbase_instr_hwcnt_resume(kbdev);
+
+	/* Resume any blocked atoms (which may cause contexts to be scheduled in
+	 * and dependent atoms to run) */
+	kbase_resume_suspended_soft_jobs(kbdev);
+
+	/* Resume the Job Scheduler and associated components, and start running
+	 * atoms */
+	kbasep_js_resume(kbdev);
+
+	/* Matching idle call, to power off the GPU/cores if we didn't actually
+	 * need it and the policy doesn't want it on */
+	kbase_pm_context_idle(kbdev);
+}
+#endif /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100755
index 000000000000..289ecc9f9782
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,888 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/** The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to:
+ * - @ref kbase_pm_get_present_cores
+ * - @ref kbase_pm_get_active_cores
+ * - @ref kbase_pm_get_trans_cores
+ * - @ref kbase_pm_get_ready_cores.
+ *
+ * They specify which type of core should be acted on.  These values are set in
+ * a manner that allows @ref core_type_to_reg function to be simpler and more
+ * efficient.
+ */
+enum kbase_pm_core_type {
+	KBASE_PM_CORE_L3 = L3_PRESENT_LO,	    /**< The L3 cache */
+	KBASE_PM_CORE_L2 = L2_PRESENT_LO,	    /**< The L2 cache */
+	KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO,   /**< Shader cores */
+	KBASE_PM_CORE_TILER = TILER_PRESENT_LO	    /**< Tiler cores */
+};
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return MALI_ERROR_NONE if the power management framework was successfully initialized.
+ */
+mali_error kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return MALI_ERROR_NONE if powerup was successful.
+ */
+mali_error kbase_pm_powerup(struct kbase_device *kbdev);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Metrics data collected for use by the power management framework.
+ *
+ */
+struct kbasep_pm_metrics_data {
+	int vsync_hit;
+	int utilisation;
+	int util_gl_share;
+	int util_cl_share[2]; /* 2 is a max number of core groups we can have */
+	ktime_t time_period_start;
+	u32 time_busy;
+	u32 time_idle;
+	u32 prev_busy;
+	u32 prev_idle;
+	mali_bool gpu_active;
+	u32 busy_cl[2];
+	u32 busy_gl;
+	u32 active_cl_ctx[2];
+	u32 active_gl_ctx;
+
+	spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	struct hrtimer timer;
+	mali_bool timer_active;
+#endif
+
+	void *platform_data;
+	struct kbase_device *kbdev;
+};
+
+/** Actions for DVFS.
+ *
+ * kbase_pm_get_dvfs_action will return one of these enumerated values to
+ * describe the action that the DVFS system should take.
+ */
+enum kbase_pm_dvfs_action {
+	KBASE_PM_DVFS_NOP,	    /**< No change in clock frequency is requested */
+	KBASE_PM_DVFS_CLOCK_UP,	    /**< The clock frequency should be increased if possible */
+	KBASE_PM_DVFS_CLOCK_DOWN    /**< The clock frequency should be decreased if possible */
+};
+
+union kbase_pm_policy_data {
+	struct kbasep_pm_policy_always_on always_on;
+	struct kbasep_pm_policy_coarse_demand coarse_demand;
+	struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+	struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+	struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+	struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/** Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one instance of this structure per device
+ * in the system.
+ */
+struct kbase_pm_device_data {
+	/** The lock protecting Power Management structures accessed
+	 * outside of IRQ.
+	 *
+	 * This lock must also be held whenever the GPU is being powered on or off.
+	 */
+	struct mutex lock;
+
+	/** The policy that is currently actively controlling core availability.
+	 *
+	 * @note: During an IRQ, this can be NULL when the policy is being changed
+	 * with kbase_pm_ca_set_policy(). The change is protected under
+	 * kbase_device::pm::power_change_lock. Direct access to this from IRQ
+	 * context must therefore check for NULL. If NULL, then
+	 * kbase_pm_ca_set_policy() will re-issue the policy functions that would've
+	 * been done under IRQ.
+	 */
+	const struct kbase_pm_ca_policy *ca_current_policy;
+
+	/** The policy that is currently actively controlling the power state.
+	 *
+	 * @note: During an IRQ, this can be NULL when the policy is being changed
+	 * with kbase_pm_set_policy(). The change is protected under
+	 * kbase_device::pm::power_change_lock. Direct access to this from IRQ
+	 * context must therefore check for NULL. If NULL, then
+	 * kbase_pm_set_policy() will re-issue the policy functions that would've
+	 * been done under IRQ.
+	 */
+	const struct kbase_pm_policy *pm_current_policy;
+
+	/** Private data for current CA policy */
+	union kbase_pm_ca_policy_data ca_policy_data;
+
+	/** Private data for current PM policy */
+	union kbase_pm_policy_data pm_policy_data;
+
+	/** Flag indicating when core availability policy is transitioning cores.
+	 * The core availability policy must set this when a change in core availability
+	 * is occuring.
+	 *
+	 * power_change_lock must be held when accessing this. */
+	mali_bool ca_in_transition;
+
+	/** Waiting for reset and a queue to wait for changes */
+	mali_bool reset_done;
+	wait_queue_head_t reset_done_wait;
+
+	/** Wait queue for whether the l2 cache has been powered as requested */
+	wait_queue_head_t l2_powered_wait;
+	/** State indicating whether all the l2 caches are powered.
+	 * Non-zero indicates they're *all* powered
+	 * Zero indicates that some (or all) are not powered */
+	int l2_powered;
+
+	/** The reference count of active contexts on this device. */
+	int active_count;
+	/** Flag indicating suspending/suspended */
+	mali_bool suspending;
+	/* Wait queue set when active_count == 0 */
+	wait_queue_head_t zero_active_count_wait;
+
+	/** The reference count of active gpu cycle counter users */
+	int gpu_cycle_counter_requests;
+	/** Lock to protect gpu_cycle_counter_requests */
+	spinlock_t gpu_cycle_counter_requests_lock;
+
+	/** A bit mask identifying the shader cores that the power policy would like to be on.
+	 * The current state of the cores may be different, but there should be transitions in progress that will
+	 * eventually achieve this state (assuming that the policy doesn't change its mind in the mean time.
+	 */
+	u64 desired_shader_state;
+	/** bit mask indicating which shader cores are currently in a power-on transition */
+	u64 powering_on_shader_state;
+	/** A bit mask identifying the tiler cores that the power policy would like to be on.
+	 * @see kbase_pm_device_data:desired_shader_state */
+	u64 desired_tiler_state;
+	/** bit mask indicating which tiler core are currently in a power-on transition */
+	u64 powering_on_tiler_state;
+
+	/** bit mask indicating which l2-caches are currently in a power-on transition */
+	u64 powering_on_l2_state;
+	/** bit mask indicating which l3-caches are currently in a power-on transition */
+	u64 powering_on_l3_state;
+
+	/** Lock protecting the power state of the device.
+	 *
+	 * This lock must be held when accessing the shader_available_bitmap, tiler_available_bitmap, l2_available_bitmap,
+	 * shader_inuse_bitmap and tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition and shader_poweroff_pending
+	 * fields of kbase_pm_device_data. It is also held when the hardware power registers are being written to, to ensure
+	 * that two threads do not conflict over the power transitions that the hardware should make.
+	 */
+	spinlock_t power_change_lock;
+
+	/** This flag is set iff the GPU is powered as requested by the
+	 * desired_xxx_state variables */
+	mali_bool gpu_in_desired_state;
+	/* Wait queue set when gpu_in_desired_state != 0 */
+	wait_queue_head_t gpu_in_desired_state_wait;
+
+	/** Set to true when the GPU is powered and register accesses are possible, false otherwise */
+	mali_bool gpu_powered;
+
+	/** A bit mask identifying the available shader cores that are specified via sysfs */
+	u64 debug_core_mask;
+
+	/** Set to true when instrumentation is enabled, false otherwise */
+	mali_bool instr_enabled;
+
+	mali_bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+	/** Debug state indicating whether sufficient initialization of the driver
+	 * has occurred to handle IRQs */
+	mali_bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+	/** Spinlock that must be held when:
+	 * - writing gpu_powered
+	 * - accessing driver_ready_for_irqs (in CONFIG_MALI_DEBUG builds) */
+	spinlock_t gpu_powered_lock;
+
+	/** Time in milliseconds between each dvfs sample */
+
+	u32 platform_dvfs_frequency;
+
+	/** Structure to hold metrics for the GPU */
+
+	struct kbasep_pm_metrics_data metrics;
+
+	/** Set to the number of poweroff timer ticks until the GPU is powered off */
+	int gpu_poweroff_pending;
+
+	/** Set to the number of poweroff timer ticks until shaders are powered off */
+	int shader_poweroff_pending_time;
+
+	/** Timer for powering off GPU */
+	struct hrtimer gpu_poweroff_timer;
+
+	struct workqueue_struct *gpu_poweroff_wq;
+
+	struct work_struct gpu_poweroff_work;
+
+	/** Period of GPU poweroff timer */
+	ktime_t gpu_poweroff_time;
+
+	/** Bit mask of shaders to be powered off on next timer callback */
+	u64 shader_poweroff_pending;
+
+	/** Set to MALI_TRUE if the poweroff timer is currently running, MALI_FALSE otherwise */
+	mali_bool poweroff_timer_needed;
+
+	int poweroff_shader_ticks;
+
+	int poweroff_gpu_ticks;
+
+	/** Callback when the GPU needs to be turned on. See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev         The kbase device
+	 *
+	 * @return 1 if GPU state was lost, 0 otherwise
+	 */
+	int (*callback_power_on)(struct kbase_device *kbdev);
+
+	/** Callback when the GPU may be turned off. See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev         The kbase device
+	 */
+	void (*callback_power_off)(struct kbase_device *kbdev);
+
+	/** Callback when a suspend occurs and the GPU needs to be turned off.
+	 *  See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev         The kbase device
+	 */
+	void (*callback_power_suspend)(struct kbase_device *kbdev);
+
+	/** Callback when a resume occurs and the GPU needs to be turned on.
+	 *  See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev         The kbase device
+	 */
+	void (*callback_power_resume)(struct kbase_device *kbdev);
+
+	/** Callback for initializing the runtime power management.
+	 *
+	 * @param kbdev         The kbase device
+	 *
+	 * @return MALI_ERROR_NONE on success, else error code
+	 */
+	 mali_error (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+	/** Callback for terminating the runtime power management.
+	 *
+	 * @param kbdev         The kbase device
+	 */
+	void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+	/** Callback when the GPU needs to be turned on. See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev         The kbase device
+	 *
+	 * @return 1 if GPU state was lost, 0 otherwise
+	 */
+	int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+
+	/** Callback when the GPU may be turned off. See @ref kbase_pm_callback_conf
+	 *
+	 * @param kbdev         The kbase device
+	 */
+	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+
+};
+
+/** The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/** The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/** Get details of the cores that are present in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of the cores (of a specified type)
+ * present in the GPU device and also a count of the number of cores.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param type      The type of core (see the @ref enum kbase_pm_core_type enumeration)
+ *
+ * @return          The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+/** Get details of the cores that are currently active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) that
+ * are actively processing work (i.e. turned on *and* busy).
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param type      The type of core (see the @ref enum kbase_pm_core_type enumeration)
+ *
+ * @return          The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+/** Get details of the cores that are currently transitioning between power states.
+ *
+ * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) that
+ * are currently transitioning between power states.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param type      The type of core (see the @ref enum kbase_pm_core_type enumeration)
+ *
+ * @return          The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+/** Get details of the cores that are currently powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of the cores (of a specified type) that
+ * are powered and ready for jobs (they may or may not be currently executing jobs).
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param type      The type of core (see the @ref enum kbase_pm_core_type enumeration)
+ *
+ * @return          The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type);
+
+/** Turn the clock for the device on, and enable device interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on. It should be modified during
+ * integration to perform the necessary actions to ensure that the GPU is fully powered and clocked.
+ *
+ * @param kbdev       The kbase device structure for the device (must be a valid pointer)
+ * @param is_resume   MALI_TRUE if clock on due to resume after suspend,
+ *                    MALI_FALSE otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, mali_bool is_resume);
+
+/** Disable device interrupts, and turn the clock for the device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU off. It should be modified during
+ * integration to perform the necessary actions to turn the clock off (if this is possible in the integration).
+ *
+ * @param kbdev       The kbase device structure for the device (must be a valid pointer)
+ * @param is_suspend  MALI_TRUE if clock off due to suspend, MALI_FALSE otherwise
+ */
+void kbase_pm_clock_off(struct kbase_device *kbdev, mali_bool is_suspend);
+
+/** Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/** Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until @ref kbase_pm_enable_interrupts or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/** Initialize the hardware
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported by the driver and performs a reset on
+ * the device so that it is in a known state before the device is used.
+ *
+ * @param kbdev        The kbase device structure for the device (must be a valid pointer)
+ * @param enable_irqs  When set to MALI_TRUE gpu irqs will be enabled after this call, else
+ *                     they will be left disabled.
+ *
+ * @return MALI_ERROR_NONE if the device is supported and successfully reset.
+ */
+mali_error kbase_pm_init_hw(struct kbase_device *kbdev, mali_bool enable_irqs);
+
+/** The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the RESET_COMPLETED bit is set. It signals to the
+ * power management initialization code that the GPU has been successfully reset.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+	/** A suspend is not expected/not possible - this is the same as
+	 * kbase_pm_context_active() */
+	KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+	/** If we're suspending, fail and don't increase the active count */
+	KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+	/** If we're suspending, succeed and allow the active count to increase iff
+	 * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+	 *
+	 * This should only be used when there is a bounded time on the activation
+	 * (e.g. guarantee it's going to be idled very soon after) */
+	KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero     Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/** Check if there are any power transitions to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of struct kbase_pm_device_data and the actual status of the
+ * hardware to see if any power transitions can be made at this time to make the hardware state closer to the state
+ * desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns MALI_TRUE when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were <em>un</em>available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device::pm::power_change_lock
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @return          non-zero when all desired cores are available. That is,
+ *                  it's worthwhile for the caller to submit a job.
+ * @return          MALI_FALSE otherwise
+ */
+mali_bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/** Synchronous and locking variant of kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * @note There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device::pm::power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/** Variant of kbase_pm_update_cores_state() where the caller must hold
+ * kbase_device::pm::power_change_lock
+ *
+ * @param kbdev       The kbase device structure for the device (must be a valid
+ *                    pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/** Update the desired state of shader cores from the Power Policy, and begin
+ * any power transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will then
+ * begin power transitions to make the hardware acheive the desired shader core
+ * state.
+ *
+ * @param kbdev       The kbase device structure for the device (must be a valid
+ *                    pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/** Cancel any pending requests to power off the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @param kbdev       The kbase device structure for the device (must be a valid
+ *                    pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/** Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever the information is required.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/** Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return MALI_ERROR_NONE on success, MALI_ERROR_FUNCTION_FAILED on error
+ */
+mali_error kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/** Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an error to call any metrics gathering
+ * function (other than kbasep_pm_metrics_init) after calling this function.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/** Record state of jobs currently active on GPU.
+ *
+ * This function record time spent executing jobs split per GL and CL
+ * contexts, per core group (only CL jobs).
+ *
+ * @param kbdev     The kbase device structure for the device
+ *                  (must be a valid pointer)
+ */
+void kbasep_pm_record_job_status(struct kbase_device *kbdev);
+
+/** Record that the GPU is active.
+ *
+ * This records that the GPU is now active. The previous GPU state must have been idle, the function will assert if
+ * this is not true in a debug build.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_record_gpu_active(struct kbase_device *kbdev);
+
+/** Record that the GPU is idle.
+ *
+ * This records that the GPU is now idle. The previous GPU state must have been active, the function will assert if
+ * this is not true in a debug build.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev);
+
+/** Function to be called by the frame buffer driver to update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether the system is hitting the vsync target
+ * or not. buffer_updated should be true if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but only when the value of buffer_updated
+ * differs from a previous call.
+ *
+ * @param kbdev             The kbase device structure for the device (must be a valid pointer)
+ * @param buffer_updated    True if the buffer has been updated on this VSync, false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/** Configure the frame buffer device to set the vsync callback.
+ *
+ * This function should do whatever is necessary for this integration to ensure that kbase_pm_report_vsync is
+ * called appropriately.
+ *
+ * This function will need porting as part of the integration for a device.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_vsync_callback(struct kbase_device *kbdev);
+
+/** Free any resources that kbase_pm_register_vsync_callback allocated.
+ *
+ * This function should perform any cleanup required from the call to kbase_pm_register_vsync_callback.
+ * No call backs should occur after this function has returned.
+ *
+ * This function will need porting as part of the integration for a device.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev);
+
+/** Determine whether the DVFS system should change the clock speed of the GPU.
+ *
+ * This function should be called regularly by the DVFS system to check whether the clock speed of the GPU needs
+ * updating. It will return one of three enumerated values of kbase_pm_dvfs_action:
+ *
+ * @param kbdev                     The kbase device structure for the device (must be a valid pointer)
+ * @retval KBASE_PM_DVFS_NOP        The clock does not need changing
+ * @retval KBASE_PM_DVFS_CLOCK_UP,  The clock frequency should be increased if possible.
+ * @retval KBASE_PM_DVFS_CLOCK_DOWN The clock frequency should be decreased if possible.
+ */
+enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/** Mark that the GPU cycle counter is needed, if the caller is the first caller
+ *  then the GPU cycle counters will be enabled along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e. @ref kbase_pm_context_active must have been called).
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/** This is a version of the above function (@ref kbase_pm_request_gpu_cycle_counter) suitable for being 
+ *  called when the l2 cache is known to be on and assured to be on until the subsequent call of 
+ *  kbase_pm_release_gpu_cycle_counter such as when a job is submitted. 
+ *  It does not sleep and can be called from atomic functions. 
+ *
+ *  The GPU must be powered when calling this function (i.e. @ref kbase_pm_context_active must have been called).
+ *  and the l2 cache must be powered on 
+ *  @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/** Mark that the GPU cycle counter is no longer in use, if the caller is the last
+ *  caller then the GPU cycle counters will be disabled. A request must have been made
+ *  before a call to this.
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/** Enables access to the GPU registers before power management has powered up the GPU
+ *  with kbase_pm_powerup().
+ *
+ *  Access to registers should be done using kbase_os_reg_read/write() at this stage,
+ *  not kbase_reg_read/write().
+ *
+ *  This results in the power management callbacks provided in the driver configuration
+ *  to get called to turn on power and/or clocks to the GPU.
+ *  See @ref kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with kbase_pm_powerup()
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/** Disables access to the GPU registers enabled earlier by a call to
+ *  kbase_pm_register_access_enable().
+ *
+ *  This results in the power management callbacks provided in the driver configuration
+ *  to get called to turn off power and/or clocks to the GPU.
+ *  See @ref kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with kbase_pm_powerup()
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline function */
+
+/**
+ * Check if the power management metrics collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @return          MALI_TRUE if metrics collection was active else MALI_FALSE.
+ */
+
+mali_bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * Power on the GPU, and any cores that are requested.
+ *
+ * @param kbdev        The kbase device structure for the device (must be a valid pointer)
+ * @param is_resume    MALI_TRUE if power on due to resume after suspend,
+ *                     MALI_FALSE otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, mali_bool is_resume);
+
+/**
+ * Power off the GPU, and any cores that have been requested.
+ *
+ * @param kbdev        The kbase device structure for the device (must be a valid pointer)
+ * @param is_suspend   MALI_TRUE if power off due to suspend,
+ *                     MALI_FALSE otherwise
+ */
+void kbase_pm_do_poweroff(struct kbase_device *kbdev, mali_bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @param kbdev           The kbase device structure for the device (must be a valid pointer)
+ * @param utilisation     The current calculated utilisation by the metrics system.
+ * @param util_gl_share   The current calculated gl share of utilisation.
+ * @param util_cl_share   The current calculated cl share of utilisation per core group.
+ * @return                Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+	u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+#endif				/* _KBASE_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c
new file mode 100755
index 000000000000..4d3878b50937
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.c
@@ -0,0 +1,64 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_always_on.c
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+#if KBASE_PM_EN
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->shader_present_bitmap;
+}
+
+static mali_bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+	return MALI_TRUE;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/** The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+	"always_on",			/* name */
+	always_on_init,			/* init */
+	always_on_term,			/* term */
+	always_on_get_core_mask,	/* get_core_mask */
+	always_on_get_core_active,	/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_ALWAYS_ON,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops)
+#endif  /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h
new file mode 100755
index 000000000000..a3858045bd0c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_always_on.h
@@ -0,0 +1,68 @@
+
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_always_on.h
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * The "Always on" power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed. The GPU itself is also kept powered, even though it is not
+ *    needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is active.
+ */
+typedef struct kbasep_pm_policy_always_on {
+	/** No state needed - just have a dummy variable here */
+	int dummy;
+} kbasep_pm_policy_always_on;
+
+#endif 				/* MALI_KBASE_PM_ALWAYS_ON_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.c
new file mode 100755
index 000000000000..9d5fb94c8218
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.c
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca.c
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#if KBASE_PM_EN
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+#if !MALI_CUSTOMER_RELEASE
+extern const struct kbase_pm_ca_policy kbase_pm_ca_random_policy_ops;
+#endif
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+	&kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/** The number of policies available in the system.
+ *  This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+mali_error kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.ca_current_policy = policy_list[0];
+
+	kbdev->pm.ca_current_policy->init(kbdev);
+
+	return MALI_ERROR_NONE;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies)
+
+const struct kbase_pm_ca_policy *kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy)
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev, const struct kbase_pm_ca_policy *new_policy)
+{
+	const struct kbase_pm_ca_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.ca_current_policy;
+	kbdev->pm.ca_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.ca_current_policy = new_policy;
+
+	/* If any core power state changes were previously attempted, but couldn't
+	 * be made because the policy was changing (current_policy was NULL), then
+	 * re-try them here. */
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	kbdev->pm.ca_current_policy->update_core_status(kbdev, kbdev->shader_ready_bitmap, kbdev->shader_transitioning_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy)
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* All cores must be enabled when instrumentation is in use */
+	if (kbdev->pm.instr_enabled == MALI_TRUE)
+		return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask;
+
+	if (kbdev->pm.ca_current_policy == NULL)
+		return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask;
+
+	return kbdev->pm.ca_current_policy->get_core_mask(kbdev) & kbdev->pm.debug_core_mask;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask)
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.ca_current_policy != NULL)
+		kbdev->pm.ca_current_policy->update_core_status(kbdev, cores_ready, cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.instr_enabled = MALI_TRUE;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.instr_enabled = MALI_FALSE;
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+#endif /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.h
new file mode 100755
index 000000000000..e073470e0855
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca.h
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca.h
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+enum kbase_pm_ca_policy_id {
+	KBASE_PM_CA_POLICY_ID_FIXED = 1,
+	KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/** Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure which contains function pointers to the
+ * policy's methods.
+ */
+struct kbase_pm_ca_policy {
+	/** The name of this policy */
+	char *name;
+
+	/** Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.ca_policy_data structure. It should not attempt
+	 * to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is called.
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/** Function called when the policy is unselected.
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/** Function called to get the current shader core availability mask
+	 *
+	 * When a change in core availability is occuring, the policy must set kbdev->pm.ca_in_transition
+	 * to MALI_TRUE. This is to indicate that reporting changes in power state cannot be optimized out,
+	 * even if kbdev->pm.desired_shader_state remains unchanged. This must be done by any functions
+	 * internal to the Core Availability Policy that change the return value of
+	 * kbase_pm_ca_policy::get_core_mask.
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 *
+	 * @return     The current core availability mask */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/** Function called to update the current core status
+	 *
+	 * If none of the cores in core group 0 are ready or transitioning, then the policy must
+	 * ensure that the next call to get_core_mask does not return 0 for all cores in core group
+	 * 0. It is an error to disable core group 0 through the core availability policy.
+	 *
+	 * When a change in core availability has finished, the policy must set kbdev->pm.ca_in_transition
+	 * to MALI_FALSE. This is to indicate that changes in power state can once again be optimized out
+	 * when kbdev->pm.desired_shader_state is unchanged.
+	 *
+	 * @param kbdev                   The kbase device structure for the device (must be a valid pointer)
+	 * @param cores_ready             The mask of cores currently powered and ready to run jobs
+	 * @param cores_transitioning     The mask of cores currently transitioning power state */
+	void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning);
+
+	/** Field indicating flags for this policy */
+	kbase_pm_ca_policy_flags flags;
+
+	/** Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging. */
+	enum kbase_pm_ca_policy_id id;
+};
+
+/** Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return MALI_ERROR_NONE if the core availability framework was successfully initialized.
+ */
+mali_error kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/** Terminate core availability framework
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/** Return mask of currently available shaders cores
+ * Calls into the core availability policy
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return          The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/** Update core availability policy with current core power status
+ * Calls into the core availability policy
+ *
+ * @param kbdev                The kbase device structure for the device (must be a valid pointer)
+ * @param cores_ready          The bit mask of cores ready for job submission
+ * @param cores_transitioning  The bit mask of cores that are transitioning power state
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning);
+
+/** Enable override for instrumentation
+ *
+ * This overrides the output of the core availability policy, ensuring that all cores are available
+ *
+ * @param kbdev                The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/** Disable override for instrumentation
+ *
+ * This disables any previously enabled override, and resumes normal policy functionality
+ *
+ * @param kbdev                The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+/** Get the current policy.
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy *kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/** Change the policy to the one specified.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param policy    The policy to change to (valid pointer returned from @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev, const struct kbase_pm_ca_policy *policy);
+
+/** Retrieve a static list of the available policies.
+ * @param[out]  policies    An array pointer to take the list of policies. This may be NULL.
+ *                          The contents of this array must not be modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+#endif				/* _KBASE_PM_CA_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c
new file mode 100755
index 000000000000..d81cdbd42e7c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca_fixed.c
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#if KBASE_PM_EN
+static void fixed_init(struct kbase_device *kbdev)
+{
+	kbdev->pm.ca_in_transition = MALI_FALSE;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+	return kbdev->shader_present_bitmap;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev, u64 cores_ready, u64 cores_transitioning)
+{
+	CSTD_UNUSED(kbdev);
+	CSTD_UNUSED(cores_ready);
+	CSTD_UNUSED(cores_transitioning);
+}
+
+/** The @ref struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+	"fixed",			/* name */
+	fixed_init,			/* init */
+	fixed_term,			/* term */
+	fixed_get_core_mask,		/* get_core_mask */
+	fixed_update_core_status,	/* update_core_status */
+	0u,				/* flags */
+	KBASE_PM_CA_POLICY_ID_FIXED,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops)
+#endif  /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h
new file mode 100755
index 000000000000..9d95e07a2d52
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca_fixed.h
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is active.
+ */
+typedef struct kbasep_pm_ca_policy_fixed {
+	/** No state needed - just have a dummy variable here */
+	int dummy;
+} kbasep_pm_ca_policy_fixed;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c
new file mode 100755
index 000000000000..8093e32e4dcd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_coarse_demand.c
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+#if KBASE_PM_EN
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return 0;
+
+	return kbdev->shader_present_bitmap;
+}
+
+static mali_bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.active_count == 0)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/** The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+	"coarse_demand",			/* name */
+	coarse_demand_init,			/* init */
+	coarse_demand_term,			/* term */
+	coarse_demand_get_core_mask,		/* get_core_mask */
+	coarse_demand_get_core_active,		/* get_core_active */
+	0u,					/* flags */
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops)
+#endif  /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h
new file mode 100755
index 000000000000..afe3fc93b42d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_coarse_demand.h
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - All Shader Cores are powered up, regardless of whether or not they will
+ *    be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - All Shader Cores are kept powered, regardless of whether or not they will
+ *    be needed
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is active.
+ */
+typedef struct kbasep_pm_policy_coarse_demand {
+	/** No state needed - just have a dummy variable here */
+	int dummy;
+} kbasep_pm_policy_coarse_demand;
+
+#endif				/* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.c
new file mode 100755
index 000000000000..0e322924c7ef
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.c
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_demand.c
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+#if KBASE_PM_EN
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+	u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+	if (0 == kbdev->pm.active_count)
+		return 0;
+
+	return desired;
+}
+
+static mali_bool demand_get_core_active(struct kbase_device *kbdev)
+{
+	if (0 == kbdev->pm.active_count)
+		return MALI_FALSE;
+
+	return MALI_TRUE;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+	CSTD_UNUSED(kbdev);
+}
+
+/** The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+	"demand",			/* name */
+	demand_init,			/* init */
+	demand_term,			/* term */
+	demand_get_core_mask,		/* get_core_mask */
+	demand_get_core_active,		/* get_core_active */
+	0u,				/* flags */
+	KBASE_PM_POLICY_ID_DEMAND,	/* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops)
+#endif  /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.h
new file mode 100755
index 000000000000..8579181d7dd0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_demand.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_demand.h
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ *   know which Job Chains are to be run:
+ *  - The Shader Cores are not powered up
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ *   currently queued Job Chains:
+ *  - Only those Shader Cores are powered up
+ * - When KBase indicates that the GPU need not be powered:
+ *  - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ *   has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ *   User Processes have finished, and it is waiting for a User Process to
+ *   submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is active.
+ */
+typedef struct kbasep_pm_policy_demand {
+	/** No state needed - just have a dummy variable here */
+	int dummy;
+} kbasep_pm_policy_demand;
+
+#endif				/* MALI_KBASE_PM_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/mali_kbase_pm_driver.c
new file mode 100755
index 000000000000..32b24b104b48
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_driver.c
@@ -0,0 +1,1013 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_driver.c
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+
+#if KBASE_PM_EN
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif				/* MALI_MOCK_TEST */
+
+/** Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow @ref core_type_to_reg function,
+ * which decodes this enumeration, to be simpler and more efficient.
+ */
+enum kbasep_pm_action {
+	ACTION_PRESENT = 0,
+	ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+	ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+	ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+	ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+	ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+/** Decode a core type and action to a register.
+ *
+ * Given a core type (defined by @ref kbase_pm_core_type) and an action (defined by @ref kbasep_pm_action) this
+ * function will return the register offset that will perform the action on the core type. The register returned is
+ * the \c _LO register and an offset must be applied to use the \c _HI register.
+ *
+ * @param core_type The type of core
+ * @param action    The type of action
+ *
+ * @return The register offset of the \c _LO register that performs an action of type \c action on a core of type \c
+ * core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type, enum kbasep_pm_action action)
+{
+	return (u32)core_type + (u32)action;
+}
+
+/** Invokes an action on a core set
+ *
+ * This function performs the action given by \c action on a set of cores of a type given by \c core_type. It is a
+ * static function used by @ref kbase_pm_transition_core_type
+ *
+ * @param kbdev     The kbase device structure of the device
+ * @param core_type The type of core that the action should be performed on
+ * @param cores     A bit mask of cores to perform the action on (low 32 bits)
+ * @param action    The action to perform on the cores
+ */
+STATIC void kbase_pm_invoke(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, u64 cores, enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo = cores & 0xFFFFFFFF;
+	u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+	if (cores) {
+		if (action == ACTION_PWRON)
+			kbase_trace_mali_pm_power_on(core_type, cores);
+		else if (action == ACTION_PWROFF)
+			kbase_trace_mali_pm_power_off(core_type, cores);
+	}
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+	/* Tracing */
+	if (cores) {
+		if (action == ACTION_PWRON)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, 0u, lo);
+				break;
+			default:
+				/* L3 not handled */
+				break;
+			}
+		else if (action == ACTION_PWROFF)
+			switch (core_type) {
+			case KBASE_PM_CORE_SHADER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_TILER:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, NULL, 0u, lo);
+				break;
+			case KBASE_PM_CORE_L2:
+				KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, 0u, lo);
+				break;
+			default:
+				/* L3 not handled */
+				break;
+			}
+	}
+
+	if (lo != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+	if (hi != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/** Get information about a core set
+ *
+ * This function gets information (chosen by \c action) about a set of cores of a type given by \c core_type. It is a
+ * static function used by @ref kbase_pm_get_present_cores, @ref kbase_pm_get_active_cores, @ref
+ * kbase_pm_get_trans_cores and @ref kbase_pm_get_ready_cores.
+ *
+ * @param kbdev     The kbase device structure of the device
+ * @param core_type The type of core that the should be queried
+ * @param action    The property of the cores to query
+ *
+ * @return A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev, enum kbase_pm_core_type core_type, enum kbasep_pm_action action)
+{
+	u32 reg;
+	u32 lo, hi;
+
+	reg = core_type_to_reg(core_type, action);
+
+	KBASE_DEBUG_ASSERT(reg);
+
+	lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+	hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+	return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+	kbdev->shader_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_SHADER, ACTION_PRESENT);
+	kbdev->tiler_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_TILER, ACTION_PRESENT);
+	kbdev->l2_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_L2, ACTION_PRESENT);
+	kbdev->l3_present_bitmap = kbase_pm_get_state(kbdev, KBASE_PM_CORE_L3, ACTION_PRESENT);
+
+	kbdev->shader_inuse_bitmap = 0;
+	kbdev->shader_needed_bitmap = 0;
+	kbdev->shader_available_bitmap = 0;
+	kbdev->tiler_available_bitmap = 0;
+	kbdev->l2_users_count = 0;
+	kbdev->l2_available_bitmap = 0;
+	kbdev->tiler_needed_cnt = 0;
+	kbdev->tiler_inuse_cnt = 0;
+
+	memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores)
+
+/** Get the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	switch (type) {
+	case KBASE_PM_CORE_L3:
+		return kbdev->l3_present_bitmap;
+		break;
+	case KBASE_PM_CORE_L2:
+		return kbdev->l2_present_bitmap;
+		break;
+	case KBASE_PM_CORE_SHADER:
+		return kbdev->shader_present_bitmap;
+		break;
+	case KBASE_PM_CORE_TILER:
+		return kbdev->tiler_present_bitmap;
+		break;
+	}
+	KBASE_DEBUG_ASSERT(0);
+	return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores)
+
+/** Get the cores that are "active" (busy processing work)
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores)
+
+/** Get the cores that are transitioning between power states
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type)
+{
+	return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores)
+/** Get the cores that are powered on
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type)
+{
+	u64 result;
+
+	result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+	switch (type) {
+	case KBASE_PM_CORE_SHADER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, (u32) result);
+		break;
+	case KBASE_PM_CORE_TILER:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, (u32) result);
+		break;
+	case KBASE_PM_CORE_L2:
+		KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, (u32) result);
+		break;
+	default:
+		/* NB: L3 not currently traced */
+		break;
+	}
+
+	return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores)
+
+/** Perform power transitions for a particular core type.
+ *
+ * This function will perform any available power transitions to make the actual hardware state closer to the desired
+ * state. If a core is currently transitioning then changes to the power state of that call cannot be made until the
+ * transition has finished. Cores which are not present in the hardware are ignored if they are specified in the
+ * desired_state bitmask, however the return value will always be 0 in this case.
+ *
+ * @param kbdev             The kbase device
+ * @param type              The core type to perform transitions for
+ * @param desired_state     A bit mask of the desired state of the cores
+ * @param in_use            A bit mask of the cores that are currently running jobs.
+ *                          These cores have to be kept powered up because there are jobs
+ *                          running (or about to run) on them.
+ * @param[out] available    Receives a bit mask of the cores that the job scheduler can use to submit jobs to.
+ *                          May be NULL if this is not needed.
+ * @param[in,out] powering_on Bit mask to update with cores that are transitioning to a power-on state.
+ *
+ * @return MALI_TRUE if the desired state has been reached, MALI_FALSE otherwise
+ */
+STATIC mali_bool kbase_pm_transition_core_type(struct kbase_device *kbdev, enum kbase_pm_core_type type, u64 desired_state,
+					       u64 in_use, u64 * const available, u64 *powering_on)
+{
+	u64 present;
+	u64 ready;
+	u64 trans;
+	u64 powerup;
+	u64 powerdown;
+	u64 powering_on_trans;
+	u64 desired_state_in_use;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	/* Get current state */
+	present = kbase_pm_get_present_cores(kbdev, type);
+	trans = kbase_pm_get_trans_cores(kbdev, type);
+	ready = kbase_pm_get_ready_cores(kbdev, type);
+	/* mask off ready from trans in case transitions finished between the register reads */
+	trans &= ~ready;
+
+	powering_on_trans = trans & *powering_on;
+	*powering_on = powering_on_trans;
+
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	/* Update desired state to include the in-use cores. These have to be kept powered up because there are jobs
+	 * running or about to run on these cores
+	 */
+	desired_state_in_use = desired_state | in_use;
+
+	/* Update state of whether l2 caches are powered */
+	if (type == KBASE_PM_CORE_L2) {
+		if ((ready == present) && (desired_state_in_use == ready) && (trans == 0)) {
+			/* All are ready, none will be turned off, and none are transitioning */
+			kbdev->pm.l2_powered = 1;
+			if (kbdev->l2_users_count > 0) {
+				/* Notify any registered l2 cache users (optimized out when no users waiting) */
+				wake_up(&kbdev->pm.l2_powered_wait);
+			}
+		} else {
+			kbdev->pm.l2_powered = 0;
+		}
+	}
+
+	if (desired_state_in_use == ready && (trans == 0))
+		return MALI_TRUE;
+
+	/* Restrict the cores to those that are actually present */
+	powerup = desired_state_in_use & present;
+	powerdown = (~desired_state_in_use) & present;
+
+	/* Restrict to cores that are not already in the desired state */
+	powerup &= ~ready;
+	powerdown &= ready;
+
+	/* Don't transition any cores that are already transitioning, except for
+	 * Mali cores that support the following case:
+	 *
+	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+	 * a core that is currently transitioning to power off, then this is
+	 * remembered and the shader core is automatically powered up again once
+	 * the original transition completes. Once the automatic power on is
+	 * complete any job scheduled on the shader core should start.
+	 */
+	powerdown &= ~trans;
+
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+			trans = powering_on_trans; /* for exception cases, only mask off cores in power on transitions */
+
+	powerup &= ~trans;
+
+	/* Perform transitions if any */
+	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+	/* Recalculate cores transitioning on, and re-evaluate our state */
+	powering_on_trans |= powerup;
+	*powering_on = powering_on_trans;
+	if (available != NULL)
+		*available = (ready | powering_on_trans) & desired_state;
+
+	return MALI_FALSE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type)
+
+/** Determine which caches should be on for a particular core state.
+ *
+ * This function takes a bit mask of the present caches and the cores (or caches) that are attached to the caches that
+ * will be powered. It then computes which caches should be turned on to allow the cores requested to be powered up.
+ *
+ * @param present       The bit mask of present caches
+ * @param cores_powered A bit mask of cores (or L2 caches) that are desired to be powered
+ *
+ * @return A bit mask of the caches that should be turned on
+ */
+STATIC u64 get_desired_cache_status(u64 present, u64 cores_powered)
+{
+	u64 desired = 0;
+
+	while (present) {
+		/* Find out which is the highest set bit */
+		u64 bit = fls64(present) - 1;
+		u64 bit_mask = 1ull << bit;
+		/* Create a mask which has all bits from 'bit' upwards set */
+
+		u64 mask = ~(bit_mask - 1);
+
+		/* If there are any cores powered at this bit or above (that haven't previously been processed) then we need
+		 * this core on */
+		if (cores_powered & mask)
+			desired |= bit_mask;
+
+		/* Remove bits from cores_powered and present */
+		cores_powered &= ~mask;
+		present &= ~bit_mask;
+	}
+
+	return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status)
+
+mali_bool MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+	mali_bool cores_are_available = MALI_FALSE;
+	mali_bool in_desired_state = MALI_TRUE;
+	u64 desired_l2_state;
+	u64 desired_l3_state;
+	u64 cores_powered;
+	u64 tiler_available_bitmap;
+	u64 shader_available_bitmap;
+	u64 shader_ready_bitmap;
+	u64 shader_transitioning_bitmap;
+	u64 l2_available_bitmap;
+	u64 prev_l2_available_bitmap;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	spin_lock(&kbdev->pm.gpu_powered_lock);
+	if (kbdev->pm.gpu_powered == MALI_FALSE) {
+		spin_unlock(&kbdev->pm.gpu_powered_lock);
+		if (kbdev->pm.desired_shader_state == 0 && kbdev->pm.desired_tiler_state == 0)
+			return MALI_TRUE;
+		return MALI_FALSE;
+	}
+
+	/* Trace that a change-state is being requested, and that it took
+	 * (effectively) no time to start it. This is useful for counting how many
+	 * state changes occurred, in a way that's backwards-compatible with
+	 * processing the trace data */
+	kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+	kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+	/* If any cores are already powered then, we must keep the caches on */
+	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	cores_powered |= kbdev->pm.desired_shader_state;
+
+	/* If there are l2 cache users registered, keep all l2s powered even if all other cores are off. */
+	if (kbdev->l2_users_count > 0)
+		cores_powered |= kbdev->l2_present_bitmap;
+
+	desired_l2_state = get_desired_cache_status(kbdev->l2_present_bitmap, cores_powered);
+
+	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
+	if (0 != desired_l2_state) {
+		desired_l2_state |= 1;
+		/* Also enable tiler if l2 cache is powered */
+		kbdev->pm.desired_tiler_state = kbdev->tiler_present_bitmap;
+	} else {
+		kbdev->pm.desired_tiler_state = 0;
+	}
+
+	desired_l3_state = get_desired_cache_status(kbdev->l3_present_bitmap, desired_l2_state);
+	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+	in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_L3, desired_l3_state, 0, NULL, &kbdev->pm.powering_on_l3_state);
+	in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_L2, desired_l2_state, 0, &l2_available_bitmap, &kbdev->pm.powering_on_l2_state);
+
+	if (kbdev->l2_available_bitmap != l2_available_bitmap) {
+		KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+	}
+
+	kbdev->l2_available_bitmap = l2_available_bitmap;
+
+	if (in_desired_state) {
+		in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_TILER, kbdev->pm.desired_tiler_state, 0, &tiler_available_bitmap, &kbdev->pm.powering_on_tiler_state);
+		in_desired_state &= kbase_pm_transition_core_type(kbdev, KBASE_PM_CORE_SHADER, kbdev->pm.desired_shader_state, kbdev->shader_inuse_bitmap, &shader_available_bitmap, &kbdev->pm.powering_on_shader_state);
+
+		if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32) shader_available_bitmap);
+			KBASE_TIMELINE_POWER_SHADER(kbdev, shader_available_bitmap);
+		}
+
+		kbdev->shader_available_bitmap = shader_available_bitmap;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, NULL, 0u, (u32) tiler_available_bitmap);
+			KBASE_TIMELINE_POWER_TILER(kbdev, tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+	} else if ((l2_available_bitmap & kbdev->tiler_present_bitmap) != kbdev->tiler_present_bitmap) {
+		tiler_available_bitmap = 0;
+
+		if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+			KBASE_TIMELINE_POWER_TILER(kbdev, tiler_available_bitmap);
+		}
+
+		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+	}
+
+	/* State updated for slow-path waiters */
+	kbdev->pm.gpu_in_desired_state = in_desired_state;
+
+	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+	/* Determine whether the cores are now available (even if the set of
+	 * available cores is empty). Note that they can be available even if we've
+	 * not finished transitioning to the desired state */
+	if ((kbdev->shader_available_bitmap & kbdev->pm.desired_shader_state) == kbdev->pm.desired_shader_state &&
+			(kbdev->tiler_available_bitmap & kbdev->pm.desired_tiler_state) == kbdev->pm.desired_tiler_state) {
+		cores_are_available = MALI_TRUE;
+
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, (u32)(kbdev->shader_available_bitmap & kbdev->pm.desired_shader_state));
+		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, (u32)(kbdev->tiler_available_bitmap & kbdev->pm.desired_tiler_state));
+
+		/* Log timelining information about handling events that power up
+		 * cores, to match up either with immediate submission either because
+		 * cores already available, or from PM IRQ */
+		if (!in_desired_state)
+			kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+	}
+
+	if (in_desired_state) {
+		KBASE_DEBUG_ASSERT(cores_are_available);
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L3, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L3));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER));
+		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER));
+#endif				/* CONFIG_MALI_GATOR_SUPPORT */
+
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, kbdev->pm.gpu_in_desired_state, (u32)kbdev->pm.desired_shader_state);
+		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, (u32)kbdev->pm.desired_tiler_state);
+
+		/* Log timelining information for synchronous waiters */
+		kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+		/* Wake slow-path waiters. Job scheduler does not use this. */
+		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+		wake_up(&kbdev->pm.gpu_in_desired_state_wait);
+	}
+
+	spin_unlock(&kbdev->pm.gpu_powered_lock);
+
+	/* kbase_pm_ca_update_core_status can cause one-level recursion into
+	 * this function, so it must only be called once all changes to kbdev
+	 * have been committed, and after the gpu_powered_lock has been
+	 * dropped. */
+	if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+	    kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+		kbdev->shader_ready_bitmap = shader_ready_bitmap;
+		kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap;
+
+		kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, shader_transitioning_bitmap);
+	}
+
+	/* The core availability policy is not allowed to keep core group 0 turned off (unless it was changing the l2 power state) */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) & kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+			(prev_l2_available_bitmap == desired_l2_state) &&
+			!(kbase_pm_ca_get_core_mask(kbdev) & kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+		BUG();
+
+	/* The core availability policy is allowed to keep core group 1 off,
+	 * but all jobs specifically targeting CG1 must fail */
+	if (!((shader_ready_bitmap | shader_transitioning_bitmap) & kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+		!(kbase_pm_ca_get_core_mask(kbdev) & kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+		kbdev->pm.cg1_disabled = MALI_TRUE;
+	else
+		kbdev->pm.cg1_disabled = MALI_FALSE;
+
+	return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock)
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	mali_bool cores_are_available;
+	/* Force the transition to be checked and reported - the cores may be
+	 * 'available' (for job submission) but not fully powered up. */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* Wait for cores */
+	wait_event(kbdev->pm.gpu_in_desired_state_wait, kbdev->pm.gpu_in_desired_state);
+
+	/* Log timelining information that a change in state has completed */
+	kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync)
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Clear all interrupts,
+	 * and unmask them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts)
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	/*
+	 * Mask all interrupts,
+	 * and clear them all.
+	 */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, NULL);
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts)
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, mali_bool is_resume)
+{
+	mali_bool reset_required = is_resume;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->pm.gpu_powered) {
+		/* Already turned on */
+		KBASE_DEBUG_ASSERT(!is_resume);
+		return;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+	if (is_resume && kbdev->pm.callback_power_resume) {
+		kbdev->pm.callback_power_resume(kbdev);
+	} else if (kbdev->pm.callback_power_on) {
+		kbdev->pm.callback_power_on(kbdev);
+		/* If your platform properly keeps the GPU state you may use the return
+		 * value of the callback_power_on function to conditionally reset the
+		 * GPU on power up. Currently we are conservative and always reset the
+		 * GPU. */
+		reset_required = MALI_TRUE;
+	}
+
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+	kbdev->pm.gpu_powered = MALI_TRUE;
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (reset_required) {
+		/* GPU state was lost, reset GPU to ensure it is in a
+		 * consistent state */
+		kbase_pm_init_hw(kbdev, MALI_TRUE);
+	}
+
+	/* Lastly, enable the interrupts */
+	kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on)
+
+void kbase_pm_clock_off(struct kbase_device *kbdev, mali_bool is_suspend)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* ASSERT that the cores should now be unavailable. No lock needed. */
+	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+	if (!kbdev->pm.gpu_powered) {
+		/* Already turned off */
+		if (is_suspend && kbdev->pm.callback_power_suspend)
+			kbdev->pm.callback_power_suspend(kbdev);
+		return;
+	}
+
+	KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+	/* Disable interrupts. This also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+	/* Ensure that any IRQ handlers have finished */
+	kbase_synchronize_irqs(kbdev);
+
+	/* The GPU power may be turned off from this point */
+	spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+	kbdev->pm.gpu_powered = MALI_FALSE;
+	spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+
+	if (is_suspend && kbdev->pm.callback_power_suspend)
+		kbdev->pm.callback_power_suspend(kbdev);
+	else if (kbdev->pm.callback_power_off)
+		kbdev->pm.callback_power_off(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off)
+
+struct kbasep_reset_timeout_data {
+	struct hrtimer timer;
+	mali_bool timed_out;
+	struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.reset_done = MALI_TRUE;
+	wake_up(&kbdev->pm.reset_done_wait);
+}
+
+/**
+ * Wait for the RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ */
+STATIC void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	wait_event(kbdev->pm.reset_done_wait, (kbdev->pm.reset_done));
+	kbdev->pm.reset_done = MALI_FALSE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done)
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+	struct kbasep_reset_timeout_data *rtdata = container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+	rtdata->timed_out = 1;
+
+	/* Set the wait queue to wake up kbase_pm_init_hw even though the reset hasn't completed */
+	kbase_pm_reset_done(rtdata->kbdev);
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues(struct kbase_device *kbdev)
+{
+	u32 value = 0;
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+	 * and
+	 * needed due to MIDGLES-3539. See PRLAM-11035 */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+		value |= SC_LS_PAUSEBUFFER_DISABLE;
+
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+		value |= SC_SDC_DISABLE_OQ_DISCARD;
+
+	/* Enable alternative hardware counter selection if configured. */
+	if (DEFAULT_ALTERNATIVE_HWC)
+		value |= SC_ALT_COUNTERS;
+
+	/* Use software control of forward pixel kill when needed. See MIDEUR-174. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_2121))
+		value |= SC_OVERRIDE_FWD_PIXEL_KILL;
+
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+		value |= SC_ENABLE_TEXGRD_FLAGS;
+
+	if (value != 0)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), value, NULL);
+
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) {
+		value = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG), NULL);
+		value |= TC_CLOCK_GATE_OVERRIDE;
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), value, NULL);
+	}
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	value = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+	/* Limit read ID width for AXI */
+	value &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+	value |= (DEFAULT_ARID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+	/* Limit write ID width for AXI */
+	value &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+	value |= (DEFAULT_AWID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), value, NULL);
+}
+
+mali_error kbase_pm_init_hw(struct kbase_device *kbdev, mali_bool enable_irqs)
+{
+	unsigned long flags;
+	struct kbasep_reset_timeout_data rtdata;
+
+	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* Ensure the clock is on before attempting to access the hardware */
+	if (!kbdev->pm.gpu_powered) {
+		if (kbdev->pm.callback_power_on)
+			kbdev->pm.callback_power_on(kbdev);
+
+		spin_lock_irqsave(&kbdev->pm.gpu_powered_lock, flags);
+		kbdev->pm.gpu_powered = MALI_TRUE;
+		spin_unlock_irqrestore(&kbdev->pm.gpu_powered_lock, flags);
+	}
+
+	/* Ensure interrupts are off to begin with, this also clears any outstanding interrupts */
+	kbase_pm_disable_interrupts(kbdev);
+
+	/* Prepare for the soft-reset */
+	kbdev->pm.reset_done = MALI_FALSE;
+
+	/* The cores should be made unavailable due to the reset */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	if (kbdev->shader_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, NULL, 0u, (u32)0u);
+	if (kbdev->tiler_available_bitmap != 0u)
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, NULL, 0u, (u32)0u);
+	kbdev->shader_available_bitmap = 0u;
+	kbdev->tiler_available_bitmap = 0u;
+	kbdev->l2_available_bitmap = 0u;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* Soft reset the GPU */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET, NULL);
+
+	/* Unmask the reset complete interrupt only */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, NULL);
+
+	/* Initialize a structure for tracking the status of the reset */
+	rtdata.kbdev = kbdev;
+	rtdata.timed_out = 0;
+
+	/* Create a timer to use as a timeout on the reset */
+	hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rtdata.timer.function = kbasep_reset_timeout;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	/* No interrupt has been received - check if the RAWSTAT register says the reset has completed */
+	if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & RESET_COMPLETED) {
+		/* The interrupt is set in the RAWSTAT; this suggests that the interrupts are not getting to the CPU */
+		dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+		/* If interrupts aren't working we can't continue. */
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	/* The GPU doesn't seem to be responding to the reset so try a hard reset */
+	dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", RESET_TIMEOUT);
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET, NULL);
+
+	/* Restart the timer to wait for the hard reset to complete */
+	rtdata.timed_out = 0;
+
+	hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), HRTIMER_MODE_REL);
+
+	/* Wait for the RESET_COMPLETED interrupt to be raised */
+	kbase_pm_wait_for_reset(kbdev);
+
+	if (rtdata.timed_out == 0) {
+		/* GPU has been reset */
+		hrtimer_cancel(&rtdata.timer);
+		destroy_hrtimer_on_stack(&rtdata.timer);
+		goto out;
+	}
+
+	destroy_hrtimer_on_stack(&rtdata.timer);
+
+	dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", RESET_TIMEOUT);
+
+	/* The GPU still hasn't reset, give up */
+	return MALI_ERROR_FUNCTION_FAILED;
+
+ out:
+
+	/* If cycle counter was in use-re enable it enable_irqs will only be false when called from kbase_pm_powerup */
+	if (kbdev->pm.gpu_cycle_counter_requests && enable_irqs) {
+		/* enable interrupts as the L2 may have to be powered on */
+		kbase_pm_enable_interrupts(kbdev);
+		kbase_pm_request_l2_caches(kbdev);
+
+		/* Re-enable the counters if we need to */
+		spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+		if (kbdev->pm.gpu_cycle_counter_requests)
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START, NULL);
+		spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+
+		kbase_pm_release_l2_caches(kbdev);
+		kbase_pm_disable_interrupts(kbdev);
+	}
+
+	if (enable_irqs)
+		kbase_pm_enable_interrupts(kbdev);
+
+	kbase_pm_hw_issues(kbdev);
+
+	return MALI_ERROR_NONE;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_init_hw)
+
+/** Increase the count of cycle counter users and turn the cycle counters on if they were previously off
+ *
+ * this function is designed to be called by @ref kbase_pm_request_gpu_cycle_counter or
+ * @ref kbase_pm_request_gpu_cycle_counter_l2_is_on only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users count must
+ * have been incremented by a call to (@ref kbase_pm_request_l2_caches or @ref kbase_pm_request_l2_caches_l2_on)
+ *
+ * @param kbdev     The kbase device structure of the device
+ *
+ */
+static void kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+
+	++kbdev->pm.gpu_cycle_counter_requests;
+
+	if (1 == kbdev->pm.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests < INT_MAX);
+
+	kbase_pm_request_l2_caches(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter)
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_powered);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests < INT_MAX);
+
+	kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on)
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_cycle_counter_requests > 0);
+
+	--kbdev->pm.gpu_cycle_counter_requests;
+
+	if (0 == kbdev->pm.gpu_cycle_counter_requests)
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+	spin_unlock_irqrestore(&kbdev->pm.gpu_cycle_counter_requests_lock, flags);
+
+	kbase_pm_release_l2_caches(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter)
+#endif /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c
new file mode 100755
index 000000000000..1d8374f72d5d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics.c
@@ -0,0 +1,410 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_metrics.c
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#if KBASE_PM_EN
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION          70
+#define KBASE_PM_VSYNC_MAX_UTILISATION          90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+   This gives a maximum period between samples of 2^(32+8)/100 ns = slightly under 11s.
+   Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT			8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+	unsigned long flags;
+	enum kbase_pm_dvfs_action action;
+	struct kbasep_pm_metrics_data *metrics;
+
+	KBASE_DEBUG_ASSERT(timer != NULL);
+
+	metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+	action = kbase_pm_get_dvfs_action(metrics->kbdev);
+
+	spin_lock_irqsave(&metrics->lock, flags);
+
+	if (metrics->timer_active)
+		hrtimer_start(timer,
+					  HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.platform_dvfs_frequency),
+					  HRTIMER_MODE_REL);
+
+	spin_unlock_irqrestore(&metrics->lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+mali_error kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.metrics.kbdev = kbdev;
+	kbdev->pm.metrics.vsync_hit = 0;
+	kbdev->pm.metrics.utilisation = 0;
+	kbdev->pm.metrics.util_cl_share[0] = 0;
+	kbdev->pm.metrics.util_cl_share[1] = 0;
+	kbdev->pm.metrics.util_gl_share = 0;
+
+	kbdev->pm.metrics.time_period_start = ktime_get();
+	kbdev->pm.metrics.time_busy = 0;
+	kbdev->pm.metrics.time_idle = 0;
+	kbdev->pm.metrics.prev_busy = 0;
+	kbdev->pm.metrics.prev_idle = 0;
+	kbdev->pm.metrics.gpu_active = MALI_TRUE;
+	kbdev->pm.metrics.active_cl_ctx[0] = 0;
+	kbdev->pm.metrics.active_cl_ctx[1] = 0;
+	kbdev->pm.metrics.active_gl_ctx = 0;
+	kbdev->pm.metrics.busy_cl[0] = 0;
+	kbdev->pm.metrics.busy_cl[1] = 0;
+	kbdev->pm.metrics.busy_gl = 0;
+
+	spin_lock_init(&kbdev->pm.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbdev->pm.metrics.timer_active = MALI_TRUE;
+	hrtimer_init(&kbdev->pm.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.metrics.timer.function = dvfs_callback;
+
+	hrtimer_start(&kbdev->pm.metrics.timer, HR_TIMER_DELAY_MSEC(kbdev->pm.platform_dvfs_frequency), HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_register_vsync_callback(kbdev);
+
+	return MALI_ERROR_NONE;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init)
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	unsigned long flags;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+	kbdev->pm.metrics.timer_active = MALI_FALSE;
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+
+	hrtimer_cancel(&kbdev->pm.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+	kbase_pm_unregister_vsync_callback(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term)
+
+/*caller needs to hold kbdev->pm.metrics.lock before calling this function*/
+void kbasep_pm_record_job_status(struct kbase_device *kbdev)
+{
+	ktime_t now;
+	ktime_t diff;
+	u32 ns_time;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	now = ktime_get();
+	diff = ktime_sub(now, kbdev->pm.metrics.time_period_start);
+
+	ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+	kbdev->pm.metrics.time_busy += ns_time;
+	kbdev->pm.metrics.busy_gl += ns_time * kbdev->pm.metrics.active_gl_ctx;
+	kbdev->pm.metrics.busy_cl[0] += ns_time * kbdev->pm.metrics.active_cl_ctx[0];
+	kbdev->pm.metrics.busy_cl[1] += ns_time * kbdev->pm.metrics.active_cl_ctx[1];
+	kbdev->pm.metrics.time_period_start = now;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_record_job_status)
+
+void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.metrics.gpu_active == MALI_TRUE);
+
+	kbdev->pm.metrics.gpu_active = MALI_FALSE;
+
+	kbasep_pm_record_job_status(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_record_gpu_idle)
+
+void kbasep_pm_record_gpu_active(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	ktime_t now;
+	ktime_t diff;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.metrics.gpu_active == MALI_FALSE);
+
+	kbdev->pm.metrics.gpu_active = MALI_TRUE;
+
+	now = ktime_get();
+	diff = ktime_sub(now, kbdev->pm.metrics.time_period_start);
+
+	kbdev->pm.metrics.time_idle += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+	kbdev->pm.metrics.time_period_start = now;
+
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_record_gpu_active)
+
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated)
+{
+	unsigned long flags;
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+	kbdev->pm.metrics.vsync_hit = buffer_updated;
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_report_vsync)
+
+/*caller needs to hold kbdev->pm.metrics.lock before calling this function*/
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ktime_t now)
+{
+	ktime_t diff;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	diff = ktime_sub(now, kbdev->pm.metrics.time_period_start);
+
+	if (kbdev->pm.metrics.gpu_active) {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+		kbdev->pm.metrics.time_busy += ns_time;
+		kbdev->pm.metrics.busy_cl[0] += ns_time * kbdev->pm.metrics.active_cl_ctx[0];
+		kbdev->pm.metrics.busy_cl[1] += ns_time * kbdev->pm.metrics.active_cl_ctx[1];
+		kbdev->pm.metrics.busy_gl += ns_time * kbdev->pm.metrics.active_gl_ctx;
+	} else {
+		kbdev->pm.metrics.time_idle += (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+	}
+}
+
+/* Caller needs to hold kbdev->pm.metrics.lock before calling this function. */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev, ktime_t now)
+{
+	/* Store previous value */
+	kbdev->pm.metrics.prev_idle = kbdev->pm.metrics.time_idle;
+	kbdev->pm.metrics.prev_busy = kbdev->pm.metrics.time_busy;
+
+	/* Reset current values */
+	kbdev->pm.metrics.time_period_start = now;
+	kbdev->pm.metrics.time_idle = 0;
+	kbdev->pm.metrics.time_busy = 0;
+	kbdev->pm.metrics.busy_cl[0] = 0;
+	kbdev->pm.metrics.busy_cl[1] = 0;
+	kbdev->pm.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+		unsigned long *total_out, unsigned long *busy_out)
+{
+	ktime_t now = ktime_get();
+	unsigned long flags, busy, total;
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	busy = kbdev->pm.metrics.time_busy;
+	total = busy + kbdev->pm.metrics.time_idle;
+
+	/* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+	 * 100ms) */
+	if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+		kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+	} else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+		total += kbdev->pm.metrics.prev_idle +
+				kbdev->pm.metrics.prev_busy;
+		busy += kbdev->pm.metrics.prev_busy;
+	}
+
+	*total_out = total;
+	*busy_out = busy;
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+}
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/*caller needs to hold kbdev->pm.metrics.lock before calling this function*/
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, int *util_gl_share, int util_cl_share[2])
+{
+	int utilisation;
+	int busy;
+	ktime_t now = ktime_get();
+
+	kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+	if (kbdev->pm.metrics.time_idle + kbdev->pm.metrics.time_busy == 0) {
+		/* No data - so we return NOP */
+		utilisation = -1;
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+		goto out;
+	}
+
+	utilisation = (100 * kbdev->pm.metrics.time_busy) /
+			(kbdev->pm.metrics.time_idle +
+			 kbdev->pm.metrics.time_busy);
+
+	busy = kbdev->pm.metrics.busy_gl +
+		kbdev->pm.metrics.busy_cl[0] +
+		kbdev->pm.metrics.busy_cl[1];
+
+	if (busy != 0) {
+		if (util_gl_share)
+			*util_gl_share =
+				(100 * kbdev->pm.metrics.busy_gl) / busy;
+		if (util_cl_share) {
+			util_cl_share[0] =
+				(100 * kbdev->pm.metrics.busy_cl[0]) / busy;
+			util_cl_share[1] =
+				(100 * kbdev->pm.metrics.busy_cl[1]) / busy;
+		}
+	} else {
+		if (util_gl_share)
+			*util_gl_share = -1;
+		if (util_cl_share) {
+			util_cl_share[0] = -1;
+			util_cl_share[1] = -1;
+		}
+	}
+
+out:
+	kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+	return utilisation;
+}
+
+enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	int utilisation, util_gl_share;
+	int util_cl_share[2];
+	enum kbase_pm_dvfs_action action;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+
+	utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share, util_cl_share);
+
+	if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 || util_cl_share[1] < 0) {
+		action = KBASE_PM_DVFS_NOP;
+		utilisation = 0;
+		util_gl_share = 0;
+		util_cl_share[0] = 0;
+		util_cl_share[1] = 0;
+		goto out;
+	}
+
+	if (kbdev->pm.metrics.vsync_hit) {
+		/* VSync is being met */
+		if (utilisation < KBASE_PM_VSYNC_MIN_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_DOWN;
+		else if (utilisation > KBASE_PM_VSYNC_MAX_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_UP;
+		else
+			action = KBASE_PM_DVFS_NOP;
+	} else {
+		/* VSync is being missed */
+		if (utilisation < KBASE_PM_NO_VSYNC_MIN_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_DOWN;
+		else if (utilisation > KBASE_PM_NO_VSYNC_MAX_UTILISATION)
+			action = KBASE_PM_DVFS_CLOCK_UP;
+		else
+			action = KBASE_PM_DVFS_NOP;
+	}
+
+	kbdev->pm.metrics.utilisation = utilisation;
+	kbdev->pm.metrics.util_cl_share[0] = util_cl_share[0];
+	kbdev->pm.metrics.util_cl_share[1] = util_cl_share[1];
+	kbdev->pm.metrics.util_gl_share = util_gl_share;
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+#endif				/*CONFIG_MALI_MIDGARD_DVFS */
+	kbdev->pm.metrics.time_idle = 0;
+	kbdev->pm.metrics.time_busy = 0;
+	kbdev->pm.metrics.busy_cl[0] = 0;
+	kbdev->pm.metrics.busy_cl[1] = 0;
+	kbdev->pm.metrics.busy_gl = 0;
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+
+	return action;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_action)
+
+mali_bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+	mali_bool isactive;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.metrics.lock, flags);
+	isactive = (kbdev->pm.metrics.timer_active == MALI_TRUE);
+	spin_unlock_irqrestore(&kbdev->pm.metrics.lock, flags);
+
+	return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active)
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+#endif  /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c
new file mode 100755
index 000000000000..7823d824f5a3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_metrics_dummy.c
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_metrics_dummy.c
+ * Dummy Metrics for power management.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+#if KBASE_PM_EN
+void kbase_pm_register_vsync_callback(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	/* no VSync metrics will be available */
+	kbdev->pm.metrics.platform_data = NULL;
+}
+
+void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+}
+#endif  /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.c
new file mode 100755
index 000000000000..06f033c59523
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.c
@@ -0,0 +1,822 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_policy.c
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_pm.h>
+
+#if KBASE_PM_EN
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#if !MALI_CUSTOMER_RELEASE
+extern const struct kbase_pm_policy kbase_pm_fast_start_policy_ops;
+extern const struct kbase_pm_policy kbase_pm_demand_always_powered_policy_ops;
+#endif
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#else				/* CONFIG_MALI_NO_MALI */
+	&kbase_pm_demand_policy_ops,
+	&kbase_pm_always_on_policy_ops,
+	&kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+	&kbase_pm_demand_always_powered_policy_ops,
+	&kbase_pm_fast_start_policy_ops,
+#endif
+#endif				/* CONFIG_MALI_NO_MALI */
+};
+
+/** The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+	 * expect to hit it nor tend to hit it very much anyway. We can detect
+	 * whether we need more instrumentation by a difference between
+	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+	/* Must be the last */
+	KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
+
+	/* These two must be last */
+	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER|KBASE_PM_CHANGE_STATE_SHADER),
+	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT][KBASE_PM_CHANGE_STATE_COUNT] = {
+	/* kbase_pm_request_cores */
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+	/* kbase_pm_release_cores */
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+	[KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER|KBASE_PM_CHANGE_STATE_TILER] =
+		SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+STATIC INLINE void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id,
+		kbase_pm_change_state state)
+{
+	int trace_code;
+
+	KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+	KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == state);
+
+	trace_code = kbase_pm_change_state_trace_code[func_id][state];
+	KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+STATIC INLINE void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+		enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+static enum hrtimer_restart kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+	struct kbase_device *kbdev;
+
+	kbdev = container_of(timer, struct kbase_device, pm.gpu_poweroff_timer);
+
+	/* It is safe for this call to do nothing if the work item is already queued.
+	 * The worker function will read the must up-to-date state of kbdev->pm.gpu_poweroff_pending
+	 * under lock.
+	 *
+	 * If a state change occurs while the worker function is processing, this
+	 * call will succeed as a work item can be requeued once it has started
+	 * processing. 
+	 */
+	if (kbdev->pm.gpu_poweroff_pending)
+		queue_work(kbdev->pm.gpu_poweroff_wq, &kbdev->pm.gpu_poweroff_work);
+
+	if (kbdev->pm.shader_poweroff_pending) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+		if (kbdev->pm.shader_poweroff_pending) {
+			kbdev->pm.shader_poweroff_pending_time--;
+
+			KBASE_DEBUG_ASSERT(kbdev->pm.shader_poweroff_pending_time >= 0);
+
+			if (kbdev->pm.shader_poweroff_pending_time == 0) {
+				u64 prev_shader_state = kbdev->pm.desired_shader_state;
+
+				kbdev->pm.desired_shader_state &= ~kbdev->pm.shader_poweroff_pending;
+				kbdev->pm.shader_poweroff_pending = 0;
+
+				if (prev_shader_state != kbdev->pm.desired_shader_state ||
+			    	    kbdev->pm.ca_in_transition != MALI_FALSE) {
+					mali_bool cores_are_available;
+
+					KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+					cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+					KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+					/* Don't need 'cores_are_available', because we don't return anything */
+					CSTD_UNUSED(cores_are_available);
+				}
+			}
+		}
+
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	}
+
+	if (kbdev->pm.poweroff_timer_needed) {
+		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+		return HRTIMER_RESTART;
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+	unsigned long flags;
+	struct kbase_device *kbdev;
+	mali_bool do_poweroff = MALI_FALSE;
+
+	kbdev = container_of(data, struct kbase_device, pm.gpu_poweroff_work);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	if (kbdev->pm.gpu_poweroff_pending == 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	kbdev->pm.gpu_poweroff_pending--;
+
+	if (kbdev->pm.gpu_poweroff_pending > 0) {
+		mutex_unlock(&kbdev->pm.lock);
+		return;
+	}
+
+	KBASE_DEBUG_ASSERT(kbdev->pm.gpu_poweroff_pending == 0);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	/* Only power off the GPU if a request is still pending */
+	if (kbdev->pm.pm_current_policy->get_core_active(kbdev) == MALI_FALSE)
+		do_poweroff = MALI_TRUE;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	if (do_poweroff != MALI_FALSE) {
+		kbdev->pm.poweroff_timer_needed = MALI_FALSE;
+		/* Power off the GPU */
+		kbase_pm_do_poweroff(kbdev, MALI_FALSE);
+		hrtimer_cancel(&kbdev->pm.gpu_poweroff_timer);
+	}
+
+	mutex_unlock(&kbdev->pm.lock);
+}
+
+mali_error kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	kbdev->pm.gpu_poweroff_wq = alloc_workqueue("kbase_pm_do_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (NULL == kbdev->pm.gpu_poweroff_wq)
+		return MALI_ERROR_OUT_OF_MEMORY;
+	INIT_WORK(&kbdev->pm.gpu_poweroff_work, kbasep_pm_do_gpu_poweroff_wq);
+
+	hrtimer_init(&kbdev->pm.gpu_poweroff_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	kbdev->pm.gpu_poweroff_timer.function = kbasep_pm_do_gpu_poweroff_callback;
+
+	kbdev->pm.pm_current_policy = policy_list[0];
+
+	kbdev->pm.pm_current_policy->init(kbdev);
+
+	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PM_GPU_POWEROFF_TICK_NS));
+
+	kbdev->pm.poweroff_shader_ticks = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_SHADER);
+	kbdev->pm.poweroff_gpu_ticks = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_PM_POWEROFF_TICK_GPU);
+
+	return MALI_ERROR_NONE;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+	kbdev->pm.pm_current_policy->term(kbdev);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.poweroff_timer_needed = MALI_FALSE;
+	hrtimer_cancel(&kbdev->pm.gpu_poweroff_timer);
+
+	/* If wq is already running but is held off by pm.lock, make sure it has no effect */
+	kbdev->pm.gpu_poweroff_pending = 0;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->pm.shader_poweroff_pending = 0;
+	kbdev->pm.shader_poweroff_pending_time = 0;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	mali_bool active;
+
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	/* pm_current_policy will never be NULL while pm.lock is held */
+	KBASE_DEBUG_ASSERT(kbdev->pm.pm_current_policy);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	active = kbdev->pm.pm_current_policy->get_core_active(kbdev);
+
+	if (active != MALI_FALSE) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		if (kbdev->pm.gpu_poweroff_pending) {
+			/* Cancel any pending power off request */
+			kbdev->pm.gpu_poweroff_pending = 0;
+
+			/* If a request was pending then the GPU was still powered, so no need to continue */
+			return;
+		}
+
+		if (!kbdev->pm.poweroff_timer_needed && !kbdev->pm.gpu_powered) {
+			kbdev->pm.poweroff_timer_needed = MALI_TRUE;
+			hrtimer_start(&kbdev->pm.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, HRTIMER_MODE_REL);
+		}
+
+		/* Power on the GPU and any cores requested by the policy */
+		kbase_pm_do_poweron(kbdev, MALI_FALSE);
+	} else {
+		/* It is an error for the power policy to power off the GPU
+		 * when there are contexts active */
+		KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+
+		if (kbdev->pm.shader_poweroff_pending) {
+			kbdev->pm.shader_poweroff_pending = 0;
+			kbdev->pm.shader_poweroff_pending_time = 0;
+		}
+
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+
+		/* Request power off */
+		if (kbdev->pm.gpu_powered) {
+			kbdev->pm.gpu_poweroff_pending = kbdev->pm.poweroff_gpu_ticks;
+			if (!kbdev->pm.poweroff_timer_needed) {
+				/* Start timer if not running (eg if power policy has been changed from always_on
+				 * to something else). This will ensure the GPU is actually powered off */
+				kbdev->pm.poweroff_timer_needed = MALI_TRUE;
+				hrtimer_start(&kbdev->pm.gpu_poweroff_timer, kbdev->pm.gpu_poweroff_time, HRTIMER_MODE_REL);
+			}
+		}
+	}
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+	u64 desired_bitmap;
+	mali_bool cores_are_available;
+
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+	if (kbdev->pm.pm_current_policy == NULL)
+		return;
+
+	desired_bitmap = kbdev->pm.pm_current_policy->get_core_mask(kbdev); 
+	desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+	/* Enable core 0 if tiler required, regardless of core availability */
+	if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+		desired_bitmap |= 1;
+
+	if (kbdev->pm.desired_shader_state != desired_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, (u32)desired_bitmap);
+
+	/* Are any cores being powered on? */
+	if (~kbdev->pm.desired_shader_state & desired_bitmap ||
+	    kbdev->pm.ca_in_transition != MALI_FALSE) {
+		/* Check if we are powering off any cores before updating shader state */
+		if (kbdev->pm.desired_shader_state & ~desired_bitmap) {
+			/* Start timer to power off cores */
+			kbdev->pm.shader_poweroff_pending |= (kbdev->pm.desired_shader_state & ~desired_bitmap);
+			kbdev->pm.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks;
+		}
+
+		kbdev->pm.desired_shader_state = desired_bitmap;
+
+		/* If any cores are being powered on, transition immediately */
+		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	} else if (kbdev->pm.desired_shader_state & ~desired_bitmap) {
+		/* Start timer to power off cores */
+		kbdev->pm.shader_poweroff_pending |= (kbdev->pm.desired_shader_state & ~desired_bitmap);
+		kbdev->pm.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks;
+	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && kbdev->pm.poweroff_timer_needed) {
+		/* If power policy is keeping cores on despite there being no
+		 * active contexts then disable poweroff timer as it isn't
+		 * required.
+		 * Only reset poweroff_timer_needed if we're not in the middle
+		 * of the power off callback */
+		kbdev->pm.poweroff_timer_needed = MALI_FALSE;
+		hrtimer_try_to_cancel(&kbdev->pm.gpu_poweroff_timer);
+	}
+
+	/* Ensure timer does not power off wanted cores and make sure to power off unwanted cores */
+	if (kbdev->pm.shader_poweroff_pending != 0) {
+		kbdev->pm.shader_poweroff_pending &= ~(kbdev->pm.desired_shader_state & desired_bitmap);
+		if (kbdev->pm.shader_poweroff_pending == 0)
+			kbdev->pm.shader_poweroff_pending_time = 0;
+	}
+
+	/* Don't need 'cores_are_available', because we don't return anything */
+	CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbase_pm_update_cores_state_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+	if (!list)
+		return POLICY_COUNT;
+
+	*list = policy_list;
+
+	return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies)
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	return kbdev->pm.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy)
+
+void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *new_policy)
+{
+	const struct kbase_pm_policy *old_policy;
+	unsigned long flags;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+	KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+	/* During a policy change we pretend the GPU is active */
+	/* A suspend won't happen here, because we're in a syscall from a userspace thread */
+	kbase_pm_context_active(kbdev);
+
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Remove the policy to prevent IRQ handlers from working on it */
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	old_policy = kbdev->pm.pm_current_policy;
+	kbdev->pm.pm_current_policy = NULL;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, old_policy->id);
+	if (old_policy->term)
+		old_policy->term(kbdev);
+
+	KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, new_policy->id);
+	if (new_policy->init)
+		new_policy->init(kbdev);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+	kbdev->pm.pm_current_policy = new_policy;
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	/* If any core power state changes were previously attempted, but couldn't
+	 * be made because the policy was changing (current_policy was NULL), then
+	 * re-try them here. */
+	kbase_pm_update_active(kbdev);
+	kbase_pm_update_cores_state(kbdev);
+
+	mutex_unlock(&kbdev->pm.lock);
+
+	/* Now the policy change is finished, we release our fake context active reference */
+	kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy)
+
+/** Check whether a state change has finished, and trace it as completed */
+STATIC void kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+	if ((kbdev->shader_available_bitmap & kbdev->pm.desired_shader_state) == kbdev->pm.desired_shader_state &&
+		(kbdev->tiler_available_bitmap & kbdev->pm.desired_tiler_state) == kbdev->pm.desired_tiler_state)
+		kbase_timeline_pm_check_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 cores;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	cores = shader_cores;
+	while (cores) {
+		int bitnum = fls64(cores) - 1;
+		u64 bit = 1ULL << bitnum;
+
+		/* It should be almost impossible for this to overflow. It would require 2^32 atoms
+		 * to request a particular core, which would require 2^24 contexts to submit. This
+		 * would require an amount of memory that is impossible on a 32-bit system and
+		 * extremely unlikely on a 64-bit system. */
+		int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+		if (1 == cnt) {
+			kbdev->shader_needed_bitmap |= bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		cores &= ~bit;
+	}
+
+	if (tiler_required != MALI_FALSE) {
+		++kbdev->tiler_needed_cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+
+		/* For tiler jobs, we must make sure that core 0 is not turned off if it's already on.
+		 * However, it's safe for core 0 to be left off and turned on later whilst a tiler job
+		 * is running. Hence, we don't need to update the cores state immediately. Also,
+		 * attempts to turn off cores will always check the tiler_needed/inuse state first anyway.
+		 *
+		 * Finally, kbase_js_choose_affinity() ensures core 0 is always requested for tiler jobs
+		 * anyway. Hence when there's only a tiler job in the system, this will still cause
+		 * kbase_pm_update_cores_state_nolock() to be called.
+		 *
+		 * Note that we still need to keep track of tiler_needed/inuse_cnt, to ensure that
+		 * kbase_pm_update_cores_state_nolock() can override the core availability policy and
+		 * force core 0 to be powered when a tiler job is in the system. */
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_START, change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_REQUEST_CORES_END, change_gpu_state);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores)
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_needed_bitmap &= ~bit;
+
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required != MALI_FALSE) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		/* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an
+		 * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off
+		 * when the last tiler job unrequests cores: kbase_js_choose_affinity() ensures core 0
+		 * was originally requested for tiler jobs. Hence when there's only a tiler job in the
+		 * system, this will still cause kbase_pm_update_cores_state_nolock() to be called. */
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+		kbase_pm_update_cores_state_nolock(kbdev);
+
+		/* Trace that any state change effectively completes immediately -
+		 * no-one will wait on the state change */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores)
+
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	u64 prev_shader_needed;	/* Just for tracing */
+	u64 prev_shader_inuse;	/* Just for tracing */
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prev_shader_needed = kbdev->shader_needed_bitmap;
+	prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+	/* If desired_shader_state does not contain the requested cores, then power
+	 * management is not attempting to powering those cores (most likely
+	 * due to core availability policy) and a new job affinity must be
+	 * chosen */
+	if ((kbdev->pm.desired_shader_state & shader_cores) != shader_cores) {
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+		return KBASE_NEW_AFFINITY;
+	}
+
+	if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+	    (tiler_required != MALI_FALSE && !kbdev->tiler_available_bitmap)) {
+		/* Trace ongoing core transition */
+		kbase_timeline_pm_l2_transition_start(kbdev);
+		spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+		return KBASE_CORES_NOT_READY;
+	}
+
+	/* If we started to trace a state change, then trace it has being finished
+	 * by now, at the very latest */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+	/* Trace core transition done */
+	kbase_timeline_pm_l2_transition_done(kbdev);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_needed_cnt[bitnum];
+
+		if (0 == cnt)
+			kbdev->shader_needed_bitmap &= ~bit;
+
+		/* shader_inuse_cnt should not overflow because there can only be a
+		 * very limited number of jobs on the h/w at one time */
+
+		kbdev->shader_inuse_cnt[bitnum]++;
+		kbdev->shader_inuse_bitmap |= bit;
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required != MALI_FALSE) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+		--kbdev->tiler_needed_cnt;
+
+		kbdev->tiler_inuse_cnt++;
+
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+	}
+
+	if (prev_shader_needed != kbdev->shader_needed_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+	if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+		KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+	return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores)
+
+void kbase_pm_release_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
+{
+	unsigned long flags;
+	kbase_pm_change_state change_gpu_state = 0u;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	while (shader_cores) {
+		int bitnum = fls64(shader_cores) - 1;
+		u64 bit = 1ULL << bitnum;
+		int cnt;
+
+		KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+		cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+		if (0 == cnt) {
+			kbdev->shader_inuse_bitmap &= ~bit;
+			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+		}
+
+		shader_cores &= ~bit;
+	}
+
+	if (tiler_required != MALI_FALSE) {
+		KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+		--kbdev->tiler_inuse_cnt;
+
+		/* Whilst tiler jobs must not allow core 0 to be turned off, we don't need to make an
+		 * extra call to kbase_pm_update_cores_state_nolock() to ensure core 0 is turned off
+		 * when the last tiler job finishes: kbase_js_choose_affinity() ensures core 0 was
+		 * originally requested for tiler jobs. Hence when there's only a tiler job in the
+		 * system, this will still cause kbase_pm_update_cores_state_nolock() to be called */
+	}
+
+	if (change_gpu_state) {
+		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_START, change_gpu_state);
+		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_timeline_pm_cores_func(kbdev, KBASE_PM_FUNC_ID_RELEASE_CORES_END, change_gpu_state);
+
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores)
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores)
+{
+	kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+
+	kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync)
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	u32 prior_l2_users_count;
+
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	prior_l2_users_count = kbdev->l2_users_count++;
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+	/* if the GPU is reset while the l2 is on, l2 will be off but prior_l2_users_count will be > 0
+	 * l2_available_bitmap will have been set to 0 though by kbase_pm_init_hw */
+	if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+		kbase_pm_check_transitions_nolock(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+	wait_event(kbdev->pm.l2_powered_wait, kbdev->pm.l2_powered == 1);
+
+	/* Trace that any state change completed immediately */
+	kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches)
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	kbdev->l2_users_count++;
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on)
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+	--kbdev->l2_users_count;
+
+	if (!kbdev->l2_users_count) {
+		kbase_pm_check_transitions_nolock(kbdev);
+		/* Trace that any state change completed immediately */
+		kbase_pm_trace_check_and_finish_state_change(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches)
+#endif /* KBASE_PM_EN */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.h
new file mode 100755
index 000000000000..4a8cc44b6308
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm_policy.h
@@ -0,0 +1,277 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_policy.h
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/** List of policy IDs */
+enum kbase_pm_policy_id {
+	KBASE_PM_POLICY_ID_DEMAND = 1,
+	KBASE_PM_POLICY_ID_ALWAYS_ON,
+	KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+	KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/** Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which contains function pointers to the
+ * policy's methods.
+ */
+typedef struct kbase_pm_policy {
+	/** The name of this policy */
+	char *name;
+
+	/** Function called when the policy is selected
+	 *
+	 * This should initialize the kbdev->pm.pm_policy_data structure. It should not attempt
+	 * to make any changes to hardware state.
+	 *
+	 * It is undefined what state the cores are in when the function is called.
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 */
+	void (*init)(struct kbase_device *kbdev);
+
+	/** Function called when the policy is unselected.
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 */
+	void (*term)(struct kbase_device *kbdev);
+
+	/** Function called to get the current shader core mask
+	 *
+	 * The returned mask should meet or exceed (kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap).
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 *
+	 * @return     The mask of shader cores to be powered */
+	u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+	/** Function called to get the current overall GPU power state
+	 *
+	 * This function should consider the state of kbdev->pm.active_count. If this count is greater than 0 then
+	 * there is at least one active context on the device and the GPU should be powered. If it is equal to 0
+	 * then there are no active contexts and the GPU could be powered off if desired.
+	 *
+	 * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+	 *
+	 * @return     MALI_TRUE if the GPU should be powered, MALI_FALSE otherwise */
+	mali_bool (*get_core_active) (struct kbase_device *kbdev);
+
+	/** Field indicating flags for this policy */
+	kbase_pm_policy_flags flags;
+
+	/** Field indicating an ID for this policy. This is not necessarily the
+	 * same as its index in the list returned by kbase_pm_list_policies().
+	 * It is used purely for debugging. */
+	enum kbase_pm_policy_id id;
+} kbase_pm_policy;
+
+/** Initialize power policy framework
+ *
+ * Must be called before calling any other policy function
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return MALI_ERROR_NONE if the power policy framework was successfully initialized.
+ */
+mali_error kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/** Terminate power policy framework
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/** Update the active power state of the GPU
+ * Calls into the current power policy
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/** Update the desired core state of the GPU
+ * Calls into the current power policy
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+/** Get the current policy.
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/** Change the policy to the one specified.
+ *
+ * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+ * @param policy    The policy to change to (valid pointer returned from @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *policy);
+
+/** Retrieve a static list of the available policies.
+ * @param[out]  policies    An array pointer to take the list of policies. This may be NULL.
+ *                          The contents of this array must not be modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+
+enum kbase_pm_cores_ready {
+	KBASE_CORES_NOT_READY = 0,
+	KBASE_NEW_AFFINITY = 1,
+	KBASE_CORES_READY = 2
+};
+
+
+/** Synchronous variant of kbase_pm_request_cores()
+ *
+ * When this function returns, the @a shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ *
+ * @param kbdev           The kbase device structure for the device
+ * @param tiler_required  MALI_TRUE if the tiler is required, MALI_FALSE otherwise
+ * @param shader_cores    A bitmask of shader cores which are necessary for the job
+ */
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores);
+
+/** Mark one or more cores as being required for jobs to be submitted.
+ *
+ * This function is called by the job scheduler to mark one or more cores
+ * as being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to @ref kbase_pm_register_inuse_cores or
+ * @ref kbase_pm_unrequest_cores should be made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * @param kbdev           The kbase device structure for the device
+ * @param tiler_required  MALI_TRUE if the tiler is required, MALI_FALSE otherwise
+ * @param shader_cores    A bitmask of shader cores which are necessary for the job
+ *
+ * @return MALI_ERROR_NONE if the cores were successfully requested.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores);
+
+/** Unmark one or more cores as being required for jobs to be submitted.
+ *
+ * This function undoes the effect of @ref kbase_pm_request_cores. It should be used when a job is not
+ * going to be submitted to the hardware (e.g. the job is cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ *
+ * @param kbdev           The kbase device structure for the device
+ * @param tiler_required  MALI_TRUE if the tiler is required, MALI_FALSE otherwise
+ * @param shader_cores    A bitmask of shader cores (as given to @ref kbase_pm_request_cores)
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores);
+
+/** Register a set of cores as in use by a job.
+ *
+ * This function should be called after @ref kbase_pm_request_cores when the job is about to be submitted to
+ * the hardware. It will check that the necessary cores are available and if so update the 'needed' and 'inuse'
+ * bitmasks to reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will return MALI_FALSE and have no effect.
+ *
+ * @param kbdev           The kbase device structure for the device
+ * @param tiler_required  MALI_TRUE if the tiler is required, MALI_FALSE otherwise
+ * @param shader_cores    A bitmask of shader cores (as given to @ref kbase_pm_request_cores)
+ *
+ * @return MALI_TRUE if the job can be submitted to the hardware or MALI_FALSE if the job is not ready to run.
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores);
+
+/** Release cores after a job has run.
+ *
+ * This function should be called when a job has finished running on the hardware. A call to @ref
+ * kbase_pm_register_inuse_cores must have previously occurred. The reference counts of the specified cores will be
+ * decremented which may cause the bitmask of 'inuse' cores to be reduced. The power policy may then turn off any
+ * cores which are no longer 'inuse'.
+ *
+ * @param kbdev         The kbase device structure for the device
+ * @param tiler_required  MALI_TRUE if the tiler is required, MALI_FALSE otherwise
+ * @param shader_cores  A bitmask of shader cores (as given to @ref kbase_pm_register_inuse_cores)
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev, mali_bool tiler_required, u64 shader_cores);
+
+/** Request the use of l2 caches for all core groups, power up, wait and prevent the power manager from
+ *  powering down the l2 caches.
+ *
+ *  This tells the power management that the caches should be powered up, and they
+ *  should remain powered, irrespective of the usage of shader cores. This does not
+ *  return until the l2 caches are powered up.
+ *
+ *  The caller must call @ref kbase_pm_release_l2_caches when they are finished to
+ *  allow normal power management of the l2 caches to resume.
+ *
+ *  This should only be used when power management is active.
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/** Increment the count of l2 users but do not attempt to power on the l2
+ *  It is the callers responsibility to ensure that the l2 is already powered up
+ *  and to eventually  call @ref kbase_pm_release_l2_caches 
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/** Release the use of l2 caches for all core groups and allow the power manager to
+ *  power them down when necessary.
+ *
+ *  This tells the power management that the caches can be powered down if necessary, with respect
+ *  to the usage of shader cores.
+ *
+ *  The caller must have called @ref kbase_pm_request_l2_caches prior to a call to this.
+ *
+ *  This should only be used when power management is active.
+ *
+ * @param kbdev    The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif				/* _KBASE_PM_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100755
index 000000000000..6f9db71f7a06
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif				/* _KBASE_PROFILING_GATOR_API */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100755
index 000000000000..e47d865601c8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1284 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_MASK      0xfe
+#define JOB_TYPE_NULL      (1 << 1)
+#define JOB_TYPE_VERTEX    (5 << 1)
+#define JOB_TYPE_TILER     (7 << 1)
+#define JOB_TYPE_FUSED     (8 << 1)
+#define JOB_TYPE_FRAGMENT  (9 << 1)
+
+#define JOB_FLAG_DESC_SIZE           (1 << 0)
+#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET       (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX                 0xffff
+
+#define JOB_SOURCE_ID(status)		(((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST		(0x03)
+
+struct job_head {
+	u32 status;
+	u32 not_complete_index;
+	u64 fault_addr;
+	u16 flags;
+	u16 index;
+	u16 dependencies[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} next;
+	u32 x[2];
+	union {
+		u64 _64;
+		u32 _32;
+	} fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+		struct job_head *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+	dev_dbg(kctx->kbdev->dev, "addr               = %p\n"
+			"status             = %x\n"
+			"not_complete_index = %x\n"
+			"fault_addr         = %llx\n"
+			"flags              = %x\n"
+			"index              = %x\n"
+			"dependencies       = %x,%x\n",
+			job, job->status, job->not_complete_index,
+			job->fault_addr, job->flags, job->index,
+			job->dependencies[0],
+			job->dependencies[1]);
+
+	if (job->flags & JOB_FLAG_DESC_SIZE)
+		dev_dbg(kctx->kbdev->dev, "next               = %llx\n",
+				job->next._64);
+	else
+		dev_dbg(kctx->kbdev->dev, "next               = %x\n",
+				job->next._32);
+#endif
+}
+
+struct kbasep_map_struct {
+	mali_addr64 gpu_addr;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	void *addr;
+	size_t size;
+	mali_bool is_cached;
+};
+
+static void *kbasep_map(struct kbase_context *kctx, mali_addr64 gpu_addr,
+		size_t size, struct kbasep_map_struct *map)
+{
+	struct kbase_va_region *region;
+	unsigned long page_index;
+	unsigned int offset = gpu_addr & ~PAGE_MASK;
+	size_t page_count = PFN_UP(offset + size);
+	phys_addr_t *page_array;
+	struct page **pages;
+	void *cpu_addr = NULL;
+	pgprot_t prot;
+	size_t i;
+
+	if (!size || !map)
+		return NULL;
+
+	/* check if page_count calculation will wrap */
+	if (size > ((size_t)-1 / PAGE_SIZE))
+		return NULL;
+
+	region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+	if (!region || (region->flags & KBASE_REG_FREE))
+		return NULL;
+
+	page_index = (gpu_addr >> PAGE_SHIFT) - region->start_pfn;
+
+	/* check if page_index + page_count will wrap */
+	if (-1UL - page_count < page_index)
+		return NULL;
+
+	if (page_index + page_count > kbase_reg_current_backed_size(region))
+		return NULL;
+
+	page_array = kbase_get_phy_pages(region);
+	if (!page_array)
+		return NULL;
+
+	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return NULL;
+
+	for (i = 0; i < page_count; i++)
+		pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+	prot = PAGE_KERNEL;
+	if (!(region->flags & KBASE_REG_CPU_CACHED)) {
+		/* Map uncached */
+		prot = pgprot_writecombine(prot);
+	}
+
+	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+	if (!cpu_addr)
+		goto vmap_failed;
+
+	map->gpu_addr = gpu_addr;
+	map->alloc = kbase_mem_phy_alloc_get(region->alloc);
+	map->pages = pages;
+	map->addr = (void *)((uintptr_t)cpu_addr + offset);
+	map->size = size;
+	map->is_cached = (region->flags & KBASE_REG_CPU_CACHED) != 0;
+
+	if (map->is_cached) {
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t pa = page_to_phys(map->pages[0]) + offset;
+
+		kbase_sync_single(kctx, pa, sz, dma_sync_single_for_cpu);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			pa = page_to_phys(map->pages[i]);
+			kbase_sync_single(kctx, pa, PAGE_SIZE,
+					dma_sync_single_for_cpu);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			pa = page_to_phys(map->pages[page_count - 1]);
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, pa, sz,
+					dma_sync_single_for_cpu);
+		}
+	}
+
+	return map->addr;
+
+vmap_failed:
+	kfree(pages);
+
+	return NULL;
+}
+
+static void kbasep_unmap(struct kbase_context *kctx,
+		struct kbasep_map_struct *map)
+{
+	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+
+	vunmap(addr);
+
+	if (map->is_cached) {
+		off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+		size_t size = map->size;
+		size_t page_count = PFN_UP(offset + size);
+		size_t i;
+
+		/* Sync first page */
+		size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+		phys_addr_t pa = page_to_phys(map->pages[0]) + offset;
+
+		kbase_sync_single(kctx, pa, sz, dma_sync_single_for_device);
+
+		/* Sync middle pages (if any) */
+		for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+			pa = page_to_phys(map->pages[i]);
+			kbase_sync_single(kctx, pa, PAGE_SIZE,
+					dma_sync_single_for_device);
+		}
+
+		/* Sync last page (if any) */
+		if (page_count > 1) {
+			pa = page_to_phys(map->pages[page_count - 1]);
+			sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+			kbase_sync_single(kctx, pa, sz,
+					dma_sync_single_for_device);
+		}
+	}
+
+	kfree(map->pages);
+
+	map->gpu_addr = 0;
+	map->alloc = kbase_mem_phy_alloc_put(map->alloc);
+	map->pages = NULL;
+	map->addr = NULL;
+	map->size = 0;
+	map->is_cached = MALI_FALSE;
+}
+
+static mali_error kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+		mali_addr64 fbd_address, mali_addr64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct {
+		u32 padding_1[1];
+		u32 flags;
+		u64 padding_2[2];
+		u64 heap_free_address;
+		u32 padding[8];
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+	struct kbasep_map_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbasep_map(kctx, fbd_address + SFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev,
+		"FBD tiler:\n"
+		"flags = %x\n"
+		"heap_free_address = %llx\n",
+		fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = fbd_tiler->flags &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+						i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+			fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+	kbasep_unmap(kctx, &map);
+
+	return MALI_ERROR_NONE;
+}
+
+static mali_error kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+		mali_addr64 fbd_address, mali_addr64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight)
+{
+	struct kbasep_map_struct map;
+	struct {
+		u32 padding_0;
+		u32 flags;
+		u64 padding_1[2];
+		u64 heap_free_address;
+		u64 padding_2;
+		u32 weights[FBD_HIERARCHY_WEIGHTS];
+	} *fbd_tiler;
+
+	dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+	fbd_tiler = kbasep_map(kctx, fbd_address + MFBD_TILER_OFFSET,
+			sizeof(*fbd_tiler), &map);
+	if (!fbd_tiler) {
+		dev_err(kctx->kbdev->dev,
+			       "kbasep_replay_reset_fbd: failed to map fbd\n");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+			"flags = %x\n"
+			"heap_free_address = %llx\n",
+			fbd_tiler->flags,
+			fbd_tiler->heap_free_address);
+#endif
+	if (hierarchy_mask) {
+		u32 weights[HIERARCHY_WEIGHTS];
+		u16 old_hierarchy_mask = (fbd_tiler->flags) &
+						       FBD_HIERARCHY_MASK_MASK;
+		int i, j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (old_hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+				weights[i] = fbd_tiler->weights[j++];
+			} else {
+				weights[i] = default_weight;
+			}
+		}
+
+
+		dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x  New hierarchy mask=%x\n",
+				old_hierarchy_mask, hierarchy_mask);
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+			dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+					i, weights[i]);
+
+		j = 0;
+
+		for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+			if (hierarchy_mask & (1 << i)) {
+				KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+				dev_dbg(kctx->kbdev->dev,
+				" Writing hierarchy level %02d (%08x) to %d\n",
+							     i, weights[i], j);
+
+				fbd_tiler->weights[j++] = weights[i];
+			}
+		}
+
+		for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+			fbd_tiler->weights[j] = 0;
+
+		fbd_tiler->flags = hierarchy_mask | (1 << 16);
+	}
+
+	fbd_tiler->heap_free_address = tiler_heap_free;
+
+	kbasep_unmap(kctx, &map);
+
+	return MALI_ERROR_NONE;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] job_header        Address of job header to reset.
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] job_64            MALI_TRUE if this job is using 64-bit
+ *                              descriptors
+ *
+ * @return MALI_ERROR_NONE on success, error code on failure
+ */
+static mali_error kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+		mali_addr64 job_header,	mali_addr64 tiler_heap_free,
+		u16 hierarchy_mask, u32 default_weight,	mali_bool job_64)
+{
+	struct kbasep_map_struct map;
+	mali_addr64 fbd_address;
+
+	if (job_64) {
+		u64 *job_ext;
+
+		job_ext = kbasep_map(kctx,
+				job_header + JOB_HEADER_64_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		fbd_address = *job_ext;
+
+		kbasep_unmap(kctx, &map);
+	} else {
+		u32 *job_ext;
+
+		job_ext = kbasep_map(kctx,
+				job_header + JOB_HEADER_32_FBD_OFFSET,
+				sizeof(*job_ext), &map);
+
+		if (!job_ext) {
+			dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		fbd_address = *job_ext;
+
+		kbasep_unmap(kctx, &map);
+	}
+
+	if (fbd_address & FBD_TYPE) {
+		return kbasep_replay_reset_mfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	} else {
+		return kbasep_replay_reset_sfbd(kctx,
+						fbd_address & FBD_POINTER_MASK,
+						tiler_heap_free,
+						hierarchy_mask,
+						default_weight);
+	}
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ *   the tiler_heap_free parameter, and set the hierarchy level mask to the
+ *   hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in,out] job_header    Address of job header to reset. Set to address
+ *                              of next job header on exit.
+ * @param[in] prev_jc           Previous job chain to link to, if this job is
+ *                              the last in the chain.
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] first_in_chain    MALI_TRUE if this job is the first in the chain
+ * @param[in] fragment_chain    MALI_TRUE if this job is in the fragment chain
+ *
+ * @return MALI_ERROR_NONE on success, error code on failure
+ */
+static mali_error kbasep_replay_reset_job(struct kbase_context *kctx,
+		mali_addr64 *job_header, mali_addr64 prev_jc,
+		mali_addr64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		mali_bool first_in_chain, mali_bool fragment_chain)
+{
+	struct job_head *job;
+	mali_addr64 new_job_header;
+	struct kbasep_map_struct map;
+
+	job = kbasep_map(kctx, *job_header, sizeof(*job), &map);
+	if (!job) {
+		dev_err(kctx->kbdev->dev,
+				 "kbasep_replay_parse_jc: failed to map jc\n");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+	dump_job_head(kctx, "Job header:", job);
+
+	if (job->status == JOB_NOT_STARTED && !fragment_chain) {
+		dev_err(kctx->kbdev->dev, "Job already not started\n");
+		goto out_unmap;
+	}
+	job->status = JOB_NOT_STARTED;
+
+	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX)
+		job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL;
+
+	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) {
+		dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+		goto out_unmap;
+	}
+
+	if (first_in_chain)
+		job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER;
+
+	if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
+	    (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
+	    (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+		dev_err(kctx->kbdev->dev,
+			     "Job indicies/dependencies out of valid range\n");
+		goto out_unmap;
+	}
+
+	if (job->dependencies[0])
+		job->dependencies[0] += hw_job_id_offset;
+	if (job->dependencies[1])
+		job->dependencies[1] += hw_job_id_offset;
+
+	job->index += hw_job_id_offset;
+
+	if (job->flags & JOB_FLAG_DESC_SIZE) {
+		new_job_header = job->next._64;
+		if (!job->next._64)
+			job->next._64 = prev_jc;
+	} else {
+		new_job_header = job->next._32;
+		if (!job->next._32)
+			job->next._32 = prev_jc;
+	}
+	dump_job_head(kctx, "Updated to:", job);
+
+	if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) {
+		mali_bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0;
+
+		if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, job_64) != MALI_ERROR_NONE)
+			goto out_unmap;
+
+	} else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) {
+		u64 fbd_address;
+
+		if (job->flags & JOB_FLAG_DESC_SIZE)
+			fbd_address = job->fragment_fbd._64;
+		else
+			fbd_address = (u64)job->fragment_fbd._32;
+
+		if (fbd_address & FBD_TYPE) {
+			if (kbasep_replay_reset_mfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != MALI_ERROR_NONE)
+				goto out_unmap;
+		} else {
+			if (kbasep_replay_reset_sfbd(kctx,
+					fbd_address & FBD_POINTER_MASK,
+					tiler_heap_free,
+					hierarchy_mask,
+					default_weight) != MALI_ERROR_NONE)
+				goto out_unmap;
+		}
+	}
+
+	kbasep_unmap(kctx, &map);
+
+	*job_header = new_job_header;
+
+	return MALI_ERROR_NONE;
+
+out_unmap:
+	kbasep_unmap(kctx, &map);
+	return MALI_ERROR_FUNCTION_FAILED;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx        Context pointer
+ * @param[in] jc          Job chain start address
+ * @param[out] hw_job_id  Highest job ID in chain
+ *
+ * @return MALI_ERROR_NONE on success, error code on failure
+ */
+static mali_error kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+		mali_addr64 jc,	u16 *hw_job_id)
+{
+	while (jc) {
+		struct job_head *job;
+		struct kbasep_map_struct map;
+
+		dev_dbg(kctx->kbdev->dev,
+			"kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+		job = kbasep_map(kctx, jc, sizeof(*job), &map);
+		if (!job) {
+			dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+
+		if (job->index > *hw_job_id)
+			*hw_job_id = job->index;
+
+		if (job->flags & JOB_FLAG_DESC_SIZE)
+			jc = job->next._64;
+		else
+			jc = job->next._32;
+
+		kbasep_unmap(kctx, &map);
+	}
+
+	return MALI_ERROR_NONE;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx              Context pointer
+ * @param[in] jc                Job chain to be processed
+ * @param[in] prev_jc           Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free   The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask    The hierarchy mask to use
+ * @param[in] default_weight    Default hierarchy weight to write when no other
+ *                              weight is given in the FBD
+ * @param[in] hw_job_id_offset  Offset for HW job IDs
+ * @param[in] fragment_chain    MAIL_TRUE if this chain is the fragment chain
+ *
+ * @return MALI_ERROR_NONE on success, error code otherwise
+ */
+static mali_error kbasep_replay_parse_jc(struct kbase_context *kctx,
+		mali_addr64 jc,	mali_addr64 prev_jc,
+		mali_addr64 tiler_heap_free, u16 hierarchy_mask,
+		u32 default_weight, u16 hw_job_id_offset,
+		mali_bool fragment_chain)
+{
+	mali_bool first_in_chain = MALI_TRUE;
+	int nr_jobs = 0;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+			jc, hw_job_id_offset);
+
+	while (jc) {
+		dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+		if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+				tiler_heap_free, hierarchy_mask,
+				default_weight, hw_job_id_offset,
+				first_in_chain, fragment_chain) != MALI_ERROR_NONE)
+			return MALI_ERROR_FUNCTION_FAILED;
+
+		first_in_chain = MALI_FALSE;
+
+		nr_jobs++;
+		if (fragment_chain &&
+		    nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+			dev_err(kctx->kbdev->dev,
+				"Exceeded maximum number of jobs in fragment chain\n");
+			return MALI_ERROR_FUNCTION_FAILED;
+		}
+	}
+
+	return MALI_ERROR_NONE;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom     The atom to be reset
+ * @param[in] dep_atom  The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+		struct kbase_jd_atom *dep_atom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+	kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+	list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx      Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+	int i;
+
+	for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+		if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+			jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+			dev_dbg(kctx->kbdev->dev,
+				  "kbasep_allocate_katom: Allocated atom %d\n",
+									    i);
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx      Context pointer
+ * @param[in] atom_id   ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+			atom_id);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+		list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+	while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+		list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+	jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+				      struct base_jd_atom_v2 *atom,
+				      int atom_nr,
+				      int prio)
+{
+	atom->nr_extres = 0;
+	atom->extres_list.value = NULL;
+	atom->device_nr = 0;
+	/* Convert priority back from NICE range */
+	atom->prio = ((prio << 16) / ((20 << 16) / 128)) - 128;
+	atom->atom_number = atom_nr;
+
+	base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+	base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+	atom->udata.blob[0] = 0;
+	atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ *   replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom      Atom to use for tiler jobs
+ * @param[out] f_atom      Atom to use for fragment jobs
+ * @param[in]  prio        Priority of new atom (inherited from replay soft
+ *                         job)
+ * @return MALI_ERROR_NONE on success, error code on failure
+ */
+static mali_error kbasep_replay_create_atoms(struct kbase_context *kctx,
+		struct base_jd_atom_v2 *t_atom,
+		struct base_jd_atom_v2 *f_atom, int prio)
+{
+	int t_atom_nr, f_atom_nr;
+
+	t_atom_nr = kbasep_allocate_katom(kctx);
+	if (t_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+	f_atom_nr = kbasep_allocate_katom(kctx);
+	if (f_atom_nr < 0) {
+		dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+		kbasep_release_katom(kctx, t_atom_nr);
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+	kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+	kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+	base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+	return MALI_ERROR_NONE;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+	mali_addr64 next;
+
+	dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+	next = payload->tiler_jc_list;
+
+	while (next) {
+		struct kbasep_map_struct map;
+		base_jd_replay_jc *jc_struct;
+
+		jc_struct = kbasep_map(kctx, next, sizeof(*jc_struct), &map);
+
+		if (!jc_struct)
+			return;
+
+		dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+				jc_struct, jc_struct->jc, jc_struct->next);
+
+		next = jc_struct->next;
+
+		kbasep_unmap(kctx, &map);
+	}
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx         Context pointer
+ * @param[in] replay_atom  Replay soft job atom
+ * @param[in] t_atom       Atom to use for tiler jobs
+ * @param[in] f_atom       Atom to use for fragment jobs
+ * @return  MALI_ERROR_NONE on success, error code on failure
+ */
+static mali_error kbasep_replay_parse_payload(struct kbase_context *kctx,
+					      struct kbase_jd_atom *replay_atom,
+					      struct base_jd_atom_v2 *t_atom,
+					      struct base_jd_atom_v2 *f_atom)
+{
+	base_jd_replay_payload *payload;
+	mali_addr64 next;
+	mali_addr64 prev_jc = 0;
+	u16 hw_job_id_offset = 0;
+	mali_error ret = MALI_ERROR_FUNCTION_FAILED;
+	struct kbasep_map_struct map;
+
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+			replay_atom->jc, sizeof(payload));
+
+	kbase_gpu_vm_lock(kctx);
+
+	payload = kbasep_map(kctx, replay_atom->jc, sizeof(*payload), &map);
+
+	if (!payload) {
+		kbase_gpu_vm_unlock(kctx);
+		dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+	dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+				  "tiler_jc_list            = %llx\n"
+				  "fragment_jc              = %llx\n"
+				  "tiler_heap_free          = %llx\n"
+				  "fragment_hierarchy_mask  = %x\n"
+				  "tiler_hierarchy_mask     = %x\n"
+				  "hierarchy_default_weight = %x\n"
+				  "tiler_core_req           = %x\n"
+				  "fragment_core_req        = %x\n",
+							payload->tiler_jc_list,
+							  payload->fragment_jc,
+						      payload->tiler_heap_free,
+					      payload->fragment_hierarchy_mask,
+						 payload->tiler_hierarchy_mask,
+					     payload->hierarchy_default_weight,
+						       payload->tiler_core_req,
+						   payload->fragment_core_req);
+	payload_dump(kctx, payload);
+#endif
+
+	t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+	f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+	/* Sanity check core requirements*/
+	if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			       ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T ||
+	    (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+			      ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS ||
+	     t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+	     f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+		dev_err(kctx->kbdev->dev, "Invalid core requirements\n");
+		goto out;
+	}
+
+	/* Process tiler job chains */
+	next = payload->tiler_jc_list;
+	if (!next) {
+		dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+		goto out;
+	}
+
+	while (next) {
+		base_jd_replay_jc *jc_struct;
+		struct kbasep_map_struct jc_map;
+		mali_addr64 jc;
+
+		jc_struct = kbasep_map(kctx, next, sizeof(*jc_struct), &jc_map);
+
+		if (!jc_struct) {
+			dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+			goto out;
+		}
+
+		jc = jc_struct->jc;
+		next = jc_struct->next;
+		if (next)
+			jc_struct->jc = 0;
+
+		kbasep_unmap(kctx, &jc_map);
+
+		if (jc) {
+			u16 max_hw_job_id = 0;
+
+			if (kbasep_replay_find_hw_job_id(kctx, jc,
+					&max_hw_job_id) != MALI_ERROR_NONE)
+				goto out;
+
+			if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+					payload->tiler_heap_free,
+					payload->tiler_hierarchy_mask,
+					payload->hierarchy_default_weight,
+					hw_job_id_offset, MALI_FALSE) !=
+					MALI_ERROR_NONE) {
+				goto out;
+			}
+
+			hw_job_id_offset += max_hw_job_id;
+
+			prev_jc = jc;
+		}
+	}
+	t_atom->jc = prev_jc;
+
+	/* Process fragment job chain */
+	f_atom->jc = payload->fragment_jc;
+	if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+			payload->tiler_heap_free,
+			payload->fragment_hierarchy_mask,
+			payload->hierarchy_default_weight, 0,
+			MALI_TRUE) != MALI_ERROR_NONE) {
+		goto out;
+	}
+
+	if (!t_atom->jc || !f_atom->jc) {
+		dev_err(kctx->kbdev->dev, "Invalid payload\n");
+		goto out;
+	}
+
+	dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+			t_atom->jc, f_atom->jc);
+	ret = MALI_ERROR_NONE;
+
+out:
+	kbasep_unmap(kctx, &map);
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+	struct kbase_jd_context *jctx;
+	bool need_to_try_schedule_context = false;
+
+	struct base_jd_atom_v2 t_atom, f_atom;
+	struct kbase_jd_atom *t_katom, *f_katom;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+	jctx = &kctx->jctx;
+
+	mutex_lock(&jctx->lock);
+
+	if (kbasep_replay_create_atoms(kctx, &t_atom, &f_atom,
+				       katom->nice_prio) != MALI_ERROR_NONE) {
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	t_katom = &jctx->atoms[t_atom.atom_number];
+	f_katom = &jctx->atoms[f_atom.atom_number];
+
+	if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) !=
+							     MALI_ERROR_NONE) {
+		kbasep_release_katom(kctx, t_atom.atom_number);
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	kbasep_replay_reset_softjob(katom, f_katom);
+
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+	if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		kbasep_release_katom(kctx, f_atom.atom_number);
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+	need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+	if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+		dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+		goto out;
+	}
+
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+	if (katom->event_code != BASE_JD_EVENT_DONE)
+		need_to_try_schedule_context |= jd_done_nolock(katom);
+
+	if (need_to_try_schedule_context)
+		kbasep_js_try_schedule_head_ctx(kctx->kbdev);
+	mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom       The atom to be processed
+ * @return  true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct device *dev = kctx->kbdev->dev;
+	base_jd_replay_payload *payload;
+	mali_addr64 job_header;
+	mali_addr64 job_loop_detect;
+	struct job_head *job;
+	struct kbasep_map_struct job_map;
+	struct kbasep_map_struct map;
+	bool err = false;
+
+	/* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+	 * if force_replay is enabled.
+	 */
+	if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+		return false;
+	} else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+		return true;
+	} else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+		katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+		return true;
+	} else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+		/* No replay for faults of type other than
+		 * BASE_JD_EVENT_DATA_INVALID_FAULT.
+		 */
+		return false;
+	}
+
+	/* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+	 * to find out whether the source of exception is POLYGON_LIST. Replay
+	 * is required if the source of fault is POLYGON_LIST.
+	 */
+	kbase_gpu_vm_lock(kctx);
+
+	payload = kbasep_map(kctx, katom->jc, sizeof(*payload), &map);
+	if (!payload) {
+		kbase_gpu_vm_unlock(kctx);
+		dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+		return false;
+	}
+
+#ifdef CONFIG_MALI_DEBUG
+	dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+	dev_dbg(dev, "\nPayload structure:\n"
+		     "fragment_jc              = 0x%llx\n"
+		     "fragment_hierarchy_mask  = 0x%x\n"
+		     "fragment_core_req        = 0x%x\n",
+		     payload->fragment_jc,
+		     payload->fragment_hierarchy_mask,
+		     payload->fragment_core_req);
+#endif
+	/* Process fragment job chain */
+	job_header      = (mali_addr64) payload->fragment_jc;
+	job_loop_detect = job_header;
+	while (job_header) {
+		job = kbasep_map(kctx, job_header, sizeof(*job), &job_map);
+		if (!job) {
+			dev_err(dev, "failed to map jc\n");
+			/* unmap payload*/
+			kbasep_unmap(kctx, &map);
+			kbase_gpu_vm_unlock(kctx);
+			return false;
+		}
+
+
+#ifdef CONFIG_MALI_DEBUG
+		dev_dbg(dev, "\njob_head structure:\n"
+			     "Source ID:0x%x Access:0x%x Exception:0x%x\n"
+			     "at job addr               = %p\n"
+			     "not_complete_index        = 0x%x\n"
+			     "fault_addr                = 0x%llx\n"
+			     "flags                     = 0x%x\n"
+			     "index                     = 0x%x\n"
+			     "dependencies              = 0x%x,0x%x\n",
+			     JOB_SOURCE_ID(job->status),
+			     ((job->status >> 8) & 0x3),
+			     (job->status  & 0xFF),
+			     job,
+			     job->not_complete_index,
+			     job->fault_addr,
+			     job->flags,
+			     job->index,
+			     job->dependencies[0],
+			     job->dependencies[1]);
+#endif
+
+		/* Replay only when the polygon list reader caused the
+		 * DATA_INVALID_FAULT */
+		if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+		    (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) {
+			err = true;
+			kbasep_unmap(kctx, &job_map);
+			break;
+		}
+
+		/* Move on to next fragment job in the list */
+		if (job->flags & JOB_FLAG_DESC_SIZE)
+			job_header = job->next._64;
+		else
+			job_header = job->next._32;
+
+		kbasep_unmap(kctx, &job_map);
+
+		/* Job chain loop detected */
+		if (job_header == job_loop_detect)
+			break;
+	}
+
+	/* unmap payload*/
+	kbasep_unmap(kctx, &map);
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom  The atom to be processed
+ * @return           false if the atom has completed
+ *                   true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_context *jctx = &kctx->jctx;
+
+	if (katom->event_code == BASE_JD_EVENT_DONE) {
+		dev_dbg(kctx->kbdev->dev, "Previous job succeeded - not replaying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kctx->kbdev);
+
+		return false;
+	}
+
+	if (jctx->sched_info.ctx.is_dying) {
+		dev_dbg(kctx->kbdev->dev, "Not replaying; context is dying\n");
+
+		if (katom->retry_count)
+			kbase_disjoint_state_down(kctx->kbdev);
+
+		return false;
+	}
+
+	/* Check job exception type and source before replaying. */
+	if (false == kbase_replay_fault_check(katom)) {
+		dev_dbg(kctx->kbdev->dev,
+			"Replay cancelled on event %x\n", katom->event_code);
+		/* katom->event_code is already set to the failure code of the
+		 * previous job.
+		 */
+		return false;
+	}
+
+	dev_warn(kctx->kbdev->dev, "Replaying jobs retry=%d\n",
+			katom->retry_count);
+
+	katom->retry_count++;
+
+	if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+		dev_err(kctx->kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+		kbase_disjoint_state_down(kctx->kbdev);
+
+		/* katom->event_code is already set to the failure code of the
+		   previous job */
+		return false;
+	}
+
+	/* only enter the disjoint state once for the whole time while the replay is ongoing */
+	if (katom->retry_count == 1)
+		kbase_disjoint_state_up(kctx->kbdev);
+
+	INIT_WORK(&katom->work, kbase_replay_process_worker);
+	queue_work(kctx->event_workq, &katom->work);
+
+	return true;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.c b/drivers/gpu/arm/midgard/mali_kbase_security.c
new file mode 100755
index 000000000000..e7916374ffef
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_security.c
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_security.c
+ * Base kernel security capability API
+ */
+
+#include <mali_kbase.h>
+
+static inline mali_bool kbasep_am_i_root(void)
+{
+#if KBASE_HWCNT_DUMP_BYPASS_ROOT
+	return MALI_TRUE;
+#else
+	/* Check if root */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+	if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
+		return MALI_TRUE;
+#else
+	if (current_euid() == 0)
+		return MALI_TRUE;
+#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/
+	return MALI_FALSE;
+#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/
+}
+
+/**
+ * kbase_security_has_capability - see mali_kbase_caps.h for description.
+ */
+
+mali_bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags)
+{
+	/* Assume failure */
+	mali_bool access_allowed = MALI_FALSE;
+	mali_bool audit = (KBASE_SEC_FLAG_AUDIT & flags) ? MALI_TRUE : MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+	CSTD_UNUSED(kctx);
+
+	/* Detect unsupported flags */
+	KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0);
+
+	/* Determine if access is allowed for the given cap */
+	switch (cap) {
+	case KBASE_SEC_MODIFY_PRIORITY:
+	case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT:
+		/* Access is granted only if the caller is privileged */
+		access_allowed = kbasep_am_i_root();
+		break;
+	}
+
+	/* Report problem if requested */
+	if (MALI_FALSE == access_allowed) {
+		if (MALI_FALSE != audit)
+			dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx);
+	}
+
+	return access_allowed;
+}
+
+KBASE_EXPORT_TEST_API(kbase_security_has_capability)
diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.h b/drivers/gpu/arm/midgard/mali_kbase_security.h
new file mode 100755
index 000000000000..c8d4e53ae3b0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_security.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_security.h
+ * Base kernel security capability APIs
+ */
+
+#ifndef _KBASE_SECURITY_H_
+#define _KBASE_SECURITY_H_
+
+/* Security flags */
+#define KBASE_SEC_FLAG_NOAUDIT (0u << 0)	/* Silently handle privilege failure */
+#define KBASE_SEC_FLAG_AUDIT   (1u << 0)	/* Write audit message on privilege failure */
+#define KBASE_SEC_FLAG_MASK    (KBASE_SEC_FLAG_AUDIT)	/* Mask of all valid flag bits */
+
+/* List of unique capabilities that have security access privileges */
+enum kbase_security_capability {
+	/* Instrumentation Counters access privilege */
+	KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1,
+	KBASE_SEC_MODIFY_PRIORITY
+	    /* Add additional access privileges here */
+};
+
+/**
+ * kbase_security_has_capability - determine whether a task has a particular effective capability
+ * @param[in]   kctx    The task context.
+ * @param[in]   cap     The capability to check for.
+ * @param[in]   flags   Additional configuration information
+ *                      Such as whether to write an audit message or not.
+ * @return MALI_TRUE if success (capability is allowed), MALI_FALSE otherwise.
+ */
+
+mali_bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags);
+
+#endif				/* _KBASE_SECURITY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100755
index 000000000000..f762996cfdd4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,442 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK		((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+	struct kbase_va_region *reg;
+	phys_addr_t addr = 0;
+	u64 pfn;
+	u32 offset;
+	char *page;
+	struct timespec ts;
+	struct base_dump_cpu_gpu_counters data;
+	u64 system_time;
+	u64 cycle_counter;
+	mali_addr64 jc = katom->jc;
+	struct kbase_context *kctx = katom->kctx;
+	int pm_active_err;
+
+	u32 hi1, hi2;
+
+	memset(&data, 0, sizeof(data));
+
+	/* Take the PM active reference as late as possible - otherwise, it could
+	 * delay suspend until we process the atom (which may be at the end of a
+	 * long chain of dependencies */
+	pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+	if (pm_active_err) {
+		struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+		/* We're suspended - queue this on the list of suspended jobs
+		 * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */
+		mutex_lock(&js_devdata->runpool_mutex);
+		list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+		mutex_unlock(&js_devdata->runpool_mutex);
+
+		return pm_active_err;
+	}
+
+	kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */
+	do {
+		hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL);
+		cycle_counter = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+		hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), NULL);
+		cycle_counter |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled correctly */
+	do {
+		hi1 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL);
+		system_time = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+		hi2 = kbase_reg_read(kctx->kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), NULL);
+		system_time |= (((u64) hi1) << 32);
+	} while (hi1 != hi2);
+
+	/* Record the CPU's idea of current time */
+	getrawmonotonic(&ts);
+
+	kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	data.sec = ts.tv_sec;
+	data.usec = ts.tv_nsec / 1000;
+	data.system_time = system_time;
+	data.cycle_counter = cycle_counter;
+
+	pfn = jc >> PAGE_SHIFT;
+	offset = jc & ~PAGE_MASK;
+
+	/* Assume this atom will be cancelled until we know otherwise */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	if (offset > 0x1000 - sizeof(data)) {
+		/* Wouldn't fit in the page */
+		return 0;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
+	if (reg &&
+	    (reg->flags & KBASE_REG_GPU_WR) &&
+	    reg->alloc && reg->alloc->pages)
+		addr = reg->alloc->pages[pfn - reg->start_pfn];
+
+	kbase_gpu_vm_unlock(kctx);
+	if (!addr)
+		return 0;
+
+	page = kmap(pfn_to_page(PFN_DOWN(addr)));
+	if (!page)
+		return 0;
+
+	dma_sync_single_for_cpu(katom->kctx->kbdev->dev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+	memcpy(page + offset, &data, sizeof(data));
+	dma_sync_single_for_device(katom->kctx->kbdev->dev,
+			kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+			offset, sizeof(data),
+			DMA_BIDIRECTIONAL);
+	kunmap(pfn_to_page(PFN_DOWN(addr)));
+
+	/* Atom was fine - mark it as done */
+	katom->event_code = BASE_JD_EVENT_DONE;
+
+	return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited)
+ *
+ * @param katom     The atom to complete
+ */
+static void complete_soft_job(struct kbase_jd_atom *katom)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	mutex_lock(&kctx->jctx.lock);
+	list_del(&katom->dep_item[0]);
+	kbase_finish_soft_job(katom);
+	if (jd_done_nolock(katom))
+		kbasep_js_try_schedule_head_ctx(kctx->kbdev);
+	mutex_unlock(&kctx->jctx.lock);
+}
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+	struct sync_pt *pt;
+	struct sync_timeline *timeline;
+
+	if (!list_is_singular(&katom->fence->pt_list_head)) {
+		/* Not exactly one item in the list - so it didn't (directly) come from us */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+	timeline = pt->parent;
+
+	if (!kbase_sync_timeline_is_ours(timeline)) {
+		/* Fence has a sync_pt which isn't ours! */
+		return BASE_JD_EVENT_JOB_CANCELLED;
+	}
+
+	kbase_sync_signal_pt(pt, result);
+
+	sync_timeline_signal(timeline);
+
+	return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+	struct kbase_jd_atom *katom;
+	struct kbase_context *kctx;
+
+	katom = container_of(data, struct kbase_jd_atom, work);
+	kctx = katom->kctx;
+
+	complete_soft_job(katom);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+	struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+	struct kbase_context *kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+
+	kctx = katom->kctx;
+
+	KBASE_DEBUG_ASSERT(NULL != kctx);
+
+	/* Propagate the fence status to the atom.
+	 * If negative then cancel this atom and its dependencies.
+	 */
+	if (fence->status < 0)
+		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	/* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+	 *
+	 * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+	 * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+	 */
+
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+	int ret;
+
+	KBASE_DEBUG_ASSERT(NULL != katom);
+	KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+	sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+	ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+	if (ret == 1) {
+		/* Already signalled */
+		return 0;
+	} else if (ret < 0) {
+		goto cancel_atom;
+	}
+	return 1;
+
+ cancel_atom:
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+	/* We should cause the dependant jobs in the bag to be failed,
+	 * to do this we schedule the work queue to complete this job */
+	KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+	INIT_WORK(&katom->work, kbase_fence_wait_worker);
+	queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+	return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+	if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+		/* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+		return;
+	}
+
+	/* Wait was cancelled - zap the atoms */
+	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+	kbase_finish_soft_job(katom);
+
+	if (jd_done_nolock(katom))
+		kbasep_js_try_schedule_head_ctx(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		KBASE_DEBUG_ASSERT(katom->fence != NULL);
+		katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+		/* Release the reference as we don't need it any more */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		return kbase_fence_wait(katom);
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		return kbase_replay_process(katom);
+	}
+
+	/* Atom is complete */
+	return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		kbase_fence_cancel_wait(katom);
+		break;
+#endif
+	default:
+		/* This soft-job doesn't support cancellation! */
+		KBASE_DEBUG_ASSERT(0);
+	}
+}
+
+mali_error kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		{
+			if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+				return MALI_ERROR_FUNCTION_FAILED;
+		}
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		{
+			struct base_fence fence;
+			int fd;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return MALI_ERROR_FUNCTION_FAILED;
+
+			fd = kbase_stream_create_fence(fence.basep.stream_fd);
+			if (fd < 0)
+				return MALI_ERROR_FUNCTION_FAILED;
+
+			katom->fence = sync_fence_fdget(fd);
+
+			if (katom->fence == NULL) {
+				/* The only way the fence can be NULL is if userspace closed it for us.
+				 * So we don't need to clear it up */
+				return MALI_ERROR_FUNCTION_FAILED;
+			}
+			fence.basep.fd = fd;
+			if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+				katom->fence = NULL;
+				sys_close(fd);
+				return MALI_ERROR_FUNCTION_FAILED;
+			}
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		{
+			struct base_fence fence;
+
+			if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+				return MALI_ERROR_FUNCTION_FAILED;
+
+			/* Get a reference to the fence object */
+			katom->fence = sync_fence_fdget(fence.basep.fd);
+			if (katom->fence == NULL)
+				return MALI_ERROR_FUNCTION_FAILED;
+		}
+		break;
+#endif				/* CONFIG_SYNC */
+	case BASE_JD_REQ_SOFT_REPLAY:
+		break;
+	default:
+		/* Unsupported soft-job */
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+	return MALI_ERROR_NONE;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+	switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+		/* Nothing to do */
+		break;
+#ifdef CONFIG_SYNC
+	case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+		if (katom->fence) {
+			/* The fence has not yet been signalled, so we do it now */
+			kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+			sync_fence_put(katom->fence);
+			katom->fence = NULL;
+		}
+		break;
+	case BASE_JD_REQ_SOFT_FENCE_WAIT:
+		/* Release the reference to the fence object */
+		sync_fence_put(katom->fence);
+		katom->fence = NULL;
+		break;
+#endif				/* CONFIG_SYNC */
+	}
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+	LIST_HEAD(local_suspended_soft_jobs);
+	struct kbase_jd_atom *tmp_iter;
+	struct kbase_jd_atom *katom_iter;
+	struct kbasep_js_device_data *js_devdata;
+	mali_bool resched = MALI_FALSE;
+
+	KBASE_DEBUG_ASSERT(kbdev);
+
+	js_devdata = &kbdev->js_data;
+
+	/* Move out the entire list */
+	mutex_lock(&js_devdata->runpool_mutex);
+	list_splice_init(&js_devdata->suspended_soft_jobs_list, &local_suspended_soft_jobs);
+	mutex_unlock(&js_devdata->runpool_mutex);
+
+	/* Each atom must be detached from the list and ran separately - it could
+	 * be re-added to the old list, but this is unlikely */
+	list_for_each_entry_safe(katom_iter, tmp_iter, &local_suspended_soft_jobs, dep_item[1])
+	{
+		struct kbase_context *kctx = katom_iter->kctx;
+
+		mutex_lock(&kctx->jctx.lock);
+
+		/* Remove from the global list */
+		list_del(&katom_iter->dep_item[1]);
+		/* Remove from the context's list of waiting soft jobs */
+		list_del(&katom_iter->dep_item[0]);
+
+		if (kbase_process_soft_job(katom_iter) == 0) {
+			kbase_finish_soft_job(katom_iter);
+			resched |= jd_done_nolock(katom_iter);
+		} else {
+			/* The job has not completed */
+			KBASE_DEBUG_ASSERT((katom_iter->core_req & BASEP_JD_REQ_ATOM_TYPE)
+					!= BASE_JD_REQ_SOFT_REPLAY);
+			list_add_tail(&katom_iter->dep_item[0], &kctx->waiting_soft_jobs);
+		}
+
+		mutex_unlock(&kctx->jctx.lock);
+	}
+
+	if (resched)
+		kbasep_js_try_schedule_head_ctx(kbdev);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.c b/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100755
index 000000000000..13b194732d50
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,189 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+struct seq_file;
+#include "sync.h"
+#include <mali_kbase.h>
+
+struct mali_sync_timeline {
+	struct sync_timeline timeline;
+	atomic_t counter;
+	atomic_t signalled;
+};
+
+struct mali_sync_pt {
+	struct sync_pt pt;
+	int order;
+	int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+	return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+	return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_pt *new_mpt;
+	struct sync_pt *new_pt = sync_pt_create(pt->parent, sizeof(struct mali_sync_pt));
+
+	if (!new_pt)
+		return NULL;
+
+	new_mpt = to_mali_sync_pt(new_pt);
+	new_mpt->order = mpt->order;
+	new_mpt->result = mpt->result;
+
+	return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(pt->parent);
+	int result = mpt->result;
+
+	int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+	if (diff >= 0)
+		return result < 0 ?  result : 1;
+	else
+		return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+	struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+	int diff = ma->order - mb->order;
+
+	if (diff < 0)
+		return -1;
+	else if (diff == 0)
+		return 0;
+	else
+		return 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+			       int size)
+{
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+	snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+	snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+	.driver_name = "Mali",
+	.dup = timeline_dup,
+	.has_signaled = timeline_has_signaled,
+	.compare = timeline_compare,
+	.timeline_value_str = timeline_value_str,
+	.pt_value_str       = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+	return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+	struct sync_timeline *tl;
+	struct mali_sync_timeline *mtl;
+
+	tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+	if (!tl)
+		return NULL;
+
+	/* Set the counter in our private struct */
+	mtl = to_mali_sync_timeline(tl);
+	atomic_set(&mtl->counter, 0);
+	atomic_set(&mtl->signalled, 0);
+
+	return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+	struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+	struct mali_sync_pt *mpt;
+
+	if (!pt)
+		return NULL;
+
+	mpt = to_mali_sync_pt(pt);
+	mpt->order = atomic_inc_return(&mtl->counter);
+	mpt->result = 0;
+
+	return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+	struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+	struct mali_sync_timeline *mtl = to_mali_sync_timeline(pt->parent);
+	int signalled;
+	int diff;
+
+	mpt->result = result;
+
+	do {
+		signalled = atomic_read(&mtl->signalled);
+
+		diff = signalled - mpt->order;
+
+		if (diff > 0) {
+			/* The timeline is already at or ahead of this point.
+			 * This should not happen unless userspace has been
+			 * signalling fences out of order, so warn but don't
+			 * violate the sync_pt API.
+			 * The warning is only in debug builds to prevent
+			 * a malicious user being able to spam dmesg.
+			 */
+#ifdef CONFIG_MALI_DEBUG
+			pr_err("Fences were triggered in a different order to allocation!");
+#endif				/* CONFIG_MALI_DEBUG */
+			return;
+		}
+	} while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100755
index 000000000000..b9f295146c7c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+#include <malisw/mali_malisw.h>
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+mali_error kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+mali_error kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100755
index 000000000000..c823226e151a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+#include <mali_base_kernel_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+	struct sync_timeline *tl;
+
+	tl = (struct sync_timeline *)file->private_data;
+	BUG_ON(!tl);
+	sync_timeline_destroy(tl);
+	return 0;
+}
+
+static const struct file_operations stream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbase_stream_close,
+};
+
+mali_error kbase_stream_create(const char *name, int *const out_fd)
+{
+	struct sync_timeline *tl;
+
+	BUG_ON(!out_fd);
+
+	tl = kbase_sync_timeline_alloc(name);
+	if (!tl)
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	*out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+	if (*out_fd < 0) {
+		sync_timeline_destroy(tl);
+		return MALI_ERROR_FUNCTION_FAILED;
+	} else {
+		return MALI_ERROR_NONE;
+	}
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+	struct sync_timeline *tl;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+	struct files_struct *files;
+	struct fdtable *fdt;
+#endif
+	int fd;
+	struct file *tl_file;
+
+	tl_file = fget(tl_fd);
+	if (tl_file == NULL)
+		return -EBADF;
+
+	if (tl_file->f_op != &stream_fops) {
+		fd = -EBADF;
+		goto out;
+	}
+
+	tl = tl_file->private_data;
+
+	pt = kbase_sync_pt_alloc(tl);
+	if (!pt) {
+		fd = -EFAULT;
+		goto out;
+	}
+
+	fence = sync_fence_create("mali_fence", pt);
+	if (!fence) {
+		sync_pt_free(pt);
+		fd = -EFAULT;
+		goto out;
+	}
+
+	/* from here the fence owns the sync_pt */
+
+	/* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+#else
+	fd = get_unused_fd();
+	if (fd < 0) {
+		sync_fence_put(fence);
+		goto out;
+	}
+
+	files = current->files;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+	__set_close_on_exec(fd, fdt);
+#else
+	FD_SET(fd, fdt->close_on_exec);
+#endif
+	spin_unlock(&files->file_lock);
+#endif  /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+	/* bind fence to the new fd */
+	sync_fence_install(fence, fd);
+
+ out:
+	fput(tl_file);
+
+	return fd;
+}
+
+mali_error kbase_fence_validate(int fd)
+{
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fd);
+	if (NULL != fence) {
+		sync_fence_put(fence);
+		return MALI_ERROR_NONE;
+	} else {
+		return MALI_ERROR_FUNCTION_FAILED;
+	}
+}
+
+#endif				/* CONFIG_SYNC */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100755
index 000000000000..e492ff2ca481
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef  KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ *  - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ *  - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ *  - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+	/* no info_val, no gpu_addr, no atom */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+	/* info_val == bits cleared */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+	/* info_val == GPU_IRQ_STATUS register */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+	/* GPU addr==dump address */
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+	KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+	/* info_val==irq rawstat at start */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+	/* info_val==jobs processed */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+	/* info_val==exit code; gpu_addr==chain gpuaddr */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+	/* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+	/* gpu_addr is as follows:
+	 * - If JS_STATUS active after soft-stop, val==gpu addr written to
+	 *   JS_HEAD on submit
+	 * - otherwise gpu_addr==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+	/* gpu_addr==JS_TAIL read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+	/* gpu_addr==JS_HEAD read */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+	/* info_val == is_scheduled */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+	/* info_val == nr jobs submitted */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+	/* gpu_addr==JS_HEAD_NEXT last written */
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+	KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==0, info_val==0, uatom==0 */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+	/* gpu_addr==last value written/would be written to JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+	/* gpu_addr==value to write into JS_HEAD */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+	/* kctx is the one being evicted, info_val == kctx to put in  */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of rechecked affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+	/* info_val == lower 32 bits of affinity */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+	/* info_val == the ctx attribute now on ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+	/* info_val == the ctx attribute now on runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+	/* info_val == the ctx attribute now off ctx */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+	/* info_val == the ctx attribute now off runpool */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+	/* info_val == whether it was evicted */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+	/* gpu_addr==JS_HEAD to write if the job were run */
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+	KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+	/* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+	/* info_val == policy number, or -1 for "Already changing" */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+	/* info_val == policy number */
+	KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+	KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100755
index 000000000000..49f5035a913e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,231 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+struct kbase_trace_timeline_desc
+{
+	char *enum_str;
+	char *desc;
+	char *format;
+	char *format_desc;
+};
+
+struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] =
+{
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+STATIC void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+STATIC void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+STATIC void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+	(*pos)++;
+
+	if (*pos == KBASE_NR_TRACE_CODES)
+		return NULL;
+
+	return &kbase_trace_timeline_desc_table[*pos];
+}
+
+STATIC int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+	struct kbase_trace_timeline_desc *trace_desc = data;
+
+	seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+	return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+	.start = kbasep_trace_timeline_seq_start,
+	.next = kbasep_trace_timeline_seq_next,
+	.stop = kbasep_trace_timeline_seq_stop,
+	.show = kbasep_trace_timeline_seq_show,
+};
+
+STATIC int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+	.open = kbasep_trace_timeline_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+mali_error kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+	kbdev->timeline.dentry = debugfs_create_file("mali_timeline_defs",
+			S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+			&kbasep_trace_timeline_debugfs_fops);
+	if (IS_ERR(kbdev->timeline.dentry))
+		return MALI_ERROR_FUNCTION_FAILED;
+
+	return MALI_ERROR_NONE;
+}
+
+void kbasep_trace_timeline_debugfs_term(struct kbase_device *kbdev)
+{
+	debugfs_remove(kbdev->timeline.dentry);
+}
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+                                    struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+	} else {
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+		KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+	}
+	++kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+                                  struct kbase_jd_atom *katom, int js,
+                                  kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+	if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+		KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+	} else {
+		/* Job finished in JS_HEAD */
+		base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+		KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+		KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+		/* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+		if (kbdev->timeline.slot_atoms_submitted[js] > 1) {
+			/* Tag events with next_katom's kctx */
+			struct kbase_jm_slot *slot = &kbdev->jm_slots[js];
+			struct kbase_jd_atom *next_katom;
+			struct kbase_context *next_kctx;
+			KBASE_DEBUG_ASSERT(kbasep_jm_nr_jobs_submitted(slot) > 0);
+
+			/* Peek the next atom - note that the atom in JS_HEAD will already
+			 * have been dequeued */
+			next_katom = kbasep_jm_peek_idx_submit_slot(slot, 0);
+			next_kctx = next_katom->kctx;
+			KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+			KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+			KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+		}
+	}
+
+	--kbdev->timeline.slot_atoms_submitted[js];
+
+	KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+	int uid = 0;
+	int old_uid;
+
+	/* If a producer already exists for the event, try to use their UID (multiple-producers) */
+	uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+	old_uid = uid;
+
+	/* Get a new non-zero UID if we don't have one yet */
+	while (!uid)
+		uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+	/* Try to use this UID */
+	if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+		/* If it changed, raced with another producer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != 0) {
+		if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+			/* If it changed, raced with another consumer: we've lost this UID */
+			uid = 0;
+
+		KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+	}
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+	int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+	if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+		/* If it changed, raced with another consumer: we've lost this UID */
+		uid = 0;
+
+	KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the start of the transition */
+	kbdev->timeline.l2_transitioning = MALI_TRUE;
+	KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.power_change_lock);
+	/* Simply log the end of the transition */
+	if (MALI_FALSE != kbdev->timeline.l2_transitioning)
+	{
+		kbdev->timeline.l2_transitioning = MALI_FALSE;
+		KBASE_TIMELINE_POWERED_L2(kbdev);
+	}
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100755
index 000000000000..63cacff66e1c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,380 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+typedef enum
+{
+	#define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+	#include "mali_kbase_trace_timeline_defs.h"
+	#undef KBASE_TIMELINE_TRACE_CODE
+} kbase_trace_timeline_code;
+
+/** Initialize Timeline DebugFS entries */
+mali_error kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+/** Terminate Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_term(struct kbase_device *kbdev);
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+   process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count)                                 \
+	do                                                                          \
+	{                                                                           \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec,          \
+		                                            (int)kctx->timeline.owner_tgid, \
+		                                            count);                         \
+	} while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id)                   \
+	do                                                             \
+	{                                                              \
+		struct timespec ts;                                        \
+		getnstimeofday(&ts);                                       \
+		trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec,            \
+		                         CTX_FLOW_ATOM_READY,              \
+		                         (int)kctx->timeline.owner_tgid,   \
+		                         atom_id);                         \
+	} while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count)                             \
+	do                                                                          \
+	{                                                                           \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec,          \
+		                                    SW_SET_GPU_SLOT_ACTIVE,         \
+		                                    (int)kctx->timeline.owner_tgid, \
+		                                    js, count);                     \
+	} while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count)                             \
+	do                                                                          \
+	{                                                                           \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,          \
+		                                    SW_SET_GPU_SLOT_NEXT, \
+		                                    (int)kctx->timeline.owner_tgid, \
+		                                    js, count);                     \
+	} while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count)                             \
+	do                                                                          \
+	{                                                                           \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,          \
+		                                    SW_SET_GPU_SLOT_HEAD,           \
+		                                    (int)kctx->timeline.owner_tgid, \
+		                                    js, count);                     \
+	} while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+	do                                                                          \
+	{                                                                           \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec,          \
+		                                    SW_SET_GPU_SLOT_STOPPING,       \
+		                                    (kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+		                                    js, count);                     \
+	} while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active)                                    \
+	do                                                                         \
+	{                                                                          \
+		struct timespec ts;                                                \
+		getnstimeofday(&ts);                                               \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,        \
+		                                     SW_SET_GPU_POWER_ACTIVE, active); \
+	} while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap)                               \
+	do                                                                      \
+	{                                                                       \
+		struct timespec ts;                                             \
+		getnstimeofday(&ts);                                            \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,     \
+		                                     SW_SET_GPU_POWER_TILER_ACTIVE, \
+		                                     hweight64(bitmap));        \
+	} while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap)                               \
+	do                                                                       \
+	{                                                                        \
+		struct timespec ts;                                              \
+		getnstimeofday(&ts);                                             \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,      \
+		                                     SW_SET_GPU_POWER_SHADER_ACTIVE, \
+		                                     hweight64(bitmap));         \
+	} while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap)                              \
+	do                                                                      \
+	{                                                                       \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec,         \
+				                             SW_SET_GPU_POWER_L2_ACTIVE,    \
+				                             hweight64(bitmap));            \
+	} while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev)                                   \
+	do                                                                      \
+	{                                                                       \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,	        \
+		                                    SW_FLOW_GPU_POWER_L2_POWERING,  \
+		                                    1);                             \
+	} while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)                                    \
+	do                                                                      \
+	{                                                                       \
+		struct timespec ts;                                                 \
+		getnstimeofday(&ts);                                                \
+		trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec,          \
+		                                    SW_FLOW_GPU_POWER_L2_ACTIVE,    \
+		                                     1);                            \
+	} while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \
+	do                                                               \
+	{                                                                \
+		struct timespec ts;                                          \
+		getnstimeofday(&ts);                                         \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,          \
+		                             SW_FLOW_PM_SEND_EVENT,          \
+		                             event_type, pm_event_id);       \
+	} while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \
+	do                                                                 \
+	{                                                                  \
+		struct timespec ts;                                            \
+		getnstimeofday(&ts);                                           \
+		trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec,            \
+		                             SW_FLOW_PM_HANDLE_EVENT,          \
+		                             event_type, pm_event_id);        \
+	} while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number)     \
+	do                                                                  \
+	{                                                                   \
+		struct timespec ts;                                             \
+		getnstimeofday(&ts);                                            \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,            \
+		                              HW_START_GPU_JOB_CHAIN_SW_APPROX, \
+		                              (int)kctx->timeline.owner_tgid,   \
+		                              js, _consumerof_atom_number);     \
+	} while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+	do                                                                  \
+	{                                                                   \
+		struct timespec ts;                                             \
+		getnstimeofday(&ts);                                            \
+		trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec,            \
+		                              HW_STOP_GPU_JOB_CHAIN_SW_APPROX,  \
+		                              (int)kctx->timeline.owner_tgid,   \
+		                              js, _producerof_atom_number_completed);     \
+	} while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \
+	do                                                                  \
+	{                                                                   \
+		struct timespec ts;                                             \
+		getnstimeofday(&ts);                                            \
+		trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec,        \
+		                                  trace_code, \
+		                                  1);     \
+	} while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count)                           \
+	do                                                                    \
+	{                                                                     \
+		struct timespec ts;                                           \
+		getnstimeofday(&ts);                                          \
+		trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec,     \
+		                                   count);                    \
+	} while (0)
+
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+                                    struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+                                  struct kbase_jd_atom *katom, int js,
+                                  kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+                                  enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev)  CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static INLINE void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+                                    struct kbase_jd_atom *katom, int js)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static INLINE void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+                                    struct kbase_jd_atom *katom, int js,
+                                    kbasep_js_atom_done_code done_code)
+{
+	lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static INLINE void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static INLINE void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static INLINE void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static INLINE void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+
+}
+
+static INLINE void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+
+}
+#endif				/* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif				/* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100755
index 000000000000..04d414cd0db9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE         *****
+ * *****            DO NOT INCLUDE DIRECTLY                  *****
+ * *****            THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ *   displayed, and is split up into various parts, separated by underscores:
+ *  - 'SW' and 'HW' as the first part will be used to determine whether a
+ *     timeline is to do with Software or Hardware - effectively, separate
+ *     'channels' for Software and Hardware
+ *  - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ *    signify related pairs of events - these are optional.
+ *  - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ *  - 'SW_ENTER_FOO'
+ *  - 'SW_LEAVE_FOO'
+ *  - 'SW_FLOW_BAR_1'
+ *  - 'SW_FLOW_BAR_2'
+ *  - 'HW_START_BAZ'
+ *  - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ *  - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ *   on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ *   (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ *   would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ *   context, that would be confusing - all shader cores (whether it be 4, 8,
+ *   etc) are shared in the system.
+ */
+
+	/*
+	 * CTX events
+	 */
+	/* Separate timelines for each context 'instance'*/
+	KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT,     "CTX: Atoms in flight",            "%d,%d",    "_instance_tgid,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY,            "CTX: Atoms Ready to Run",         "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+	/*
+	 * SW Events
+	 */
+	/* Separate timelines for each slot 'instance' */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE,         "SW: GPU slot active",             "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT,           "SW: GPU atom in NEXT",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD,           "SW: GPU atom in HEAD",            "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING,       "SW: Try Soft-Stop on GPU slot",   "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+	/* Shader and overall power is shared - can't have separate instances of
+	 * it, just tagging with the context */
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE,        "SW: GPU power active",            "%d,%d",    "_tgid,_value_is_power_active"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE,  "SW: GPU tiler powered",           "%d,%d",    "_tgid,_value_number_of_tilers"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered",         "%d,%d",    "_tgid,_value_number_of_shaders"),
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE,     "SW: GPU L2 powered",              "%d,%d",    "_tgid,_value_number_of_l2"),
+
+	/* SW Power event messaging. _event_type is one from the kbase_pm_event enum  */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT,          "SW: PM Send Event",               "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT,        "SW: PM Handle Event",             "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+	/* SW L2 power events */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING,  "SW: GPU L2 powering",             "%d,%d", "_tgid,_writerof_l2_transitioning"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE,	  "SW: GPU L2 powering done",        "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+	KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE,          "SW: Context Active",              "%d,%d",    "_tgid,_value_active"),
+
+	/*
+	 * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END,   "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END,   "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END,   "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+	/*
+	 * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+	 */
+	/* kbase_pm_request_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+	/* kbase_pm_release_cores */
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END,   "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END,   "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+	/*
+	 * END: SW Functions that call kbase_pm_check_transitions_nolock()
+	 */
+
+	/*
+	 * HW Events
+	 */
+	KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX,     "HW: Job Chain start (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_consumerof_atom_number_ready"),
+	KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX,      "HW: Job Chain stop (SW approximated)",  "%d,%d,%d", "_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100755
index 000000000000..0e2fecf33d77
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,477 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include <malisw/mali_malisw.h>
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT)
+#define SUPPORT_MALI_ERROR_INJECT
+#elif defined(MALI_ERROR_INJECT)
+#if MALI_ERROR_INJECT
+#define SUPPORT_MALI_ERROR_INJECT
+#endif
+#endif
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+#define BASE_UK_VERSION_MAJOR 8
+#define BASE_UK_VERSION_MINOR 0
+
+struct kbase_uk_mem_alloc {
+	union uk_header header;
+	/* IN */
+	u64 va_pages;
+	u64 commit_pages;
+	u64 extent;
+	/* IN/OUT */
+	u64 flags;
+	/* OUT */
+	u64 gpu_va;
+	u16 va_alignment;
+	u8  padding[6];
+};
+
+struct kbase_uk_mem_free {
+	union uk_header header;
+	/* IN */
+	mali_addr64 gpu_addr;
+	/* OUT */
+};
+
+struct kbase_uk_mem_alias {
+	union uk_header header;
+	/* IN/OUT */
+	u64 flags;
+	/* IN */
+	u64 stride;
+	u64 nents;
+	union kbase_pointer ai;
+	/* OUT */
+	u64         gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_import {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer phandle;
+	u32 type;
+	u32 padding;
+	/* IN/OUT */
+	u64         flags;
+	/* OUT */
+	mali_addr64 gpu_va;
+	u64         va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+	union uk_header header;
+	/* IN */
+	mali_addr64 gpu_va;
+	u64 flags;
+	u64 mask;
+};
+
+struct kbase_uk_job_submit {
+	union uk_header header;
+	/* IN */
+	union kbase_pointer addr;
+	u32 nr_atoms;
+	u32 stride;		/* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+	/* OUT */
+};
+
+struct kbase_uk_post_term {
+	union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+	union uk_header header;
+
+	/* IN */
+	struct base_syncset sset;
+
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+	union uk_header header;
+
+	/* IN */
+	mali_addr64 dump_buffer;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 l3_cache_bm;
+	u32 mmu_l2_bm;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_hwcnt_dump {
+	union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+	union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+	union uk_header header;
+	/* IN */
+	s32 fd;
+	u32 padding;
+	/* OUT */
+};
+
+struct kbase_uk_stream_create {
+	union uk_header header;
+	/* IN */
+	char name[32];
+	/* OUT */
+	s32 fd;
+	u32 padding;
+};
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+#define BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN F_BIT_0
+struct base_cpu_id_props {
+	/**
+	 * CPU ID
+	 */
+	u32 id;
+
+	/**
+	 * CPU Part number
+	 */
+	u16 part;
+	/**
+	 * ASCII code of implementer trademark
+	 */
+	u8 implementer;
+
+	/**
+	 * CPU Variant
+	 */
+	u8 variant;
+	/**
+	 * CPU Architecture
+	 */
+	u8 arch;
+
+	/**
+	 * CPU revision
+	 */
+	u8 rev;
+
+	/**
+	 * Validity of CPU id where 0-invalid and
+	 * 1-valid only if ALL the cpu_id props are valid
+	 */
+	u8 valid;
+
+	u8 padding[1];
+};
+
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+struct base_cpu_props {
+	u32 nr_cores;        /**< Number of CPU cores */
+
+	/**
+	 * CPU page size as a Logarithm to Base 2. The compile-time
+	 * equivalent is @ref OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+	 */
+	u32 cpu_page_size_log2;
+
+	/**
+	 * CPU L1 Data cache line size as a Logarithm to Base 2. The compile-time
+	 * equivalent is @ref OSU_CONFIG_CPU_L1_DCACHE_LINE_SIZE_LOG2.
+	 */
+	u32 cpu_l1_dcache_line_size_log2;
+
+	/**
+	 * CPU L1 Data cache size, in bytes. The compile-time equivalient is
+	 * @ref OSU_CONFIG_CPU_L1_DCACHE_SIZE.
+	 *
+	 * This CPU Property is mainly provided to implement OpenCL's
+	 * clGetDeviceInfo(), which allows the CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
+	 * hint to be queried.
+	 */
+	u32 cpu_l1_dcache_size;
+
+	/**
+	 * CPU Property Flags bitpattern.
+	 *
+	 * This is a combination of bits as specified by the macros prefixed with
+	 * 'BASE_CPU_PROPERTY_FLAG_'.
+	 */
+	u32 cpu_flags;
+
+	/**
+	 * Maximum clock speed in MHz.
+	 * @usecase 'Maximum' CPU Clock Speed information is required by OpenCL's
+	 * clGetDeviceInfo() function for the CL_DEVICE_MAX_CLOCK_FREQUENCY hint.
+	 */
+	u32 max_cpu_clock_speed_mhz;
+
+	/**
+	 * @brief Total memory, in bytes.
+	 *
+	 * This is the theoretical maximum memory available to the CPU. It is
+	 * unlikely that a client will be able to allocate all of this memory for
+	 * their own purposes, but this at least provides an upper bound on the
+	 * memory available to the CPU.
+	 *
+	 * This is required for OpenCL's clGetDeviceInfo() call when
+	 * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL CPU devices.
+	 */
+	u64 available_memory_size;
+
+	/**
+	 * CPU ID detailed info
+	 */
+	struct base_cpu_id_props cpu_id;
+
+	u32 padding;
+};
+
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+struct kbase_uk_cpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct base_cpu_props props;
+	/* OUT */
+};
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+
+struct kbase_uk_gpuprops {
+	union uk_header header;
+
+	/* IN */
+	struct mali_base_gpu_props props;
+	/* OUT */
+};
+
+struct kbase_uk_mem_query {
+	union uk_header header;
+	/* IN */
+	mali_addr64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE  1
+#define KBASE_MEM_QUERY_VA_SIZE      2
+#define KBASE_MEM_QUERY_FLAGS        3
+	u64         query;
+	/* OUT */
+	u64         value;
+};
+	
+struct kbase_uk_mem_commit {
+	union uk_header header;
+	/* IN */
+	mali_addr64 gpu_addr;
+	u64         pages;
+	/* OUT */
+	u32 result_subcode;
+	u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+	union uk_header header;
+	/* IN */
+	mali_addr64 gpu_addr;
+	u64 cpu_addr;
+	u64 size;
+	/* OUT */
+	mali_size64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+	union uk_header header;
+	/* OUT */
+	char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+	u32 version_string_size;
+	u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+	union uk_header header;
+	/* OUT */
+	u32 counter;
+	u32 padding;
+};
+
+struct kbase_uk_set_flags {
+	union uk_header header;
+	/* IN */
+	u32 create_flags;
+	u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+typedef struct kbase_exported_test_data {
+	mali_addr64 test_addr[TEST_ADDR_COUNT];		/**< memory address */
+	u32 test_addr_pages[TEST_ADDR_COUNT];		/**<  memory size in pages */
+	union kbase_pointer kctx;				/**<  base context created by process */
+	union kbase_pointer mm;				/**< pointer to process address space */
+	u8 buffer1[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+	u8 buffer2[KBASE_TEST_BUFFER_SIZE];   /**<  unit test defined parameter */
+} kbase_exported_test_data;
+
+struct kbase_uk_set_test_data {
+	union uk_header header;
+	/* IN */
+	struct kbase_exported_test_data test_data;
+};
+
+#endif				/* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_error_params params;
+};
+#endif				/* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+	union uk_header header;
+	/* IN */
+	struct kbase_model_control_params params;
+};
+#endif				/* SUPPORT_MALI_NO_MALI */
+
+#define KBASE_MAXIMUM_EXT_RESOURCES       255
+
+struct kbase_uk_ext_buff_kds_data {
+	union uk_header header;
+	union kbase_pointer external_resource;
+	union kbase_pointer file_descriptor;
+	u32 num_res;		/* limited to KBASE_MAXIMUM_EXT_RESOURCES */
+	u32 padding;
+};
+
+struct kbase_uk_keep_gpu_powered {
+	union uk_header header;
+	u32       enabled;
+	u32       padding;
+};
+
+struct kbase_uk_profiling_controls {
+	union uk_header header;
+	u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+	union uk_header header;
+	u32 len;
+	union kbase_pointer buf;
+};
+
+enum kbase_uk_function_id {
+	KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+	KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+	KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+	KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+	KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+	KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+	KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+	KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+	KBASE_FUNC_SYNC  = (UK_FUNC_ID + 8),
+
+	KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+	KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+	KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+	KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+	KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE = (UK_FUNC_ID + 13),
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+	KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+	KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+	KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+	KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17),
+	KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+	KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+	KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+	KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+	KBASE_FUNC_KEEP_GPU_POWERED = (UK_FUNC_ID + 22),
+
+	KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+	KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+	KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+	KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+					    /* to be used only for testing
+					    * purposes, otherwise these controls
+					    * are set through gator API */
+
+	KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+	KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+	KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29)
+
+};
+
+#endif				/* _KBASE_UKU_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100755
index 000000000000..d159872306c5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+mali_bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+	struct list_head *pos = base->next;
+
+	while (pos != base) {
+		if (pos == entry)
+			return MALI_TRUE;
+
+		pos = pos->next;
+	}
+	return MALI_FALSE;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100755
index 000000000000..59988163fd2e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base      The head of the list to be tested
+ * @param entry     The list entry to be tested
+ *
+ * @return          MALI_TRUE if entry is a member of base
+ *                  MALI_FALSE otherwise
+ */
+mali_bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif				/* _KBASE_UTILITY_H */
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100755
index 000000000000..20250592dd63
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+	TP_PROTO(int jobslot, unsigned int info_val),
+	TP_ARGS(jobslot, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, jobslot)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->jobslot = jobslot;
+		__entry->info_val = info_val;
+	),
+	TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+	TP_PROTO(int jobslot, unsigned int info_val), \
+	TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+	TP_PROTO(int refcount, unsigned int info_val),
+	TP_ARGS(refcount, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, refcount)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->refcount = refcount;
+		__entry->info_val = info_val;
+	),
+	TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+	TP_PROTO(int refcount, unsigned int info_val), \
+	TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+	TP_PROTO(int gpu_addr, unsigned int info_val),
+	TP_ARGS(gpu_addr, info_val),
+	TP_STRUCT__entry(
+		__field(unsigned int, gpu_addr)
+		__field(unsigned int, info_val)
+	),
+	TP_fast_assign(
+		__entry->gpu_addr = gpu_addr;
+		__entry->info_val = info_val;
+	),
+	TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+	TP_PROTO(int gpu_addr, unsigned int info_val), \
+	TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100755
index 000000000000..537610b49d36
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,129 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event, TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, unsigned char job_id), TP_ARGS(event_id, tgid, pid, job_id), TP_STRUCT__entry(__field(unsigned int, event_id)
+																		       __field(unsigned int, tgid)
+																		       __field(unsigned int, pid)
+																		       __field(unsigned char, job_id)
+	    ), TP_fast_assign(__entry->event_id = event_id; __entry->tgid = tgid; __entry->pid = pid; __entry->job_id = job_id;), TP_printk("event=%u tgid=%u pid=%u job_id=%u", __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+	);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache, l3 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(unsigned int, event_id)
+																  __field(unsigned long long, value)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+	);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache, l3 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on, TP_PROTO(unsigned int event_id, unsigned long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(unsigned int, event_id)
+																    __field(unsigned long long, value)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+	);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache, l3 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off, TP_PROTO(unsigned int event_id, unsigned long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(unsigned int, event_id)
+																     __field(unsigned long long, value)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+	);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(int, event_id)
+																  __field(unsigned long, value)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+	);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use, TP_PROTO(int event_id), TP_ARGS(event_id), TP_STRUCT__entry(__field(int, event_id)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event=%d", __entry->event_id)
+	);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released, TP_PROTO(int event_id), TP_ARGS(event_id), TP_STRUCT__entry(__field(int, event_id)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event=%d", __entry->event_id)
+	);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ *                                 and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change, TP_PROTO(long long int event_id), TP_ARGS(event_id), TP_STRUCT__entry(__field(long long int, event_id)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event=%lld", __entry->event_id)
+	);
+
+/**
+ * mali_sw_counter - not currently used
+ * @event_id: counter id
+ */
+TRACE_EVENT(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value), TP_ARGS(event_id, value), TP_STRUCT__entry(__field(int, event_id)
+																 __field(long long, value)
+	    ), TP_fast_assign(__entry->event_id = event_id;), TP_printk("event %d = %lld", __entry->event_id, __entry->value)
+	);
+
+#endif				/*  _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100755
index 000000000000..a6231183a9a5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,527 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
+#define L3_FEATURES             0x008	/* (RO) Level 3 cache features */
+#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
+#define MEM_FEATURES            0x010	/* (RO) Memory system features */
+#define MMU_FEATURES            0x014	/* (RO) MMU features */
+#define AS_PRESENT              0x018	/* (RO) Address space slots present */
+#define JS_PRESENT              0x01C	/* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
+#define GPU_IRQ_CLEAR           0x024	/* (WO) */
+#define GPU_IRQ_MASK            0x028	/* (RW) */
+#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
+						   commands which may take time. */
+#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
+						   and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND             0x030	/* (WO) */
+#define GPU_STATUS              0x034	/* (RO) */
+
+#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
+#define GROUPS_L3_COHERENT      (1 << 1)	/* Cores groups are l3 coherent */
+
+#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY                 0x050	/* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_L3_CACHE_EN      0x078	/* (RW) Performance counter enable flags for L3 cache */
+#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+
+#define L3_PRESENT_LO           0x130	/* (RO) Level 3 cache present bitmap, low word */
+#define L3_PRESENT_HI           0x134	/* (RO) Level 3 cache present bitmap, high word */
+
+#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+
+#define L3_READY_LO             0x170	/* (RO) Level 3 cache ready bitmap, low word */
+#define L3_READY_HI             0x174	/* (RO) Level 3 cache ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+
+#define L3_PWRON_LO             0x1B0	/* (WO) Level 3 cache power on bitmap, low word */
+#define L3_PWRON_HI             0x1B4	/* (WO) Level 3 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+
+#define L3_PWROFF_LO            0x1F0	/* (WO) Level 3 cache power off bitmap, low word */
+#define L3_PWROFF_HI            0x1F4	/* (WO) Level 3 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+
+#define L3_PWRTRANS_LO          0x230	/* (RO) Level 3 cache power transition bitmap, low word */
+#define L3_PWRTRANS_HI          0x234	/* (RO) Level 3 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+
+#define L3_PWRACTIVE_LO         0x270	/* (RO) Level 3 cache active bitmap, low word */
+#define L3_PWRACTIVE_HI         0x274	/* (RO) Level 3 cache active bitmap, high word */
+
+#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
+#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define MEMORY_MANAGEMENT_BASE  0x2000
+#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
+#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
+#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
+#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
+#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS   16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n)      (1UL << (n))
+#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_ADDR_SPACE_MASK   0xfffff000
+#define AS_TRANSTAB_ADRMODE_UNMAPPED  (0u << 0)
+#define AS_TRANSTAB_ADRMODE_IDENTITY  (1u << 1)
+#define AS_TRANSTAB_ADRMODE_TABLE     (3u << 0)
+#define AS_TRANSTAB_READ_INNER        (1u << 2)
+#define AS_TRANSTAB_SHARE_OUTER       (1u << 4)
+
+#define MMU_TRANSTAB_ADRMODE_MASK      0x00000003
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK    (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX      (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ    (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE   (0x3<<8)
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH       0x04	/* Flush all L2 caches then issue a flush region command to all MMUs
+					   (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT    0x04	/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
+					   flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_AS_SHIFT    4	/* address space bitmap starts from bit 4 of the register */
+#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x48
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x4F
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x4D
+/* symbol for default MEMATTR to use */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc inner */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT       0
+#define GPU_ID_VERSION_MINOR_SHIFT        4
+#define GPU_ID_VERSION_MAJOR_SHIFT        12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT   16
+#define GPU_ID_VERSION_STATUS             (0xF  << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR              (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR              (0xF  << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID         (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X                    0x6956
+#define GPU_ID_PI_T62X                    0x0620
+#define GPU_ID_PI_T76X                    0x0750
+#define GPU_ID_PI_T72X                    0x0720
+#ifdef MALI_INCLUDE_TFRX
+#define GPU_ID_PI_TFRX                    0x0880
+#endif /* MALI_INCLUDE_TFRX */
+#define GPU_ID_PI_T86X                    0x0860
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0                   0x1
+#define GPU_ID_S_EAC                      0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+		(((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+		((major) << GPU_ID_VERSION_MAJOR_SHIFT) |   \
+		((minor) << GPU_ID_VERSION_MINOR_SHIFT) |   \
+		((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+/* End GPU_ID register */
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS             (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
+#define SC_LS_PAUSEBUFFER_DISABLE   (1ul << 16)
+#define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+
+#endif				/* _MIDGARD_REGMAP_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100755
index 000000000000..83a29cd304fb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int tgid,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		tgid,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, tgid)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->tgid = tgid;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->atom_id,
+				__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->js,
+				__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->js,
+				__entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int active),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		active),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, active)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->active = active;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+	                   (int)__entry->ts_sec,
+	                   (int)__entry->ts_nsec,
+	                   __entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int state),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		state),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, state)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->state = state;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+	                   (int)__entry->ts_sec,
+	                   (int)__entry->ts_nsec,
+	                   __entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int pm_event_type,
+		unsigned int pm_event_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		pm_event_type,
+		pm_event_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, pm_event_type)
+			__field(unsigned int, pm_event_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->pm_event_type = pm_event_type;
+		__entry->pm_event_id = pm_event_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+	                   (int)__entry->ts_sec,
+	                   (int)__entry->ts_nsec,
+	                   __entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int event_type,
+		int tgid,
+		int js,
+		int atom_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		event_type,
+		tgid,
+		js,
+		atom_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, event_type)
+			__field(int, tgid)
+			__field(int, js)
+			__field(int, atom_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->event_type = event_type;
+		__entry->tgid = tgid;
+		__entry->js = js;
+		__entry->atom_id = atom_id;
+	),
+
+	TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->tgid,
+				__entry->js,
+				__entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int trans_code,
+		int trans_id),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		trans_code,
+		trans_id),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, trans_code)
+			__field(int, trans_id)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->trans_code = trans_code;
+		__entry->trans_id = trans_id;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+	                   (int)__entry->ts_sec,
+	                   (int)__entry->ts_nsec,
+	                   __entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+	TP_PROTO(u64 ts_sec,
+		u32 ts_nsec,
+		int count),
+
+	TP_ARGS(ts_sec,
+		ts_nsec,
+		count),
+
+	TP_STRUCT__entry(
+			__field(u64, ts_sec)
+			__field(u32, ts_nsec)
+			__field(int, count)
+	),
+
+	TP_fast_assign(
+		__entry->ts_sec = ts_sec;
+		__entry->ts_nsec = ts_nsec;
+		__entry->count = count;
+	),
+
+	TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+				(int)__entry->ts_sec,
+				(int)__entry->ts_nsec,
+				__entry->count)
+);
+
+#endif				/* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100755
index 000000000000..5d9e0dd218ec
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,143 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+#include <malisw/mali_stdtypes.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+	/**
+	 * Value used to identify the Base driver UK client.
+	 */
+	UK_CLIENT_MALI_T600_BASE,
+
+	/** The number of uk clients supported. This must be the last member of the enum */
+	UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
+	/**
+	 * Each UK client numbers the functions they provide starting from
+	 * number UK_FUNC_ID. This number is then eventually assigned to the
+	 * id field of the union uk_header structure when preparing to make a
+	 * UK call. See your UK client for a list of their function numbers.
+	 */
+	UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * mali_error return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+	/**
+	 * 32-bit number identifying the UK function to be called.
+	 * Also see uk_func.
+	 */
+	u32 id;
+	/**
+	 * The mali_error return code returned by the called UK function.
+	 * See the specification of the particular UK function you are
+	 * calling for the meaning of the error codes returned. All
+	 * UK functions return MALI_ERROR_NONE on success.
+	 */
+	u32 ret;
+	/*
+	 * Used to ensure 64-bit alignment of this union. Do not remove.
+	 * This field is used for padding and does not need to be initialized.
+	 */
+	u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+	union uk_header header;
+		  /**< UK call header */
+	u16 major;
+	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
+	u16 minor;
+	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+	u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif				/* __cplusplus */
+#endif				/* _UK_H_ */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h
new file mode 100755
index 000000000000..9226d977f7a5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @addtogroup malisw
+ * @{
+ */
+
+/* ============================================================================
+    Description
+============================================================================ */
+/**
+ * @defgroup arm_cstd_coding_standard ARM C standard types and constants
+ * The common files are a set of standard headers which are used by all parts
+ * of this development, describing types, and generic constants.
+ *
+ * Files in group:
+ *     - arm_cstd.h
+ *     - arm_cstd_compilers.h
+ *     - arm_cstd_types.h
+ *     - arm_cstd_types_rvct.h
+ *     - arm_cstd_types_gcc.h
+ *     - arm_cstd_types_msvc.h
+ *     - arm_cstd_pack_push.h
+ *     - arm_cstd_pack_pop.h
+ */
+
+/**
+ * @addtogroup arm_cstd_coding_standard
+ * @{
+ */
+
+#ifndef _ARM_CSTD_
+#define _ARM_CSTD_
+
+/* ============================================================================
+	Import standard C99 types
+============================================================================ */
+#include "arm_cstd_compilers.h"
+#include "arm_cstd_types.h"
+
+/* ============================================================================
+	Min and Max Values
+============================================================================ */
+#if !defined(INT8_MAX)
+	#define INT8_MAX                ((int8_t) 0x7F)
+#endif
+#if !defined(INT8_MIN)
+	#define INT8_MIN                (-INT8_MAX - 1)
+#endif
+
+#if !defined(INT16_MAX)
+	#define INT16_MAX               ((int16_t)0x7FFF)
+#endif
+#if !defined(INT16_MIN)
+	#define INT16_MIN               (-INT16_MAX - 1)
+#endif
+
+#if !defined(INT32_MAX)
+	#define INT32_MAX               ((int32_t)0x7FFFFFFF)
+#endif
+#if !defined(INT32_MIN)
+	#define INT32_MIN               (-INT32_MAX - 1)
+#endif
+
+#if !defined(INT64_MAX)
+	#define INT64_MAX               ((int64_t)0x7FFFFFFFFFFFFFFFLL)
+#endif
+#if !defined(INT64_MIN)
+	#define INT64_MIN               (-INT64_MAX - 1)
+#endif
+
+#if !defined(UINT8_MAX)
+	#define UINT8_MAX               ((uint8_t) 0xFF)
+#endif
+
+#if !defined(UINT16_MAX)
+	#define UINT16_MAX              ((uint16_t)0xFFFF)
+#endif
+
+#if !defined(UINT32_MAX)
+	#define UINT32_MAX              ((uint32_t)0xFFFFFFFF)
+#endif
+
+#if !defined(UINT64_MAX)
+	#define UINT64_MAX              ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
+#endif
+
+/* fallbacks if limits.h wasn't available */
+#if !defined(UCHAR_MAX)
+	#define UCHAR_MAX               ((unsigned char)~0U)
+#endif
+
+#if !defined(SCHAR_MAX)
+	#define SCHAR_MAX               ((signed char)(UCHAR_MAX >> 1))
+#endif
+#if !defined(SCHAR_MIN)
+	#define SCHAR_MIN               ((signed char)(-SCHAR_MAX - 1))
+#endif
+
+#if !defined(USHRT_MAX)
+	#define USHRT_MAX               ((unsigned short)~0U)
+#endif
+
+#if !defined(SHRT_MAX)
+	#define SHRT_MAX                ((signed short)(USHRT_MAX >> 1))
+#endif
+#if !defined(SHRT_MIN)
+	#define SHRT_MIN                ((signed short)(-SHRT_MAX - 1))
+#endif
+
+#if !defined(UINT_MAX)
+	#define UINT_MAX                ((unsigned int)~0U)
+#endif
+
+#if !defined(INT_MAX)
+	#define INT_MAX                 ((signed int)(UINT_MAX >> 1))
+#endif
+#if !defined(INT_MIN)
+	#define INT_MIN                 ((signed int)(-INT_MAX - 1))
+#endif
+
+#if !defined(ULONG_MAX)
+	#define ULONG_MAX               ((unsigned long)~0UL)
+#endif
+
+#if !defined(LONG_MAX)
+	#define LONG_MAX                ((signed long)(ULONG_MAX >> 1))
+#endif
+#if !defined(LONG_MIN)
+	#define LONG_MIN                ((signed long)(-LONG_MAX - 1))
+#endif
+
+#if !defined(ULLONG_MAX)
+	#define ULLONG_MAX              ((unsigned long long)~0ULL)
+#endif
+
+#if !defined(LLONG_MAX)
+	#define LLONG_MAX               ((signed long long)(ULLONG_MAX >> 1))
+#endif
+#if !defined(LLONG_MIN)
+	#define LLONG_MIN               ((signed long long)(-LLONG_MAX - 1))
+#endif
+
+#if !defined(SIZE_MAX)
+	#if 1 == CSTD_CPU_32BIT
+		#define SIZE_MAX            UINT32_MAX
+	#elif 1 == CSTD_CPU_64BIT
+		#define SIZE_MAX            UINT64_MAX
+	#endif
+#endif
+
+/* ============================================================================
+	Keywords
+============================================================================ */
+/* Portable keywords. */
+
+#if !defined(CONST)
+/**
+ * @hideinitializer
+ * Variable is a C @c const, which can be made non-const for testing purposes.
+ */
+	#define CONST                   const
+#endif
+
+#if !defined(STATIC)
+/**
+ * @hideinitializer
+ * Variable is a C @c static, which can be made non-static for testing
+ * purposes.
+ */
+	#define STATIC                  static
+#endif
+
+/**
+ * Specifies a function as being exported outside of a logical module.
+ */
+#define PUBLIC
+
+/**
+ * @def PROTECTED
+ * Specifies a a function which is internal to an logical module, but which
+ * should not be used outside of that module. This cannot be enforced by the
+ * compiler, as a module is typically more than one translation unit.
+ */
+#define PROTECTED
+
+/**
+ * Specifies a function as being internal to a translation unit. Private
+ * functions would typically be declared as STATIC, unless they are being
+ * exported for unit test purposes.
+ */
+#define PRIVATE STATIC
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT( expr ) \
+	do { switch(0){case 0: case (expr):;} } while( FALSE )
+
+/**
+ * @hideinitializer
+ * @deprecated Prefered form is @c CSTD_UNUSED
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define UNUSED( x )                 ((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED( x )            ((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP( ... )             ((void)#__VA_ARGS__)
+
+/**
+ * @hideinitializer
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define CSTD_PTR_TO_U64( x )        ((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1( x )             #x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2( x )              CSTD_STR1( x )
+
+/**
+ * @hideinitializer
+ * Utility function for stripping the first character off a string.
+ */
+static INLINE char* arm_cstd_strstrip( char * string )
+{
+	return ++string;
+}
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro where the macro
+ * itself may not have a value. Parameter @c a should be set to any single
+ * character which is then stripped by the macro via an inline function. This
+ * should only be used via the @c CSTD_STR2N macro; for printing a single
+ * macro only the @c CSTD_STR1 macro is a better alternative.
+ *
+ * This macro requires run-time code to handle the case where the macro has
+ * no value (you can't concat empty strings in the preprocessor).
+ */
+#define CSTD_STR1N( a, x )          arm_cstd_strstrip( CSTD_STR1( a##x ) )
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a two level macro where the macro itself
+ * may not have a value.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2N( MY_MACRO )
+ * > "32"
+ *
+ * #define MY_MACRO 32
+ * CSTD_STR2N( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2N( x )              CSTD_STR1N( _, x )
+
+/* ============================================================================
+	Validate portability constructs
+============================================================================ */
+static INLINE void arm_cstd_compile_time_assertions( void )
+{
+	CSTD_COMPILE_TIME_ASSERT( sizeof(uint8_t)  == 1 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(int8_t)   == 1 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(uint16_t) == 2 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(int16_t)  == 2 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(uint32_t) == 4 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(int32_t)  == 4 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(uint64_t) == 8 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(int64_t)  == 8 );
+	CSTD_COMPILE_TIME_ASSERT( sizeof(intptr_t) == sizeof(uintptr_t) );
+
+	CSTD_COMPILE_TIME_ASSERT( 1 == TRUE );
+	CSTD_COMPILE_TIME_ASSERT( 0 == FALSE );
+
+#if 1 == CSTD_CPU_32BIT
+	CSTD_COMPILE_TIME_ASSERT( sizeof(uintptr_t) == 4 );
+#elif 1 == CSTD_CPU_64BIT
+	CSTD_COMPILE_TIME_ASSERT( sizeof(uintptr_t) == 8 );
+#endif
+
+}
+
+/* ============================================================================
+	Useful function-like macro
+============================================================================ */
+/**
+ * @brief Return the lesser of two values.
+ * As a macro it may evaluate its arguments more than once.
+ * @see CSTD_MAX
+ */
+#define CSTD_MIN( x, y )            ((x) < (y) ? (x) : (y))
+
+/**
+ * @brief Return the greater of two values.
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as CSTD_MIN it is guaranteed to return
+ * the one that CSTD_MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define CSTD_MAX( x, y )            ((x) < (y) ? (y) : (x))
+
+/**
+ * @brief Clamp value @c x to within @c min and @c max inclusive.
+ */
+#define CSTD_CLAMP( x, min, max )   ((x)<(min) ? (min):((x)>(max) ? (max):(x)))
+
+/**
+ * Flag a cast as a reinterpretation, usually of a pointer type.
+ */
+#define CSTD_REINTERPRET_CAST(type) (type)
+
+/**
+ * Flag a cast as casting away const, usually of a pointer type.
+ */
+#define CSTD_CONST_CAST(type)       (type)
+
+/**
+ * Flag a cast as a (potentially complex) value conversion, usually of a
+ * numerical type.
+ */
+#define CSTD_STATIC_CAST(type)      (type)
+
+/* ============================================================================
+	Useful bit constants
+============================================================================ */
+/**
+ * @cond arm_cstd_utilities
+ */
+
+/* Common bit constant values, useful in embedded programming. */
+#define F_BIT_0       ((uint32_t)0x00000001)
+#define F_BIT_1       ((uint32_t)0x00000002)
+#define F_BIT_2       ((uint32_t)0x00000004)
+#define F_BIT_3       ((uint32_t)0x00000008)
+#define F_BIT_4       ((uint32_t)0x00000010)
+#define F_BIT_5       ((uint32_t)0x00000020)
+#define F_BIT_6       ((uint32_t)0x00000040)
+#define F_BIT_7       ((uint32_t)0x00000080)
+#define F_BIT_8       ((uint32_t)0x00000100)
+#define F_BIT_9       ((uint32_t)0x00000200)
+#define F_BIT_10      ((uint32_t)0x00000400)
+#define F_BIT_11      ((uint32_t)0x00000800)
+#define F_BIT_12      ((uint32_t)0x00001000)
+#define F_BIT_13      ((uint32_t)0x00002000)
+#define F_BIT_14      ((uint32_t)0x00004000)
+#define F_BIT_15      ((uint32_t)0x00008000)
+#define F_BIT_16      ((uint32_t)0x00010000)
+#define F_BIT_17      ((uint32_t)0x00020000)
+#define F_BIT_18      ((uint32_t)0x00040000)
+#define F_BIT_19      ((uint32_t)0x00080000)
+#define F_BIT_20      ((uint32_t)0x00100000)
+#define F_BIT_21      ((uint32_t)0x00200000)
+#define F_BIT_22      ((uint32_t)0x00400000)
+#define F_BIT_23      ((uint32_t)0x00800000)
+#define F_BIT_24      ((uint32_t)0x01000000)
+#define F_BIT_25      ((uint32_t)0x02000000)
+#define F_BIT_26      ((uint32_t)0x04000000)
+#define F_BIT_27      ((uint32_t)0x08000000)
+#define F_BIT_28      ((uint32_t)0x10000000)
+#define F_BIT_29      ((uint32_t)0x20000000)
+#define F_BIT_30      ((uint32_t)0x40000000)
+#define F_BIT_31      ((uint32_t)0x80000000)
+
+/* Common 2^n size values, useful in embedded programming. */
+#define C_SIZE_1B     ((uint32_t)0x00000001)
+#define C_SIZE_2B     ((uint32_t)0x00000002)
+#define C_SIZE_4B     ((uint32_t)0x00000004)
+#define C_SIZE_8B     ((uint32_t)0x00000008)
+#define C_SIZE_16B    ((uint32_t)0x00000010)
+#define C_SIZE_32B    ((uint32_t)0x00000020)
+#define C_SIZE_64B    ((uint32_t)0x00000040)
+#define C_SIZE_128B   ((uint32_t)0x00000080)
+#define C_SIZE_256B   ((uint32_t)0x00000100)
+#define C_SIZE_512B   ((uint32_t)0x00000200)
+#define C_SIZE_1KB    ((uint32_t)0x00000400)
+#define C_SIZE_2KB    ((uint32_t)0x00000800)
+#define C_SIZE_4KB    ((uint32_t)0x00001000)
+#define C_SIZE_8KB    ((uint32_t)0x00002000)
+#define C_SIZE_16KB   ((uint32_t)0x00004000)
+#define C_SIZE_32KB   ((uint32_t)0x00008000)
+#define C_SIZE_64KB   ((uint32_t)0x00010000)
+#define C_SIZE_128KB  ((uint32_t)0x00020000)
+#define C_SIZE_256KB  ((uint32_t)0x00040000)
+#define C_SIZE_512KB  ((uint32_t)0x00080000)
+#define C_SIZE_1MB    ((uint32_t)0x00100000)
+#define C_SIZE_2MB    ((uint32_t)0x00200000)
+#define C_SIZE_4MB    ((uint32_t)0x00400000)
+#define C_SIZE_8MB    ((uint32_t)0x00800000)
+#define C_SIZE_16MB   ((uint32_t)0x01000000)
+#define C_SIZE_32MB   ((uint32_t)0x02000000)
+#define C_SIZE_64MB   ((uint32_t)0x04000000)
+#define C_SIZE_128MB  ((uint32_t)0x08000000)
+#define C_SIZE_256MB  ((uint32_t)0x10000000)
+#define C_SIZE_512MB  ((uint32_t)0x20000000)
+#define C_SIZE_1GB    ((uint32_t)0x40000000)
+#define C_SIZE_2GB    ((uint32_t)0x80000000)
+
+/**
+ * @endcond
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+#endif  /* End (_ARM_CSTD_) */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h
new file mode 100755
index 000000000000..55637cf8afc0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_compilers.h
@@ -0,0 +1,617 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _ARM_CSTD_COMPILERS_H_
+#define _ARM_CSTD_COMPILERS_H_
+
+/* ============================================================================
+	Document default definitions - assuming nothing set at this point.
+============================================================================ */
+/**
+ * @addtogroup arm_cstd_coding_standard
+ * @{
+ */
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if toolchain is Microsoft Visual Studio, 0
+ * otherwise.
+ */
+#define CSTD_TOOLCHAIN_MSVC         0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if toolchain is the GNU Compiler Collection, 0
+ * otherwise.
+ */
+#define CSTD_TOOLCHAIN_GCC          0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if toolchain is ARM RealView Compiler Tools, 0
+ * otherwise. Note - if running RVCT in GCC mode this define will be set to 0;
+ * @c CSTD_TOOLCHAIN_GCC and @c CSTD_TOOLCHAIN_RVCT_GCC_MODE will both be
+ * defined as 1.
+ */
+#define CSTD_TOOLCHAIN_RVCT         0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if toolchain is ARM RealView Compiler Tools running
+ * in GCC mode, 0 otherwise.
+ */
+#define CSTD_TOOLCHAIN_RVCT_GCC_MODE 0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if processor is an x86 32-bit machine, 0 otherwise.
+ */
+#define CSTD_CPU_X86_32             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if processor is an x86-64 (AMD64) machine, 0
+ * otherwise.
+ */
+#define CSTD_CPU_X86_64             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if processor is an ARM machine, 0 otherwise.
+ */
+#define CSTD_CPU_ARM                0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if processor is an AARCH64 machine, 0 otherwise.
+ */
+#define CSTD_CPU_AARCH64            0
+
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if processor is a MIPS machine, 0 otherwise.
+ */
+#define CSTD_CPU_MIPS               0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if CPU is 32-bit, 0 otherwise.
+ */
+#define CSTD_CPU_32BIT              0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if CPU is 64-bit, 0 otherwise.
+ */
+#define CSTD_CPU_64BIT              0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if processor configured as big-endian, 0 if it
+ * is little-endian.
+ */
+#define CSTD_CPU_BIG_ENDIAN         0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a version of Windows, 0 if
+ * it is not.
+ */
+#define CSTD_OS_WINDOWS             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 32-bit version of Windows,
+ * 0 if it is not.
+ */
+#define CSTD_OS_WIN32               0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 64-bit version of Windows,
+ * 0 if it is not.
+ */
+#define CSTD_OS_WIN64               0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is Linux, 0 if it is not.
+ */
+#define CSTD_OS_LINUX               0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if we are compiling Linux kernel code, 0 otherwise.
+ */
+#define CSTD_OS_LINUX_KERNEL        0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 32-bit version of Linux,
+ * 0 if it is not.
+ */
+#define CSTD_OS_LINUX32             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 64-bit version of Linux,
+ * 0 if it is not.
+ */
+#define CSTD_OS_LINUX64             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is Android, 0 if it is not.
+ */
+#define CSTD_OS_ANDROID             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if we are compiling Android kernel code, 0 otherwise.
+ */
+#define CSTD_OS_ANDROID_KERNEL      0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 32-bit version of Android,
+ * 0 if it is not.
+ */
+#define CSTD_OS_ANDROID32           0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 64-bit version of Android,
+ * 0 if it is not.
+ */
+#define CSTD_OS_ANDROID64           0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a version of Apple OS,
+ * 0 if it is not.
+ */
+#define CSTD_OS_APPLEOS             0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 32-bit version of Apple OS,
+ * 0 if it is not.
+ */
+#define CSTD_OS_APPLEOS32           0
+
+/**
+ * @hideinitializer
+ * Defined with value of 1 if operating system is a 64-bit version of Apple OS,
+ * 0 if it is not.
+ */
+#define CSTD_OS_APPLEOS64           0
+
+/**
+ * @def CSTD_OS_SYMBIAN
+ * @hideinitializer
+ * Defined with value of 1 if operating system is Symbian, 0 if it is not.
+ */
+#define CSTD_OS_SYMBIAN             0
+
+/**
+ * @def CSTD_OS_NONE
+ * @hideinitializer
+ * Defined with value of 1 if there is no operating system (bare metal), 0
+ * otherwise
+ */
+#define CSTD_OS_NONE                0
+
+/* ============================================================================
+	Determine the compiler in use
+============================================================================ */
+
+/* Default empty definitions of compiler-specific option enable/disable.  This will be overridden 
+ * if applicable by preprocessor defines below. */
+#define CSTD_PUSH_WARNING_GCC_WADDRESS
+#define CSTD_POP_WARNING_GCC_WADDRESS
+
+#if defined(_MSC_VER)
+	#undef CSTD_TOOLCHAIN_MSVC
+	#define CSTD_TOOLCHAIN_MSVC         1
+
+#elif defined(__GNUC__)
+	#undef CSTD_TOOLCHAIN_GCC
+	#define CSTD_TOOLCHAIN_GCC          1
+
+	/* Detect RVCT pretending to be GCC. */
+	#if defined(__ARMCC_VERSION)
+		#undef CSTD_TOOLCHAIN_RVCT_GCC_MODE
+		#define CSTD_TOOLCHAIN_RVCT_GCC_MODE    1
+	#endif
+
+	#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6 && MALI_GCC_WORKAROUND_MIDCOM_4598 == 0)
+		/* As a workaround to MIDCOM-4598 (GCC internal defect), these pragmas are not compiled if the GCC version 
+		 * is within a certain range, or if a #define is enabled by the build system.  For more, see a comment 
+		 * in the build system also referring to the MIDCOM issue mentioned, where the environment is updated 
+		 * for the GNU toolchain.  */
+		#undef CSTD_PUSH_WARNING_GCC_WADDRESS
+		#define CSTD_PUSH_WARNING_GCC_WADDRESS \
+			do\
+			{\
+				_Pragma("GCC diagnostic push")\
+				_Pragma("GCC diagnostic ignored \"-Waddress\"")\
+			}while(MALI_FALSE)
+
+		#undef CSTD_POP_WARNING_GCC_WADDRESS
+		#define CSTD_POP_WARNING_GCC_WADDRESS \
+			do\
+			{\
+				_Pragma("GCC diagnostic pop")\
+			}while(MALI_FALSE)
+	#endif
+
+#elif defined(__ARMCC_VERSION)
+	#undef CSTD_TOOLCHAIN_RVCT
+	#define CSTD_TOOLCHAIN_RVCT         1
+
+#else
+	#warning "Unsupported or unknown toolchain"
+
+#endif
+
+/* ============================================================================
+	Determine the processor
+============================================================================ */
+#if 1 == CSTD_TOOLCHAIN_MSVC
+	#if defined(_M_IX86)
+		#undef CSTD_CPU_X86_32
+		#define CSTD_CPU_X86_32         1
+
+	#elif defined(_M_X64) || defined(_M_AMD64)
+		#undef CSTD_CPU_X86_64
+		#define CSTD_CPU_X86_64         1
+
+	#elif defined(_M_ARM)
+		#undef CSTD_CPU_ARM
+		#define CSTD_CPU_ARM            1
+
+	#elif defined(_M_MIPS)
+		#undef CSTD_CPU_MIPS
+		#define CSTD_CPU_MIPS           1
+
+	#else
+		#warning "Unsupported or unknown host CPU for MSVC tools"
+
+	#endif
+
+#elif 1 == CSTD_TOOLCHAIN_GCC
+	#if defined(__amd64__)
+		#undef CSTD_CPU_X86_64
+		#define CSTD_CPU_X86_64         1
+
+	#elif defined(__i386__)
+		#undef CSTD_CPU_X86_32
+		#define CSTD_CPU_X86_32         1
+
+	#elif defined(__arm__)
+		#undef CSTD_CPU_ARM
+		#define CSTD_CPU_ARM            1
+
+	#elif defined(__aarch64__)
+		#undef CSTD_CPU_AARCH64
+		#define CSTD_CPU_AARCH64        1
+
+	#elif defined(__mips__)
+		#undef CSTD_CPU_MIPS
+		#define CSTD_CPU_MIPS           1
+
+	#else
+		#warning "Unsupported or unknown host CPU for GCC tools"
+
+	#endif
+
+#elif 1 == CSTD_TOOLCHAIN_RVCT
+	#undef CSTD_CPU_ARM
+	#define CSTD_CPU_ARM                1
+
+#else
+	#warning "Unsupported or unknown toolchain"
+
+#endif
+
+/* ============================================================================
+	Determine the Processor Endianness
+============================================================================ */
+
+#if ((1 == CSTD_CPU_X86_32) || (1 == CSTD_CPU_X86_64))
+	/* Note: x86 and x86-64 are always little endian, so leave at default. */
+
+#elif 1 == CSTD_CPU_AARCH64
+	/* No big endian support? */
+
+#elif 1 == CSTD_TOOLCHAIN_RVCT
+	#if defined(__BIG_ENDIAN)
+		#undef CSTD_ENDIAN_BIG
+		#define CSTD_ENDIAN_BIG         1
+	#endif
+
+#elif ((1 == CSTD_TOOLCHAIN_GCC) && (1 == CSTD_CPU_ARM))
+	#if defined(__ARMEB__)
+		#undef CSTD_ENDIAN_BIG
+		#define CSTD_ENDIAN_BIG         1
+	#endif
+
+#elif ((1 == CSTD_TOOLCHAIN_GCC) && (1 == CSTD_CPU_MIPS))
+	#if defined(__MIPSEB__)
+		#undef CSTD_ENDIAN_BIG
+		#define CSTD_ENDIAN_BIG         1
+	#endif
+
+#elif 1 == CSTD_TOOLCHAIN_MSVC
+	/* Note: Microsoft only support little endian, so leave at default. */
+
+#else
+	#warning "Unsupported or unknown CPU"
+
+#endif
+
+/* ============================================================================
+	Determine the operating system and addressing width
+============================================================================ */
+#if 1 == CSTD_TOOLCHAIN_MSVC
+	#if defined(_WIN32) && !defined(_WIN64)
+		#undef CSTD_OS_WINDOWS
+		#define CSTD_OS_WINDOWS         1
+		#undef CSTD_OS_WIN32
+		#define CSTD_OS_WIN32           1
+		#undef CSTD_CPU_32BIT
+		#define CSTD_CPU_32BIT          1
+
+	#elif defined(_WIN32) && defined(_WIN64)
+		#undef CSTD_OS_WINDOWS
+		#define CSTD_OS_WINDOWS         1
+		#undef CSTD_OS_WIN64
+		#define CSTD_OS_WIN64           1
+		#undef CSTD_CPU_64BIT
+		#define CSTD_CPU_64BIT          1
+
+	#else
+		#warning "Unsupported or unknown host OS for MSVC tools"
+
+	#endif
+
+#elif 1 == CSTD_TOOLCHAIN_GCC
+	#if defined(_WIN32) && defined(_WIN64)
+		#undef CSTD_OS_WINDOWS
+		#define CSTD_OS_WINDOWS         1
+		#undef CSTD_OS_WIN64
+		#define CSTD_OS_WIN64           1
+		#undef CSTD_CPU_64BIT
+		#define CSTD_CPU_64BIT          1
+
+	#elif defined(_WIN32) && !defined(_WIN64)
+		#undef CSTD_OS_WINDOWS
+		#define CSTD_OS_WINDOWS         1
+		#undef CSTD_OS_WIN32
+		#define CSTD_OS_WIN32           1
+		#undef CSTD_CPU_32BIT
+		#define CSTD_CPU_32BIT          1
+
+	#elif defined(ANDROID)
+		#undef CSTD_OS_ANDROID
+		#define CSTD_OS_ANDROID         1
+
+		#if defined(__KERNEL__)
+			#undef CSTD_OS_ANDROID_KERNEL
+			#define CSTD_OS_ANDROID_KERNEL  1
+		#endif
+
+		#if defined(__LP64__) || defined(_LP64)
+			#undef CSTD_OS_ANDROID64
+			#define CSTD_OS_ANDROID64       1
+			#undef CSTD_CPU_64BIT
+			#define CSTD_CPU_64BIT          1
+		#else
+			#undef CSTD_OS_ANDROID32
+			#define CSTD_OS_ANDROID32       1
+			#undef CSTD_CPU_32BIT
+			#define CSTD_CPU_32BIT          1
+		#endif
+
+	#elif defined(__KERNEL__) || defined(__linux)
+		#undef CSTD_OS_LINUX
+		#define CSTD_OS_LINUX           1
+		
+		#if defined(__KERNEL__)
+			#undef CSTD_OS_LINUX_KERNEL
+			#define CSTD_OS_LINUX_KERNEL    1
+		#endif
+
+		#if defined(__LP64__) || defined(_LP64)
+			#undef CSTD_OS_LINUX64
+			#define CSTD_OS_LINUX64         1
+			#undef CSTD_CPU_64BIT
+			#define CSTD_CPU_64BIT          1
+		#else
+			#undef CSTD_OS_LINUX32
+			#define CSTD_OS_LINUX32         1
+			#undef CSTD_CPU_32BIT
+			#define CSTD_CPU_32BIT          1
+		#endif
+
+	#elif defined(__APPLE__)
+		#undef CSTD_OS_APPLEOS
+		#define CSTD_OS_APPLEOS         1
+
+		#if defined(__LP64__) || defined(_LP64)
+			#undef CSTD_OS_APPLEOS64
+			#define CSTD_OS_APPLEOS64       1
+			#undef CSTD_CPU_64BIT
+			#define CSTD_CPU_64BIT          1
+		#else
+			#undef CSTD_OS_APPLEOS32
+			#define CSTD_OS_APPLEOS32       1
+			#undef CSTD_CPU_32BIT
+			#define CSTD_CPU_32BIT          1
+		#endif
+
+	#elif defined(__SYMBIAN32__)
+		#undef CSTD_OS_SYMBIAN
+		#define CSTD_OS_SYMBIAN         1
+		#undef CSTD_CPU_32BIT
+		#define CSTD_CPU_32BIT          1
+
+	#else
+		#undef CSTD_OS_NONE
+		#define CSTD_OS_NONE            1
+		#undef CSTD_CPU_32BIT
+		#define CSTD_CPU_32BIT          1
+
+#endif
+
+#elif 1 == CSTD_TOOLCHAIN_RVCT
+
+	#if defined(ANDROID)
+		#undef CSTD_OS_ANDROID
+		#undef CSTD_OS_ANDROID32
+		#define CSTD_OS_ANDROID         1
+		#define CSTD_OS_ANDROID32       1
+
+	#elif defined(__linux)
+		#undef CSTD_OS_LINUX
+		#undef CSTD_OS_LINUX32
+		#define CSTD_OS_LINUX           1
+		#define CSTD_OS_LINUX32         1
+
+	#elif defined(__SYMBIAN32__)
+		#undef CSTD_OS_SYMBIAN
+		#define CSTD_OS_SYMBIAN         1
+
+	#else
+		#undef CSTD_OS_NONE
+		#define CSTD_OS_NONE            1
+
+#endif
+
+#else
+	#warning "Unsupported or unknown host OS"
+
+#endif
+
+/* ============================================================================
+	Determine the correct linker symbol Import and Export Macros
+============================================================================ */
+/**
+ * @defgroup arm_cstd_linkage_specifiers Linkage Specifiers
+ * @{
+ *
+ * This set of macros contain system-dependent linkage specifiers which
+ * determine the visibility of symbols across DLL boundaries. A header for a
+ * particular DLL should define a set of local macros derived from these,
+ * and should not use these macros to decorate functions directly as there may
+ * be multiple DLLs being used.
+ *
+ * These DLL library local macros should be (with appropriate library prefix)
+ * <tt>[MY_LIBRARY]_API</tt>, <tt>[MY_LIBRARY]_IMPL</tt>, and
+ * <tt>[MY_LIBRARY]_LOCAL</tt>.
+ *
+ *    - <tt>[MY_LIBRARY]_API</tt> should be use to decorate the function
+ *      declarations in the header. It should be defined as either
+ *      @c CSTD_LINK_IMPORT or @c CSTD_LINK_EXPORT, depending whether the
+ *      current situation is a compile of the DLL itself (use export) or a
+ *      compile of an external user of the DLL (use import).
+ *    - <tt>[MY_LIBRARY]_IMPL</tt> should be defined as @c CSTD_LINK_IMPL
+ *      and should be used to decorate the definition of functions in the C
+ *      file.
+ *    - <tt>[MY_LIBRARY]_LOCAL</tt> should be used to decorate function
+ *      declarations which are exported across translation units within the
+ *      DLL, but which are not exported outside of the DLL boundary.
+ *
+ * Functions which are @c static in either a C file or in a header file do not
+ * need any form of linkage decoration, and should therefore have no linkage
+ * macro applied to them.
+ */
+
+/**
+ * @def CSTD_LINK_IMPORT
+ * Specifies a function as being imported to a translation unit across a DLL
+ * boundary.
+ */
+
+/**
+ * @def CSTD_LINK_EXPORT
+ * Specifies a function as being exported across a DLL boundary by a
+ * translation unit.
+ */
+
+/**
+ * @def CSTD_LINK_IMPL
+ * Specifies a function which will be exported across a DLL boundary as
+ * being implemented by a translation unit.
+ */
+
+/**
+ * @def CSTD_LINK_LOCAL
+ * Specifies a function which is internal to a DLL, and which should not be
+ * exported outside of it.
+ */
+
+/**
+ * @}
+ */
+
+#if 1 ==  CSTD_OS_LINUX
+	#define CSTD_LINK_IMPORT __attribute__((visibility("default")))
+	#define CSTD_LINK_EXPORT __attribute__((visibility("default")))
+	#define CSTD_LINK_IMPL   __attribute__((visibility("default")))
+	#define CSTD_LINK_LOCAL  __attribute__((visibility("hidden")))
+
+#elif 1 ==  CSTD_OS_WINDOWS
+	#define CSTD_LINK_IMPORT __declspec(dllimport)
+	#define CSTD_LINK_EXPORT __declspec(dllexport)
+	#define CSTD_LINK_IMPL   __declspec(dllexport)
+	#define CSTD_LINK_LOCAL
+
+#elif 1 ==  CSTD_OS_SYMBIAN
+	#define CSTD_LINK_IMPORT IMPORT_C
+	#define CSTD_LINK_EXPORT IMPORT_C
+	#define CSTD_LINK_IMPL   EXPORT_C
+	#define CSTD_LINK_LOCAL
+
+#elif 1 ==  CSTD_OS_APPLEOS
+	#define CSTD_LINK_IMPORT __attribute__((visibility("default")))
+	#define CSTD_LINK_EXPORT __attribute__((visibility("default")))
+	#define CSTD_LINK_IMPL   __attribute__((visibility("default")))
+	#define CSTD_LINK_LOCAL  __attribute__((visibility("hidden")))
+
+#elif 1 ==  CSTD_OS_ANDROID
+	#define CSTD_LINK_IMPORT __attribute__((visibility("default")))
+	#define CSTD_LINK_EXPORT __attribute__((visibility("default")))
+	#define CSTD_LINK_IMPL   __attribute__((visibility("default")))
+	#define CSTD_LINK_LOCAL  __attribute__((visibility("hidden")))
+
+#else /* CSTD_OS_NONE */
+	#define CSTD_LINK_IMPORT
+	#define CSTD_LINK_EXPORT
+	#define CSTD_LINK_IMPL
+	#define CSTD_LINK_LOCAL
+
+#endif
+
+/**
+ * @}
+ */
+
+#endif /* End (_ARM_CSTD_COMPILERS_H_) */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h
new file mode 100755
index 000000000000..20862ec1fa79
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_pop.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _ARM_CSTD_PACK_POP_H_
+#define _ARM_CSTD_PACK_POP_H_
+
+#if 1 == CSTD_TOOLCHAIN_MSVC
+	#include <poppack.h>
+#endif
+
+#endif /* End (_ARM_CSTD_PACK_POP_H_) */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h
new file mode 100755
index 000000000000..bc24e6942b11
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_pack_push.h
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _ARM_CSTD_PACK_PUSH_H_
+#define _ARM_CSTD_PACK_PUSH_H_
+
+#if 1 == CSTD_TOOLCHAIN_MSVC
+	#include <pshpack1.h>
+#endif
+
+#endif /* End (_ARM_CSTD_PACK_PUSH_H_) */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h
new file mode 100755
index 000000000000..efeefa590c65
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _ARM_CSTD_TYPES_H_
+#define _ARM_CSTD_TYPES_H_
+
+#if 1 == CSTD_TOOLCHAIN_MSVC
+	#include "arm_cstd_types_msvc.h"
+#elif 1 == CSTD_TOOLCHAIN_GCC
+	#include "arm_cstd_types_gcc.h"
+#elif 1 == CSTD_TOOLCHAIN_RVCT
+	#include "arm_cstd_types_rvct.h"
+#else
+	#error "Toolchain not recognized"
+#endif
+
+#endif /* End (_ARM_CSTD_TYPES_H_) */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h
new file mode 100755
index 000000000000..679819655002
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_gcc.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _ARM_CSTD_TYPES_GCC_H_
+#define _ARM_CSTD_TYPES_GCC_H_
+
+/* ============================================================================
+	Type definitions
+============================================================================ */
+/* All modern versions of GCC support stdint outside of C99 Mode. */
+/* However, Linux kernel limits what headers are available! */
+#if 1 == CSTD_OS_LINUX_KERNEL
+	#include <linux/kernel.h>
+	#include <linux/types.h>
+	#include <linux/stddef.h>
+	#include <linux/version.h>
+
+	/* Fix up any types which CSTD provdes but which Linux is missing. */
+	/* Note Linux assumes pointers are "long", so this is safe. */
+	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
+		typedef unsigned long   uintptr_t;
+	#endif
+	typedef long                intptr_t;
+
+#else
+	#include <stdint.h>
+	#include <stddef.h>
+	#include <limits.h>
+#endif
+
+typedef uint32_t                bool_t;
+
+#if !defined(TRUE)
+	#define TRUE                ((bool_t)1)
+#endif
+
+#if !defined(FALSE)
+	#define FALSE               ((bool_t)0)
+#endif
+
+/* ============================================================================
+	Keywords
+============================================================================ */
+/* Doxygen documentation for these is in the RVCT header. */
+#define ASM                     __asm__
+
+#define INLINE                  __inline__
+
+#define FORCE_INLINE            __attribute__((__always_inline__)) __inline__
+
+#define NEVER_INLINE            __attribute__((__noinline__))
+
+#define PURE                    __attribute__((__pure__))
+
+#define PACKED                  __attribute__((__packed__))
+
+/* GCC does not support pointers to UNALIGNED data, so we do not define it to
+ * force a compile error if this macro is used. */
+
+#define RESTRICT                __restrict__
+
+/* RVCT in GCC mode does not support the CHECK_RESULT attribute. */
+#if 0 == CSTD_TOOLCHAIN_RVCT_GCC_MODE
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+#else
+	#define CHECK_RESULT
+#endif
+
+/* RVCT in GCC mode does not support the __func__ name outside of C99. */
+#if (0 == CSTD_TOOLCHAIN_RVCT_GCC_MODE)
+	#define CSTD_FUNC           __func__
+#else
+	#define CSTD_FUNC           __FUNCTION__
+#endif
+
+#endif /* End (_ARM_CSTD_TYPES_GCC_H_) */
diff --git a/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h
new file mode 100755
index 000000000000..a8efda0040a1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/arm_cstd/arm_cstd_types_rvct.h
@@ -0,0 +1,192 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _ARM_CSTD_TYPES_RVCT_H_
+#define _ARM_CSTD_TYPES_RVCT_H_
+
+/* ============================================================================
+	Type definitions
+============================================================================ */
+#include <stddef.h>
+#include <limits.h>
+
+#if 199901L <= __STDC_VERSION__
+	#include <inttypes.h>
+#else
+	typedef unsigned char           uint8_t;
+	typedef signed char             int8_t;
+	typedef unsigned short          uint16_t;
+	typedef signed short            int16_t;
+	typedef unsigned int            uint32_t;
+	typedef signed int              int32_t;
+	typedef unsigned __int64        uint64_t;
+	typedef signed __int64          int64_t;
+	typedef ptrdiff_t               intptr_t;
+	typedef size_t                  uintptr_t;
+#endif
+
+typedef uint32_t                    bool_t;
+
+#if !defined(TRUE)
+	#define TRUE                ((bool_t)1)
+#endif
+
+#if !defined(FALSE)
+	#define FALSE               ((bool_t)0)
+#endif
+
+/* ============================================================================
+	Keywords
+============================================================================ */
+/**
+ * @addtogroup arm_cstd_coding_standard
+ * @{
+ */
+
+/**
+ * @def ASM
+ * @hideinitializer
+ * Mark an assembler block. Such blocks are often compiler specific, so often
+ * need to be surrounded in appropriate @c ifdef and @c endif blocks
+ * using the relevant @c CSTD_TOOLCHAIN macro.
+ */
+#define ASM                     __asm
+
+/**
+ * @def INLINE
+ * @hideinitializer
+ * Mark a definition as something which should be inlined. This is not always
+ * possible on a given compiler, and may be disabled at lower optimization
+ * levels.
+ */
+#define INLINE                  __inline
+
+/**
+ * @def FORCE_INLINE
+ * @hideinitializer
+ * Mark a definition as something which should be inlined. This provides a much
+ * stronger hint to the compiler than @c INLINE, and if supported should always
+ * result in an inlined function being emitted. If not supported this falls
+ * back to using the @c INLINE definition.
+ */
+#define FORCE_INLINE            __forceinline
+
+/**
+ * @def NEVER_INLINE
+ * @hideinitializer
+ * Mark a definition as something which should not be inlined. This provides a
+ * stronger hint to the compiler than the function should not be inlined,
+ * bypassing any heuristic rules the compiler normally applies. If not
+ * supported by a toolchain this falls back to being an empty macro.
+ */
+#define NEVER_INLINE            __declspec(noinline)
+
+/**
+ * @def PURE
+ * @hideinitializer
+ * Denotes that a function's return is only dependent on its inputs, enabling
+ * more efficient optimizations. Falls back to an empty macro if not supported.
+ */
+#define PURE                    __pure
+
+/**
+ * @def PACKED
+ * @hideinitializer
+ * Denotes that a structure should be stored in a packed form. This macro must
+ * be used in conjunction with the @c arm_cstd_pack_* headers for portability:
+ *
+ * @code
+ * #include <cstd/arm_cstd_pack_push.h>
+ *
+ * struct PACKED myStruct {
+ *     ...
+ * };
+ *
+ * #include <cstd/arm_cstd_pack_pop.h>
+ * PACKED
+ * @endcode
+ */
+#define PACKED                  __packed
+
+/**
+ * @def UNALIGNED
+ * @hideinitializer
+ * Denotes that a pointer points to a buffer with lower alignment than the
+ * natural alignment required by the C standard. This should only be used
+ * in extreme cases, as the emitted code is normally more efficient if memory
+ * is aligned.
+ *
+ * @warning This is \b NON-PORTABLE. The GNU tools are anti-unaligned pointers
+ * and have no support for such a construction.
+ */
+#define UNALIGNED               __packed
+
+/**
+ * @def RESTRICT
+ * @hideinitializer
+ * Denotes that a pointer does not overlap with any other points currently in
+ * scope, increasing the range of optimizations which can be performed by the
+ * compiler.
+ *
+ * @warning Specification of @c RESTRICT is a contract between the programmer
+ * and the compiler. If you place @c RESTICT on buffers which do actually
+ * overlap the behavior is undefined, and likely to vary at different
+ * optimization levels.!
+ */
+#define RESTRICT                __restrict
+
+/**
+ * @def CHECK_RESULT
+ * @hideinitializer
+ * Function attribute which causes a warning to be emitted if the compiler's
+ * return value is not used by the caller. Compiles to an empty macro if
+ * there is no supported mechanism for this check in the underlying compiler.
+ *
+ * @note At the time of writing this is only supported by GCC. RVCT does not
+ * support this attribute, even in GCC mode, so engineers are encouraged to
+ * compile their code using GCC even if primarily working with another
+ * compiler.
+ *
+ * @code
+ * CHECK_RESULT int my_func( void );
+ * @endcode
+  */
+#define CHECK_RESULT
+
+/**
+ * @def CSTD_FUNC
+ * Specify the @c CSTD_FUNC macro, a portable construct containing the name of
+ * the current function. On most compilers it is illegal to use this macro
+ * outside of a function scope. If not supported by the compiler we define
+ * @c CSTD_FUNC as an empty string.
+ *
+ * @warning Due to the implementation of this on most modern compilers this
+ * expands to a magically defined "static const" variable, not a constant
+ * string. This makes injecting @c CSTD_FUNC directly in to compile-time
+ * strings impossible, so if you want to make the function name part of a
+ * larger string you must use a printf-like function with a @c @%s template
+ * which is populated with @c CSTD_FUNC
+ */
+#define CSTD_FUNC            __FUNCTION__
+
+/**
+ * @}
+ */
+
+#endif /* End (_ARM_CSTD_TYPES_RVCT_H_) */
diff --git a/drivers/gpu/arm/midgard/malisw/mali_malisw.h b/drivers/gpu/arm/midgard/malisw/mali_malisw.h
new file mode 100755
index 000000000000..15faf15bf13a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/mali_malisw.h
@@ -0,0 +1,242 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+/**
+ * @file mali_malisw.h
+ * Driver-wide include for common macros and types.
+ */
+
+/**
+ * @defgroup malisw Mali software definitions and types
+ * @{
+ */
+
+#include <stddef.h>
+
+#include "mali_stdtypes.h"
+
+/** @brief Gets the container object when given a pointer to a member of an object. */
+#define CONTAINER_OF(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type,member)))
+
+/** @brief Gets the number of elements of type s in a fixed length array of s */
+#define NELEMS(s)       (sizeof(s)/sizeof((s)[0]))
+
+/**
+ * @brief The lesser of two values.
+ * May evaluate its arguments more than once.
+ * @see CSTD_MIN
+ */
+#define MIN(x,y) CSTD_MIN(x,y)
+
+/**
+ * @brief The greater of two values.
+ * May evaluate its arguments more than once.
+ * @see CSTD_MAX
+ */
+#define MAX(x,y) CSTD_MAX(x,y)
+
+/**
+ * @brief Clamp value x to within min and max inclusive
+ * May evaluate its arguments more than once.
+ * @see CSTD_CLAMP
+ */
+#define CLAMP( x, min, max ) CSTD_CLAMP( x, min, max )
+
+/**
+ * @brief Convert a pointer into a u64 for storing in a data structure.
+ * This is commonly used when pairing a 32-bit CPU with a 64-bit peripheral,
+ * such as a Midgard GPU. C's type promotion is complex and a straight cast
+ * does not work reliably as pointers are often considered as signed.
+ */
+#define PTR_TO_U64( x ) CSTD_PTR_TO_U64( x )
+
+/**
+ * @name Mali library linkage specifiers
+ * These directly map to the cstd versions described in detail here: @ref arm_cstd_linkage_specifiers
+ * @{
+ */
+#define MALI_IMPORT CSTD_LINK_IMPORT
+#define MALI_EXPORT CSTD_LINK_EXPORT
+#define MALI_IMPL   CSTD_LINK_IMPL
+#if defined(CONFIG_MALI_DEBUG) || !MALI_CUSTOMER_RELEASE
+#define MALI_LOCAL  CSTD_LINK_EXPORT
+#else
+#define MALI_LOCAL  CSTD_LINK_LOCAL
+#endif
+
+/** @brief Decorate exported function prototypes.
+ *
+ * The file containing the implementation of the function should define this to be MALI_EXPORT before including
+ * malisw/mali_malisw.h.
+ */
+#ifndef MALI_API
+#define MALI_API MALI_IMPORT
+#endif
+/** @} */
+
+/** @name Testable static functions
+ * @{
+ *
+ * These macros can be used to allow functions to be static in release builds but exported from a shared library in unit
+ * test builds, allowing them to be tested or used to assist testing.
+ *
+ * Example mali_foo_bar.c containing the function to test:
+ *
+ * @code
+ * #define MALI_API MALI_EXPORT
+ *
+ * #include <malisw/mali_malisw.h>
+ * #include "mali_foo_testable_statics.h"
+ *
+ * MALI_TESTABLE_STATIC_IMPL void my_func()
+ * {
+ *	//Implementation
+ * }
+ * @endcode
+ *
+ * Example mali_foo_testable_statics.h:
+ *
+ * @code
+ * #if 1 == MALI_UNIT_TEST
+ * #include <malisw/mali_malisw.h>
+ *
+ * MALI_TESTABLE_STATIC_API void my_func();
+ *
+ * #endif
+ * @endcode
+ *
+ * Example mali_foo_tests.c:
+ *
+ * @code
+ * #include <foo/src/mali_foo_testable_statics.h>
+ *
+ * void my_test_func()
+ * {
+ * 	my_func();
+ * }
+ * @endcode
+ */
+
+/** @brief Decorate testable static function implementations.
+ *
+ * A header file containing a MALI_TESTABLE_STATIC_API-decorated prototype for each static function will be required
+ * when MALI_UNIT_TEST == 1 in order to link the function from the test.
+ */
+#if 1 == MALI_UNIT_TEST
+#define MALI_TESTABLE_STATIC_IMPL MALI_IMPL
+#else
+#define MALI_TESTABLE_STATIC_IMPL static
+#endif
+
+/** @brief Decorate testable static function prototypes.
+ *
+ * @note Prototypes should @em only be declared when MALI_UNIT_TEST == 1
+ */
+#define MALI_TESTABLE_STATIC_API MALI_API
+/** @} */
+
+/** @name Testable local functions
+ * @{
+ *
+ * These macros can be used to allow functions to be local to a shared library in release builds but be exported in unit
+ * test builds, allowing them to be tested or used to assist testing.
+ *
+ * Example mali_foo_bar.c containing the function to test:
+ *
+ * @code
+ * #define MALI_API MALI_EXPORT
+ *
+ * #include <malisw/mali_malisw.h>
+ * #include "mali_foo_bar.h"
+ *
+ * MALI_TESTABLE_LOCAL_IMPL void my_func()
+ * {
+ *	//Implementation
+ * }
+ * @endcode
+ *
+ * Example mali_foo_bar.h:
+ *
+ * @code
+ * #include <malisw/mali_malisw.h>
+ *
+ * MALI_TESTABLE_LOCAL_API void my_func();
+ *
+ * @endcode
+ *
+ * Example mali_foo_tests.c:
+ *
+ * @code
+ * #include <foo/src/mali_foo_bar.h>
+ *
+ * void my_test_func()
+ * {
+ * 	my_func();
+ * }
+ * @endcode
+ */
+
+/** @brief Decorate testable local function implementations.
+ *
+ * This can be used to have a function normally local to the shared library except in debug builds where it will be
+ * exported.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define MALI_TESTABLE_LOCAL_IMPL MALI_IMPL
+#else
+#define MALI_TESTABLE_LOCAL_IMPL MALI_LOCAL
+#endif /* CONFIG_MALI_DEBUG */
+
+/** @brief Decorate testable local function prototypes.
+ *
+ * This can be used to have a function normally local to the shared library except in debug builds where it will be
+ * exported.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define MALI_TESTABLE_LOCAL_API MALI_API
+#else
+#define MALI_TESTABLE_LOCAL_API MALI_LOCAL
+#endif /* CONFIG_MALI_DEBUG */
+/** @} */
+
+/**
+ * Flag a cast as a reinterpretation, usually of a pointer type.
+ * @see CSTD_REINTERPRET_CAST
+ */
+#define REINTERPRET_CAST(type) CSTD_REINTERPRET_CAST(type)
+
+/**
+ * Flag a cast as casting away const, usually of a pointer type.
+ * @see CSTD_CONST_CAST
+ */
+#define CONST_CAST(type) (type) CSTD_CONST_CAST(type)
+
+/**
+ * Flag a cast as a (potentially complex) value conversion, usually of a numerical type.
+ * @see CSTD_STATIC_CAST
+ */
+#define STATIC_CAST(type) (type) CSTD_STATIC_CAST(type)
+
+
+/** @} */
+
+#endif /* _MALISW_H_ */
diff --git a/drivers/gpu/arm/midgard/malisw/mali_stdtypes.h b/drivers/gpu/arm/midgard/malisw/mali_stdtypes.h
new file mode 100755
index 000000000000..5dabe26ec01c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/malisw/mali_stdtypes.h
@@ -0,0 +1,265 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALISW_STDTYPES_H_
+#define _MALISW_STDTYPES_H_
+
+/**
+ * @file mali_stdtypes.h
+ * This file defines the standard types used by the Mali codebase.
+ */
+
+/**
+ * @addtogroup malisw
+ * @{
+ */
+
+/**
+ * @defgroup malisw_stdtypes Mali software standard types
+ *
+ * Basic driver-wide types.
+ */
+
+/**
+ * @addtogroup malisw_stdtypes
+ * @{
+ */
+
+#include "arm_cstd/arm_cstd.h"
+
+/**
+ * @name Scalar types.
+ * These are the scalar types used within the mali driver.
+ * @{
+ */
+/* Note: if compiling the Linux kernel then avoid redefining these. */
+#if 0 == CSTD_OS_LINUX_KERNEL
+	typedef uint64_t u64;
+	typedef uint32_t u32;
+	typedef uint16_t u16;
+	typedef uint8_t  u8;
+	
+	typedef int64_t  s64;
+	typedef int32_t  s32;
+	typedef int16_t  s16;
+	typedef int8_t   s8;
+#endif
+
+typedef double   f64;
+typedef float    f32;
+typedef u16      f16;
+
+typedef u32      mali_fixed16_16;
+/* @} */
+
+/**
+ * @name Boolean types.
+ * The intended use is for bool8 to be used when storing boolean values in
+ * structures, casting to mali_bool to be used in code sections.
+ * @{
+ */
+typedef bool_t     mali_bool;
+typedef u8         mali_bool8;
+
+#define MALI_FALSE FALSE
+#define MALI_TRUE  TRUE
+/* @} */
+
+/**
+ * @name Integer bounding values
+ * Maximum and minimum values for integer types
+ * @{
+ */
+#ifndef U64_MAX
+#define U64_MAX  UINT64_MAX
+#endif
+
+#ifndef U32_MAX
+#define U32_MAX  UINT32_MAX
+#endif
+
+#ifndef U16_MAX
+#define U16_MAX  UINT16_MAX
+#endif
+
+#ifndef U8_MAX
+#define U8_MAX  UINT8_MAX
+#endif
+
+#ifndef S64_MAX
+#define S64_MAX  INT64_MAX
+#endif
+
+#ifndef S64_MIN
+#define S64_MIN  INT64_MIN
+#endif
+
+#ifndef S32_MAX
+#define S32_MAX  INT32_MAX
+#endif
+
+#ifndef S32_MIN
+#define S32_MIN  INT32_MIN
+#endif
+
+#ifndef S16_MAX
+#define S16_MAX  INT16_MAX
+#endif
+
+#ifndef S16_MIN
+#define S16_MIN  INT16_MIN
+#endif
+
+#ifndef S8_MAX
+#define S8_MAX   INT8_MAX
+#endif
+
+#ifndef S8_MIN
+#define S8_MIN   INT8_MIN
+#endif
+
+/* @} */
+
+/**
+ * @name GPU address types
+ * Types for integers which hold a GPU pointer or GPU pointer offsets.
+ * @{
+ */
+typedef u64      mali_addr64;
+typedef u32      mali_addr32;
+typedef u64      mali_size64;
+typedef s64      mali_offset64;
+/* 32 bit offsets and sizes are always for native types and so use ptrdiff_t and size_t respectively */
+/* @} */
+
+/**
+ * @name Mali error types
+ * @brief The common error type for the mali drivers
+ * The mali_error type, all driver error handling should be of this type unless
+ * it must deal with a specific APIs error type.
+ * @{
+ */
+typedef enum
+{
+	/**
+	 * @brief Common Mali errors for the entire driver
+	 * MALI_ERROR_NONE is guaranteed to be 0.
+	 * @{
+	 */
+	MALI_ERROR_NONE = 0,
+	MALI_ERROR_OUT_OF_GPU_MEMORY,
+	MALI_ERROR_OUT_OF_MEMORY,
+	MALI_ERROR_FUNCTION_FAILED,
+	/* @} */
+	/**
+	 * @brief Mali errors for Client APIs to pass to EGL when creating EGLImages
+	 * These errors must only be returned to EGL from one of the Client APIs as part of the
+	 * (clientapi)_egl_image_interface.h
+	 * @{
+	 */
+	MALI_ERROR_EGLP_BAD_ACCESS,
+	MALI_ERROR_EGLP_BAD_PARAMETER,
+	/* @} */
+	/**
+	 * @brief Mali errors for the MCL module.
+	 * These errors must only be used within the private components of the OpenCL implementation that report
+	 * directly to API functions for cases where errors cannot be detected in the entrypoints file. They must
+	 * not be passed between driver components.
+	 * These are errors in the mali error space specifically for the MCL module, hence the MCLP prefix.
+	 * @{
+	 */
+	MALI_ERROR_MCLP_DEVICE_NOT_FOUND,
+	MALI_ERROR_MCLP_DEVICE_NOT_AVAILABLE,
+	MALI_ERROR_MCLP_COMPILER_NOT_AVAILABLE,
+	MALI_ERROR_MCLP_MEM_OBJECT_ALLOCATION_FAILURE,
+	MALI_ERROR_MCLP_PROFILING_INFO_NOT_AVAILABLE,
+	MALI_ERROR_MCLP_MEM_COPY_OVERLAP,
+	MALI_ERROR_MCLP_IMAGE_FORMAT_MISMATCH,
+	MALI_ERROR_MCLP_IMAGE_FORMAT_NOT_SUPPORTED,
+	MALI_ERROR_MCLP_BUILD_PROGRAM_FAILURE,
+	MALI_ERROR_MCLP_MAP_FAILURE,
+	MALI_ERROR_MCLP_MISALIGNED_SUB_BUFFER_OFFSET,
+	MALI_ERROR_MCLP_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,
+	MALI_ERROR_MCLP_INVALID_VALUE,
+	MALI_ERROR_MCLP_INVALID_DEVICE_TYPE,
+	MALI_ERROR_MCLP_INVALID_PLATFORM,
+	MALI_ERROR_MCLP_INVALID_DEVICE,
+	MALI_ERROR_MCLP_INVALID_CONTEXT,
+	MALI_ERROR_MCLP_INVALID_QUEUE_PROPERTIES,
+	MALI_ERROR_MCLP_INVALID_COMMAND_QUEUE,
+	MALI_ERROR_MCLP_INVALID_HOST_PTR,
+	MALI_ERROR_MCLP_INVALID_MEM_OBJECT,
+	MALI_ERROR_MCLP_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+	MALI_ERROR_MCLP_INVALID_IMAGE_SIZE,
+	MALI_ERROR_MCLP_INVALID_SAMPLER,
+	MALI_ERROR_MCLP_INVALID_BINARY,
+	MALI_ERROR_MCLP_INVALID_BUILD_OPTIONS,
+	MALI_ERROR_MCLP_INVALID_PROGRAM,
+	MALI_ERROR_MCLP_INVALID_PROGRAM_EXECUTABLE,
+	MALI_ERROR_MCLP_INVALID_KERNEL_NAME,
+	MALI_ERROR_MCLP_INVALID_KERNEL_DEFINITION,
+	MALI_ERROR_MCLP_INVALID_KERNEL,
+	MALI_ERROR_MCLP_INVALID_ARG_INDEX,
+	MALI_ERROR_MCLP_INVALID_ARG_VALUE,
+	MALI_ERROR_MCLP_INVALID_ARG_SIZE,
+	MALI_ERROR_MCLP_INVALID_KERNEL_ARGS,
+	MALI_ERROR_MCLP_INVALID_WORK_DIMENSION,
+	MALI_ERROR_MCLP_INVALID_WORK_GROUP_SIZE,
+	MALI_ERROR_MCLP_INVALID_WORK_ITEM_SIZE,
+	MALI_ERROR_MCLP_INVALID_GLOBAL_OFFSET,
+	MALI_ERROR_MCLP_INVALID_EVENT_WAIT_LIST,
+	MALI_ERROR_MCLP_INVALID_EVENT,
+	MALI_ERROR_MCLP_INVALID_OPERATION,
+	MALI_ERROR_MCLP_INVALID_GL_OBJECT,
+	MALI_ERROR_MCLP_INVALID_BUFFER_SIZE,
+	MALI_ERROR_MCLP_INVALID_MIP_LEVEL,
+	MALI_ERROR_MCLP_INVALID_GLOBAL_WORK_SIZE,
+	MALI_ERROR_MCLP_INVALID_GL_SHAREGROUP_REFERENCE_KHR,
+	MALI_ERROR_MCLP_INVALID_EGL_OBJECT,
+	/* @} */
+	/**
+	 * @brief Mali errors for the BASE module
+	 * These errors must only be used within the private components of the Base implementation. They will not
+	 * passed to other modules by the base driver.
+	 * These are errors in the mali error space specifically for the BASE module, hence the BASEP prefix.
+	 * @{
+	 */
+	MALI_ERROR_BASEP_INVALID_FUNCTION,
+	/* @} */
+	/** A dependency exists upon a resource that the client application wants to modify, so the driver must either
+	 * create a copy of the resource (if possible) or block until the dependency has been satisfied.
+	 */
+	MALI_ERROR_RESOURCE_IN_USE,
+
+	/**
+	 * @brief A stride value was too big.
+	 *
+	 * A surface descriptor can store strides of up to 2<sup>31</sup>-1 bytes but strides greater than
+	 * 2<sup>28</sup>-1 bytes cannot be expressed in bits without overflow.
+	 */
+	MALI_ERROR_STRIDE_TOO_BIG
+
+} mali_error;
+/* @} */
+
+/* @} */
+
+/* @} */
+
+#endif /* _MALISW_STDTYPES_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/Kbuild b/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100755
index 000000000000..558657bbced9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+	platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+	obj-y += $(platform_name)/
+endif
diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100755
index 000000000000..8fb4e917c4fa
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100755
index 000000000000..6e424017ffd0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-y += juno_mali_opp.o
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100755
index 000000000000..9a4c5a4c941e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,75 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/opp.h>
+#include <linux/scpi_protocol.h>
+
+#include <mali_kbase.h>
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+	struct scpi_ops *scpi;
+	struct scpi_dvfs_info *info;
+	int i;
+
+	scpi = get_scpi_ops();
+	if (!scpi)
+		return 0; /* Really need to defer until scpi available */
+
+	/* Hard coded for Juno. 2 is GPU domain */
+	info = scpi->dvfs_get_info(2);
+	if (IS_ERR_OR_NULL(info))
+		return PTR_ERR(info);
+
+	for (i = 0; i < info->count; i++) {
+		struct scpi_opp *e = &info->opps[i];
+		dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+				e->freq, e->m_volt);
+		opp_add(dev, e->freq, e->m_volt * 1000);
+	}
+
+	return 0;
+}
+
+int setup_opps(void)
+{
+	struct device_node *np;
+	struct platform_device *pdev;
+	int err;
+
+	np = of_find_node_by_name(NULL, "gpu");
+	if (!np) {
+		printk(KERN_ERR "Failed to find DT entry for Mali\n");
+		return -EFAULT;
+	}
+
+	pdev = of_find_device_by_node(np);
+	if (!pdev) {
+		printk(KERN_ERR "Failed to find device for Mali\n");
+		of_node_put(np);
+		return -EFAULT;
+	}
+
+	err = init_juno_opps_from_scpi(&pdev->dev);
+
+	of_node_put(np);
+
+	return err;
+}
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100755
index 000000000000..5a33d33e32e5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,156 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 65,
+	.mmu_irq_number = 66,
+	.gpu_irq_number = 64,
+	.io_memory_region = {
+			     .start = 0x2D000000,
+			     .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+static struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long juno_model_static_power(unsigned long voltage)
+{
+	struct thermal_zone_device *tz;
+	unsigned long temperature, temp;
+	unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+	const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+	const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+	tz = thermal_zone_get_zone_by_name("gpu");
+	if (IS_ERR(tz)) {
+		pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+				PTR_ERR(tz));
+		temperature = FALLBACK_STATIC_TEMPERATURE;
+	} else {
+		int ret;
+
+		ret = tz->ops->get_temp(tz, &temperature);
+		if (ret) {
+			pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+					ret);
+			temperature = FALLBACK_STATIC_TEMPERATURE;
+		}
+	}
+
+	/* Calculate the temperature scaling factor. To be applied to the
+	 * voltage scaled power.
+	 */
+	temp = temperature / 1000;
+	temp_squared = temp * temp;
+	temp_cubed = temp_squared * temp;
+	temp_scaling_factor =
+			(2 * temp_cubed)
+			- (80 * temp_squared)
+			+ (4700 * temp)
+			+ 32000;
+
+	return (((coefficient * voltage_cubed) >> 20)
+			* temp_scaling_factor)
+				/ 1000000;
+}
+
+static unsigned long juno_model_dynamic_power(unsigned long freq,
+		unsigned long voltage)
+{
+	/* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+	 * The coefficient (c) is in mW/(MHz mV mV).
+	 *
+	 * This function calculates the dynamic power after this formula:
+	 * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+	 */
+	const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+	const unsigned long f_mhz = freq / 1000000; /* MHz */
+	const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+	return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+struct devfreq_cooling_ops juno_model_ops = {
+	.get_static_power = juno_model_static_power,
+	.get_dynamic_power = juno_model_dynamic_power,
+};
+
+static struct kbase_attribute config_attributes[] = {
+	{ KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS, 500 },
+	{ KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS, ((uintptr_t)&pm_callbacks) },
+	{ KBASE_CONFIG_ATTR_POWER_MODEL_CALLBACKS, ((uintptr_t)&juno_model_ops) },
+
+	{ KBASE_CONFIG_ATTR_END, 0 }
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+	.attributes = config_attributes,
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100755
index 000000000000..196de7129a57
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100755
index 000000000000..b0d8e3249b80
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100755
index 000000000000..084a15613436
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100755
index 000000000000..81259485ee00
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100755
index 000000000000..90561fc5aa77
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+/* Versatile Express (VE) configuration defaults shared between config_attributes[]
+ * and config_attributes_hw_issue_8408[]. Settings are not shared for
+ * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS.
+ */
+#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG    15000000u      /* 15ms, an agressive tick for testing purposes. This will reduce performance significantly */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG       1	/* between 15ms and 30ms before soft-stop a job */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG    1	/* between 15ms and 30ms before soft-stop a CL job */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG    333	/* 5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 2000	/* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG    166	/* 2.5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG   100000	/* 1500s (25mins) before NSS hard-stop */
+#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG        500	/* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG   3000	/* 7.5s before resetting GPU - for issue 8401 */
+#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG        500	/* 45s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG       100166	/* 1502s before resetting GPU */
+
+#define KBASE_VE_JS_SCHEDULING_TICK_NS          1250000000u	/* 1.25s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS             2	/* 2.5s before soft-stop a job */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL          1	/* 1.25s before soft-stop a CL job */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS          4	/* 5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401     24	/* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL          2	/* 2.5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS         1200	/* 1500s before NSS hard-stop */
+#define KBASE_VE_JS_RESET_TICKS_SS              6	/* 7.5s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401         36	/* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_RESET_TICKS_CL              3	/* 7.5s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_NSS             1201	/* 1502s before resetting GPU */
+
+#define KBASE_VE_JS_RESET_TIMEOUT_MS            3000	/* 3s before cancelling stuck jobs */
+#define KBASE_VE_JS_CTX_TIMESLICE_NS            1000000	/* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */
+#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS     ((uintptr_t)&pm_callbacks)
+#define KBASE_VE_CPU_SPEED_FUNC                 ((uintptr_t)&kbase_get_vexpress_cpu_clock_speed)
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+static struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */
+static struct kbase_attribute config_attributes[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+	 KBASE_VE_CPU_SPEED_FUNC},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+/* as config_attributes array above except with different settings for
+ * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that
+ * are needed for BASE_HW_ISSUE_8408.
+ */
+struct kbase_attribute config_attributes_hw_issue_8408[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+	 KBASE_VE_CPU_SPEED_FUNC},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+	.attributes = config_attributes,
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100755
index 000000000000..3dd6c0f55046
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,180 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed)
+	{
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+	else
+	{
+		int result = 0;
+		u32 reg_val = 0;
+		u32 osc2_value = 0;
+		u32 pa_divide = 0;
+		u32 pb_divide = 0;
+		u32 pc_divide = 0;
+		void __iomem *pSysCfgReg = NULL;
+		void __iomem *pSCCReg = NULL;
+
+		/* Init the value case something goes wrong */
+		*cpu_clock = 0;
+
+		/* Map CPU register into virtual memory */
+		pSysCfgReg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+		if (pSysCfgReg == NULL) {
+			result = 1;
+
+			goto pSysCfgReg_map_failed;
+		}
+
+		pSCCReg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+		if (pSCCReg == NULL) {
+			result = 1;
+
+			goto pSCCReg_map_failed;
+		}
+
+		raw_spin_lock(&syscfg_lock);
+
+		/*Read SYS regs - OSC2 */
+		reg_val = readl(pSysCfgReg + SYS_CFGCTRL_OFFSET);
+
+		/*Verify if there is no other undergoing request */
+		if (!(reg_val & SYS_CFGCTRL_START_BIT_VALUE)) {
+			/*Reset the CGFGSTAT reg */
+			writel(0, (pSysCfgReg + SYS_CFGSTAT_OFFSET));
+
+			writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE | DCC_DEFAULT_BIT_VALUE | SYS_CFG_OSC_FUNC_BIT_VALUE | SITE_DEFAULT_BIT_VALUE | BOARD_STACK_POS_DEFAULT_BIT_VALUE | DEVICE_DEFAULT_BIT_VALUE, (pSysCfgReg + SYS_CFGCTRL_OFFSET));
+			/* Wait for the transaction to complete */
+			while (!(readl(pSysCfgReg + SYS_CFGSTAT_OFFSET) & SYS_CFG_COMPLETE_BIT_VALUE))
+				;
+			/* Read SYS_CFGSTAT Register to get the status of submitted transaction */
+			reg_val = readl(pSysCfgReg + SYS_CFGSTAT_OFFSET);
+
+			/*------------------------------------------------------------------------------------------*/
+			/* Check for possible errors */
+			if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+				/* Error while setting register */
+				result = 1;
+			} else {
+				osc2_value = readl(pSysCfgReg + SYS_CFGDATA_OFFSET);
+				/* Read the SCC CFGRW0 register */
+				reg_val = readl(pSCCReg);
+
+				/*
+				   Select the appropriate feed:
+				   CFGRW0[0] - CLKOB
+				   CFGRW0[1] - CLKOC
+				   CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+				 */
+				/* Calculate the  FCLK */
+				if (IS_SINGLE_BIT_SET(reg_val, 0)) {	/*CFGRW0[0] - CLKOB */
+					/* CFGRW0[6:3] */
+					pa_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PA_DIVIDE_BIT_SHIFT)) >> FCLK_PA_DIVIDE_BIT_SHIFT);
+					/* CFGRW0[10:7] */
+					pb_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PB_DIVIDE_BIT_SHIFT)) >> FCLK_PB_DIVIDE_BIT_SHIFT);
+					*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+				} else {
+					if (IS_SINGLE_BIT_SET(reg_val, 1)) {	/*CFGRW0[1] - CLKOC */
+						/* CFGRW0[6:3] */
+						pa_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PA_DIVIDE_BIT_SHIFT)) >> FCLK_PA_DIVIDE_BIT_SHIFT);
+						/* CFGRW0[14:11] */
+						pc_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PC_DIVIDE_BIT_SHIFT)) >> FCLK_PC_DIVIDE_BIT_SHIFT);
+						*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+					} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {	/*CFGRW0[2] - FACLK */
+						/* CFGRW0[18:15] */
+						pa_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PA_DIVIDE_BIT_SHIFT)) >> AXICLK_PA_DIVIDE_BIT_SHIFT);
+						/* CFGRW0[22:19] */
+						pb_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PB_DIVIDE_BIT_SHIFT)) >> AXICLK_PB_DIVIDE_BIT_SHIFT);
+						*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+					} else {
+						result = 1;
+					}
+				}
+			}
+		} else {
+			result = 1;
+		}
+		raw_spin_unlock(&syscfg_lock);
+		/* Convert result expressed in Hz to Mhz units. */
+		*cpu_clock /= HZ_IN_MHZ;
+		if (!result)
+		{
+			cpu_clock_speed = *cpu_clock;
+		}
+
+		/* Unmap memory */
+		iounmap(pSCCReg);
+
+	 pSCCReg_map_failed:
+		iounmap(pSysCfgReg);
+
+	 pSysCfgReg_map_failed:
+
+		return result;
+	}
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100755
index 000000000000..f607d1800a72
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpuprops_clock_speed_function.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100755
index 000000000000..d9bfabc28b6b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100755
index 000000000000..81259485ee00
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100755
index 000000000000..0ec2821d256f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,279 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+/* Versatile Express (VE) configuration defaults shared between config_attributes[]
+ * and config_attributes_hw_issue_8408[]. Settings are not shared for
+ * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS.
+ */
+#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG    3000000000u	/* 3s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG       10	/* 30s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG     5	/* 15s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG    20	/* 60s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 120	/* 360s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG    10	/* 30s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG   6000	/* 18000s */
+#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG        30	/* 180s */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG   180	/* 540s */
+#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG        30	/* 180s */
+#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG       6010	/* 18030s*/
+
+#define KBASE_VE_JS_SCHEDULING_TICK_NS          3000000000u	/* 3s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS             10	/* 30s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL           5	/* 15s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS          20	/* 60s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401     120	/* 360s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL          10	/* 30s */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS         6000	/* 18000s */
+#define KBASE_VE_JS_RESET_TICKS_SS              30	/* 180s */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401         180	/* 540s */
+#define KBASE_VE_JS_RESET_TICKS_CL              30	/* 180s */
+#define KBASE_VE_JS_RESET_TICKS_NSS             6010	/* 18030s*/
+
+#define KBASE_VE_JS_RESET_TIMEOUT_MS            3000	/* 3s before cancelling stuck jobs */
+#define KBASE_VE_JS_CTX_TIMESLICE_NS            1000000	/* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */
+#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS     ((uintptr_t)&pm_callbacks)
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+			     .start = 0x2f010000,
+			     .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+static struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */
+static struct kbase_attribute config_attributes[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+/* as config_attributes array above except with different settings for
+ * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that
+ * are needed for BASE_HW_ISSUE_8408.
+ */
+struct kbase_attribute config_attributes_hw_issue_8408[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+	.attributes = config_attributes,
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100755
index 000000000000..0cb41ce8952d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100755
index 000000000000..ad6bfb2d59a8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 000000000000..f1921c02bb8c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+/* Versatile Express (VE) configuration defaults shared between config_attributes[]
+ * and config_attributes_hw_issue_8408[]. Settings are not shared for
+ * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS.
+ */
+#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG    15000000u      /* 15ms, an agressive tick for testing purposes. This will reduce performance significantly */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG       1	/* between 15ms and 30ms before soft-stop a job */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG    1	/* between 15ms and 30ms before soft-stop a CL job */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG    333	/* 5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 2000	/* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG    166	/* 2.5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG   100000	/* 1500s (25mins) before NSS hard-stop */
+#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG        500	/* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG   3000	/* 7.5s before resetting GPU - for issue 8401 */
+#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG        500	/* 45s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG       100166	/* 1502s before resetting GPU */
+
+#define KBASE_VE_JS_SCHEDULING_TICK_NS          1250000000u	/* 1.25s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS             2	/* 2.5s before soft-stop a job */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL          1	/* 1.25s before soft-stop a CL job */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS          4	/* 5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401     24	/* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL          2	/* 2.5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS         1200	/* 1500s before NSS hard-stop */
+#define KBASE_VE_JS_RESET_TICKS_SS              6	/* 7.5s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401         36	/* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_RESET_TICKS_CL              3	/* 3.75s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_NSS             1201	/* 1502s before resetting GPU */
+
+#define KBASE_VE_JS_RESET_TIMEOUT_MS            3000	/* 3s before cancelling stuck jobs */
+#define KBASE_VE_JS_CTX_TIMESLICE_NS            1000000	/* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */
+#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS     ((uintptr_t)&pm_callbacks)
+#define KBASE_VE_CPU_SPEED_FUNC                 ((uintptr_t)&kbase_get_vexpress_cpu_clock_speed)
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+	.job_irq_number = 75,
+	.mmu_irq_number = 76,
+	.gpu_irq_number = 77,
+	.io_memory_region = {
+			     .start = 0x2F000000,
+			     .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+static struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */
+static struct kbase_attribute config_attributes[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+	 KBASE_VE_CPU_SPEED_FUNC},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+/* as config_attributes array above except with different settings for
+ * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that
+ * are needed for BASE_HW_ISSUE_8408.
+ */
+struct kbase_attribute config_attributes_hw_issue_8408[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+	 KBASE_VE_CPU_SPEED_FUNC},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+	.attributes = config_attributes,
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 000000000000..1577f8cef787
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+	/* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+	*cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 000000000000..f607d1800a72
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpuprops_clock_speed_function.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/Kbuild
new file mode 100755
index 000000000000..32c707030b0f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_platform.h
new file mode 100755
index 000000000000..ad576a533ef8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_platform.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 40000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 40000
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_vexpress.c
new file mode 100755
index 000000000000..c0ec1dd57f04
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,293 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/* Versatile Express (VE) configuration defaults shared between config_attributes[]
+ * and config_attributes_hw_issue_8408[]. Settings are not shared for
+ * JS_HARD_STOP_TICKS_SS and JS_RESET_TICKS_SS.
+ */
+#define KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG    15000000u      /* 15ms, an agressive tick for testing purposes. This will reduce performance significantly */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG       1	/* between 15ms and 30ms before soft-stop a job */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG    1	/* between 15ms and 30ms before soft-stop a CL job */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG    333	/* 5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG 2000	/* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG    166	/* 2.5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG   100000	/* 1500s (25mins) before NSS hard-stop */
+#define KBASE_VE_JS_RESET_TICKS_SS_DEBUG        500	/* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG   3000	/* 7.5s before resetting GPU - for issue 8401 */
+#define KBASE_VE_JS_RESET_TICKS_CL_DEBUG        500	/* 45s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_NSS_DEBUG       100166	/* 1502s before resetting GPU */
+
+#define KBASE_VE_JS_SCHEDULING_TICK_NS          1250000000u	/* 1.25s */
+#define KBASE_VE_JS_SOFT_STOP_TICKS             2	/* 2.5s before soft-stop a job */
+#define KBASE_VE_JS_SOFT_STOP_TICKS_CL          1	/* 1.25s before soft-stop a CL job */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS          4	/* 5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_SS_8401     24	/* 30s before hard-stop, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_HARD_STOP_TICKS_CL          2	/* 2.5s before hard-stop */
+#define KBASE_VE_JS_HARD_STOP_TICKS_NSS         1200	/* 1500s before NSS hard-stop */
+#define KBASE_VE_JS_RESET_TICKS_SS              6	/* 7.5s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_SS_8401         36	/* 45s before resetting GPU, for a certain GLES2 test at 128x128 (bound by combined vertex+tiler job) - for issue 8401 */
+#define KBASE_VE_JS_RESET_TICKS_CL              3	/* 3.75s before resetting GPU */
+#define KBASE_VE_JS_RESET_TICKS_NSS             1201	/* 1502s before resetting GPU */
+
+#define KBASE_VE_JS_RESET_TIMEOUT_MS            3000	/* 3s before cancelling stuck jobs */
+#define KBASE_VE_JS_CTX_TIMESLICE_NS            1000000	/* 1ms - an agressive timeslice for testing purposes (causes lots of scheduling out for >4 ctxs) */
+#define KBASE_VE_POWER_MANAGEMENT_CALLBACKS     ((uintptr_t)&pm_callbacks)
+#define KBASE_VE_CPU_SPEED_FUNC                 ((uintptr_t)&kbase_get_vexpress_cpu_clock_speed)
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static kbase_io_resources io_resources = {
+	.job_irq_number = 68,
+	.mmu_irq_number = 69,
+	.gpu_irq_number = 70,
+	.io_memory_region = {
+	.start = 0xFC010000,
+	.end = 0xFC010000 + (4096 * 4) - 1
+	}
+
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+	/* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+	return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+	/* Cause a GPU hard reset to test whether we have actually idled the GPU
+	 * and that we properly reconfigure the GPU on power up.
+	 * Usually this would be dangerous, but if the GPU is working correctly it should
+	 * be completely safe as the GPU should not be active at this point.
+	 * However this is disabled normally because it will most likely interfere with
+	 * bus logging etc.
+	 */
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+	kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+static struct kbase_pm_callback_conf pm_callbacks = {
+	.power_on_callback = pm_callback_power_on,
+	.power_off_callback = pm_callback_power_off,
+	.power_suspend_callback  = NULL,
+	.power_resume_callback = NULL
+};
+
+/* Please keep table config_attributes in sync with config_attributes_hw_issue_8408 */
+static struct kbase_attribute config_attributes[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS_CL,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_CL,
+	 KBASE_VE_JS_HARD_STOP_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_CL,
+	 KBASE_VE_JS_RESET_TICKS_CL},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+	 KBASE_VE_CPU_SPEED_FUNC},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+/* as config_attributes array above except with different settings for
+ * JS_HARD_STOP_TICKS_SS, JS_RESET_TICKS_SS that
+ * are needed for BASE_HW_ISSUE_8408.
+ */
+struct kbase_attribute config_attributes_hw_issue_8408[] = {
+#ifdef CONFIG_MALI_DEBUG
+/* Use more aggressive scheduling timeouts in debug builds for testing purposes */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401_DEBUG},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS_DEBUG},
+#else				/* CONFIG_MALI_DEBUG */
+/* In release builds same as the defaults but scaled for 5MHz FPGA */
+	{
+	 KBASE_CONFIG_ATTR_JS_SCHEDULING_TICK_NS,
+	 KBASE_VE_JS_SCHEDULING_TICK_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_SOFT_STOP_TICKS,
+	 KBASE_VE_JS_SOFT_STOP_TICKS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_SS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_HARD_STOP_TICKS_NSS,
+	 KBASE_VE_JS_HARD_STOP_TICKS_NSS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_SS,
+	 KBASE_VE_JS_RESET_TICKS_SS_8401},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TICKS_NSS,
+	 KBASE_VE_JS_RESET_TICKS_NSS},
+#endif				/* CONFIG_MALI_DEBUG */
+	{
+	 KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS,
+	 KBASE_VE_JS_RESET_TIMEOUT_MS},
+
+	{
+	 KBASE_CONFIG_ATTR_JS_CTX_TIMESLICE_NS,
+	 KBASE_VE_JS_CTX_TIMESLICE_NS},
+
+	{
+	 KBASE_CONFIG_ATTR_POWER_MANAGEMENT_CALLBACKS,
+	 KBASE_VE_POWER_MANAGEMENT_CALLBACKS},
+
+	{
+	 KBASE_CONFIG_ATTR_CPU_SPEED_FUNC,
+	 KBASE_VE_CPU_SPEED_FUNC},
+
+	{
+	 KBASE_CONFIG_ATTR_END,
+	 0}
+};
+
+static struct kbase_platform_config virtex7_platform_config = {
+	.attributes = config_attributes,
+#ifndef CONFIG_OF
+	.io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+	return &virtex7_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+	/* Nothing needed at this stage */
+	return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c
new file mode 100755
index 000000000000..f0036a345453
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,178 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ					    (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START	(0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START		(0x10000000)
+#define SYS_CFGDATA_OFFSET				(0x000000A0)
+#define SYS_CFGCTRL_OFFSET				(0x000000A4)
+#define SYS_CFGSTAT_OFFSET				(0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE		  (1 << 31)
+#define READ_REG_BIT_VALUE				  (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE			  (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE		  (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE			  (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE	      (2 <<  0)
+#define SYS_CFG_COMPLETE_BIT_VALUE		  (1 <<  0)
+#define SYS_CFG_ERROR_BIT_VALUE			  (1 <<  1)
+
+#define FEED_REG_BIT_MASK				(0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT		(0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT		(0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT		(0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT		(0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT		(0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos)		(val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief  Retrieves the CPU clock speed.
+ *         The implementation is platform specific.
+ * @param[out]    cpu_clock - the value of CPU clock speed in MHz
+ * @return        0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+
+
+	if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed)
+	{
+		*cpu_clock = cpu_clock_speed;
+		return 0;
+	}
+	else
+	{
+		int result = 0;
+		u32 reg_val = 0;
+		u32 osc2_value = 0;
+		u32 pa_divide = 0;
+		u32 pb_divide = 0;
+		u32 pc_divide = 0;
+		void *volatile pSysCfgReg = 0;
+		void *volatile pSCCReg = 0;
+
+		/* Init the value case something goes wrong */
+		*cpu_clock = 0;
+
+		/* Map CPU register into virtual memory */
+		pSysCfgReg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+		if (pSysCfgReg == NULL) {
+			result = 1;
+
+			goto pSysCfgReg_map_failed;
+		}
+
+		pSCCReg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+		if (pSCCReg == NULL) {
+			result = 1;
+
+			goto pSCCReg_map_failed;
+		}
+
+		raw_spin_lock(&syscfg_lock);
+
+		/*Read SYS regs - OSC2 */
+		reg_val = readl(pSysCfgReg + SYS_CFGCTRL_OFFSET);
+
+		/*Verify if there is no other undergoing request */
+		if (!(reg_val & SYS_CFGCTRL_START_BIT_VALUE)) {
+			/*Reset the CGFGSTAT reg */
+			writel(0, (pSysCfgReg + SYS_CFGSTAT_OFFSET));
+
+			writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE | DCC_DEFAULT_BIT_VALUE | SYS_CFG_OSC_FUNC_BIT_VALUE | SITE_DEFAULT_BIT_VALUE | BOARD_STACK_POS_DEFAULT_BIT_VALUE | DEVICE_DEFAULT_BIT_VALUE, (pSysCfgReg + SYS_CFGCTRL_OFFSET));
+			/* Wait for the transaction to complete */
+			while (!(readl(pSysCfgReg + SYS_CFGSTAT_OFFSET) & SYS_CFG_COMPLETE_BIT_VALUE))
+				;
+			/* Read SYS_CFGSTAT Register to get the status of submitted transaction */
+			reg_val = readl(pSysCfgReg + SYS_CFGSTAT_OFFSET);
+
+			/*------------------------------------------------------------------------------------------*/
+			/* Check for possible errors */
+			if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+				/* Error while setting register */
+				result = 1;
+			} else {
+				osc2_value = readl(pSysCfgReg + SYS_CFGDATA_OFFSET);
+				/* Read the SCC CFGRW0 register */
+				reg_val = readl(pSCCReg);
+
+				/*
+				   Select the appropriate feed:
+				   CFGRW0[0] - CLKOB
+				   CFGRW0[1] - CLKOC
+				   CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+				 */
+				/* Calculate the  FCLK */
+				if (IS_SINGLE_BIT_SET(reg_val, 0)) {	/*CFGRW0[0] - CLKOB */
+					/* CFGRW0[6:3] */
+					pa_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PA_DIVIDE_BIT_SHIFT)) >> FCLK_PA_DIVIDE_BIT_SHIFT);
+					/* CFGRW0[10:7] */
+					pb_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PB_DIVIDE_BIT_SHIFT)) >> FCLK_PB_DIVIDE_BIT_SHIFT);
+					*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+				} else {
+					if (IS_SINGLE_BIT_SET(reg_val, 1)) {	/*CFGRW0[1] - CLKOC */
+						/* CFGRW0[6:3] */
+						pa_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PA_DIVIDE_BIT_SHIFT)) >> FCLK_PA_DIVIDE_BIT_SHIFT);
+						/* CFGRW0[14:11] */
+						pc_divide = ((reg_val & (FEED_REG_BIT_MASK << FCLK_PC_DIVIDE_BIT_SHIFT)) >> FCLK_PC_DIVIDE_BIT_SHIFT);
+						*cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+					} else if (IS_SINGLE_BIT_SET(reg_val, 2)) {	/*CFGRW0[2] - FACLK */
+						/* CFGRW0[18:15] */
+						pa_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PA_DIVIDE_BIT_SHIFT)) >> AXICLK_PA_DIVIDE_BIT_SHIFT);
+						/* CFGRW0[22:19] */
+						pb_divide = ((reg_val & (FEED_REG_BIT_MASK << AXICLK_PB_DIVIDE_BIT_SHIFT)) >> AXICLK_PB_DIVIDE_BIT_SHIFT);
+						*cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+					} else {
+						result = 1;
+					}
+				}
+			}
+		} else {
+			result = 1;
+		}
+		raw_spin_unlock(&syscfg_lock);
+		/* Convert result expressed in Hz to Mhz units. */
+		*cpu_clock /= HZ_IN_MHZ;
+		if (!result)
+		{
+			cpu_clock_speed = *cpu_clock;
+		}
+
+		/* Unmap memory */
+		iounmap(pSCCReg);
+
+	 pSCCReg_map_failed:
+		iounmap(pSysCfgReg);
+
+	 pSysCfgReg_map_failed:
+
+		return result;
+	}
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h
new file mode 100755
index 000000000000..3f6c68ece392
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_virtex7_40mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpuprops_clock_speed_function.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif				/* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100755
index 000000000000..daaa8c0198a5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session
+{
+	int dummy;     /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b16c50ee769c..a9ca07916a94 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -79,6 +79,8 @@ config DRM_TDFX
 	  Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
 	  graphics card.  If M is selected, the module will be called tdfx.
 
+source "drivers/gpu/drm/arm/Kconfig"
+
 config DRM_R128
 	tristate "ATI Rage 128"
 	depends on DRM && PCI
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 1c9f24396002..daa41fcb63a8 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -30,6 +30,7 @@ CFLAGS_drm_trace_points.o := -I$(src)
 
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM_USB)   += drm_usb.o
+obj-$(CONFIG_DRM_ARM)	+= arm/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
 obj-$(CONFIG_DRM_TDFX)	+= tdfx/
 obj-$(CONFIG_DRM_R128)	+= r128/
diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig
new file mode 100644
index 000000000000..acb046fec516
--- /dev/null
+++ b/drivers/gpu/drm/arm/Kconfig
@@ -0,0 +1,26 @@
+config DRM_ARM
+	bool "ARM Ltd. drivers"
+	depends on DRM && OF && (ARM || ARM64)
+	select DMA_CMA
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_GEM_CMA_HELPER
+	select VIDEOMODE_HELPERS
+	help
+	  Choose this option to select drivers for ARM's devices
+
+config DRM_HDLCD
+	tristate "ARM HDLCD"
+	depends on DRM_ARM
+	select I2C
+	help
+	  Choose this option if you have an ARM High Definition Colour LCD
+	  controller.
+
+	  If M is selected the module will be called hdlcd.
+
+config DRM_VIRTUAL_HDLCD
+	bool "Support for virtual HDLCD"
+	depends on DRM_HDLCD
+	help
+	  Enable support for virtual HDLCD as emulated by ARM's Fast Models.
diff --git a/drivers/gpu/drm/arm/Makefile b/drivers/gpu/drm/arm/Makefile
new file mode 100644
index 000000000000..429644c28a2f
--- /dev/null
+++ b/drivers/gpu/drm/arm/Makefile
@@ -0,0 +1,4 @@
+
+hdlcd-y := hdlcd_drv.o hdlcd_crtc.o hdlcd_hdmi_encoder.o hdlcd_vexpress_encoder.o hdlcd_fb.o
+hdlcd-$(CONFIG_DRM_VIRTUAL_HDLCD) += hdlcd_virt_encoder.o
+obj-$(CONFIG_DRM_HDLCD)	+= hdlcd.o
diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
new file mode 100644
index 000000000000..9a81467d72f4
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  Implementation of a CRTC class for the HDLCD driver.
+ */
+
+#include <linux/clk.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+
+#include "hdlcd_drv.h"
+#include "hdlcd_regs.h"
+
+/*
+ * The HDLCD controller is a dumb RGB streamer that gets connected to
+ * a single HDMI transmitter or in the case of the ARM Models it gets
+ * emulated by the software that does the actual rendering.
+ *
+ */
+static void hdlcd_crtc_destroy(struct drm_crtc *crtc)
+{
+	drm_crtc_cleanup(crtc);
+}
+
+void hdlcd_set_scanout(struct hdlcd_drm_private *hdlcd, bool wait)
+{
+	struct drm_framebuffer *fb = hdlcd->crtc.fb;
+	struct hdlcd_bo *bo;
+	unsigned int depth, bpp;
+	dma_addr_t scanout_start;
+	int ret;
+
+	drm_fb_get_bpp_depth(fb->pixel_format, &depth, &bpp);
+	bo = hdlcd->bo;
+
+	scanout_start = bo->dma_addr + fb->offsets[0] +
+		(hdlcd->crtc.y * fb->pitches[0]) + (hdlcd->crtc.x * bpp/8);
+
+	hdlcd_write(hdlcd, HDLCD_REG_FB_BASE, scanout_start);
+
+	if (wait && hdlcd->dpms == DRM_MODE_DPMS_ON) {
+		drm_vblank_get(fb->dev, 0);
+		hdlcd->frame_completion.done = 0;
+		do {
+			ret = wait_for_completion_interruptible_timeout(&hdlcd->frame_completion,
+							msecs_to_jiffies(50));
+		} while (ret <= 0);
+		drm_vblank_put(fb->dev, 0);
+	} else {
+		dev_info(fb->dev->dev, "%s: wait called with DPMS set to %d\n",
+			__func__, hdlcd->dpms);
+	}
+}
+
+static int hdlcd_crtc_page_flip(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_pending_vblank_event *event)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+
+	if (hdlcd->dpms == DRM_MODE_DPMS_ON) {
+		/* don't schedule any page flipping if one is in progress */
+		if (hdlcd->event)
+			return -EBUSY;
+
+		hdlcd->event = event;
+		drm_vblank_get(crtc->dev, 0);
+	}
+
+	crtc->fb = fb;
+
+	if (hdlcd->dpms == DRM_MODE_DPMS_ON) {
+		hdlcd_set_scanout(hdlcd, true);
+	} else {
+		unsigned long flags;
+
+		/* not active, update registers immediately */
+		hdlcd_set_scanout(hdlcd, false);
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+		if (event)
+			drm_send_vblank_event(crtc->dev, 0, event);
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	}
+
+	return 0;
+}
+
+static const struct drm_crtc_funcs hdlcd_crtc_funcs = {
+	.destroy	= hdlcd_crtc_destroy,
+	.set_config	= drm_crtc_helper_set_config,
+	.page_flip	= hdlcd_crtc_page_flip,
+};
+
+static void hdlcd_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+
+	hdlcd->dpms = mode;
+	if (mode == DRM_MODE_DPMS_ON)
+		hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 1);
+	else
+		hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 0);
+}
+
+static bool hdlcd_crtc_mode_fixup(struct drm_crtc *crtc,
+			const struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void hdlcd_crtc_prepare(struct drm_crtc *crtc)
+{
+	drm_vblank_pre_modeset(crtc->dev, 0);
+	hdlcd_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void hdlcd_crtc_commit(struct drm_crtc *crtc)
+{
+	drm_vblank_post_modeset(crtc->dev, 0);
+	hdlcd_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+}
+
+static bool hdlcd_fb_mode_equal(struct drm_framebuffer *oldfb,
+				struct drm_framebuffer *newfb)
+{
+	if (!oldfb || !newfb)
+		return false;
+
+	if (oldfb->pixel_format == newfb->pixel_format &&
+		oldfb->width == newfb->width &&
+		oldfb->height == newfb->height)
+		return true;
+
+	return false;
+}
+
+static int hdlcd_crtc_mode_set(struct drm_crtc *crtc,
+			struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode,
+			int x, int y, struct drm_framebuffer *oldfb)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+	unsigned int depth, bpp, polarities;
+	unsigned char red_width = 0, green_width = 0, blue_width = 0, alpha_width = 0;
+	unsigned int default_color = 0x00000000;
+
+#ifdef HDLCD_SHOW_UNDERRUN
+	default_color = 0x00ff000000;
+#endif
+
+	/* This function gets called when the only change is the start of
+	   the scanout buffer. Detect that and bail out early */
+	if (hdlcd->initialised && hdlcd_fb_mode_equal(oldfb, crtc->fb)) {
+		hdlcd_set_scanout(hdlcd, true);
+		return 0;
+	}
+
+	/* Preset the number of bits per colour */
+	drm_fb_get_bpp_depth(crtc->fb->pixel_format, &depth, &bpp);
+	switch (depth) {
+	case 32:
+		alpha_width = 8;
+	case 24:
+	case 8:	 /* pseudocolor */
+		red_width = 8; green_width = 8; blue_width = 8;
+		break;
+	case 16: /* 565 format */
+		red_width = 5; green_width = 6; blue_width = 5;
+		break;
+	}
+
+	/* switch to using the more useful bytes per pixel */
+	bpp = (bpp + 7) / 8;
+
+	polarities = HDLCD_POLARITY_DATAEN | HDLCD_POLARITY_DATA;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
+		polarities |= HDLCD_POLARITY_HSYNC;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
+		polarities |= HDLCD_POLARITY_VSYNC;
+
+	/* Allow max number of outstanding requests and largest burst size */
+	hdlcd_write(hdlcd, HDLCD_REG_BUS_OPTIONS,
+		    HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16);
+
+	hdlcd_write(hdlcd, HDLCD_REG_PIXEL_FORMAT, (bpp - 1) << 3);
+
+	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, crtc->fb->width * bpp);
+	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_COUNT, crtc->fb->height - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_PITCH, crtc->fb->width * bpp);
+	hdlcd_write(hdlcd, HDLCD_REG_V_BACK_PORCH,
+				mode->vtotal - mode->vsync_end - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_FRONT_PORCH,
+				mode->vsync_start - mode->vdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_SYNC,
+				mode->vsync_end - mode->vsync_start - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_DATA, mode->vdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_BACK_PORCH,
+				mode->htotal - mode->hsync_end - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_FRONT_PORCH,
+				mode->hsync_start - mode->hdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_SYNC,
+				mode->hsync_end - mode->hsync_start - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_DATA, mode->hdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_POLARITIES, polarities);
+
+	/*
+	 * The format of the HDLCD_REG_<color>_SELECT register is:
+	 *   - bits[23:16] - default value for that color component
+	 *   - bits[11:8]  - number of bits to extract for each color component
+	 *   - bits[4:0]   - index of the lowest bit to extract
+	 *
+	 * The default color value is used when bits[11:8] read zero, when the
+	 * pixel is outside the visible frame area or when there is a
+	 * buffer underrun.
+	 */
+	hdlcd_write(hdlcd, HDLCD_REG_BLUE_SELECT, default_color |
+		alpha_width |   /* offset */
+		(blue_width & 0xf) << 8);
+	hdlcd_write(hdlcd, HDLCD_REG_GREEN_SELECT, default_color |
+		(blue_width + alpha_width) |  /* offset */
+		((green_width & 0xf) << 8));
+	hdlcd_write(hdlcd, HDLCD_REG_RED_SELECT, default_color |
+		(blue_width + green_width + alpha_width) |  /* offset */
+		((red_width & 0xf) << 8));
+
+	clk_prepare(hdlcd->clk);
+	clk_set_rate(hdlcd->clk, mode->clock * 1000);
+	clk_enable(hdlcd->clk);
+
+	hdlcd_set_scanout(hdlcd, false);
+	hdlcd->initialised = true;
+
+	return 0;
+}
+
+int hdlcd_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+			struct drm_framebuffer *oldfb)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+
+	hdlcd_set_scanout(hdlcd, true);
+	return 0;
+}
+
+static void hdlcd_crtc_load_lut(struct drm_crtc *crtc)
+{
+}
+
+static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = {
+	.dpms		= hdlcd_crtc_dpms,
+	.mode_fixup	= hdlcd_crtc_mode_fixup,
+	.prepare	= hdlcd_crtc_prepare,
+	.commit		= hdlcd_crtc_commit,
+	.mode_set	= hdlcd_crtc_mode_set,
+	.mode_set_base	= hdlcd_crtc_mode_set_base,
+	.load_lut	= hdlcd_crtc_load_lut,
+};
+
+int hdlcd_setup_crtc(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	int ret;
+
+	drm_mode_config_init(dev);
+	hdlcd_drm_mode_config_init(dev);
+
+	ret = drm_crtc_init(dev, &hdlcd->crtc, &hdlcd_crtc_funcs);
+	if (ret < 0)
+		goto crtc_setup_err;
+
+	drm_crtc_helper_add(&hdlcd->crtc, &hdlcd_crtc_helper_funcs);
+
+	return 0;
+
+crtc_setup_err:
+	drm_mode_config_cleanup(dev);
+
+	return ret;
+}
+
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
new file mode 100644
index 000000000000..a3019ab75a40
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Driver
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+
+#include "hdlcd_drv.h"
+#include "hdlcd_regs.h"
+
+
+static int hdlcd_unload(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+
+	drm_kms_helper_poll_fini(dev);
+	if (hdlcd->fb_helper)
+		drm_fb_helper_fini(hdlcd->fb_helper);
+
+	drm_vblank_cleanup(dev);
+	drm_mode_config_cleanup(dev);
+
+	drm_irq_uninstall(dev);
+
+	if (!IS_ERR(hdlcd->clk))
+		clk_put(hdlcd->clk);
+
+	platform_set_drvdata(dev->platformdev, NULL);
+
+	if (hdlcd->mmio)
+		iounmap(hdlcd->mmio);
+
+	dev->dev_private = NULL;
+	kfree(hdlcd);
+
+	return 0;
+}
+
+static int hdlcd_load(struct drm_device *dev, unsigned long flags)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct hdlcd_drm_private *hdlcd;
+	struct resource *res;
+	phandle slave_phandle;
+	u32 version;
+	int ret;
+
+	hdlcd = kzalloc(sizeof(*hdlcd), GFP_KERNEL);
+	if (!hdlcd) {
+		dev_err(dev->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	atomic_set(&hdlcd->buffer_underrun_count, 0);
+	atomic_set(&hdlcd->bus_error_count, 0);
+	atomic_set(&hdlcd->vsync_count, 0);
+	atomic_set(&hdlcd->dma_end_count, 0);
+#endif
+	hdlcd->initialised = false;
+	dev->dev_private = hdlcd;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev->dev, "failed to get memory resource\n");
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	hdlcd->mmio = ioremap_nocache(res->start, resource_size(res));
+	if (!hdlcd->mmio) {
+		dev_err(dev->dev, "failed to map control registers area\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	hdlcd->clk = clk_get(dev->dev, "pxlclk");
+	if (IS_ERR(hdlcd->clk)) {
+		dev_err(dev->dev, "unable to get an usable clock\n");
+		ret = PTR_ERR(hdlcd->clk);
+		goto fail;
+	}
+
+	if (of_property_read_u32(pdev->dev.of_node, "i2c-slave", &slave_phandle)) {
+		dev_warn(dev->dev, "no i2c-slave handle provided, disabling physical connector\n");
+		hdlcd->slave_node = NULL;
+	} else
+		hdlcd->slave_node = of_find_node_by_phandle(slave_phandle);
+
+	version = hdlcd_read(hdlcd, HDLCD_REG_VERSION);
+		if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) {
+		dev_err(dev->dev, "unknown product id: 0x%x\n", version);
+		ret = -EINVAL;
+		goto fail;
+	}
+	dev_info(dev->dev, "found ARM HDLCD version r%dp%d\n",
+		(version & HDLCD_VERSION_MAJOR_MASK) >> 8,
+		version & HDLCD_VERSION_MINOR_MASK);
+
+	ret = hdlcd_setup_crtc(dev);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to create crtc\n");
+		goto fail;
+	}
+
+	/*
+	 * It only makes sense to create the virtual connector if we don't have
+	 * a physical way of controlling output
+	 */
+	if (hdlcd->slave_node) {
+		ret = hdlcd_create_digital_connector(dev, hdlcd);
+		if (ret < 0) {
+			dev_err(dev->dev, "failed to create digital connector, trying board setup: %d\n", ret);
+			ret = hdlcd_create_vexpress_connector(dev, hdlcd);
+		}
+
+		if (ret < 0) {
+			dev_err(dev->dev, "failed to create board connector: %d\n", ret);
+			goto fail;
+		}
+	} else {
+		ret = hdlcd_create_virtual_connector(dev);
+		if (ret < 0) {
+			dev_err(dev->dev, "failed to create virtual connector: %d\n", ret);
+			goto fail;
+		}
+	}
+
+	ret = hdlcd_fbdev_init(dev);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to init the framebuffer (%d)\n", ret);
+		goto fail;
+	}
+
+	platform_set_drvdata(pdev, dev);
+
+	ret = drm_irq_install(dev);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to install IRQ handler\n");
+		goto fail;
+	}
+
+	init_completion(&hdlcd->frame_completion);
+	ret = drm_vblank_init(dev, 1);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to initialise vblank\n");
+		goto fail;
+	} else {
+		dev_info(dev->dev, "initialised vblank\n");
+	}
+
+	drm_kms_helper_poll_init(dev);
+
+	return 0;
+
+fail:
+	hdlcd_unload(dev);
+	return ret;
+}
+
+static void hdlcd_preclose(struct drm_device *dev, struct drm_file *file)
+{
+}
+
+static void hdlcd_lastclose(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+
+	drm_modeset_lock_all(dev);
+	if (hdlcd->fb_helper)
+		drm_fb_helper_restore_fbdev_mode(hdlcd->fb_helper);
+	drm_modeset_unlock_all(dev);
+}
+
+static irqreturn_t hdlcd_irq(int irq, void *arg)
+{
+	struct drm_device *dev = arg;
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	unsigned long irq_status;
+
+	irq_status = hdlcd_read(hdlcd, HDLCD_REG_INT_STATUS);
+
+#ifdef CONFIG_DEBUG_FS
+	if (irq_status & HDLCD_INTERRUPT_UNDERRUN) {
+		atomic_inc(&hdlcd->buffer_underrun_count);
+	}
+	if (irq_status & HDLCD_INTERRUPT_DMA_END) {
+		atomic_inc(&hdlcd->dma_end_count);
+	}
+	if (irq_status & HDLCD_INTERRUPT_BUS_ERROR) {
+		atomic_inc(&hdlcd->bus_error_count);
+	}
+	if (irq_status & HDLCD_INTERRUPT_VSYNC) {
+		atomic_inc(&hdlcd->vsync_count);
+	}
+#endif
+	if (irq_status & HDLCD_INTERRUPT_VSYNC) {
+		struct drm_pending_vblank_event *event;
+		unsigned long flags;
+
+		drm_handle_vblank(dev, 0);
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		if (hdlcd->event) {
+			event = hdlcd->event;
+			hdlcd->event = NULL;
+			drm_send_vblank_event(dev, 0, event);
+			drm_vblank_put(dev, 0);
+		}
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+	if (irq_status & HDLCD_INTERRUPT_DMA_END) {
+		// send completion when reading the frame has finished
+		complete_all(&hdlcd->frame_completion);
+	}
+
+	/* acknowledge interrupt(s) */
+	hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, irq_status);
+
+	return IRQ_HANDLED;
+}
+
+static void hdlcd_irq_preinstall(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	/* Ensure interrupts are disabled */
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, 0);
+	hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, ~0);
+}
+
+static int hdlcd_irq_postinstall(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	unsigned int irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+#ifdef CONFIG_DEBUG_FS
+	/* enable debug interrupts */
+	irq_mask |= HDLCD_DEBUG_INT_MASK;
+#endif
+
+	/* enable DMA completion interrupts */
+	irq_mask |= HDLCD_INTERRUPT_DMA_END;
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask);
+
+	return 0;
+}
+
+static void hdlcd_irq_uninstall(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	/* disable all the interrupts that we might have enabled */
+	unsigned int irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+#ifdef CONFIG_DEBUG_FS
+	/* disable debug interrupts */
+	irq_mask &= ~HDLCD_DEBUG_INT_MASK;
+#endif
+
+	/* disable vsync and dma interrupts */
+	irq_mask &= ~(HDLCD_INTERRUPT_VSYNC | HDLCD_INTERRUPT_DMA_END);
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask);
+}
+
+static int hdlcd_enable_vblank(struct drm_device *dev, int crtc)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	unsigned int mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, mask | HDLCD_INTERRUPT_VSYNC);
+
+	return 0;
+}
+
+static void hdlcd_disable_vblank(struct drm_device *dev, int crtc)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	unsigned int mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, mask & ~HDLCD_INTERRUPT_VSYNC);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int hdlcd_show_underrun_count(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+
+	seq_printf(m, "underrun : %d\n", atomic_read(&hdlcd->buffer_underrun_count));
+	seq_printf(m, "dma_end  : %d\n", atomic_read(&hdlcd->dma_end_count));
+	seq_printf(m, "bus_error: %d\n", atomic_read(&hdlcd->bus_error_count));
+	seq_printf(m, "vsync    : %d\n", atomic_read(&hdlcd->vsync_count));
+	return 0;
+}
+
+static struct drm_info_list hdlcd_debugfs_list[] = {
+	{ "interrupt_count", hdlcd_show_underrun_count, 0 },
+};
+
+static int hdlcd_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(hdlcd_debugfs_list,
+		ARRAY_SIZE(hdlcd_debugfs_list),	minor->debugfs_root, minor);
+}
+
+static void hdlcd_debugfs_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(hdlcd_debugfs_list,
+		ARRAY_SIZE(hdlcd_debugfs_list), minor);
+}
+#endif
+
+static const struct file_operations hdlcd_fops = {
+	.owner		= THIS_MODULE,
+	.open		= drm_open,
+	.release	= drm_release,
+	.unlocked_ioctl	= drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= drm_compat_ioctl,
+#endif
+	.mmap		= drm_gem_cma_mmap,
+	.poll		= drm_poll,
+	.read		= drm_read,
+	.llseek		= no_llseek,
+};
+
+int drm_gem_dumb_destroy(struct drm_file *file,
+			 struct drm_device *dev,
+			 uint32_t handle)
+{
+	return drm_gem_handle_delete(file, handle);
+}
+
+static struct drm_driver hdlcd_driver = {
+	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_GEM |
+					DRIVER_MODESET | DRIVER_PRIME,
+	.load			= hdlcd_load,
+	.unload			= hdlcd_unload,
+	.preclose		= hdlcd_preclose,
+	.lastclose		= hdlcd_lastclose,
+	.irq_handler		= hdlcd_irq,
+	.irq_preinstall		= hdlcd_irq_preinstall,
+	.irq_postinstall	= hdlcd_irq_postinstall,
+	.irq_uninstall		= hdlcd_irq_uninstall,
+	.get_vblank_counter	= drm_vblank_count,
+	.enable_vblank		= hdlcd_enable_vblank,
+	.disable_vblank		= hdlcd_disable_vblank,
+
+	.gem_free_object	= drm_gem_cma_free_object,
+	.gem_vm_ops		= &drm_gem_cma_vm_ops,
+
+	.dumb_create		= drm_gem_cma_dumb_create,
+	.dumb_map_offset	= drm_gem_cma_dumb_map_offset,
+	.dumb_destroy		= drm_gem_dumb_destroy,
+	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
+	.gem_prime_export	= drm_gem_prime_export,
+	.gem_prime_import	= drm_gem_prime_import,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init		= hdlcd_debugfs_init,
+	.debugfs_cleanup	= hdlcd_debugfs_cleanup,
+#endif
+	.fops			= &hdlcd_fops,
+
+	.name			= "hdlcd",
+	.desc			= "ARM HDLCD Controller DRM",
+	.date			= "20130505",
+	.major			= 1,
+	.minor			= 0,
+};
+
+
+static int hdlcd_probe(struct platform_device *pdev)
+{
+	return drm_platform_init(&hdlcd_driver, pdev);
+}
+
+static int hdlcd_remove(struct platform_device *pdev)
+{
+	drm_put_dev(platform_get_drvdata(pdev));
+	return 0;
+}
+
+static struct of_device_id  hdlcd_of_match[] = {
+	{ .compatible	= "arm,hdlcd" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, hdlcd_of_match);
+
+static struct platform_driver hdlcd_platform_driver = {
+	.probe		= hdlcd_probe,
+	.remove		= hdlcd_remove,
+	.driver	= {
+		.name		= "hdlcd",
+		.owner		= THIS_MODULE,
+		.of_match_table	= hdlcd_of_match,
+	},
+};
+
+static int __init hdlcd_init(void)
+{
+	return platform_driver_register(&hdlcd_platform_driver);
+}
+
+static void __exit hdlcd_exit(void)
+{
+	platform_driver_unregister(&hdlcd_platform_driver);
+}
+
+/* need late_initcall() so we load after i2c driver */
+late_initcall(hdlcd_init);
+module_exit(hdlcd_exit);
+
+MODULE_AUTHOR("Liviu Dudau");
+MODULE_DESCRIPTION("ARM HDLCD DRM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.h b/drivers/gpu/drm/arm/hdlcd_drv.h
new file mode 100644
index 000000000000..3fc3bddbb841
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_drv.h
@@ -0,0 +1,82 @@
+/*
+ *  ARM HDLCD Controller register definition
+ */
+
+#ifndef __HDLCD_DRV_H__
+#define __HDLCD_DRV_H__
+
+struct hdlcd_bo {
+	struct drm_gem_object gem;
+	dma_addr_t dma_addr;
+	void *cpu_addr;
+};
+
+struct hdlcd_drm_private {
+	void __iomem			*mmio;
+	struct clk			*clk;
+	struct drm_fb_helper		*fb_helper;
+	struct hdlcd_bo			*bo;
+	struct drm_pending_vblank_event	*event;
+	struct drm_crtc			crtc;
+	struct device_node		*slave_node;
+	struct completion		frame_completion;
+#ifdef CONFIG_DEBUG_FS
+	atomic_t buffer_underrun_count;
+	atomic_t bus_error_count;
+	atomic_t vsync_count;
+	atomic_t dma_end_count;
+#endif
+	int dpms;
+	bool initialised;
+};
+
+#define to_hdlcd_bo_obj(x)	container_of(x, struct hdlcd_bo, gem)
+#define crtc_to_hdlcd_priv(x)	container_of(x, struct hdlcd_drm_private, crtc)
+
+static inline void
+hdlcd_write(struct hdlcd_drm_private *hdlcd, unsigned int reg, u32 value)
+{
+	writel(value, hdlcd->mmio + reg);
+}
+
+static inline u32 hdlcd_read(struct hdlcd_drm_private *hdlcd, unsigned int reg)
+{
+	return readl(hdlcd->mmio + reg);
+}
+
+/*
+ * Developers using HDLCD may wish to enable these settings if
+ * display disruption is apparent and you suspect HDLCD
+ * access to RAM may be starved.
+ *
+ * Turn HDLCD default color to red instead of default black so
+ * that it's easier to see data underruns (compared to other
+ * visual disruptions)
+ */
+#define HDLCD_SHOW_UNDERRUN
+
+/* setup the crtc subclass */
+int hdlcd_setup_crtc(struct drm_device *dev);
+
+/* functions for creating a suitable connector */
+extern int hdlcd_create_digital_connector(struct drm_device *dev,
+					struct hdlcd_drm_private *hdlcd);
+extern int hdlcd_create_vexpress_connector(struct drm_device *dev,
+					struct hdlcd_drm_private *hdlcd);
+#ifdef CONFIG_DRM_VIRTUAL_HDLCD
+extern int hdlcd_create_virtual_connector(struct drm_device *dev);
+#else
+static inline int hdlcd_create_virtual_connector(struct drm_device *dev)
+{
+	return -ENXIO;
+}
+#endif
+
+void hdlcd_set_scanout(struct hdlcd_drm_private *hdlcd, bool wait);
+void hdlcd_drm_mode_config_init(struct drm_device *dev);
+int hdlcd_fbdev_init(struct drm_device *dev);
+
+/* common function used by all connectors */
+extern struct drm_encoder *hdlcd_connector_best_encoder(struct drm_connector *con);
+
+#endif /* __HDLCD_DRV_H__ */
diff --git a/drivers/gpu/drm/arm/hdlcd_fb.c b/drivers/gpu/drm/arm/hdlcd_fb.c
new file mode 100644
index 000000000000..0e96361dc2a3
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_fb.c
@@ -0,0 +1,682 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ * Implementation of the DRM fbdev compatibility mode for the HDLCD driver.
+ * Mainly used for doing dumb double buffering as expected by the ARM Mali driver.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <linux/dma-buf.h>
+
+#include "hdlcd_drv.h"
+#include "hdlcd_regs.h"
+
+#define MAX_FRAMES 2
+
+#define to_hdlcd_fb(x) container_of(x, struct hdlcd_fb, fb)
+
+struct hdlcd_dmabuf_attachment {
+	struct sg_table *sgt;
+	enum dma_data_direction dir;
+};
+
+static struct drm_framebuffer *hdlcd_fb_alloc(struct drm_device *dev,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+static int hdlcd_dmabuf_attach(struct dma_buf *dma_buf,
+		struct device *target_dev, struct dma_buf_attachment *attach)
+{
+	struct hdlcd_dmabuf_attachment *hdlcd_attach;
+
+	hdlcd_attach = kzalloc(sizeof(*hdlcd_attach), GFP_KERNEL);
+	if (!hdlcd_attach)
+		return -ENOMEM;
+
+	hdlcd_attach->dir = DMA_NONE;
+	attach->priv = hdlcd_attach;
+
+	return 0;
+}
+
+static void hdlcd_dmabuf_detach(struct dma_buf *dma_buf,
+				struct dma_buf_attachment *attach)
+{
+	struct hdlcd_dmabuf_attachment *hdlcd_attach = attach->priv;
+	struct sg_table *sgt;
+
+	if (!hdlcd_attach)
+		return;
+
+	sgt = hdlcd_attach->sgt;
+	if (sgt) {
+		sg_free_table(sgt);
+	}
+
+	kfree(sgt);
+	kfree(hdlcd_attach);
+	attach->priv = NULL;
+}
+
+static void hdlcd_dmabuf_release(struct dma_buf *dma_buf)
+{
+	struct hdlcd_drm_private *hdlcd = dma_buf->priv;
+	struct drm_gem_object *obj = &hdlcd->bo->gem;
+
+	if (obj->export_dma_buf == dma_buf) {
+		/* drop the reference the export fd holds */
+		obj->export_dma_buf = NULL;
+		drm_gem_object_unreference_unlocked(obj);
+	}
+}
+
+static struct sg_table *hdlcd_map_dma_buf(struct dma_buf_attachment *attach,
+					  enum dma_data_direction dir)
+{
+	struct hdlcd_dmabuf_attachment *hdlcd_attach = attach->priv;
+	struct hdlcd_drm_private *hdlcd = attach->dmabuf->priv;
+	struct sg_table *sgt;
+	int size, ret;
+
+	if (dir == DMA_NONE || !hdlcd_attach)
+		return ERR_PTR(-EINVAL);
+
+	/* return the cached mapping when possible */
+	if (hdlcd_attach->dir == dir)
+		return hdlcd_attach->sgt;
+
+	/* don't allow two different directions for the same attachment */
+	if (hdlcd_attach->dir != DMA_NONE)
+		return ERR_PTR(-EBUSY);
+
+	size = hdlcd->bo->gem.size;
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt) {
+		DRM_ERROR("Failed to allocate sg_table\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	if (ret < 0) {
+		DRM_ERROR("Failed to allocate page table\n");
+		kfree(sgt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	sg_dma_len(sgt->sgl) = size;
+	sg_set_page(sgt->sgl, pfn_to_page(PFN_DOWN(hdlcd->bo->dma_addr)), size, 0);
+	sg_dma_address(sgt->sgl) = hdlcd->bo->dma_addr;
+	ret = dma_map_sg(attach->dev, sgt->sgl, sgt->orig_nents, dir);
+	if (!ret) {
+		DRM_ERROR("failed to map sgl with IOMMU\n");
+		sg_free_table(sgt);
+		kfree(sgt);
+		return ERR_PTR(-EIO);
+	}
+	hdlcd_attach->sgt = sgt;
+	hdlcd_attach->dir = dir;
+
+	return sgt;
+}
+
+static void hdlcd_unmap_dma_buf(struct dma_buf_attachment *attach,
+				struct sg_table *sgt, enum dma_data_direction dir)
+{
+	struct hdlcd_dmabuf_attachment *hdlcd_attach = attach->priv;
+
+	if (hdlcd_attach->dir != DMA_NONE)
+		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+	sg_free_table(sgt);
+	kfree(sgt);
+	hdlcd_attach->sgt = NULL;
+	hdlcd_attach->dir = DMA_NONE;
+}
+
+static void *hdlcd_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
+{
+	return NULL;
+}
+
+static void hdlcd_dmabuf_kunmap(struct dma_buf *dma_buf,
+				unsigned long page_num, void *addr)
+{
+}
+
+static int hdlcd_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+	struct hdlcd_drm_private *hdlcd = dma_buf->priv;
+	struct drm_gem_object *obj = &hdlcd->bo->gem;
+	struct hdlcd_bo *bo = to_hdlcd_bo_obj(obj);
+	int ret;
+
+	mutex_lock(&obj->dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+	 * the whole buffer.
+	 */
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
+	ret = dma_mmap_writecombine(obj->dev->dev, vma, bo->cpu_addr, bo->dma_addr,
+				    vma->vm_end - vma->vm_start);
+	if (ret)
+		drm_gem_vm_close(vma);
+
+	return 0;
+}
+
+static void *hdlcd_dmabuf_vmap(struct dma_buf *dma_buf)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static void hdlcd_dmabuf_vunmap(struct dma_buf *dma_buf, void *cpu_addr)
+{
+}
+
+struct dma_buf_ops hdlcd_buf_ops = {
+	.attach = hdlcd_dmabuf_attach,
+	.detach = hdlcd_dmabuf_detach,
+	.map_dma_buf = hdlcd_map_dma_buf,
+	.unmap_dma_buf = hdlcd_unmap_dma_buf,
+	.release = hdlcd_dmabuf_release,
+	.kmap = hdlcd_dmabuf_kmap,
+	.kmap_atomic = hdlcd_dmabuf_kmap,
+	.kunmap = hdlcd_dmabuf_kunmap,
+	.kunmap_atomic = hdlcd_dmabuf_kunmap,
+	.mmap = hdlcd_dmabuf_mmap,
+	.vmap = hdlcd_dmabuf_vmap,
+	.vunmap = hdlcd_dmabuf_vunmap,
+};
+
+/*
+ * Used for sharing buffers with Mali userspace
+ */
+struct fb_dmabuf_export {
+	uint32_t fd;
+	uint32_t flags;
+};
+
+#define FBIOGET_DMABUF       _IOR('F', 0x21, struct fb_dmabuf_export)
+
+static int hdlcd_get_dmabuf_ioctl(struct fb_info *info, unsigned int cmd,
+				  unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct fb_dmabuf_export ebuf;
+	struct drm_fb_helper *helper = info->par;
+	struct hdlcd_drm_private *hdlcd = helper->dev->dev_private;
+	struct dma_buf			*dma_buf;
+	uint32_t			fd;
+
+	if (copy_from_user(&ebuf, argp, sizeof(ebuf)))
+		return -EFAULT;
+
+	dma_buf = dma_buf_export(hdlcd, &hdlcd_buf_ops,
+				info->screen_size, ebuf.flags | O_RDWR);
+	if (!dma_buf) {
+		dev_info(info->dev, "Failed to export DMA buffer\n");
+		goto err_export;
+	}
+
+	fd = dma_buf_fd(dma_buf, O_CLOEXEC);
+	if (fd < 0) {
+		dev_info(info->dev, "Failed to get file descriptor for DMA buffer\n");
+		goto err_export_fd;
+	}
+	ebuf.fd = fd;
+
+	if (copy_to_user(argp, &ebuf, sizeof(ebuf)))
+		goto err_export_fd;
+
+	return 0;
+
+err_export_fd:
+	dma_buf_put(dma_buf);
+err_export:
+	return -EFAULT;
+}
+
+static int hdlcd_wait_for_vsync(struct fb_info *info)
+{
+#if 0
+	struct drm_fb_helper *helper = info->par;
+	struct hdlcd_drm_private *hdlcd = helper->dev->dev_private;
+	int ret;
+
+	drm_vblank_get(helper->dev, 0);
+	reinit_completion(&hdlcd->frame_completion);
+	do {
+		ret = wait_for_completion_interruptible_timeout(&hdlcd->frame_completion,
+							msecs_to_jiffies(50));
+	} while (ret == -ERESTARTSYS);
+	drm_vblank_put(helper->dev, 0);
+	if (!ret)
+		return -ETIMEDOUT;
+#endif
+
+	return 0;
+}
+
+static int hdlcd_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case FBIOGET_DMABUF:
+		return hdlcd_get_dmabuf_ioctl(info, cmd, arg);
+	case FBIO_WAITFORVSYNC:
+		return hdlcd_wait_for_vsync(info);
+	default:
+		printk(KERN_INFO "HDLCD FB does not handle ioctl 0x%x\n", cmd);
+	}
+
+	return -EFAULT;
+}
+
+static int hdlcd_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	struct drm_fb_helper *helper = info->par;
+	struct hdlcd_drm_private *hdlcd = helper->dev->dev_private;
+	struct drm_gem_object *obj = &hdlcd->bo->gem;
+	struct hdlcd_bo *bo = to_hdlcd_bo_obj(obj);
+	DEFINE_DMA_ATTRS(attrs);
+	size_t vm_size;
+
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+
+	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+	vm_size = vma->vm_end - vma->vm_start;
+
+	if (vm_size > bo->gem.size)
+		return -EINVAL;
+
+	return dma_mmap_attrs(helper->dev->dev, vma, bo->cpu_addr,
+			bo->dma_addr, bo->gem.size, &attrs);
+}
+
+static int hdlcd_fb_check_var(struct fb_var_screeninfo *var,
+			struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	int depth;
+
+	if (var->pixclock != 0 || in_dbg_master())
+		return -EINVAL;
+
+	/* Need to resize the fb object !!! */
+	if (var->bits_per_pixel > fb->bits_per_pixel ||
+	    var->xres > fb->width || var->yres > fb->height ||
+	    var->xres_virtual > fb->width || var->yres_virtual > fb->height * MAX_FRAMES) {
+		DRM_DEBUG("fb userspace requested width/height/bpp is greater than current fb "
+			  "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n",
+			  var->xres, var->yres, var->bits_per_pixel,
+			  var->xres_virtual, var->yres_virtual,
+			  fb->width, fb->height, fb->bits_per_pixel);
+		return -EINVAL;
+	}
+
+	switch (var->bits_per_pixel) {
+	case 16:
+		depth = (var->green.length == 6) ? 16 : 15;
+		break;
+	case 32:
+		depth = (var->transp.length > 0) ? 32 : 24;
+		break;
+	default:
+		depth = var->bits_per_pixel;
+		break;
+	}
+
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.length = 1;
+		var->transp.offset = 15;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 24;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct fb_ops hdlcd_fb_ops = {
+	.owner	= THIS_MODULE,
+	.fb_fillrect = sys_fillrect,
+	.fb_copyarea = sys_copyarea,
+	.fb_imageblit = sys_imageblit,
+	.fb_check_var = hdlcd_fb_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_setcmap = drm_fb_helper_setcmap,
+	.fb_ioctl = hdlcd_fb_ioctl,
+	.fb_compat_ioctl = hdlcd_fb_ioctl,
+	.fb_mmap = hdlcd_fb_mmap,
+};
+
+static struct hdlcd_bo *hdlcd_fb_bo_create(struct drm_device *drm, size_t size)
+{
+	int err;
+	struct hdlcd_bo *bo;
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	DEFINE_DMA_ATTRS(attrs);
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	size = round_up(size, PAGE_SIZE);
+	err = drm_gem_object_init(drm, &bo->gem, size);
+	if (err)
+		goto err_init;
+
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	bo->cpu_addr = dma_alloc_attrs(drm->dev, size, &bo->dma_addr,
+				    GFP_KERNEL | __GFP_NOWARN, &attrs);
+
+	if (!bo->cpu_addr) {
+		dev_err(drm->dev, "failed to allocate buffer of size %zu\n", size);
+		err = -ENOMEM;
+		goto err_dma;
+	}
+
+#if 0
+	err = drm_gem_create_mmap_offset(&bo->gem);
+	if (err)
+		goto err_map;
+#endif
+
+	hdlcd->bo = bo;
+	return bo;
+
+#if 0
+err_map:
+	dma_free_attrs(drm->dev, size, bo->cpu_addr, bo->dma_addr, &attrs);
+#endif
+err_dma:
+	drm_gem_object_release(&bo->gem);
+err_init:
+	kfree(bo);
+
+	return ERR_PTR(err);
+}
+
+void hdlcd_fb_bo_free(struct hdlcd_bo *bo)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+
+	drm_gem_object_release(&bo->gem);
+	dma_free_attrs(bo->gem.dev->dev, bo->gem.size, bo->cpu_addr, bo->dma_addr, &attrs);
+	kfree(bo);
+}
+
+static int hdlcd_fb_probe(struct drm_fb_helper *helper,
+			  struct drm_fb_helper_surface_size *sizes)
+{
+	struct hdlcd_drm_private *hdlcd = helper->dev->dev_private;
+	struct fb_info *info;
+	struct hdlcd_bo *bo = hdlcd->bo;
+	struct drm_mode_fb_cmd2 cmd = { 0 };
+	unsigned int bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
+	unsigned long offset;
+	size_t size;
+	int err;
+
+	cmd.width = sizes->surface_width;
+	cmd.height = sizes->surface_height;
+	cmd.pitches[0] = ALIGN(sizes->surface_width * bytes_per_pixel, 64);
+	cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+						     sizes->surface_depth);
+
+	size = PAGE_ALIGN(cmd.pitches[0] * cmd.height * MAX_FRAMES);
+
+	bo = hdlcd_fb_bo_create(helper->dev, size);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	info = framebuffer_alloc(0, helper->dev->dev);
+	if (!info) {
+		dev_err(helper->dev->dev, "failed to allocate framebuffer info\n");
+		hdlcd_fb_bo_free(bo);
+		return -ENOMEM;
+	}
+
+	helper->fb = hdlcd_fb_alloc(helper->dev, &cmd);
+	helper->fbdev = info;
+
+	info->par = helper;
+	info->flags = FBINFO_FLAG_DEFAULT;
+	info->fbops = &hdlcd_fb_ops;
+
+	err = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (err < 0) {
+		dev_err(helper->dev->dev, "failed to allocate color map: %d\n", err);
+		goto cleanup;
+	}
+
+	drm_fb_helper_fill_fix(info, helper->fb->pitches[0], helper->fb->depth);
+	drm_fb_helper_fill_var(info, helper, helper->fb->width, sizes->surface_height);
+
+	offset = info->var.xoffset * bytes_per_pixel +
+			info->var.yoffset * helper->fb->pitches[0];
+
+	helper->dev->mode_config.fb_base = (resource_size_t)bo->dma_addr;
+	info->screen_base = (void __iomem *)bo->cpu_addr + offset;
+	info->screen_size = size;
+	info->var.yres_virtual = info->var.yres * MAX_FRAMES;
+	info->fix.smem_start = (unsigned long)bo->dma_addr + offset;
+	info->fix.smem_len = size;
+
+	return 0;
+
+cleanup:
+	drm_framebuffer_unregister_private(helper->fb);
+	hdlcd_fb_bo_free(bo);
+	framebuffer_release(info);
+	return err;
+}
+
+static struct drm_fb_helper_funcs hdlcd_fb_helper_funcs = {
+	.fb_probe	= hdlcd_fb_probe,
+};
+
+static void hdlcd_fb_destroy(struct drm_framebuffer *fb)
+{
+	struct hdlcd_drm_private *hdlcd = fb->dev->dev_private;
+	drm_gem_object_unreference_unlocked(&hdlcd->bo->gem);
+	drm_framebuffer_cleanup(fb);
+	kfree(hdlcd->bo);
+}
+
+static int hdlcd_fb_create_handle(struct drm_framebuffer *fb,
+				  struct drm_file *file_priv,
+				  unsigned int *handle)
+{
+	struct hdlcd_drm_private *hdlcd = fb->dev->dev_private;
+	return drm_gem_handle_create(file_priv, &hdlcd->bo->gem, handle);
+}
+
+static int hdlcd_fb_dirty(struct drm_framebuffer *fb,
+			  struct drm_file *file_priv, unsigned flags,
+			  unsigned color, struct drm_clip_rect *clips,
+			  unsigned num_clips)
+{
+	return 0;
+}
+
+static struct drm_framebuffer_funcs hdlcd_fb_funcs = {
+	.destroy	= hdlcd_fb_destroy,
+	.create_handle	= hdlcd_fb_create_handle,
+	.dirty		= hdlcd_fb_dirty,
+};
+
+static struct drm_framebuffer *hdlcd_fb_alloc(struct drm_device *dev,
+					      struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	int err;
+	struct drm_framebuffer *fb;
+
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
+	if (!fb)
+		return ERR_PTR(-ENOMEM);
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	err = drm_framebuffer_init(dev, fb, &hdlcd_fb_funcs);
+	if (err) {
+		dev_err(dev->dev, "failed to initialize framebuffer.\n");
+		kfree(fb);
+		return ERR_PTR(err);
+	}
+
+	return fb;
+}
+
+static struct drm_framebuffer *hdlcd_fb_create(struct drm_device *dev,
+					       struct drm_file *file_priv,
+					       struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_gem_object *obj;
+	struct hdlcd_bo *bo;
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+
+	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (!obj) {
+		dev_err(dev->dev, "failed to lookup GEM object\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	bo = to_hdlcd_bo_obj(obj);
+	hdlcd->fb_helper->fb = hdlcd_fb_alloc(dev, mode_cmd);
+	if (IS_ERR(hdlcd->fb_helper->fb)) {
+		dev_err(dev->dev, "failed to allocate DRM framebuffer\n");
+	}
+	hdlcd->bo = bo;
+
+	return hdlcd->fb_helper->fb;
+}
+
+int hdlcd_fbdev_init(struct drm_device *dev)
+{
+	int ret;
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+
+	hdlcd->fb_helper = kzalloc(sizeof(*hdlcd->fb_helper), GFP_KERNEL);
+	if (!hdlcd->fb_helper)
+		return -ENOMEM;
+
+	hdlcd->fb_helper->funcs = &hdlcd_fb_helper_funcs;
+	ret = drm_fb_helper_init(dev, hdlcd->fb_helper, dev->mode_config.num_crtc,
+				 dev->mode_config.num_connector);
+	if (ret < 0) {
+		dev_err(dev->dev, "Failed to initialize DRM fb helper.\n");
+		goto err_free;
+	}
+
+	ret = drm_fb_helper_single_add_all_connectors(hdlcd->fb_helper);
+	if (ret < 0) {
+		dev_err(dev->dev, "Failed to add connectors.\n");
+		goto err_helper_fini;
+	}
+
+	drm_helper_disable_unused_functions(dev);
+
+	/* disable all the possible outputs/crtcs before entering KMS mode */
+	ret = drm_fb_helper_initial_config(hdlcd->fb_helper, 32);
+	if (ret < 0) {
+		dev_err(dev->dev, "Failed to set initial hw configuration.\n");
+		goto err_helper_fini;
+	}
+
+	return 0;
+
+err_helper_fini:
+	drm_fb_helper_fini(hdlcd->fb_helper);
+
+err_free:
+	kfree(hdlcd->fb_helper);
+	hdlcd->fb_helper = NULL;
+
+	return ret;
+}
+
+static void hdlcd_fb_output_poll_changed(struct drm_device *dev)
+{
+	struct hdlcd_drm_private *hdlcd = dev->dev_private;
+	struct drm_fb_helper *fb_helper = hdlcd->fb_helper;
+
+	drm_fb_helper_hotplug_event(fb_helper);
+}
+
+static const struct drm_mode_config_funcs hdlcd_mode_config_funcs = {
+	.fb_create = hdlcd_fb_create,
+	.output_poll_changed = hdlcd_fb_output_poll_changed,
+};
+
+
+void hdlcd_drm_mode_config_init(struct drm_device *dev)
+{
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	dev->mode_config.max_width = HDLCD_MAX_XRES;
+	dev->mode_config.max_height = HDLCD_MAX_YRES;
+	dev->mode_config.funcs = &hdlcd_mode_config_funcs;
+}
diff --git a/drivers/gpu/drm/arm/hdlcd_hdmi_encoder.c b/drivers/gpu/drm/arm/hdlcd_hdmi_encoder.c
new file mode 100644
index 000000000000..5f6a4b4ff9f6
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_hdmi_encoder.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ */
+
+/*
+ * Theory of operation:
+ *
+ * The DRM framework expects the CRTC -> Encoder -> Connector chain,
+ * where the CRTC is reading from framebuffer, passes data to the
+ * encoder and that formats the signals to something usable by the
+ * attached connector(s). Connectors can use i2c links to talk with
+ * attached monitors.
+ *
+ * The HDMI transmitter is a different beast: it is both and encoder
+ * and a connector in DRM parlance *and* can only be reached via i2c.
+ * It implements an i2c pass through mode for the situation where one
+ * wants to talk with the attached monitor. To complicate things
+ * even further, the VExpress boards that have the SiI9022 chip share
+ * the i2c line between the on-board microcontroller and the CoreTiles.
+ * This leads to a situation where the microcontroller might be able to
+ * talk with the SiI9022 transmitter, but not the CoreTile. And the
+ * micro has a very small brain and a list of hardcoded modes that
+ * it can program into the HDMI transmitter, so only a limited set
+ * of resolutions will be valid.
+ *
+ * This file handles only the case where the i2c connection is available
+ * to the kernel. For the case where we have to ask the microcontroller
+ * to do the modesetting for us see the hdlcd_vexpress_encoder.c file.
+ */
+
+#include <linux/i2c.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_encoder_slave.h>
+#include <drm/i2c/tda998x.h>
+#include <video/display_timing.h>
+#include <video/of_display_timing.h>
+#include <video/videomode.h>
+
+#include "hdlcd_drv.h"
+
+static inline struct drm_encoder_slave *
+hdlcd_get_slave_encoder(struct drm_connector * connector)
+{
+	return to_encoder_slave(hdlcd_connector_best_encoder(connector));
+}
+
+static void hdlcd_connector_destroy(struct drm_connector *connector)
+{
+}
+
+static enum drm_connector_status
+hdlcd_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct drm_encoder_slave *slave;
+	if (!connector->encoder)
+		connector->encoder = hdlcd_connector_best_encoder(connector);
+
+	slave = hdlcd_get_slave_encoder(connector);
+	if (!slave || !slave->slave_funcs)
+		return connector_status_unknown;
+
+	return slave->slave_funcs->detect(connector->encoder, connector);
+}
+
+static const struct drm_connector_funcs hdlcd_connector_funcs = {
+	.destroy = hdlcd_connector_destroy,
+	.dpms = drm_helper_connector_dpms,
+	.detect = hdlcd_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+};
+
+struct drm_encoder *
+hdlcd_connector_best_encoder(struct drm_connector *connector)
+{
+	int enc_id = connector->encoder_ids[0];
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+
+	if (connector->encoder)
+		return connector->encoder;
+
+	if (enc_id) {
+		obj = drm_mode_object_find(connector->dev, enc_id,
+					DRM_MODE_OBJECT_ENCODER);
+		if (obj) {
+			encoder = obj_to_encoder(obj);
+			return encoder;
+		}
+	}
+	return NULL;
+
+}
+
+static int hdlcd_hdmi_con_get_modes(struct drm_connector *connector)
+{
+	struct drm_encoder_slave *slave = hdlcd_get_slave_encoder(connector);
+
+	if (slave && slave->slave_funcs)
+		return slave->slave_funcs->get_modes(&slave->base, connector);
+
+	return 0;
+}
+
+static int hdlcd_hdmi_con_mode_valid(struct drm_connector *connector,
+				struct drm_display_mode *mode)
+{
+	struct drm_encoder_slave *slave = hdlcd_get_slave_encoder(connector);
+
+	if (slave && slave->slave_funcs)
+		return slave->slave_funcs->mode_valid(connector->encoder, mode);
+
+	return MODE_ERROR;
+}
+
+static const struct drm_connector_helper_funcs hdlcd_hdmi_con_helper_funcs = {
+	.get_modes	= hdlcd_hdmi_con_get_modes,
+	.mode_valid	= hdlcd_hdmi_con_mode_valid,
+	.best_encoder	= hdlcd_connector_best_encoder,
+};
+
+
+static struct drm_encoder_funcs hdlcd_encoder_funcs = {
+	.destroy	= drm_i2c_encoder_destroy,
+};
+
+static void hdlcd_hdmi_encoder_disable(struct drm_encoder *encoder)
+{
+	drm_i2c_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static struct drm_encoder_helper_funcs hdlcd_encoder_helper_funcs = {
+	.dpms		= drm_i2c_encoder_dpms,
+	.save		= drm_i2c_encoder_save,
+	.restore	= drm_i2c_encoder_restore,
+	.mode_fixup	= drm_i2c_encoder_mode_fixup,
+	.prepare	= drm_i2c_encoder_prepare,
+	.commit		= drm_i2c_encoder_commit,
+	.mode_set	= drm_i2c_encoder_mode_set,
+	.detect		= drm_i2c_encoder_detect,
+	.disable	= hdlcd_hdmi_encoder_disable,
+};
+
+static struct tda998x_encoder_params tda998x_params = {
+	.swap_a = 2,
+	.swap_b = 3,
+	.swap_c = 4,
+	.swap_d = 5,
+	.swap_e = 0,
+	.swap_f = 1,
+};
+
+int hdlcd_create_digital_connector(struct drm_device *dev,
+				struct hdlcd_drm_private *hdlcd)
+{
+	int err;
+	struct i2c_board_info i2c_info;
+	struct drm_encoder_slave *slave;
+	struct drm_connector *connector;
+	struct device_node *node = hdlcd->slave_node;
+
+	slave = kzalloc(sizeof(*slave), GFP_KERNEL);
+	if (!slave)
+		return -ENOMEM;
+
+	slave->base.possible_crtcs = 1;
+	slave->base.possible_clones = 0;
+
+	err = drm_encoder_init(dev, &slave->base, &hdlcd_encoder_funcs,
+			DRM_MODE_ENCODER_TMDS);
+	if (err)
+		goto encoder_init_err;
+
+	drm_encoder_helper_add(&slave->base, &hdlcd_encoder_helper_funcs);
+
+	/* get the driver for the i2c slave node */
+	i2c_info.of_node = node;
+	err = of_modalias_node(node, i2c_info.type, sizeof(i2c_info.type));
+	if (err < 0) {
+		dev_err(dev->dev, "failed to get a module alias for node %s\n",
+			node->full_name);
+	}
+
+	/* Hack: this needs to be specified in the device tree */
+	i2c_info.platform_data = &tda998x_params;
+
+	err = drm_i2c_encoder_init(dev, slave, NULL, &i2c_info);
+	of_node_put(node);
+	if (err)
+		goto connector_alloc_err;
+
+	connector = kzalloc(sizeof(*connector), GFP_KERNEL);
+	if (!connector) {
+		err = -ENOMEM;
+		goto connector_alloc_err;
+	}
+
+	connector->interlace_allowed = false;
+	connector->doublescan_allowed = false;
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			DRM_CONNECTOR_POLL_DISCONNECT;
+	err = drm_connector_init(dev, connector, &hdlcd_connector_funcs,
+						DRM_MODE_CONNECTOR_DVID);
+	if (err)
+		goto connector_init_err;
+
+	drm_connector_helper_add(connector, &hdlcd_hdmi_con_helper_funcs);
+
+	connector->encoder = &slave->base;
+	err = drm_mode_connector_attach_encoder(connector, &slave->base);
+	if (err) {
+		goto connector_attach_err;
+	}
+
+	drm_sysfs_connector_add(connector);
+
+	slave->base.dev->dev->platform_data = hdlcd;
+	return err;
+
+connector_attach_err:
+	drm_connector_cleanup(connector);
+connector_init_err:
+	kfree(connector);
+connector_alloc_err:
+	drm_encoder_cleanup(&slave->base);
+encoder_init_err:
+	kfree(slave);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/arm/hdlcd_regs.h b/drivers/gpu/drm/arm/hdlcd_regs.h
new file mode 100644
index 000000000000..571dad4a6f37
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_regs.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Controller register definition
+ */
+
+#ifndef __HDLCD_REGS_H__
+#define __HDLCD_REGS_H__
+
+/* register offsets */
+#define HDLCD_REG_VERSION		0x0000	/* ro */
+#define HDLCD_REG_INT_RAWSTAT		0x0010	/* rw */
+#define HDLCD_REG_INT_CLEAR		0x0014	/* wo */
+#define HDLCD_REG_INT_MASK		0x0018	/* rw */
+#define HDLCD_REG_INT_STATUS		0x001c	/* ro */
+#define HDLCD_REG_FB_BASE		0x0100	/* rw */
+#define HDLCD_REG_FB_LINE_LENGTH	0x0104	/* rw */
+#define HDLCD_REG_FB_LINE_COUNT		0x0108	/* rw */
+#define HDLCD_REG_FB_LINE_PITCH		0x010c	/* rw */
+#define HDLCD_REG_BUS_OPTIONS		0x0110	/* rw */
+#define HDLCD_REG_V_SYNC		0x0200	/* rw */
+#define HDLCD_REG_V_BACK_PORCH		0x0204	/* rw */
+#define HDLCD_REG_V_DATA		0x0208	/* rw */
+#define HDLCD_REG_V_FRONT_PORCH		0x020c	/* rw */
+#define HDLCD_REG_H_SYNC		0x0210	/* rw */
+#define HDLCD_REG_H_BACK_PORCH		0x0214	/* rw */
+#define HDLCD_REG_H_DATA		0x0218	/* rw */
+#define HDLCD_REG_H_FRONT_PORCH		0x021c	/* rw */
+#define HDLCD_REG_POLARITIES		0x0220	/* rw */
+#define HDLCD_REG_COMMAND		0x0230	/* rw */
+#define HDLCD_REG_PIXEL_FORMAT		0x0240	/* rw */
+#define HDLCD_REG_BLUE_SELECT		0x0244	/* rw */
+#define HDLCD_REG_GREEN_SELECT		0x0248	/* rw */
+#define HDLCD_REG_RED_SELECT		0x024c	/* rw */
+
+/* version */
+#define HDLCD_PRODUCT_ID		0x1CDC0000
+#define HDLCD_PRODUCT_MASK		0xFFFF0000
+#define HDLCD_VERSION_MAJOR_MASK	0x0000FF00
+#define HDLCD_VERSION_MINOR_MASK	0x000000FF
+
+/* interrupts */
+#define HDLCD_INTERRUPT_DMA_END		(1 << 0)
+#define HDLCD_INTERRUPT_BUS_ERROR	(1 << 1)
+#define HDLCD_INTERRUPT_VSYNC		(1 << 2)
+#define HDLCD_INTERRUPT_UNDERRUN	(1 << 3)
+#define HDLCD_DEBUG_INT_MASK		(HDLCD_INTERRUPT_DMA_END |  \
+					HDLCD_INTERRUPT_BUS_ERROR | \
+					HDLCD_INTERRUPT_UNDERRUN)
+
+/* polarities */
+#define HDLCD_POLARITY_VSYNC		(1 << 0)
+#define HDLCD_POLARITY_HSYNC		(1 << 1)
+#define HDLCD_POLARITY_DATAEN		(1 << 2)
+#define HDLCD_POLARITY_DATA		(1 << 3)
+#define HDLCD_POLARITY_PIXELCLK		(1 << 4)
+
+/* commands */
+#define HDLCD_COMMAND_DISABLE		(0 << 0)
+#define HDLCD_COMMAND_ENABLE		(1 << 0)
+
+/* pixel format */
+#define HDLCD_PIXEL_FMT_LITTLE_ENDIAN	(0 << 31)
+#define HDLCD_PIXEL_FMT_BIG_ENDIAN	(1 << 31)
+#define HDLCD_BYTES_PER_PIXEL_MASK	(3 << 3)
+
+/* bus options */
+#define HDLCD_BUS_BURST_MASK		0x01f
+#define HDLCD_BUS_MAX_OUTSTAND		0xf00
+#define HDLCD_BUS_BURST_NONE		(0 << 0)
+#define HDLCD_BUS_BURST_1		(1 << 0)
+#define HDLCD_BUS_BURST_2		(1 << 1)
+#define HDLCD_BUS_BURST_4		(1 << 2)
+#define HDLCD_BUS_BURST_8		(1 << 3)
+#define HDLCD_BUS_BURST_16		(1 << 4)
+
+/* Max resolution supported is 4096x4096, 32bpp */
+#define HDLCD_MAX_XRES			4096
+#define HDLCD_MAX_YRES			4096
+
+#define NR_PALETTE			256
+
+#endif /* __HDLCD_REGS_H__ */
diff --git a/drivers/gpu/drm/arm/hdlcd_vexpress_encoder.c b/drivers/gpu/drm/arm/hdlcd_vexpress_encoder.c
new file mode 100644
index 000000000000..e96a62343648
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_vexpress_encoder.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#include <linux/i2c.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_encoder_slave.h>
+#include <video/display_timing.h>
+#include <video/of_display_timing.h>
+#include <video/videomode.h>
+
+#include <linux/vexpress.h>
+
+#include "hdlcd_drv.h"
+
+/*
+ * use the functionality of vexpress-config to set the output mode
+ * for HDLCD on Versatile Express boards that lack proper control
+ * of the DDC i2c chip.
+ */
+
+static struct vexpress_config_func *vconfig_func;
+
+/*
+ * Predefined modes that are available through the VExpress micro
+ */
+static const struct {
+	int hsize, vsize, vrefresh, dvimode;
+} vexpress_dvimodes[] = {
+	{  640,  480, 60, 0 }, /* VGA */
+	{  800,  600, 60, 1 }, /* SVGA */
+	{ 1024,  768, 60, 2 }, /* XGA */
+	{ 1280, 1024, 60, 3 }, /* SXGA */
+	{ 1600, 1200, 60, 4 }, /* UXGA */
+	{ 1920, 1080, 60, 5 }, /* HD1080 */
+};
+
+static void hdlcd_connector_destroy(struct drm_connector *connector)
+{
+}
+
+static enum drm_connector_status
+hdlcd_connector_detect(struct drm_connector *connector, bool force)
+{
+	if (vconfig_func)
+		return connector_status_connected;
+	return connector_status_disconnected;
+}
+
+static const struct drm_connector_funcs hdlcd_connector_funcs = {
+	.destroy = hdlcd_connector_destroy,
+	.dpms = drm_helper_connector_dpms,
+	.detect = hdlcd_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+};
+
+static int hdlcd_vexpress_con_get_modes(struct drm_connector *connector)
+{
+	int i;
+	struct drm_display_mode *mode;
+
+	/* Add the predefined modes */
+	for (i = 0; i < ARRAY_SIZE(vexpress_dvimodes); i++) {
+		mode = drm_mode_find_dmt(connector->dev,
+					vexpress_dvimodes[i].hsize,
+					vexpress_dvimodes[i].vsize,
+					vexpress_dvimodes[i].vrefresh, false);
+		if (!mode)
+			continue;
+		/* prefer the 1280x1024 mode */
+		if (i == 3)
+			mode->type |= DRM_MODE_TYPE_PREFERRED;
+		drm_mode_probed_add(connector, mode);
+	}
+
+	return i;
+}
+
+/*
+ * mode valid is only called for detected modes and we know that
+ * the restricted list is correct ;)
+ */
+static int hdlcd_vexpress_con_mode_valid(struct drm_connector *connector,
+				struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs hdlcd_vexpress_con_helper_funcs = {
+	.get_modes	= hdlcd_vexpress_con_get_modes,
+	.mode_valid	= hdlcd_vexpress_con_mode_valid,
+	.best_encoder	= hdlcd_connector_best_encoder,
+};
+
+
+static void hdlcd_vexpress_encoder_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+	if (vconfig_func)
+		vexpress_config_func_put(vconfig_func);
+}
+
+static const struct drm_encoder_funcs hdlcd_vexpress_encoder_funcs = {
+	.destroy = hdlcd_vexpress_encoder_destroy,
+};
+
+static void hdlcd_vexpress_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	/* VExpress micro has no support for DPMS */
+}
+
+static bool hdlcd_vexpress_encoder_mode_fixup(struct drm_encoder *encoder,
+					const struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	/* nothing needs to be done here */
+	return true;
+}
+
+static void hdlcd_vexpress_encoder_prepare(struct drm_encoder *encoder)
+{
+	hdlcd_vexpress_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void hdlcd_vexpress_encoder_commit(struct drm_encoder *encoder)
+{
+	hdlcd_vexpress_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static void hdlcd_vexpress_encoder_mode_set(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	int i, vrefresh = drm_mode_vrefresh(mode);
+
+	if (vconfig_func) {
+		for (i = 0; i < ARRAY_SIZE(vexpress_dvimodes); i++) {
+			if (vexpress_dvimodes[i].hsize != mode->hdisplay)
+				continue;
+			if (vexpress_dvimodes[i].vsize != mode->vdisplay)
+				continue;
+			if (vexpress_dvimodes[i].vrefresh != vrefresh)
+				continue;
+
+			vexpress_config_write(vconfig_func, 0,
+					vexpress_dvimodes[i].dvimode);
+			return;
+		}
+	}
+}
+
+static const struct drm_encoder_helper_funcs
+hdlcd_vexpress_encoder_helper_funcs = {
+	.dpms		= hdlcd_vexpress_encoder_dpms,
+	.mode_fixup	= hdlcd_vexpress_encoder_mode_fixup,
+	.prepare	= hdlcd_vexpress_encoder_prepare,
+	.commit		= hdlcd_vexpress_encoder_commit,
+	.mode_set	= hdlcd_vexpress_encoder_mode_set,
+};
+
+static const struct of_device_id vexpress_dvi_match[] = {
+	{ .compatible = "arm,vexpress-dvimode" },
+	{}
+};
+
+int hdlcd_create_vexpress_connector(struct drm_device *dev,
+				struct hdlcd_drm_private *hdlcd)
+{
+	int err;
+	struct drm_connector *connector;
+	struct device_node *node;
+	struct drm_encoder *encoder;
+
+	node = of_find_matching_node(NULL, vexpress_dvi_match);
+	if (!node)
+		return -ENXIO;
+
+	vconfig_func = vexpress_config_func_get_by_node(node);
+	if (!vconfig_func) {
+		dev_err(dev->dev, "failed to get an output connector\n");
+		return -ENXIO;
+	}
+
+	encoder = kzalloc(sizeof(*encoder), GFP_KERNEL);
+	if (!encoder) {
+		err = -ENOMEM;
+		goto encoder_alloc_fail;
+	}
+
+	encoder->possible_crtcs = 1;
+	encoder->possible_clones = 0;
+	err = drm_encoder_init(dev, encoder, &hdlcd_vexpress_encoder_funcs,
+			DRM_MODE_ENCODER_TMDS);
+	if (err)
+		goto encoder_init_fail;
+
+	drm_encoder_helper_add(encoder, &hdlcd_vexpress_encoder_helper_funcs);
+
+	connector = kzalloc(sizeof(*connector), GFP_KERNEL);
+	if (!connector) {
+		err = -ENOMEM;
+		goto connector_alloc_err;
+	}
+
+	connector->interlace_allowed = false;
+	connector->doublescan_allowed = false;
+	connector->polled = 0;
+	err = drm_connector_init(dev, connector, &hdlcd_connector_funcs,
+						DRM_MODE_CONNECTOR_DVID);
+	if (err)
+		goto connector_init_err;
+
+	drm_connector_helper_add(connector, &hdlcd_vexpress_con_helper_funcs);
+
+	connector->encoder = encoder;
+	err = drm_mode_connector_attach_encoder(connector, encoder);
+	if (err)
+		goto connector_attach_err;
+
+	return 0;
+
+connector_attach_err:
+	drm_connector_cleanup(connector);
+connector_init_err:
+	kfree(connector);
+connector_alloc_err:
+	drm_encoder_cleanup(encoder);
+encoder_init_fail:
+	kfree(encoder);
+encoder_alloc_fail:
+	vexpress_config_func_put(vconfig_func);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/arm/hdlcd_virt_encoder.c b/drivers/gpu/drm/arm/hdlcd_virt_encoder.c
new file mode 100644
index 000000000000..d9feb066a311
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_virt_encoder.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#include <linux/i2c.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <video/display_timing.h>
+#include <video/of_display_timing.h>
+#include <video/videomode.h>
+
+#include "hdlcd_drv.h"
+
+struct hdlcd_connector {
+	struct drm_connector	connector;
+	struct display_timings	*timings;
+};
+
+#define conn_to_hdlcd(x) container_of(x, struct hdlcd_connector, connector)
+
+static void hdlcd_connector_destroy(struct drm_connector *connector)
+{
+	struct hdlcd_connector *hdlcd = conn_to_hdlcd(connector);
+
+	drm_connector_cleanup(connector);
+	kfree(hdlcd);
+}
+
+static enum drm_connector_status hdlcd_connector_detect(
+		struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static const struct drm_connector_funcs hdlcd_connector_funcs = {
+	.destroy	= hdlcd_connector_destroy,
+	.dpms		= drm_helper_connector_dpms,
+	.detect		= hdlcd_connector_detect,
+	.fill_modes	= drm_helper_probe_single_connector_modes,
+};
+
+static int hdlcd_connector_get_modes(struct drm_connector *connector)
+{
+	struct hdlcd_connector *hdlcd = conn_to_hdlcd(connector);
+	struct display_timings *timings = hdlcd->timings;
+	int i;
+
+	for (i = 0; i < timings->num_timings; i++) {
+		struct drm_display_mode *mode = drm_mode_create(connector->dev);
+		struct videomode vm;
+
+		if (videomode_from_timings(timings, &vm, i))
+			break;
+
+		drm_display_mode_from_videomode(&vm, mode);
+		mode->type = DRM_MODE_TYPE_DRIVER;
+		if (timings->native_mode == i)
+			mode->type = DRM_MODE_TYPE_PREFERRED;
+
+		drm_mode_set_name(mode);
+		drm_mode_probed_add(connector, mode);
+	}
+
+	return i;
+}
+
+static int hdlcd_connector_mode_valid(struct drm_connector *connector,
+				struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs hdlcd_virt_con_helper_funcs = {
+	.get_modes	= hdlcd_connector_get_modes,
+	.mode_valid	= hdlcd_connector_mode_valid,
+	.best_encoder	= hdlcd_connector_best_encoder,
+};
+
+static void hdlcd_encoder_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_funcs hdlcd_encoder_funcs = {
+	.destroy = hdlcd_encoder_destroy,
+};
+
+static void hdlcd_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+}
+
+static bool hdlcd_encoder_mode_fixup(struct drm_encoder *encoder,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	/* nothing needed */
+	return true;
+}
+
+static void hdlcd_encoder_prepare(struct drm_encoder *encoder)
+{
+	hdlcd_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void hdlcd_encoder_commit(struct drm_encoder *encoder)
+{
+	hdlcd_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static void hdlcd_encoder_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	/* nothing needed */
+}
+
+static const struct drm_encoder_helper_funcs hdlcd_encoder_helper_funcs = {
+	.dpms		= hdlcd_encoder_dpms,
+	.mode_fixup	= hdlcd_encoder_mode_fixup,
+	.prepare	= hdlcd_encoder_prepare,
+	.commit		= hdlcd_encoder_commit,
+	.mode_set	= hdlcd_encoder_mode_set,
+};
+
+int hdlcd_create_virtual_connector(struct drm_device *dev)
+{
+	struct drm_encoder *encoder;
+	struct hdlcd_connector *hdlcdc;
+	struct drm_connector *connector;
+	int ret;
+
+	encoder = kzalloc(sizeof(*encoder), GFP_KERNEL);
+	if (!encoder)
+		return -ENOMEM;
+
+	encoder->possible_crtcs = 1;
+	encoder->possible_clones = 0;
+
+	ret = drm_encoder_init(dev, encoder, &hdlcd_encoder_funcs,
+			DRM_MODE_ENCODER_VIRTUAL);
+	if (ret)
+		goto encoder_init_err;
+
+	drm_encoder_helper_add(encoder, &hdlcd_encoder_helper_funcs);
+
+	hdlcdc = kzalloc(sizeof(*hdlcdc), GFP_KERNEL);
+	if (!hdlcdc) {
+		ret = -ENOMEM;
+		goto connector_alloc_err;
+	}
+
+	hdlcdc->timings = of_get_display_timings(dev->platformdev->dev.of_node);
+	if (!hdlcdc->timings) {
+		dev_err(dev->dev, "failed to get display panel timings\n");
+		ret = -ENXIO;
+		goto connector_init_err;
+	}
+
+	connector = &hdlcdc->connector;
+
+	/* bogus values, pretend we're a 24" screen */
+	connector->display_info.width_mm = 519;
+	connector->display_info.height_mm = 324;
+	connector->interlace_allowed = false;
+	connector->doublescan_allowed = false;
+	connector->polled = 0;
+	ret = drm_connector_init(dev, connector, &hdlcd_connector_funcs,
+				DRM_MODE_CONNECTOR_VIRTUAL);
+	if (ret)
+		goto connector_init_err;
+
+	drm_connector_helper_add(connector, &hdlcd_virt_con_helper_funcs);
+
+	connector->encoder = encoder;
+	ret = drm_mode_connector_attach_encoder(connector, encoder);
+	if (ret)
+		goto attach_err;
+
+
+	return ret;
+
+attach_err:
+	drm_connector_cleanup(connector);
+connector_init_err:
+	kfree(hdlcdc);
+connector_alloc_err:
+	drm_encoder_cleanup(encoder);
+encoder_init_err:
+	kfree(encoder);
+
+	return ret;
+};
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 83f0ba5859c0..4184970c1e36 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1597,13 +1597,8 @@ drm_mode_std(struct drm_connector *connector, struct edid *edid,
 	}
 
 	/* check whether it can be found in default mode table */
-	if (drm_monitor_supports_rb(edid)) {
-		mode = drm_mode_find_dmt(dev, hsize, vsize, vrefresh_rate,
-					 true);
-		if (mode)
-			return mode;
-	}
-	mode = drm_mode_find_dmt(dev, hsize, vsize, vrefresh_rate, false);
+	mode = drm_mode_find_dmt(dev, hsize, vsize, vrefresh_rate,
+				drm_monitor_supports_rb(edid));
 	if (mode)
 		return mode;
 
@@ -1994,7 +1989,8 @@ do_inferred_modes(struct detailed_timing *timing, void *c)
 						  closure->edid,
 						  timing);
 	
-	if (!version_greater(closure->edid, 1, 1))
+	if (!version_greater(closure->edid, 1, 1) ||
+		!(closure->edid->features & DRM_EDID_FEATURE_DEFAULT_GTF))
 		return; /* GTF not defined yet */
 
 	switch (range->flags) {
@@ -2951,8 +2947,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	num_modes += add_cvt_modes(connector, edid);
 	num_modes += add_standard_modes(connector, edid);
 	num_modes += add_established_modes(connector, edid);
-	if (edid->features & DRM_EDID_FEATURE_DEFAULT_GTF)
-		num_modes += add_inferred_modes(connector, edid);
+	num_modes += add_inferred_modes(connector, edid);
 	num_modes += add_cea_modes(connector, edid);
 
 	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c
index 0cfb60f54766..36c0aa740bcd 100644
--- a/drivers/gpu/drm/drm_encoder_slave.c
+++ b/drivers/gpu/drm/drm_encoder_slave.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/of_i2c.h>
 
 #include <drm/drm_encoder_slave.h>
 
@@ -61,7 +62,10 @@ int drm_i2c_encoder_init(struct drm_device *dev,
 
 	request_module("%s%s", I2C_MODULE_PREFIX, info->type);
 
-	client = i2c_new_device(adap, info);
+	if (info->of_node)
+		client = of_find_i2c_device_by_node(info->of_node);
+	else
+		client = i2c_new_device(adap, info);
 	if (!client) {
 		err = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 239ef30f4a62..c9d7081acf59 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -640,6 +640,55 @@ void drm_gem_vm_close(struct vm_area_struct *vma)
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 
+/**
+ * drm_gem_mmap_obj - memory map a GEM object
+ * @obj: the GEM object to map
+ * @obj_size: the object size to be mapped, in bytes
+ * @vma: VMA for the area to be mapped
+ *
+ * Set up the VMA to prepare mapping of the GEM object using the gem_vm_ops
+ * provided by the driver. Depending on their requirements, drivers can either
+ * provide a fault handler in their gem_vm_ops (in which case any accesses to
+ * the object will be trapped, to perform migration, GTT binding, surface
+ * register allocation, or performance monitoring), or mmap the buffer memory
+ * synchronously after calling drm_gem_mmap_obj.
+ *
+ * This function is mainly intended to implement the DMABUF mmap operation, when
+ * the GEM object is not looked up based on its fake offset. To implement the
+ * DRM mmap operation, drivers should use the drm_gem_mmap() function.
+ *
+ * Return 0 or success or -EINVAL if the object size is smaller than the VMA
+ * size, or if no gem_vm_ops are provided.
+ */
+int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
+		     struct vm_area_struct *vma)
+{
+	struct drm_device *dev = obj->dev;
+
+	/* Check for valid size. */
+	if (obj_size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (!dev->driver->gem_vm_ops)
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+	/* Take a ref for this mapping of the object, so that the fault
+	 * handler can dereference the mmap offset's pointer to the object.
+	 * This reference is cleaned up by the corresponding vm_close
+	 * (which should happen whether the vma was created by this call, or
+	 * by a vm_open due to mremap or partial unmap or whatever).
+	 */
+	drm_gem_object_reference(obj);
+
+	drm_vm_open_locked(dev, vma);
+	return 0;
+}
+EXPORT_SYMBOL(drm_gem_mmap_obj);
 
 /**
  * drm_gem_mmap - memory map routine for GEM objects
@@ -649,11 +698,9 @@ EXPORT_SYMBOL(drm_gem_vm_close);
  * If a driver supports GEM object mapping, mmap calls on the DRM file
  * descriptor will end up here.
  *
- * If we find the object based on the offset passed in (vma->vm_pgoff will
+ * Look up the GEM object based on the offset passed in (vma->vm_pgoff will
  * contain the fake offset we created when the GTT map ioctl was called on
- * the object), we set up the driver fault handler so that any accesses
- * to the object can be trapped, to perform migration, GTT binding, surface
- * register allocation, or performance monitoring.
+ * the object) and map it with a call to drm_gem_mmap_obj().
  */
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 {
@@ -661,7 +708,6 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_device *dev = priv->minor->dev;
 	struct drm_gem_mm *mm = dev->mm_private;
 	struct drm_local_map *map = NULL;
-	struct drm_gem_object *obj;
 	struct drm_hash_item *hash;
 	int ret = 0;
 
@@ -682,32 +728,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 		goto out_unlock;
 	}
 
-	/* Check for valid size. */
-	if (map->size < vma->vm_end - vma->vm_start) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	obj = map->handle;
-	if (!obj->dev->driver->gem_vm_ops) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = obj->dev->driver->gem_vm_ops;
-	vma->vm_private_data = map->handle;
-	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-
-	/* Take a ref for this mapping of the object, so that the fault
-	 * handler can dereference the mmap offset's pointer to the object.
-	 * This reference is cleaned up by the corresponding vm_close
-	 * (which should happen whether the vma was created by this call, or
-	 * by a vm_open due to mremap or partial unmap or whatever).
-	 */
-	drm_gem_object_reference(obj);
-
-	drm_vm_open_locked(dev, vma);
+	ret = drm_gem_mmap_obj(map->handle, map->size, vma);
 
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index c25b7a0eb0e3..23d9f408ef18 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -35,8 +35,12 @@ static unsigned int get_gem_mmap_offset(struct drm_gem_object *obj)
 static void drm_gem_cma_buf_destroy(struct drm_device *drm,
 		struct drm_gem_cma_object *cma_obj)
 {
-	dma_free_writecombine(drm->dev, cma_obj->base.size, cma_obj->vaddr,
-			cma_obj->paddr);
+	DEFINE_DMA_ATTRS(attrs);
+
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+
+	dma_free_attrs(drm->dev, cma_obj->base.size,
+			cma_obj->vaddr, cma_obj->paddr, &attrs);
 }
 
 /*
@@ -51,6 +55,9 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 	struct drm_gem_cma_object *cma_obj;
 	struct drm_gem_object *gem_obj;
 	int ret;
+	DEFINE_DMA_ATTRS(attrs);
+
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 
 	size = round_up(size, PAGE_SIZE);
 
@@ -58,8 +65,8 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 	if (!cma_obj)
 		return ERR_PTR(-ENOMEM);
 
-	cma_obj->vaddr = dma_alloc_writecombine(drm->dev, size,
-			&cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN);
+	cma_obj->vaddr = dma_alloc_attrs(drm->dev, size,
+				&cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN, &attrs);
 	if (!cma_obj->vaddr) {
 		dev_err(drm->dev, "failed to allocate buffer with size %d\n", size);
 		ret = -ENOMEM;
@@ -139,7 +146,6 @@ err_handle_create:
 void drm_gem_cma_free_object(struct drm_gem_object *gem_obj)
 {
 	struct drm_gem_cma_object *cma_obj;
-
 	if (gem_obj->map_list.map)
 		drm_gem_free_mmap_offset(gem_obj);
 
diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig
index 4d341db462a2..508e1e6f3194 100644
--- a/drivers/gpu/drm/i2c/Kconfig
+++ b/drivers/gpu/drm/i2c/Kconfig
@@ -11,6 +11,13 @@ config DRM_I2C_CH7006
 	  This driver is currently only useful if you're also using
 	  the nouveau driver.
 
+config DRM_I2C_SII9022
+	tristate "Silicon Image SiI9022 HDMI transmitter"
+	default m if DRM_HDLCD
+	help
+	  Support for SiI9022 HDMI transmitter, used on the
+	  ARM Versatile Express motherboards.
+
 config DRM_I2C_SIL164
 	tristate "Silicon Image sil164 TMDS transmitter"
 	default m if DRM_NOUVEAU
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 43aa33baebed..c1e2ee0004c3 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -3,8 +3,11 @@ ccflags-y := -Iinclude/drm
 ch7006-y := ch7006_drv.o ch7006_mode.o
 obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o
 
+sii9022-y := sii9022_drv.o
+obj-$(CONFIG_DRM_I2C_SII9022) += sii9022.o
+
 sil164-y := sil164_drv.o
 obj-$(CONFIG_DRM_I2C_SIL164) += sil164.o
 
-tda998x-y := tda998x_drv.o
+tda998x-y := tda998x_drv.o tda998x_codec.o
 obj-$(CONFIG_DRM_I2C_NXP_TDA998X) += tda998x.o
diff --git a/drivers/gpu/drm/i2c/sii9022_drv.c b/drivers/gpu/drm/i2c/sii9022_drv.c
new file mode 100644
index 000000000000..5ba82d0182f5
--- /dev/null
+++ b/drivers/gpu/drm/i2c/sii9022_drv.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (C) 2013 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  Silicon Image SiI9022 driver for DRM I2C encoder slave
+ */
+
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_encoder_slave.h>
+
+#define SII9022_INFORMAT_REG		0x09
+#define SII9022_OUTFORMAT_REG		0x0a
+#define SII9022_SYS_CONTROL_REG		0x1a
+#  define SII9022_DDC_OUT_MODE		(1 << 0)
+#  define SII9022_DDC_BUS_STAT		(1 << 1)
+#  define SII9022_DDC_BUS_GRANT		(1 << 2)
+#  define SII9022_AV_MUTE		(1 << 3)
+#  define SII9022_OUTPUT_EN		(1 << 4)
+#define SII9022_ID_REG			0x1b
+#define SII9022_POWER_REG		0x1e
+#define SII9022_SEC_CTRL_REG		0x2a
+#define SII9022_SEC_STATUS_REG		0x29
+#define SII9022_SEC_VERSION_REG		0x30
+#define SII9022_INTR_REG		0x3c
+#define SII9022_INTR_STATUS		0x3d
+#  define SII9022_HOTPLUG_EVENT		(1 << 0)
+#  define SII9022_RECEIVER_EVENT	(1 << 1)
+#  define SII9022_HOTPLUG_STATE		(1 << 2)
+#  define SII9022_RECEIVER_STATE	(1 << 3)
+#define SII9022_VENDOR_ID		0xb0
+#define SII9022_INTERNAL_PAGE		0xbc
+#define SII9022_INTERNAL_INDEX		0xbd
+#define SII9022_INTERNAL_REG		0xbe
+#define SII9022_CTRL_REG		0xc7
+
+
+struct sii9022_video_regs {
+	uint16_t	pixel_clock;
+	uint16_t	vrefresh;
+	uint16_t	cols;
+	uint16_t	lines;
+	uint8_t		pixel_data;
+};
+
+static void sii9022_write(struct i2c_client *client, uint8_t addr, uint8_t val)
+{
+	uint8_t buf[] = { addr, val };
+	int ret;
+
+	ret = i2c_master_send(client, buf, ARRAY_SIZE(buf));
+	if (ret < 0)
+		dev_err(&client->dev, "Error writing to subaddress 0x%x: %d\n",	addr, ret);
+}
+
+static uint8_t sii9022_read(struct i2c_client *client, uint8_t addr)
+{
+	uint8_t val;
+	int ret;
+
+	ret = i2c_master_send(client, &addr, sizeof(addr));
+	if (ret < 0)
+		goto fail;
+
+	ret = i2c_master_recv(client, &val, sizeof(val));
+	if (ret < 0)
+		goto fail;
+
+	return val;
+
+fail:
+	dev_err(&client->dev, "Error reading from subaddress 0x%x: %d\n", addr, ret);
+	return 0;
+}
+
+static void sii9022_encoder_set_config(struct drm_encoder *encoder, void *params)
+{
+}
+
+static void sii9022_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	uint8_t val;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_OFF:
+		val = sii9022_read(client, SII9022_SYS_CONTROL_REG);
+		val |= SII9022_OUTPUT_EN;
+		sii9022_write(client, SII9022_SYS_CONTROL_REG, val);
+		/* wait for AVI InfoFrames to flush */
+		mdelay(128);
+		/* use D2 for OFF as we cannot control the reset pin */
+		sii9022_write(client, SII9022_POWER_REG, 0x2);
+		break;
+	case DRM_MODE_DPMS_ON:
+		val = sii9022_read(client, SII9022_SYS_CONTROL_REG);
+		val &= ~SII9022_OUTPUT_EN;
+		sii9022_write(client, SII9022_SYS_CONTROL_REG, val);
+		/* fall through */
+	default:
+		sii9022_write(client, SII9022_POWER_REG, mode);
+		break;
+	}
+}
+
+static enum drm_connector_status
+sii9022_encoder_detect(struct drm_encoder *encoder,
+			struct drm_connector *connector)
+{
+	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	enum drm_connector_status con_status = connector_status_unknown;
+	uint8_t status = sii9022_read(client, SII9022_INTR_STATUS);
+
+	if (status & SII9022_HOTPLUG_STATE)
+		con_status = connector_status_connected;
+	else
+		con_status = connector_status_disconnected;
+
+	/* clear the event status bits */
+	sii9022_write(client, SII9022_INTR_STATUS,
+		0xff /*SII9022_HOTPLUG_EVENT | SII9022_RECEIVER_EVENT */);
+	status = sii9022_read(client, SII9022_INTR_STATUS);
+
+	return con_status;
+}
+
+static int sii9022_encoder_get_modes(struct drm_encoder *encoder,
+				struct drm_connector *connector)
+{
+	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	struct edid *edid = NULL;
+	uint8_t status;
+	int ret = 0, timeout = 10;
+
+	/* Disable HDCP link security */
+	do {
+		sii9022_write(client, SII9022_SEC_CTRL_REG, 0);
+		status = sii9022_read(client, SII9022_SEC_CTRL_REG);
+	} while (status);
+	status = sii9022_read(client, SII9022_SEC_STATUS_REG);
+
+	/* first, request the pass-through mode in order to read the edid */
+	status = sii9022_read(client, SII9022_SYS_CONTROL_REG);
+	status |= SII9022_DDC_BUS_GRANT;
+	sii9022_write(client, SII9022_SYS_CONTROL_REG, status);
+	do {
+		/* wait for state change */
+		status = sii9022_read(client, SII9022_SYS_CONTROL_REG);
+		--timeout;
+	} while (((status & SII9022_DDC_BUS_STAT) != SII9022_DDC_BUS_STAT) && timeout);
+
+	if (!timeout) {
+		dev_warn(&client->dev, "timeout waiting for DDC bus grant\n");
+		goto release_ddc;
+	}
+
+	/* write back the value read in order to close the i2c switch */
+	sii9022_write(client, SII9022_SYS_CONTROL_REG, status);
+
+	edid = drm_get_edid(connector, client->adapter);
+	if (!edid) {
+		dev_err(&client->dev, "failed to get EDID data\n");
+		ret = -1;
+	}
+
+release_ddc:
+	timeout = 10;
+	do {
+		status &= ~(SII9022_DDC_BUS_STAT | SII9022_DDC_BUS_GRANT);
+		sii9022_write(client, SII9022_SYS_CONTROL_REG, status);
+		status = sii9022_read(client, SII9022_SYS_CONTROL_REG);
+		--timeout;
+	} while ((status & (SII9022_DDC_BUS_STAT | SII9022_DDC_BUS_GRANT)) && timeout);
+
+	if (edid) {
+		drm_mode_connector_update_edid_property(connector, edid);
+		ret = drm_add_edid_modes(connector, edid);
+		if (drm_detect_hdmi_monitor(edid))
+			sii9022_write(client, SII9022_SYS_CONTROL_REG, status | 1);
+		else
+			sii9022_write(client, SII9022_SYS_CONTROL_REG, status & 0xfe);
+		kfree(edid);
+	}
+
+	return ret;
+}
+
+static bool sii9022_encoder_mode_fixup(struct drm_encoder *encoder,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int sii9022_encoder_mode_valid(struct drm_encoder *encoder,
+				struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static void sii9022_encoder_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	/* SiI9022 clock is pixclock / 10000 Hz */
+	int clk = adjusted_mode->clock / 10;
+	int i, vrefresh = adjusted_mode->vrefresh * 100;
+	uint8_t buf[15];   /* start address reg + 14 bytes max */
+
+	/* Set Video Mode (8 registers block) */
+	buf[0] = 0;		/* start register */
+	buf[1] = clk & 0xff;
+	buf[2] = (clk & 0xff00) >> 8;
+	buf[3] = vrefresh & 0xff;
+	buf[4] = (vrefresh & 0xff00) >> 8;
+	buf[5] = adjusted_mode->crtc_hdisplay & 0xff;
+	buf[6] = (adjusted_mode->crtc_hdisplay & 0xff00) >> 8;
+	buf[7] = adjusted_mode->crtc_vdisplay & 0xff;
+	buf[8] = (adjusted_mode->crtc_vdisplay & 0xff00) >> 8;
+
+	if (i2c_master_send(client, buf, 9) < 0) {
+		dev_err(&client->dev, "Could not write video mode data\n");
+		return;
+	}
+
+	/* input is full range RGB */
+	sii9022_write(client, SII9022_INFORMAT_REG, 0x04);
+	/* output is full range digital RGB */
+	sii9022_write(client, SII9022_OUTFORMAT_REG, 0x17);
+
+	/* set the AVI InfoFrame (14 registers block */
+	buf[0] = 0x0c;		/* start register */
+	buf[1] = 0x0e;		/* AVI_DBYTE0 = checksum */
+	buf[2] = 0x10;		/* AVI_DBYTE1 */
+	buf[3] = 0x50;		/* AVI_DBYTE2 (colorimetry) */
+	buf[4] = 0;		/* AVI_DBYTE3 (scaling) */
+	buf[5] = drm_match_cea_mode(adjusted_mode);
+	buf[6] = 0;		/* AVI_DBYTE5 (pixel repetition factor) */
+	buf[7] = 0;
+	buf[8] = 0;
+	buf[9] = 0;
+	buf[10] = 0;
+	buf[11] = 0;
+	buf[12] = 0;
+	buf[13] = 0;
+	buf[14] = 0;
+
+	/* calculate checksum */
+	buf[1] = 0x82 + 0x02 + 13;	/* Identifier code for AVI InfoFrame, length */
+	for (i = 2; i < 15; i++)
+		buf[1] += buf[i];
+	buf[1] = 0x100 - buf[1];
+
+	if (i2c_master_send(client, buf, ARRAY_SIZE(buf)) < 0) {
+		dev_err(&client->dev, "Could not write video mode data\n");
+		return;
+	}
+}
+
+static struct drm_encoder_slave_funcs sii9022_encoder_funcs = {
+	.set_config	= sii9022_encoder_set_config,
+	.dpms		= sii9022_encoder_dpms,
+	.detect		= sii9022_encoder_detect,
+	.get_modes	= sii9022_encoder_get_modes,
+	.mode_fixup	= sii9022_encoder_mode_fixup,
+	.mode_valid	= sii9022_encoder_mode_valid,
+	.mode_set	= sii9022_encoder_mode_set,
+};
+
+static int sii9022_encoder_init(struct i2c_client *client,
+				struct drm_device *dev,
+				struct drm_encoder_slave *encoder)
+{
+	encoder->slave_funcs = &sii9022_encoder_funcs;
+
+	return 0;
+}
+
+static struct i2c_device_id sii9022_ids[] = {
+	{ "sii9022-tpi", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, sii9022_ids);
+
+static int sii9022_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int dev_id, dev_rev;
+
+	/* first step is to enable the TPI mode */
+	sii9022_write(client, SII9022_CTRL_REG, 0x00);
+	dev_id = sii9022_read(client, SII9022_ID_REG);
+	dev_rev = sii9022_read(client, SII9022_ID_REG+1);
+	if (dev_id != SII9022_VENDOR_ID) {
+		printk(KERN_INFO "sii9022 not found\n");
+		return -ENODEV;
+	}
+	dev_id = sii9022_read(client, SII9022_SEC_VERSION_REG);
+	dev_info(&client->dev, "found %s chip (rev %01u.%01u)\n",
+		dev_id ? "SiI9024" : "SiI9022",
+		(dev_rev >> 4) & 0xf, dev_rev & 0xf);
+
+	/* disable interrupts */
+	sii9022_write(client, SII9022_INTR_REG, 0x00);
+
+	return 0;
+}
+
+static int sii9022_remove(struct i2c_client *client)
+{
+	return 0;
+}
+
+static struct drm_i2c_encoder_driver sii9022_driver = {
+	.i2c_driver = {
+		.probe	= sii9022_probe,
+		.remove	= sii9022_remove,
+		.driver	= {
+			.name = "sii9022-tpi",
+		},
+		.id_table = sii9022_ids,
+		.class	= I2C_CLASS_DDC,
+	},
+	.encoder_init = sii9022_encoder_init,
+};
+
+static int __init sii9022_init(void)
+{
+	return drm_i2c_encoder_register(THIS_MODULE, &sii9022_driver);
+}
+
+static void __exit sii9022_exit(void)
+{
+	drm_i2c_encoder_unregister(&sii9022_driver);
+}
+
+module_init(sii9022_init);
+module_exit(sii9022_exit);
+
+MODULE_ALIAS(I2C_MODULE_PREFIX "sii9022-tpi");
+MODULE_AUTHOR("Liviu Dudau <Liviu.Dudau@arm.com>");
+MODULE_DESCRIPTION("Silicon Image SiI9022 HDMI transmitter driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/i2c/tda998x_codec.c b/drivers/gpu/drm/i2c/tda998x_codec.c
new file mode 100644
index 000000000000..c84d2b58f42d
--- /dev/null
+++ b/drivers/gpu/drm/i2c/tda998x_codec.c
@@ -0,0 +1,248 @@
+/*
+ * ALSA SoC TDA998X CODEC
+ *
+ * Copyright (C) 2014 Jean-Francois Moine
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <sound/soc.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <linux/of.h>
+#include <linux/i2c.h>
+#include <drm/drm_encoder_slave.h>
+#include <drm/i2c/tda998x.h>
+
+#include "tda998x_drv.h"
+
+#define TDA998X_FORMATS	(SNDRV_PCM_FMTBIT_S16_LE | \
+			SNDRV_PCM_FMTBIT_S20_3LE | \
+			SNDRV_PCM_FMTBIT_S24_LE | \
+			SNDRV_PCM_FMTBIT_S32_LE)
+
+static int tda_startup(struct snd_pcm_substream *substream,
+			struct snd_soc_dai *dai)
+{
+	struct tda998x_priv *priv = snd_soc_codec_get_drvdata(dai->codec);
+	u8 *eld = priv->eld;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	u8 *sad;
+	int sad_count;
+	unsigned eld_ver, mnl, rate_mask;
+	unsigned max_channels, fmt;
+	u64 formats;
+	struct snd_pcm_hw_constraint_list *rate_constraints =
+			&priv->rate_constraints;
+	static const u32 hdmi_rates[] = {
+		32000, 44100, 48000, 88200, 96000, 176400, 192000
+	};
+
+	/* check if streaming is already active */
+	if (priv->dai_id != AFMT_NO_AUDIO)
+		return -EBUSY;
+	priv->dai_id = dai->id;
+
+	if (!eld)
+		return 0;
+
+	/* adjust the hw params from the ELD (EDID) */
+	eld_ver = eld[0] >> 3;
+	if (eld_ver != 2 && eld_ver != 31)
+		return 0;
+
+	mnl = eld[4] & 0x1f;
+	if (mnl > 16)
+		return 0;
+
+	sad_count = eld[5] >> 4;
+	sad = eld + 20 + mnl;
+
+	/* Start from the basic audio settings */
+	max_channels = 2;
+	rate_mask = 0;
+	fmt = 0;
+	while (sad_count--) {
+		switch (sad[0] & 0x78) {
+		case 0x08: /* PCM */
+			max_channels = max(max_channels, (sad[0] & 7) + 1u);
+			rate_mask |= sad[1];
+			fmt |= sad[2] & 0x07;
+			break;
+		}
+		sad += 3;
+	}
+
+	/* set the constraints */
+	rate_constraints->list = hdmi_rates;
+	rate_constraints->count = ARRAY_SIZE(hdmi_rates);
+	rate_constraints->mask = rate_mask;
+	snd_pcm_hw_constraint_list(runtime, 0,
+					SNDRV_PCM_HW_PARAM_RATE,
+					rate_constraints);
+
+	formats = 0;
+	if (fmt & 1)
+		formats |= SNDRV_PCM_FMTBIT_S16_LE;
+	if (fmt & 2)
+		formats |= SNDRV_PCM_FMTBIT_S20_3LE;
+	if (fmt & 4)
+		formats |= SNDRV_PCM_FMTBIT_S24_LE;
+	snd_pcm_hw_constraint_mask64(runtime,
+				SNDRV_PCM_HW_PARAM_FORMAT,
+				formats);
+
+	snd_pcm_hw_constraint_minmax(runtime,
+				SNDRV_PCM_HW_PARAM_CHANNELS,
+				1, max_channels);
+	return 0;
+}
+
+static int tda_hw_params(struct snd_pcm_substream *substream,
+			struct snd_pcm_hw_params *params,
+			struct snd_soc_dai *dai)
+{
+	struct tda998x_priv *priv = snd_soc_codec_get_drvdata(dai->codec);
+
+	/* Requires an attached display */
+	if (!priv->encoder->crtc)
+		return -ENODEV;
+
+	/* if same input and same parameters, do not do a full switch */
+	if (dai->id == priv->params.audio_format &&
+	    params_format(params) == priv->audio_sample_format) {
+		tda998x_audio_start(priv, 0);
+		return 0;
+	}
+	priv->params.audio_sample_rate = params_rate(params);
+	priv->params.audio_format = dai->id;
+	priv->audio_sample_format = params_format(params);
+	priv->params.audio_cfg =
+		priv->audio_ports[dai->id == AFMT_I2S ? 0 : 1];
+	tda998x_audio_start(priv, 1);
+	return 0;
+}
+
+static void tda_shutdown(struct snd_pcm_substream *substream,
+			struct snd_soc_dai *dai)
+{
+	struct tda998x_priv *priv = snd_soc_codec_get_drvdata(dai->codec);
+
+	tda998x_audio_stop(priv);
+	priv->dai_id = AFMT_NO_AUDIO;
+}
+
+static const struct snd_soc_dai_ops tda_ops = {
+	.startup = tda_startup,
+	.hw_params = tda_hw_params,
+	.shutdown = tda_shutdown,
+};
+
+static struct snd_soc_dai_driver tda998x_dai[] = {
+	{
+		.name = "i2s-hifi",
+		.id = AFMT_I2S,
+		.playback = {
+			.stream_name	= "HDMI I2S Playback",
+			.channels_min	= 1,
+			.channels_max	= 8,
+			.rates		= SNDRV_PCM_RATE_CONTINUOUS,
+			.rate_min	= 5512,
+			.rate_max	= 192000,
+			.formats	= TDA998X_FORMATS,
+		},
+		.ops = &tda_ops,
+	},
+	{
+		.name = "spdif-hifi",
+		.id = AFMT_SPDIF,
+		.playback = {
+			.stream_name	= "HDMI SPDIF Playback",
+			.channels_min	= 1,
+			.channels_max	= 2,
+			.rates		= SNDRV_PCM_RATE_CONTINUOUS,
+			.rate_min	= 22050,
+			.rate_max	= 192000,
+			.formats	= TDA998X_FORMATS,
+		},
+		.ops = &tda_ops,
+	},
+};
+
+static const struct snd_soc_dapm_widget tda_widgets[] = {
+	SND_SOC_DAPM_OUTPUT("hdmi-out"),
+};
+static const struct snd_soc_dapm_route tda_routes[] = {
+	{ "hdmi-out", NULL, "HDMI I2S Playback" },
+	{ "hdmi-out", NULL, "HDMI SPDIF Playback" },
+};
+
+static int tda_probe(struct snd_soc_codec *codec)
+{
+	struct i2c_client *i2c_client = to_i2c_client(codec->dev);
+	struct tda998x_priv *priv = i2c_get_clientdata(i2c_client);
+	struct device_node *np = codec->dev->of_node;
+	int i, j, ret;
+	const char *p;
+
+	if (!priv)
+		return -ENODEV;
+	snd_soc_codec_set_drvdata(codec, priv);
+
+	if (!np)
+		return 0;
+
+	/* get the audio input ports*/
+	for (i = 0; i < 2; i++) {
+		u32 port;
+
+		ret = of_property_read_u32_index(np, "audio-ports", i, &port);
+		if (ret) {
+			if (i == 0)
+				dev_err(codec->dev,
+					"bad or missing audio-ports\n");
+			break;
+		}
+		ret = of_property_read_string_index(np, "audio-port-names",
+						i, &p);
+		if (ret) {
+			dev_err(codec->dev,
+				"missing audio-port-names[%d]\n", i);
+			break;
+		}
+		if (strcmp(p, "i2s") == 0) {
+			j = 0;
+		} else if (strcmp(p, "spdif") == 0) {
+			j = 1;
+		} else {
+			dev_err(codec->dev,
+				"bad audio-port-names '%s'\n", p);
+			break;
+		}
+		priv->audio_ports[j] = port;
+	}
+	return 0;
+}
+
+static const struct snd_soc_codec_driver soc_codec_tda998x = {
+	.probe = tda_probe,
+	.dapm_widgets = tda_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(tda_widgets),
+	.dapm_routes = tda_routes,
+	.num_dapm_routes = ARRAY_SIZE(tda_routes),
+};
+
+int tda998x_codec_register(struct device *dev)
+{
+	return snd_soc_register_codec(dev,
+				&soc_codec_tda998x,
+				tda998x_dai, ARRAY_SIZE(tda998x_dai));
+}
+
+void tda998x_codec_unregister(struct device *dev)
+{
+	snd_soc_unregister_codec(dev);
+}
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index e68b58a1aaf9..63723e8fea87 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -17,23 +17,22 @@
 
 
 
+#include <linux/hdmi.h>
 #include <linux/module.h>
+#include <linux/irq.h>
+#include <sound/asoundef.h>
+#include <sound/pcm_params.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_encoder_slave.h>
 #include <drm/drm_edid.h>
+#include <drm/i2c/tda998x.h>
 
+#include "tda998x_drv.h"
 
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 
-struct tda998x_priv {
-	struct i2c_client *cec;
-	uint16_t rev;
-	uint8_t current_page;
-	int dpms;
-};
-
 #define to_tda998x_priv(x)  ((struct tda998x_priv *)to_encoder_slave(x)->slave_priv)
 
 /* The TDA9988 series of devices use a paged register scheme.. to simplify
@@ -68,10 +67,13 @@ struct tda998x_priv {
 # define I2C_MASTER_DIS_MM        (1 << 0)
 # define I2C_MASTER_DIS_FILT      (1 << 1)
 # define I2C_MASTER_APP_STRT_LAT  (1 << 2)
+#define REG_FEAT_POWERDOWN        REG(0x00, 0x0e)     /* read/write */
+# define FEAT_POWERDOWN_SPDIF     (1 << 3)
 #define REG_INT_FLAGS_0           REG(0x00, 0x0f)     /* read/write */
 #define REG_INT_FLAGS_1           REG(0x00, 0x10)     /* read/write */
 #define REG_INT_FLAGS_2           REG(0x00, 0x11)     /* read/write */
 # define INT_FLAGS_2_EDID_BLK_RD  (1 << 1)
+#define REG_ENA_ACLK              REG(0x00, 0x16)     /* read/write */
 #define REG_ENA_VP_0              REG(0x00, 0x18)     /* read/write */
 #define REG_ENA_VP_1              REG(0x00, 0x19)     /* read/write */
 #define REG_ENA_VP_2              REG(0x00, 0x1a)     /* read/write */
@@ -110,6 +112,10 @@ struct tda998x_priv {
 #define REG_VIP_CNTRL_5           REG(0x00, 0x25)     /* write */
 # define VIP_CNTRL_5_CKCASE       (1 << 0)
 # define VIP_CNTRL_5_SP_CNT(x)    (((x) & 3) << 1)
+#define REG_MUX_AP                REG(0x00, 0x26)     /* read/write */
+# define MUX_AP_SELECT_I2S	  0x64
+# define MUX_AP_SELECT_SPDIF	  0x40
+#define REG_MUX_VP_VIP_OUT        REG(0x00, 0x27)     /* read/write */
 #define REG_MAT_CONTRL            REG(0x00, 0x80)     /* write */
 # define MAT_CONTRL_MAT_SC(x)     (((x) & 3) << 0)
 # define MAT_CONTRL_MAT_BP        (1 << 2)
@@ -130,8 +136,12 @@ struct tda998x_priv {
 #define REG_VS_LINE_END_1_LSB     REG(0x00, 0xae)     /* write */
 #define REG_VS_PIX_END_1_MSB      REG(0x00, 0xaf)     /* write */
 #define REG_VS_PIX_END_1_LSB      REG(0x00, 0xb0)     /* write */
+#define REG_VS_LINE_STRT_2_MSB    REG(0x00, 0xb1)     /* write */
+#define REG_VS_LINE_STRT_2_LSB    REG(0x00, 0xb2)     /* write */
 #define REG_VS_PIX_STRT_2_MSB     REG(0x00, 0xb3)     /* write */
 #define REG_VS_PIX_STRT_2_LSB     REG(0x00, 0xb4)     /* write */
+#define REG_VS_LINE_END_2_MSB     REG(0x00, 0xb5)     /* write */
+#define REG_VS_LINE_END_2_LSB     REG(0x00, 0xb6)     /* write */
 #define REG_VS_PIX_END_2_MSB      REG(0x00, 0xb7)     /* write */
 #define REG_VS_PIX_END_2_LSB      REG(0x00, 0xb8)     /* write */
 #define REG_HS_PIX_START_MSB      REG(0x00, 0xb9)     /* write */
@@ -142,21 +152,29 @@ struct tda998x_priv {
 #define REG_VWIN_START_1_LSB      REG(0x00, 0xbe)     /* write */
 #define REG_VWIN_END_1_MSB        REG(0x00, 0xbf)     /* write */
 #define REG_VWIN_END_1_LSB        REG(0x00, 0xc0)     /* write */
+#define REG_VWIN_START_2_MSB      REG(0x00, 0xc1)     /* write */
+#define REG_VWIN_START_2_LSB      REG(0x00, 0xc2)     /* write */
+#define REG_VWIN_END_2_MSB        REG(0x00, 0xc3)     /* write */
+#define REG_VWIN_END_2_LSB        REG(0x00, 0xc4)     /* write */
 #define REG_DE_START_MSB          REG(0x00, 0xc5)     /* write */
 #define REG_DE_START_LSB          REG(0x00, 0xc6)     /* write */
 #define REG_DE_STOP_MSB           REG(0x00, 0xc7)     /* write */
 #define REG_DE_STOP_LSB           REG(0x00, 0xc8)     /* write */
 #define REG_TBG_CNTRL_0           REG(0x00, 0xca)     /* write */
+# define TBG_CNTRL_0_TOP_TGL      (1 << 0)
+# define TBG_CNTRL_0_TOP_SEL      (1 << 1)
+# define TBG_CNTRL_0_DE_EXT       (1 << 2)
+# define TBG_CNTRL_0_TOP_EXT      (1 << 3)
 # define TBG_CNTRL_0_FRAME_DIS    (1 << 5)
 # define TBG_CNTRL_0_SYNC_MTHD    (1 << 6)
 # define TBG_CNTRL_0_SYNC_ONCE    (1 << 7)
 #define REG_TBG_CNTRL_1           REG(0x00, 0xcb)     /* write */
-# define TBG_CNTRL_1_VH_TGL_0     (1 << 0)
-# define TBG_CNTRL_1_VH_TGL_1     (1 << 1)
-# define TBG_CNTRL_1_VH_TGL_2     (1 << 2)
-# define TBG_CNTRL_1_VHX_EXT_DE   (1 << 3)
-# define TBG_CNTRL_1_VHX_EXT_HS   (1 << 4)
-# define TBG_CNTRL_1_VHX_EXT_VS   (1 << 5)
+# define TBG_CNTRL_1_H_TGL        (1 << 0)
+# define TBG_CNTRL_1_V_TGL        (1 << 1)
+# define TBG_CNTRL_1_TGL_EN       (1 << 2)
+# define TBG_CNTRL_1_X_EXT        (1 << 3)
+# define TBG_CNTRL_1_H_EXT        (1 << 4)
+# define TBG_CNTRL_1_V_EXT        (1 << 5)
 # define TBG_CNTRL_1_DWIN_DIS     (1 << 6)
 #define REG_ENABLE_SPACE          REG(0x00, 0xd6)     /* write */
 #define REG_HVF_CNTRL_0           REG(0x00, 0xe4)     /* write */
@@ -171,7 +189,14 @@ struct tda998x_priv {
 # define HVF_CNTRL_1_PAD(x)       (((x) & 3) << 4)
 # define HVF_CNTRL_1_SEMI_PLANAR  (1 << 6)
 #define REG_RPT_CNTRL             REG(0x00, 0xf0)     /* write */
-
+#define REG_I2S_FORMAT            REG(0x00, 0xfc)     /* read/write */
+# define I2S_FORMAT(x)            (((x) & 3) << 0)
+#define REG_AIP_CLKSEL            REG(0x00, 0xfd)     /* write */
+# define AIP_CLKSEL_AIP_SPDIF	  (0 << 3)
+# define AIP_CLKSEL_AIP_I2S	  (1 << 3)
+# define AIP_CLKSEL_FS_ACLK	  (0 << 0)
+# define AIP_CLKSEL_FS_MCLK	  (1 << 0)
+# define AIP_CLKSEL_FS_FS64SPDIF  (2 << 0)
 
 /* Page 02h: PLL settings */
 #define REG_PLL_SERIAL_1          REG(0x02, 0x00)     /* read/write */
@@ -179,7 +204,7 @@ struct tda998x_priv {
 # define PLL_SERIAL_1_SRL_IZ(x)   (((x) & 3) << 1)
 # define PLL_SERIAL_1_SRL_MAN_IZ  (1 << 6)
 #define REG_PLL_SERIAL_2          REG(0x02, 0x01)     /* read/write */
-# define PLL_SERIAL_2_SRL_NOSC(x) (((x) & 3) << 0)
+# define PLL_SERIAL_2_SRL_NOSC(x) ((x) << 0)
 # define PLL_SERIAL_2_SRL_PR(x)   (((x) & 0xf) << 4)
 #define REG_PLL_SERIAL_3          REG(0x02, 0x02)     /* read/write */
 # define PLL_SERIAL_3_SRL_CCIR    (1 << 0)
@@ -194,6 +219,12 @@ struct tda998x_priv {
 #define REG_PLL_SCGR1             REG(0x02, 0x09)     /* read/write */
 #define REG_PLL_SCGR2             REG(0x02, 0x0a)     /* read/write */
 #define REG_AUDIO_DIV             REG(0x02, 0x0e)     /* read/write */
+# define AUDIO_DIV_SERCLK_1       0
+# define AUDIO_DIV_SERCLK_2       1
+# define AUDIO_DIV_SERCLK_4       2
+# define AUDIO_DIV_SERCLK_8       3
+# define AUDIO_DIV_SERCLK_16      4
+# define AUDIO_DIV_SERCLK_32      5
 #define REG_SEL_CLK               REG(0x02, 0x11)     /* read/write */
 # define SEL_CLK_SEL_CLK1         (1 << 0)
 # define SEL_CLK_SEL_VRF_CLK(x)   (((x) & 3) << 1)
@@ -212,6 +243,11 @@ struct tda998x_priv {
 
 
 /* Page 10h: information frames and packets */
+#define REG_IF1_HB0               REG(0x10, 0x20)     /* read/write */
+#define REG_IF2_HB0               REG(0x10, 0x40)     /* read/write */
+#define REG_IF3_HB0               REG(0x10, 0x60)     /* read/write */
+#define REG_IF4_HB0               REG(0x10, 0x80)     /* read/write */
+#define REG_IF5_HB0               REG(0x10, 0xa0)     /* read/write */
 
 
 /* Page 11h: audio settings and content info packets */
@@ -221,14 +257,39 @@ struct tda998x_priv {
 # define AIP_CNTRL_0_LAYOUT       (1 << 2)
 # define AIP_CNTRL_0_ACR_MAN      (1 << 5)
 # define AIP_CNTRL_0_RST_CTS      (1 << 6)
+#define REG_CA_I2S                REG(0x11, 0x01)     /* read/write */
+# define CA_I2S_CA_I2S(x)         (((x) & 31) << 0)
+# define CA_I2S_HBR_CHSTAT        (1 << 6)
+#define REG_LATENCY_RD            REG(0x11, 0x04)     /* read/write */
+#define REG_ACR_CTS_0             REG(0x11, 0x05)     /* read/write */
+#define REG_ACR_CTS_1             REG(0x11, 0x06)     /* read/write */
+#define REG_ACR_CTS_2             REG(0x11, 0x07)     /* read/write */
+#define REG_ACR_N_0               REG(0x11, 0x08)     /* read/write */
+#define REG_ACR_N_1               REG(0x11, 0x09)     /* read/write */
+#define REG_ACR_N_2               REG(0x11, 0x0a)     /* read/write */
+#define REG_CTS_N                 REG(0x11, 0x0c)     /* read/write */
+# define CTS_N_K(x)               (((x) & 7) << 0)
+# define CTS_N_M(x)               (((x) & 3) << 4)
 #define REG_ENC_CNTRL             REG(0x11, 0x0d)     /* read/write */
 # define ENC_CNTRL_RST_ENC        (1 << 0)
 # define ENC_CNTRL_RST_SEL        (1 << 1)
 # define ENC_CNTRL_CTL_CODE(x)    (((x) & 3) << 2)
+#define REG_DIP_FLAGS             REG(0x11, 0x0e)     /* read/write */
+# define DIP_FLAGS_ACR            (1 << 0)
+# define DIP_FLAGS_GC             (1 << 1)
+#define REG_DIP_IF_FLAGS          REG(0x11, 0x0f)     /* read/write */
+# define DIP_IF_FLAGS_IF1         (1 << 1)
+# define DIP_IF_FLAGS_IF2         (1 << 2)
+# define DIP_IF_FLAGS_IF3         (1 << 3)
+# define DIP_IF_FLAGS_IF4         (1 << 4)
+# define DIP_IF_FLAGS_IF5         (1 << 5)
+#define REG_CH_STAT_B(x)          REG(0x11, 0x14 + (x)) /* read/write */
 
 
 /* Page 12h: HDCP and OTP */
 #define REG_TX3                   REG(0x12, 0x9a)     /* read/write */
+#define REG_TX4                   REG(0x12, 0x9b)     /* read/write */
+# define TX4_PD_RAM               (1 << 1)
 #define REG_TX33                  REG(0x12, 0xb8)     /* read/write */
 # define TX33_HDMI                (1 << 1)
 
@@ -239,11 +300,16 @@ struct tda998x_priv {
 
 /* CEC registers: (not paged)
  */
+#define REG_CEC_INTSTATUS	  0xee		      /* read */
+# define CEC_INTSTATUS_CEC	  (1 << 0)
+# define CEC_INTSTATUS_HDMI	  (1 << 1)
 #define REG_CEC_FRO_IM_CLK_CTRL   0xfb                /* read/write */
 # define CEC_FRO_IM_CLK_CTRL_GHOST_DIS (1 << 7)
 # define CEC_FRO_IM_CLK_CTRL_ENA_OTP   (1 << 6)
 # define CEC_FRO_IM_CLK_CTRL_IMCLK_SEL (1 << 1)
 # define CEC_FRO_IM_CLK_CTRL_FRO_DIV   (1 << 0)
+#define REG_CEC_RXSHPDINTENA	  0xfc		      /* read/write */
+#define REG_CEC_RXSHPDINT	  0xfd		      /* read */
 #define REG_CEC_RXSHPDLEV         0xfe                /* read */
 # define CEC_RXSHPDLEV_RXSENS     (1 << 0)
 # define CEC_RXSHPDLEV_HPD        (1 << 1)
@@ -263,21 +329,21 @@ struct tda998x_priv {
 #define TDA19988                  0x0301
 
 static void
-cec_write(struct drm_encoder *encoder, uint16_t addr, uint8_t val)
+cec_write(struct tda998x_priv *priv, uint16_t addr, uint8_t val)
 {
-	struct i2c_client *client = to_tda998x_priv(encoder)->cec;
+	struct i2c_client *client = priv->cec;
 	uint8_t buf[] = {addr, val};
 	int ret;
 
-	ret = i2c_master_send(client, buf, ARRAY_SIZE(buf));
+	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to cec:0x%x\n", ret, addr);
 }
 
 static uint8_t
-cec_read(struct drm_encoder *encoder, uint8_t addr)
+cec_read(struct tda998x_priv *priv, uint8_t addr)
 {
-	struct i2c_client *client = to_tda998x_priv(encoder)->cec;
+	struct i2c_client *client = priv->cec;
 	uint8_t val;
 	int ret;
 
@@ -296,32 +362,36 @@ fail:
 	return 0;
 }
 
-static void
-set_page(struct drm_encoder *encoder, uint16_t reg)
+static int
+set_page(struct tda998x_priv *priv, uint16_t reg)
 {
-	struct tda998x_priv *priv = to_tda998x_priv(encoder);
-
 	if (REG2PAGE(reg) != priv->current_page) {
-		struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+		struct i2c_client *client = priv->hdmi;
 		uint8_t buf[] = {
 				REG_CURPAGE, REG2PAGE(reg)
 		};
 		int ret = i2c_master_send(client, buf, sizeof(buf));
-		if (ret < 0)
-			dev_err(&client->dev, "Error %d writing to REG_CURPAGE\n", ret);
+		if (ret < 0) {
+			dev_err(&client->dev, "setpage %04x err %d\n",
+					reg, ret);
+			return ret;
+		}
 
 		priv->current_page = REG2PAGE(reg);
 	}
+	return 0;
 }
 
 static int
-reg_read_range(struct drm_encoder *encoder, uint16_t reg, char *buf, int cnt)
+reg_read_range(struct tda998x_priv *priv, uint16_t reg, char *buf, int cnt)
 {
-	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	struct i2c_client *client = priv->hdmi;
 	uint8_t addr = REG2ADDR(reg);
 	int ret;
 
-	set_page(encoder, reg);
+	ret = set_page(priv, reg);
+	if (ret < 0)
+		return ret;
 
 	ret = i2c_master_send(client, &addr, sizeof(addr));
 	if (ret < 0)
@@ -338,81 +408,359 @@ fail:
 	return ret;
 }
 
-static uint8_t
-reg_read(struct drm_encoder *encoder, uint16_t reg)
+static void
+reg_write_range(struct tda998x_priv *priv, uint16_t reg, uint8_t *p, int cnt)
+{
+	struct i2c_client *client = priv->hdmi;
+	uint8_t buf[cnt+1];
+	int ret;
+
+	buf[0] = REG2ADDR(reg);
+	memcpy(&buf[1], p, cnt);
+
+	ret = set_page(priv, reg);
+	if (ret < 0)
+		return;
+
+	ret = i2c_master_send(client, buf, cnt + 1);
+	if (ret < 0)
+		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+}
+
+static int
+reg_read(struct tda998x_priv *priv, uint16_t reg)
 {
 	uint8_t val = 0;
-	reg_read_range(encoder, reg, &val, sizeof(val));
+	int ret;
+
+	ret = reg_read_range(priv, reg, &val, sizeof(val));
+	if (ret < 0)
+		return ret;
 	return val;
 }
 
 static void
-reg_write(struct drm_encoder *encoder, uint16_t reg, uint8_t val)
+reg_write(struct tda998x_priv *priv, uint16_t reg, uint8_t val)
 {
-	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	struct i2c_client *client = priv->hdmi;
 	uint8_t buf[] = {REG2ADDR(reg), val};
 	int ret;
 
-	set_page(encoder, reg);
+	ret = set_page(priv, reg);
+	if (ret < 0)
+		return;
 
-	ret = i2c_master_send(client, buf, ARRAY_SIZE(buf));
+	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
 }
 
 static void
-reg_write16(struct drm_encoder *encoder, uint16_t reg, uint16_t val)
+reg_write16(struct tda998x_priv *priv, uint16_t reg, uint16_t val)
 {
-	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	struct i2c_client *client = priv->hdmi;
 	uint8_t buf[] = {REG2ADDR(reg), val >> 8, val};
 	int ret;
 
-	set_page(encoder, reg);
+	ret = set_page(priv, reg);
+	if (ret < 0)
+		return;
 
-	ret = i2c_master_send(client, buf, ARRAY_SIZE(buf));
+	ret = i2c_master_send(client, buf, sizeof(buf));
 	if (ret < 0)
 		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
 }
 
 static void
-reg_set(struct drm_encoder *encoder, uint16_t reg, uint8_t val)
+reg_set(struct tda998x_priv *priv, uint16_t reg, uint8_t val)
 {
-	reg_write(encoder, reg, reg_read(encoder, reg) | val);
+	int old_val;
+
+	old_val = reg_read(priv, reg);
+	if (old_val >= 0)
+		reg_write(priv, reg, old_val | val);
 }
 
 static void
-reg_clear(struct drm_encoder *encoder, uint16_t reg, uint8_t val)
+reg_clear(struct tda998x_priv *priv, uint16_t reg, uint8_t val)
 {
-	reg_write(encoder, reg, reg_read(encoder, reg) & ~val);
+	int old_val;
+
+	old_val = reg_read(priv, reg);
+	if (old_val >= 0)
+		reg_write(priv, reg, old_val & ~val);
 }
 
 static void
-tda998x_reset(struct drm_encoder *encoder)
+tda998x_reset(struct tda998x_priv *priv)
 {
 	/* reset audio and i2c master: */
-	reg_set(encoder, REG_SOFTRESET, SOFTRESET_AUDIO | SOFTRESET_I2C_MASTER);
+	reg_write(priv, REG_SOFTRESET, SOFTRESET_AUDIO | SOFTRESET_I2C_MASTER);
 	msleep(50);
-	reg_clear(encoder, REG_SOFTRESET, SOFTRESET_AUDIO | SOFTRESET_I2C_MASTER);
+	reg_write(priv, REG_SOFTRESET, 0);
 	msleep(50);
 
 	/* reset transmitter: */
-	reg_set(encoder, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR);
-	reg_clear(encoder, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR);
+	reg_set(priv, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR);
+	reg_clear(priv, REG_MAIN_CNTRL0, MAIN_CNTRL0_SR);
 
 	/* PLL registers common configuration */
-	reg_write(encoder, REG_PLL_SERIAL_1, 0x00);
-	reg_write(encoder, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(1));
-	reg_write(encoder, REG_PLL_SERIAL_3, 0x00);
-	reg_write(encoder, REG_SERIALIZER,   0x00);
-	reg_write(encoder, REG_BUFFER_OUT,   0x00);
-	reg_write(encoder, REG_PLL_SCG1,     0x00);
-	reg_write(encoder, REG_AUDIO_DIV,    0x03);
-	reg_write(encoder, REG_SEL_CLK,      SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK);
-	reg_write(encoder, REG_PLL_SCGN1,    0xfa);
-	reg_write(encoder, REG_PLL_SCGN2,    0x00);
-	reg_write(encoder, REG_PLL_SCGR1,    0x5b);
-	reg_write(encoder, REG_PLL_SCGR2,    0x00);
-	reg_write(encoder, REG_PLL_SCG2,     0x10);
+	reg_write(priv, REG_PLL_SERIAL_1, 0x00);
+	reg_write(priv, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(1));
+	reg_write(priv, REG_PLL_SERIAL_3, 0x00);
+	reg_write(priv, REG_SERIALIZER,   0x00);
+	reg_write(priv, REG_BUFFER_OUT,   0x00);
+	reg_write(priv, REG_PLL_SCG1,     0x00);
+	reg_write(priv, REG_AUDIO_DIV,    AUDIO_DIV_SERCLK_8);
+	reg_write(priv, REG_SEL_CLK,      SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK);
+	reg_write(priv, REG_PLL_SCGN1,    0xfa);
+	reg_write(priv, REG_PLL_SCGN2,    0x00);
+	reg_write(priv, REG_PLL_SCGR1,    0x5b);
+	reg_write(priv, REG_PLL_SCGR2,    0x00);
+	reg_write(priv, REG_PLL_SCG2,     0x10);
+
+	/* Write the default value MUX register */
+	reg_write(priv, REG_MUX_VP_VIP_OUT, 0x24);
+}
+
+/*
+ * only 2 interrupts may occur: screen plug/unplug and EDID read
+ */
+static irqreturn_t tda998x_irq_thread(int irq, void *data)
+{
+	struct tda998x_priv *priv = data;
+	u8 sta, cec, lvl, flag0, flag1, flag2;
+
+	if (!priv)
+		return IRQ_HANDLED;
+	sta = cec_read(priv, REG_CEC_INTSTATUS);
+	cec = cec_read(priv, REG_CEC_RXSHPDINT);
+	lvl = cec_read(priv, REG_CEC_RXSHPDLEV);
+	flag0 = reg_read(priv, REG_INT_FLAGS_0);
+	flag1 = reg_read(priv, REG_INT_FLAGS_1);
+	flag2 = reg_read(priv, REG_INT_FLAGS_2);
+	DRM_DEBUG_DRIVER(
+		"tda irq sta %02x cec %02x lvl %02x f0 %02x f1 %02x f2 %02x\n",
+		sta, cec, lvl, flag0, flag1, flag2);
+	if ((flag2 & INT_FLAGS_2_EDID_BLK_RD) && priv->wq_edid_wait) {
+		priv->wq_edid_wait = 0;
+		wake_up(&priv->wq_edid);
+	} else if (cec != 0) {			/* HPD change */
+		if (priv->encoder && priv->encoder->dev)
+			drm_helper_hpd_irq_event(priv->encoder->dev);
+	}
+	return IRQ_HANDLED;
+}
+
+static uint8_t tda998x_cksum(uint8_t *buf, size_t bytes)
+{
+	uint8_t sum = 0;
+
+	while (bytes--)
+		sum += *buf++;
+	return (255 - sum) + 1;
+}
+
+#define HB(x) (x)
+#define PB(x) (HB(2) + 1 + (x))
+
+static void
+tda998x_write_if(struct tda998x_priv *priv, uint8_t bit, uint16_t addr,
+		 uint8_t *buf, size_t size)
+{
+	buf[PB(0)] = tda998x_cksum(buf, size);
+
+	reg_clear(priv, REG_DIP_IF_FLAGS, bit);
+	reg_write_range(priv, addr, buf, size);
+	reg_set(priv, REG_DIP_IF_FLAGS, bit);
+}
+
+static void
+tda998x_write_aif(struct tda998x_priv *priv, struct tda998x_encoder_params *p)
+{
+	u8 buf[PB(HDMI_AUDIO_INFOFRAME_SIZE) + 1];
+
+	memset(buf, 0, sizeof(buf));
+	buf[HB(0)] = HDMI_INFOFRAME_TYPE_AUDIO;
+	buf[HB(1)] = 0x01;
+	buf[HB(2)] = HDMI_AUDIO_INFOFRAME_SIZE;
+	buf[PB(1)] = p->audio_frame[1] & 0x07; /* CC */
+	buf[PB(2)] = p->audio_frame[2] & 0x1c; /* SF */
+	buf[PB(4)] = p->audio_frame[4];
+	buf[PB(5)] = p->audio_frame[5] & 0xf8; /* DM_INH + LSV */
+
+	tda998x_write_if(priv, DIP_IF_FLAGS_IF4, REG_IF4_HB0, buf,
+			 sizeof(buf));
+}
+
+static void
+tda998x_write_avi(struct tda998x_priv *priv, struct drm_display_mode *mode)
+{
+	u8 buf[PB(HDMI_AVI_INFOFRAME_SIZE) + 1];
+
+	memset(buf, 0, sizeof(buf));
+	buf[HB(0)] = HDMI_INFOFRAME_TYPE_AVI;
+	buf[HB(1)] = 0x02;
+	buf[HB(2)] = HDMI_AVI_INFOFRAME_SIZE;
+	buf[PB(1)] = HDMI_SCAN_MODE_UNDERSCAN;
+	buf[PB(2)] = HDMI_ACTIVE_ASPECT_PICTURE;
+	buf[PB(3)] = HDMI_QUANTIZATION_RANGE_FULL << 2;
+	buf[PB(4)] = drm_match_cea_mode(mode);
+
+	tda998x_write_if(priv, DIP_IF_FLAGS_IF2, REG_IF2_HB0, buf,
+			 sizeof(buf));
+}
+
+static void tda998x_audio_mute(struct tda998x_priv *priv, bool on)
+{
+	if (on) {
+		reg_set(priv, REG_SOFTRESET, SOFTRESET_AUDIO);
+		reg_clear(priv, REG_SOFTRESET, SOFTRESET_AUDIO);
+		reg_set(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
+	} else {
+		reg_clear(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
+	}
+}
+
+static void
+tda998x_configure_audio(struct tda998x_priv *priv,
+		struct drm_display_mode *mode, struct tda998x_encoder_params *p)
+{
+	uint8_t buf[6], clksel_aip, clksel_fs, cts_n, adiv, aclk;
+	uint32_t n, cts;
+
+	/* Enable audio ports */
+	reg_write(priv, REG_ENA_AP, p->audio_cfg);
+
+	/* Set audio input source */
+	switch (p->audio_format) {
+	case AFMT_SPDIF:
+		reg_write(priv, REG_MUX_AP, MUX_AP_SELECT_SPDIF);
+		clksel_aip = AIP_CLKSEL_AIP_SPDIF;
+		clksel_fs = AIP_CLKSEL_FS_FS64SPDIF;
+		cts_n = CTS_N_M(3) | CTS_N_K(3);
+		aclk = 0;				/* no clock */
+		break;
+
+	case AFMT_I2S:
+		reg_write(priv, REG_MUX_AP, MUX_AP_SELECT_I2S);
+		clksel_aip = AIP_CLKSEL_AIP_I2S;
+		clksel_fs = AIP_CLKSEL_FS_ACLK;
+		/* with I2S input, the CTS_N predivider depends on
+		 * the sample width */
+		switch (priv->audio_sample_format) {
+		case SNDRV_PCM_FORMAT_S16_LE:
+			cts_n = CTS_N_M(3) | CTS_N_K(1);
+			break;
+		case SNDRV_PCM_FORMAT_S24_LE:
+			cts_n = CTS_N_M(3) | CTS_N_K(2);
+			break;
+		default:
+		case SNDRV_PCM_FORMAT_S32_LE:
+			cts_n = CTS_N_M(3) | CTS_N_K(3);
+			break;
+		}
+		aclk = 1;				/* clock enable */
+		break;
+
+	default:
+		BUG();
+		return;
+	}
+
+	reg_write(priv, REG_AIP_CLKSEL, clksel_aip);
+	reg_clear(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_LAYOUT |
+					AIP_CNTRL_0_ACR_MAN);	/* auto CTS */
+	reg_write(priv, REG_CTS_N, cts_n);
+	reg_write(priv, REG_ENA_ACLK, aclk);
+
+	/*
+	 * Audio input somehow depends on HDMI line rate which is
+	 * related to pixclk. Testing showed that modes with pixclk
+	 * >100MHz need a larger divider while <40MHz need the default.
+	 * There is no detailed info in the datasheet, so we just
+	 * assume 100MHz requires larger divider.
+	 */
+	adiv = AUDIO_DIV_SERCLK_8;
+	if (mode->clock > 100000)
+		adiv++;			/* AUDIO_DIV_SERCLK_16 */
+
+	/* S/PDIF asks for a larger divider */
+	if (p->audio_format == AFMT_SPDIF)
+		adiv++;			/* AUDIO_DIV_SERCLK_16 or _32 */
+
+	reg_write(priv, REG_AUDIO_DIV, adiv);
+
+	/*
+	 * This is the approximate value of N, which happens to be
+	 * the recommended values for non-coherent clocks.
+	 */
+	n = 128 * p->audio_sample_rate / 1000;
+
+	/* Write the CTS and N values */
+	if ((n > 0) && (mode->clock > 0)) {
+		/*
+		 * The average CTS value is calculated as:
+		 *
+		 *      fTMDS * n / (128 * fs)
+		 *
+		 * which equates to:
+		 *
+		 * 	fTMDS / 1000
+		 *
+		 * for non-coherent clocks.
+		 */
+		cts = mode->clock;
+	} else {
+		cts = 82500;
+	}
+	buf[0] = cts;
+	buf[1] = cts >> 8;
+	buf[2] = cts >> 16;
+	buf[3] = n;
+	buf[4] = n >> 8;
+	buf[5] = n >> 16;
+	reg_write_range(priv, REG_ACR_CTS_0, buf, 6);
+
+	/* Set CTS clock reference */
+	reg_write(priv, REG_AIP_CLKSEL, clksel_aip | clksel_fs);
+
+	/* Reset CTS generator */
+	reg_set(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS);
+	reg_clear(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS);
+
+	/* Write the channel status */
+	buf[0] = IEC958_AES0_CON_NOT_COPYRIGHT;
+	buf[1] = 0x00;
+	buf[2] = IEC958_AES3_CON_FS_NOTID;
+	buf[3] = IEC958_AES4_CON_ORIGFS_NOTID |
+			IEC958_AES4_CON_MAX_WORDLEN_24;
+	reg_write_range(priv, REG_CH_STAT_B(0), buf, 4);
+
+	tda998x_audio_mute(priv, true);
+	msleep(20);
+	tda998x_audio_mute(priv, false);
+
+	/* Write the audio information packet */
+	tda998x_write_aif(priv, p);
+}
+
+/* tda998x codec interface */
+void tda998x_audio_start(struct tda998x_priv *priv,
+			 int full)
+{
+	struct tda998x_encoder_params *p = &priv->params;
+
+	if (!full) {
+		reg_write(priv, REG_ENA_AP, p->audio_cfg);
+		return;
+	}
+	tda998x_configure_audio(priv, &priv->encoder->crtc->hwmode, p);
+}
+
+void tda998x_audio_stop(struct tda998x_priv *priv)
+{
+	reg_write(priv, REG_ENA_AP, 0);
 }
 
 /* DRM encoder functions */
@@ -420,6 +768,23 @@ tda998x_reset(struct drm_encoder *encoder)
 static void
 tda998x_encoder_set_config(struct drm_encoder *encoder, void *params)
 {
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
+	struct tda998x_encoder_params *p = params;
+
+	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(p->swap_a) |
+			    (p->mirr_a ? VIP_CNTRL_0_MIRR_A : 0) |
+			    VIP_CNTRL_0_SWAP_B(p->swap_b) |
+			    (p->mirr_b ? VIP_CNTRL_0_MIRR_B : 0);
+	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(p->swap_c) |
+			    (p->mirr_c ? VIP_CNTRL_1_MIRR_C : 0) |
+			    VIP_CNTRL_1_SWAP_D(p->swap_d) |
+			    (p->mirr_d ? VIP_CNTRL_1_MIRR_D : 0);
+	priv->vip_cntrl_2 = VIP_CNTRL_2_SWAP_E(p->swap_e) |
+			    (p->mirr_e ? VIP_CNTRL_2_MIRR_E : 0) |
+			    VIP_CNTRL_2_SWAP_F(p->swap_f) |
+			    (p->mirr_f ? VIP_CNTRL_2_MIRR_F : 0);
+
+	priv->params = *p;
 }
 
 static void
@@ -436,25 +801,20 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode)
 
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
-		/* enable audio and video ports */
-		reg_write(encoder, REG_ENA_AP, 0xff);
-		reg_write(encoder, REG_ENA_VP_0, 0xff);
-		reg_write(encoder, REG_ENA_VP_1, 0xff);
-		reg_write(encoder, REG_ENA_VP_2, 0xff);
+		/* enable video ports, audio will be enabled later */
+		reg_write(priv, REG_ENA_VP_0, 0xff);
+		reg_write(priv, REG_ENA_VP_1, 0xff);
+		reg_write(priv, REG_ENA_VP_2, 0xff);
 		/* set muxing after enabling ports: */
-		reg_write(encoder, REG_VIP_CNTRL_0,
-				VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3));
-		reg_write(encoder, REG_VIP_CNTRL_1,
-				VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1));
-		reg_write(encoder, REG_VIP_CNTRL_2,
-				VIP_CNTRL_2_SWAP_E(4) | VIP_CNTRL_2_SWAP_F(5));
+		reg_write(priv, REG_VIP_CNTRL_0, priv->vip_cntrl_0);
+		reg_write(priv, REG_VIP_CNTRL_1, priv->vip_cntrl_1);
+		reg_write(priv, REG_VIP_CNTRL_2, priv->vip_cntrl_2);
 		break;
 	case DRM_MODE_DPMS_OFF:
-		/* disable audio and video ports */
-		reg_write(encoder, REG_ENA_AP, 0x00);
-		reg_write(encoder, REG_ENA_VP_0, 0x00);
-		reg_write(encoder, REG_ENA_VP_1, 0x00);
-		reg_write(encoder, REG_ENA_VP_2, 0x00);
+		/* disable video ports */
+		reg_write(priv, REG_ENA_VP_0, 0x00);
+		reg_write(priv, REG_ENA_VP_1, 0x00);
+		reg_write(priv, REG_ENA_VP_2, 0x00);
 		break;
 	}
 
@@ -494,132 +854,199 @@ tda998x_encoder_mode_set(struct drm_encoder *encoder,
 			struct drm_display_mode *adjusted_mode)
 {
 	struct tda998x_priv *priv = to_tda998x_priv(encoder);
-	uint16_t hs_start, hs_end, line_start, line_end;
-	uint16_t vwin_start, vwin_end, de_start, de_end;
-	uint16_t ref_pix, ref_line, pix_start2;
+	uint16_t ref_pix, ref_line, n_pix, n_line;
+	uint16_t hs_pix_s, hs_pix_e;
+	uint16_t vs1_pix_s, vs1_pix_e, vs1_line_s, vs1_line_e;
+	uint16_t vs2_pix_s, vs2_pix_e, vs2_line_s, vs2_line_e;
+	uint16_t vwin1_line_s, vwin1_line_e;
+	uint16_t vwin2_line_s, vwin2_line_e;
+	uint16_t de_pix_s, de_pix_e;
 	uint8_t reg, div, rep;
 
-	hs_start   = mode->hsync_start - mode->hdisplay;
-	hs_end     = mode->hsync_end - mode->hdisplay;
-	line_start = 1;
-	line_end   = 1 + mode->vsync_end - mode->vsync_start;
-	vwin_start = mode->vtotal - mode->vsync_start;
-	vwin_end   = vwin_start + mode->vdisplay;
-	de_start   = mode->htotal - mode->hdisplay;
-	de_end     = mode->htotal;
-
-	pix_start2 = 0;
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-		pix_start2 = (mode->htotal / 2) + hs_start;
-
-	/* TODO how is this value calculated?  It is 2 for all common
-	 * formats in the tables in out of tree nxp driver (assuming
-	 * I've properly deciphered their byzantine table system)
+	/*
+	 * Internally TDA998x is using ITU-R BT.656 style sync but
+	 * we get VESA style sync. TDA998x is using a reference pixel
+	 * relative to ITU to sync to the input frame and for output
+	 * sync generation. Currently, we are using reference detection
+	 * from HS/VS, i.e. REFPIX/REFLINE denote frame start sync point
+	 * which is position of rising VS with coincident rising HS.
+	 *
+	 * Now there is some issues to take care of:
+	 * - HDMI data islands require sync-before-active
+	 * - TDA998x register values must be > 0 to be enabled
+	 * - REFLINE needs an additional offset of +1
+	 * - REFPIX needs an addtional offset of +1 for UYUV and +3 for RGB
+	 *
+	 * So we add +1 to all horizontal and vertical register values,
+	 * plus an additional +3 for REFPIX as we are using RGB input only.
 	 */
-	ref_line = 2;
-
-	/* this might changes for other color formats from the CRTC: */
-	ref_pix = 3 + hs_start;
+	n_pix        = mode->htotal;
+	n_line       = mode->vtotal;
+
+	hs_pix_e     = mode->hsync_end - mode->hdisplay;
+	hs_pix_s     = mode->hsync_start - mode->hdisplay;
+	de_pix_e     = mode->htotal;
+	de_pix_s     = mode->htotal - mode->hdisplay;
+	ref_pix      = 3 + hs_pix_s;
+
+	/*
+	 * Attached LCD controllers may generate broken sync. Allow
+	 * those to adjust the position of the rising VS edge by adding
+	 * HSKEW to ref_pix.
+	 */
+	if (adjusted_mode->flags & DRM_MODE_FLAG_HSKEW)
+		ref_pix += adjusted_mode->hskew;
+
+	if ((mode->flags & DRM_MODE_FLAG_INTERLACE) == 0) {
+		ref_line     = 1 + mode->vsync_start - mode->vdisplay;
+		vwin1_line_s = mode->vtotal - mode->vdisplay - 1;
+		vwin1_line_e = vwin1_line_s + mode->vdisplay;
+		vs1_pix_s    = vs1_pix_e = hs_pix_s;
+		vs1_line_s   = mode->vsync_start - mode->vdisplay;
+		vs1_line_e   = vs1_line_s +
+			       mode->vsync_end - mode->vsync_start;
+		vwin2_line_s = vwin2_line_e = 0;
+		vs2_pix_s    = vs2_pix_e  = 0;
+		vs2_line_s   = vs2_line_e = 0;
+	} else {
+		ref_line     = 1 + (mode->vsync_start - mode->vdisplay)/2;
+		vwin1_line_s = (mode->vtotal - mode->vdisplay)/2;
+		vwin1_line_e = vwin1_line_s + mode->vdisplay/2;
+		vs1_pix_s    = vs1_pix_e = hs_pix_s;
+		vs1_line_s   = (mode->vsync_start - mode->vdisplay)/2;
+		vs1_line_e   = vs1_line_s +
+			       (mode->vsync_end - mode->vsync_start)/2;
+		vwin2_line_s = vwin1_line_s + mode->vtotal/2;
+		vwin2_line_e = vwin2_line_s + mode->vdisplay/2;
+		vs2_pix_s    = vs2_pix_e = hs_pix_s + mode->htotal/2;
+		vs2_line_s   = vs1_line_s + mode->vtotal/2 ;
+		vs2_line_e   = vs2_line_s +
+			       (mode->vsync_end - mode->vsync_start)/2;
+	}
 
 	div = 148500 / mode->clock;
-
-	DBG("clock=%d, div=%u", mode->clock, div);
-	DBG("hs_start=%u, hs_end=%u, line_start=%u, line_end=%u",
-			hs_start, hs_end, line_start, line_end);
-	DBG("vwin_start=%u, vwin_end=%u, de_start=%u, de_end=%u",
-			vwin_start, vwin_end, de_start, de_end);
-	DBG("ref_line=%u, ref_pix=%u, pix_start2=%u",
-			ref_line, ref_pix, pix_start2);
+	if (div != 0) {
+		div--;
+		if (div > 3)
+			div = 3;
+	}
 
 	/* mute the audio FIFO: */
-	reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
+	reg_set(priv, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
 
 	/* set HDMI HDCP mode off: */
-	reg_set(encoder, REG_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS);
-	reg_clear(encoder, REG_TX33, TX33_HDMI);
+	reg_write(priv, REG_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS);
+	reg_clear(priv, REG_TX33, TX33_HDMI);
+	reg_write(priv, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(0));
 
-	reg_write(encoder, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(0));
 	/* no pre-filter or interpolator: */
-	reg_write(encoder, REG_HVF_CNTRL_0, HVF_CNTRL_0_PREFIL(0) |
+	reg_write(priv, REG_HVF_CNTRL_0, HVF_CNTRL_0_PREFIL(0) |
 			HVF_CNTRL_0_INTPOL(0));
-	reg_write(encoder, REG_VIP_CNTRL_5, VIP_CNTRL_5_SP_CNT(0));
-	reg_write(encoder, REG_VIP_CNTRL_4, VIP_CNTRL_4_BLANKIT(0) |
+	reg_write(priv, REG_VIP_CNTRL_5, VIP_CNTRL_5_SP_CNT(0));
+	reg_write(priv, REG_VIP_CNTRL_4, VIP_CNTRL_4_BLANKIT(0) |
 			VIP_CNTRL_4_BLC(0));
-	reg_clear(encoder, REG_PLL_SERIAL_3, PLL_SERIAL_3_SRL_CCIR);
 
-	reg_clear(encoder, REG_PLL_SERIAL_1, PLL_SERIAL_1_SRL_MAN_IZ);
-	reg_clear(encoder, REG_PLL_SERIAL_3, PLL_SERIAL_3_SRL_DE);
-	reg_write(encoder, REG_SERIALIZER, 0);
-	reg_write(encoder, REG_HVF_CNTRL_1, HVF_CNTRL_1_VQR(0));
+	reg_clear(priv, REG_PLL_SERIAL_1, PLL_SERIAL_1_SRL_MAN_IZ);
+	reg_clear(priv, REG_PLL_SERIAL_3, PLL_SERIAL_3_SRL_CCIR |
+					  PLL_SERIAL_3_SRL_DE);
+	reg_write(priv, REG_SERIALIZER, 0);
+	reg_write(priv, REG_HVF_CNTRL_1, HVF_CNTRL_1_VQR(0));
 
 	/* TODO enable pixel repeat for pixel rates less than 25Msamp/s */
 	rep = 0;
-	reg_write(encoder, REG_RPT_CNTRL, 0);
-	reg_write(encoder, REG_SEL_CLK, SEL_CLK_SEL_VRF_CLK(0) |
+	reg_write(priv, REG_RPT_CNTRL, 0);
+	reg_write(priv, REG_SEL_CLK, SEL_CLK_SEL_VRF_CLK(0) |
 			SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK);
 
-	reg_write(encoder, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) |
+	reg_write(priv, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) |
 			PLL_SERIAL_2_SRL_PR(rep));
 
-	reg_write16(encoder, REG_VS_PIX_STRT_2_MSB, pix_start2);
-	reg_write16(encoder, REG_VS_PIX_END_2_MSB, pix_start2);
-
 	/* set color matrix bypass flag: */
-	reg_set(encoder, REG_MAT_CONTRL, MAT_CONTRL_MAT_BP);
+	reg_write(priv, REG_MAT_CONTRL, MAT_CONTRL_MAT_BP |
+				MAT_CONTRL_MAT_SC(1));
 
 	/* set BIAS tmds value: */
-	reg_write(encoder, REG_ANA_GENERAL, 0x09);
-
-	reg_clear(encoder, REG_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_MTHD);
+	reg_write(priv, REG_ANA_GENERAL, 0x09);
 
-	reg_write(encoder, REG_VIP_CNTRL_3, 0);
-	reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_SYNC_HS);
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_V_TGL);
+	/*
+	 * Sync on rising HSYNC/VSYNC
+	 */
+	reg = VIP_CNTRL_3_SYNC_HS;
 
+	/*
+	 * TDA19988 requires high-active sync at input stage,
+	 * so invert low-active sync provided by master encoder here
+	 */
 	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_H_TGL);
-
-	reg_write(encoder, REG_VIDFORMAT, 0x00);
-	reg_write16(encoder, REG_NPIX_MSB, mode->hdisplay - 1);
-	reg_write16(encoder, REG_NLINE_MSB, mode->vdisplay - 1);
-	reg_write16(encoder, REG_VS_LINE_STRT_1_MSB, line_start);
-	reg_write16(encoder, REG_VS_LINE_END_1_MSB, line_end);
-	reg_write16(encoder, REG_VS_PIX_STRT_1_MSB, hs_start);
-	reg_write16(encoder, REG_VS_PIX_END_1_MSB, hs_start);
-	reg_write16(encoder, REG_HS_PIX_START_MSB, hs_start);
-	reg_write16(encoder, REG_HS_PIX_STOP_MSB, hs_end);
-	reg_write16(encoder, REG_VWIN_START_1_MSB, vwin_start);
-	reg_write16(encoder, REG_VWIN_END_1_MSB, vwin_end);
-	reg_write16(encoder, REG_DE_START_MSB, de_start);
-	reg_write16(encoder, REG_DE_STOP_MSB, de_end);
+		reg |= VIP_CNTRL_3_H_TGL;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		reg |= VIP_CNTRL_3_V_TGL;
+	reg_write(priv, REG_VIP_CNTRL_3, reg);
+
+	reg_write(priv, REG_VIDFORMAT, 0x00);
+	reg_write16(priv, REG_REFPIX_MSB, ref_pix);
+	reg_write16(priv, REG_REFLINE_MSB, ref_line);
+	reg_write16(priv, REG_NPIX_MSB, n_pix);
+	reg_write16(priv, REG_NLINE_MSB, n_line);
+	reg_write16(priv, REG_VS_LINE_STRT_1_MSB, vs1_line_s);
+	reg_write16(priv, REG_VS_PIX_STRT_1_MSB, vs1_pix_s);
+	reg_write16(priv, REG_VS_LINE_END_1_MSB, vs1_line_e);
+	reg_write16(priv, REG_VS_PIX_END_1_MSB, vs1_pix_e);
+	reg_write16(priv, REG_VS_LINE_STRT_2_MSB, vs2_line_s);
+	reg_write16(priv, REG_VS_PIX_STRT_2_MSB, vs2_pix_s);
+	reg_write16(priv, REG_VS_LINE_END_2_MSB, vs2_line_e);
+	reg_write16(priv, REG_VS_PIX_END_2_MSB, vs2_pix_e);
+	reg_write16(priv, REG_HS_PIX_START_MSB, hs_pix_s);
+	reg_write16(priv, REG_HS_PIX_STOP_MSB, hs_pix_e);
+	reg_write16(priv, REG_VWIN_START_1_MSB, vwin1_line_s);
+	reg_write16(priv, REG_VWIN_END_1_MSB, vwin1_line_e);
+	reg_write16(priv, REG_VWIN_START_2_MSB, vwin2_line_s);
+	reg_write16(priv, REG_VWIN_END_2_MSB, vwin2_line_e);
+	reg_write16(priv, REG_DE_START_MSB, de_pix_s);
+	reg_write16(priv, REG_DE_STOP_MSB, de_pix_e);
 
 	if (priv->rev == TDA19988) {
 		/* let incoming pixels fill the active space (if any) */
-		reg_write(encoder, REG_ENABLE_SPACE, 0x01);
+		reg_write(priv, REG_ENABLE_SPACE, 0x00);
 	}
 
-	reg_write16(encoder, REG_REFPIX_MSB, ref_pix);
-	reg_write16(encoder, REG_REFLINE_MSB, ref_line);
-
-	reg = TBG_CNTRL_1_VHX_EXT_DE |
-			TBG_CNTRL_1_VHX_EXT_HS |
-			TBG_CNTRL_1_VHX_EXT_VS |
-			TBG_CNTRL_1_DWIN_DIS | /* HDCP off */
-			TBG_CNTRL_1_VH_TGL_2;
-	if (mode->flags & (DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC))
-		reg |= TBG_CNTRL_1_VH_TGL_0;
-	reg_set(encoder, REG_TBG_CNTRL_1, reg);
+	/*
+	 * Always generate sync polarity relative to input sync and
+	 * revert input stage toggled sync at output stage
+	 */
+	reg = TBG_CNTRL_1_DWIN_DIS | TBG_CNTRL_1_TGL_EN;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		reg |= TBG_CNTRL_1_H_TGL;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		reg |= TBG_CNTRL_1_V_TGL;
+	reg_write(priv, REG_TBG_CNTRL_1, reg);
 
 	/* must be last register set: */
-	reg_clear(encoder, REG_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_ONCE);
+	reg_write(priv, REG_TBG_CNTRL_0, 0);
+
+	/* Only setup the info frames if the sink is HDMI */
+	if (priv->is_hdmi_sink) {
+		/* We need to turn HDMI HDCP stuff on to get audio through */
+		reg &= ~TBG_CNTRL_1_DWIN_DIS;
+		reg_write(priv, REG_TBG_CNTRL_1, reg);
+		reg_write(priv, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(1));
+		reg_set(priv, REG_TX33, TX33_HDMI);
+
+		tda998x_write_avi(priv, adjusted_mode);
+
+		if (priv->params.audio_cfg)
+			tda998x_configure_audio(priv, adjusted_mode,
+						&priv->params);
+	}
 }
 
 static enum drm_connector_status
 tda998x_encoder_detect(struct drm_encoder *encoder,
 		      struct drm_connector *connector)
 {
-	uint8_t val = cec_read(encoder, REG_CEC_RXSHPDLEV);
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
+	uint8_t val = cec_read(priv, REG_CEC_RXSHPDLEV);
+
 	return (val & CEC_RXSHPDLEV_HPD) ? connector_status_connected :
 			connector_status_disconnected;
 }
@@ -627,69 +1054,106 @@ tda998x_encoder_detect(struct drm_encoder *encoder,
 static int
 read_edid_block(struct drm_encoder *encoder, uint8_t *buf, int blk)
 {
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
 	uint8_t offset, segptr;
 	int ret, i;
 
-	/* enable EDID read irq: */
-	reg_set(encoder, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
-
 	offset = (blk & 1) ? 128 : 0;
 	segptr = blk / 2;
 
-	reg_write(encoder, REG_DDC_ADDR, 0xa0);
-	reg_write(encoder, REG_DDC_OFFS, offset);
-	reg_write(encoder, REG_DDC_SEGM_ADDR, 0x60);
-	reg_write(encoder, REG_DDC_SEGM, segptr);
+	reg_write(priv, REG_DDC_ADDR, 0xa0);
+	reg_write(priv, REG_DDC_OFFS, offset);
+	reg_write(priv, REG_DDC_SEGM_ADDR, 0x60);
+	reg_write(priv, REG_DDC_SEGM, segptr);
 
 	/* enable reading EDID: */
-	reg_write(encoder, REG_EDID_CTRL, 0x1);
+	priv->wq_edid_wait = 1;
+	reg_write(priv, REG_EDID_CTRL, 0x1);
 
 	/* flag must be cleared by sw: */
-	reg_write(encoder, REG_EDID_CTRL, 0x0);
+	reg_write(priv, REG_EDID_CTRL, 0x0);
 
 	/* wait for block read to complete: */
-	for (i = 100; i > 0; i--) {
-		uint8_t val = reg_read(encoder, REG_INT_FLAGS_2);
-		if (val & INT_FLAGS_2_EDID_BLK_RD)
-			break;
-		msleep(1);
+	if (priv->hdmi->irq) {
+		i = wait_event_timeout(priv->wq_edid,
+					!priv->wq_edid_wait,
+					msecs_to_jiffies(100));
+		if (i < 0) {
+			dev_err(&priv->hdmi->dev, "read edid wait err %d\n", i);
+			return i;
+		}
+	} else {
+		for (i = 10; i > 0; i--) {
+			msleep(10);
+			ret = reg_read(priv, REG_INT_FLAGS_2);
+			if (ret < 0)
+				return ret;
+			if (ret & INT_FLAGS_2_EDID_BLK_RD)
+				break;
+		}
 	}
 
-	if (i == 0)
+	if (i == 0) {
+		dev_err(&priv->hdmi->dev, "read edid timeout\n");
 		return -ETIMEDOUT;
+	}
 
-	ret = reg_read_range(encoder, REG_EDID_DATA_0, buf, EDID_LENGTH);
+	ret = reg_read_range(priv, REG_EDID_DATA_0, buf, EDID_LENGTH);
 	if (ret != EDID_LENGTH) {
-		dev_err(encoder->dev->dev, "failed to read edid block %d: %d",
-				blk, ret);
+		dev_err(&priv->hdmi->dev, "failed to read edid block %d: %d\n",
+			blk, ret);
 		return ret;
 	}
 
-	reg_clear(encoder, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
-
 	return 0;
 }
 
+static int
+read_validate_edid_block(struct drm_encoder *encoder, uint8_t *buf, int blk)
+{
+	bool print_bad_edid = drm_debug & DRM_UT_KMS;
+	int ret;
+	int retries = 1;
+
+	do
+	{
+		bool print_bad = print_bad_edid && (retries == 0);
+
+		ret = read_edid_block(encoder, buf, blk);
+		/* Fail on I2C error */
+		if (ret)
+			break;
+
+		/* But retry checksum errored blocks */
+		if (drm_edid_block_valid(buf, blk, print_bad))
+			break;
+		else
+			ret = -EINVAL;
+	} while (retries-- > 0);
+
+	return ret;
+}
+
 static uint8_t *
 do_get_edid(struct drm_encoder *encoder)
 {
-	int j = 0, valid_extensions = 0;
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
+	int j, valid_extensions = 0;
 	uint8_t *block, *new;
-	bool print_bad_edid = drm_debug & DRM_UT_KMS;
 
 	if ((block = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL)
 		return NULL;
 
-	/* base block fetch */
-	if (read_edid_block(encoder, block, 0))
-		goto fail;
+	if (priv->rev == TDA19988)
+		reg_clear(priv, REG_TX4, TX4_PD_RAM);
 
-	if (!drm_edid_block_valid(block, 0, print_bad_edid))
+	/* base block fetch */
+	if (read_validate_edid_block(encoder, block, 0))
 		goto fail;
 
 	/* if there's no extensions, we're done */
 	if (block[0x7e] == 0)
-		return block;
+		goto done;
 
 	new = krealloc(block, (block[0x7e] + 1) * EDID_LENGTH, GFP_KERNEL);
 	if (!new)
@@ -698,10 +1162,7 @@ do_get_edid(struct drm_encoder *encoder)
 
 	for (j = 1; j <= block[0x7e]; j++) {
 		uint8_t *ext_block = block + (valid_extensions + 1) * EDID_LENGTH;
-		if (read_edid_block(encoder, ext_block, j))
-			goto fail;
-
-		if (!drm_edid_block_valid(ext_block, j, print_bad_edid))
+		if (read_validate_edid_block(encoder, ext_block, j))
 			goto fail;
 
 		valid_extensions++;
@@ -716,10 +1177,16 @@ do_get_edid(struct drm_encoder *encoder)
 		block = new;
 	}
 
+done:
+	if (priv->rev == TDA19988)
+		reg_set(priv, REG_TX4, TX4_PD_RAM);
+
 	return block;
 
 fail:
-	dev_warn(encoder->dev->dev, "failed to read EDID\n");
+	if (priv->rev == TDA19988)
+		reg_set(priv, REG_TX4, TX4_PD_RAM);
+	dev_warn(&priv->hdmi->dev, "failed to read EDID\n");
 	kfree(block);
 	return NULL;
 }
@@ -728,12 +1195,19 @@ static int
 tda998x_encoder_get_modes(struct drm_encoder *encoder,
 			 struct drm_connector *connector)
 {
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
 	struct edid *edid = (struct edid *)do_get_edid(encoder);
 	int n = 0;
 
 	if (edid) {
 		drm_mode_connector_update_edid_property(connector, edid);
 		n = drm_add_edid_modes(connector, edid);
+		priv->is_hdmi_sink = drm_detect_hdmi_monitor(edid);
+
+		/* keep the EDID as ELD for the audio subsystem */
+		drm_edid_to_eld(connector, edid);
+		priv->eld = connector->eld;
+
 		kfree(edid);
 	}
 
@@ -744,7 +1218,13 @@ static int
 tda998x_encoder_create_resources(struct drm_encoder *encoder,
 				struct drm_connector *connector)
 {
-	DBG("");
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
+
+	if (priv->hdmi->irq)
+		connector->polled = DRM_CONNECTOR_POLL_HPD;
+	else
+		connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			DRM_CONNECTOR_POLL_DISCONNECT;
 	return 0;
 }
 
@@ -763,6 +1243,17 @@ tda998x_encoder_destroy(struct drm_encoder *encoder)
 {
 	struct tda998x_priv *priv = to_tda998x_priv(encoder);
 	drm_i2c_encoder_destroy(encoder);
+
+	/* disable all IRQs and free the IRQ handler */
+	cec_write(priv, REG_CEC_RXSHPDINTENA, 0);
+	reg_clear(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
+	if (priv->hdmi->irq)
+		free_irq(priv->hdmi->irq, priv);
+
+	tda998x_codec_unregister(&priv->hdmi->dev);
+
+	if (priv->cec)
+		i2c_unregister_device(priv->cec);
 	kfree(priv);
 }
 
@@ -800,56 +1291,137 @@ tda998x_encoder_init(struct i2c_client *client,
 		    struct drm_device *dev,
 		    struct drm_encoder_slave *encoder_slave)
 {
-	struct drm_encoder *encoder = &encoder_slave->base;
 	struct tda998x_priv *priv;
+	struct device_node *np = client->dev.of_node;
+	u32 video;
+	int rev_lo, rev_hi, ret;
+	unsigned short cec_addr;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	priv->current_page = 0;
-	priv->cec = i2c_new_dummy(client->adapter, 0x34);
+	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3);
+	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1);
+	priv->vip_cntrl_2 = VIP_CNTRL_2_SWAP_E(4) | VIP_CNTRL_2_SWAP_F(5);
+
+	priv->params.audio_frame[1] = 1;		/* channels - 1 */
+	priv->params.audio_sample_rate = 48000;		/* 48kHz */
+
+	priv->current_page = 0xff;
+	priv->hdmi = client;
+	/* CEC I2C address bound to TDA998x I2C addr by configuration pins */
+	cec_addr = 0x34 + (client->addr & 0x03);
+	priv->cec = i2c_new_dummy(client->adapter, cec_addr);
+	if (!priv->cec) {
+		kfree(priv);
+		return -ENODEV;
+	}
+
+	priv->encoder = &encoder_slave->base;
 	priv->dpms = DRM_MODE_DPMS_OFF;
 
 	encoder_slave->slave_priv = priv;
 	encoder_slave->slave_funcs = &tda998x_encoder_funcs;
+	i2c_set_clientdata(client, priv);
 
 	/* wake up the device: */
-	cec_write(encoder, REG_CEC_ENAMODS,
+	cec_write(priv, REG_CEC_ENAMODS,
 			CEC_ENAMODS_EN_RXSENS | CEC_ENAMODS_EN_HDMI);
 
-	tda998x_reset(encoder);
+	tda998x_reset(priv);
 
 	/* read version: */
-	priv->rev = reg_read(encoder, REG_VERSION_LSB) |
-			reg_read(encoder, REG_VERSION_MSB) << 8;
+	rev_lo = reg_read(priv, REG_VERSION_LSB);
+	rev_hi = reg_read(priv, REG_VERSION_MSB);
+	if (rev_lo < 0 || rev_hi < 0) {
+		ret = rev_lo < 0 ? rev_lo : rev_hi;
+		goto fail;
+	}
+
+	priv->rev = rev_lo | rev_hi << 8;
 
 	/* mask off feature bits: */
 	priv->rev &= ~0x30; /* not-hdcp and not-scalar bit */
 
 	switch (priv->rev) {
-	case TDA9989N2:  dev_info(dev->dev, "found TDA9989 n2");  break;
-	case TDA19989:   dev_info(dev->dev, "found TDA19989");    break;
-	case TDA19989N2: dev_info(dev->dev, "found TDA19989 n2"); break;
-	case TDA19988:   dev_info(dev->dev, "found TDA19988");    break;
+	case TDA9989N2:
+		dev_info(&client->dev, "found TDA9989 n2");
+		break;
+	case TDA19989:
+		dev_info(&client->dev, "found TDA19989");
+		break;
+	case TDA19989N2:
+		dev_info(&client->dev, "found TDA19989 n2");
+		break;
+	case TDA19988:
+		dev_info(&client->dev, "found TDA19988");
+		break;
 	default:
-		DBG("found unsupported device: %04x", priv->rev);
+		dev_err(&client->dev, "found unsupported device: %04x\n",
+			priv->rev);
 		goto fail;
 	}
 
 	/* after reset, enable DDC: */
-	reg_write(encoder, REG_DDC_DISABLE, 0x00);
+	reg_write(priv, REG_DDC_DISABLE, 0x00);
 
 	/* set clock on DDC channel: */
-	reg_write(encoder, REG_TX3, 39);
+	reg_write(priv, REG_TX3, 39);
 
 	/* if necessary, disable multi-master: */
 	if (priv->rev == TDA19989)
-		reg_set(encoder, REG_I2C_MASTER, I2C_MASTER_DIS_MM);
+		reg_set(priv, REG_I2C_MASTER, I2C_MASTER_DIS_MM);
 
-	cec_write(encoder, REG_CEC_FRO_IM_CLK_CTRL,
+	cec_write(priv, REG_CEC_FRO_IM_CLK_CTRL,
 			CEC_FRO_IM_CLK_CTRL_GHOST_DIS | CEC_FRO_IM_CLK_CTRL_IMCLK_SEL);
 
+	/* initialize the optional IRQ */
+	if (client->irq) {
+		int irqf_trigger;
+
+		/* init read EDID waitqueue */
+		init_waitqueue_head(&priv->wq_edid);
+
+		/* clear pending interrupts */
+		reg_read(priv, REG_INT_FLAGS_0);
+		reg_read(priv, REG_INT_FLAGS_1);
+		reg_read(priv, REG_INT_FLAGS_2);
+
+		irqf_trigger =
+			irqd_get_trigger_type(irq_get_irq_data(client->irq));
+		ret = request_threaded_irq(client->irq, NULL,
+					   tda998x_irq_thread,
+					   irqf_trigger | IRQF_ONESHOT,
+					   "tda998x", priv);
+		if (ret) {
+			dev_err(&client->dev,
+				"failed to request IRQ#%u: %d\n",
+				client->irq, ret);
+			goto fail;
+		}
+
+		/* enable HPD irq */
+		cec_write(priv, REG_CEC_RXSHPDINTENA, CEC_RXSHPDLEV_HPD);
+	}
+
+	/* enable EDID read irq: */
+	reg_set(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
+
+	/* register the audio CODEC */
+	tda998x_codec_register(&client->dev);
+
+	if (!np)
+		return 0;		/* non-DT */
+
+	/* get the optional video properties */
+	ret = of_property_read_u32(np, "video-ports", &video);
+	if (ret == 0) {
+		priv->vip_cntrl_0 = video >> 16;
+		priv->vip_cntrl_1 = video >> 8;
+		priv->vip_cntrl_2 = video;
+	}
+
 	return 0;
 
 fail:
@@ -864,6 +1436,14 @@ fail:
 	return -ENXIO;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id tda998x_dt_ids[] = {
+	{ .compatible = "nxp,tda998x", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tda998x_dt_ids);
+#endif
+
 static struct i2c_device_id tda998x_ids[] = {
 	{ "tda998x", 0 },
 	{ }
@@ -876,6 +1456,7 @@ static struct drm_i2c_encoder_driver tda998x_driver = {
 		.remove = tda998x_remove,
 		.driver = {
 			.name = "tda998x",
+			.of_match_table = of_match_ptr(tda998x_dt_ids),
 		},
 		.id_table = tda998x_ids,
 	},
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.h b/drivers/gpu/drm/i2c/tda998x_drv.h
new file mode 100644
index 000000000000..e6c8dd5dbe21
--- /dev/null
+++ b/drivers/gpu/drm/i2c/tda998x_drv.h
@@ -0,0 +1,32 @@
+/* tda998x private data */
+
+struct tda998x_priv {
+	struct i2c_client *cec;
+	struct i2c_client *hdmi;
+	uint16_t rev;
+	uint8_t current_page;
+	int dpms;
+	bool is_hdmi_sink;
+	u8 vip_cntrl_0;
+	u8 vip_cntrl_1;
+	u8 vip_cntrl_2;
+	struct tda998x_encoder_params params;
+
+	wait_queue_head_t wq_edid;
+	volatile int wq_edid_wait;
+	struct drm_encoder *encoder;
+
+	u8 audio_ports[2];
+	int audio_sample_format;
+	int dai_id;			/* DAI ID when streaming active */
+
+	u8 *eld;
+
+	struct snd_pcm_hw_constraint_list rate_constraints;
+};
+
+int tda998x_codec_register(struct device *dev);
+void tda998x_codec_unregister(struct device *dev);
+
+void tda998x_audio_start(struct tda998x_priv *priv, int full);
+void tda998x_audio_stop(struct tda998x_priv *priv);
diff --git a/drivers/gpu/drm/pl111/Kbuild b/drivers/gpu/drm/pl111/Kbuild
new file mode 100755
index 000000000000..f10d58c70dff
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
new file mode 100755
index 000000000000..60b465c56c51
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
new file mode 100755
index 000000000000..2869f587266b
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA  02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100755
index 000000000000..43f212efac6b
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* 
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ * 
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE				0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS		256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE		4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD	16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK	0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND	0x0
+#define CLCD_CRSR_LBBP_FOREGROUND	0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT	0x2
+#define CLCD_CRSR_LBBP_INVERSE		0x3
+
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100755
index 000000000000..e151e8c02b11
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	u32 type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This is the kds set associated to the dma_buf we want to flip */
+	struct kds_resource_set *kds_res_set;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* This protects "old_kds_res_set" and "displaying_fb" */
+	spinlock_t current_displaying_lock;
+	/*
+	 * When a buffer is displayed its associated kds resource
+	 * will be obtained and stored here. Every time a buffer
+	 * flip is completed this old kds set is released and assigned
+	 * the kds set of the new buffer.
+	 */
+	struct kds_resource_set *old_kds_res_set;
+	/*
+	 * Stores which frame buffer is currently being displayed by
+	 * this CRTC or NULL if nothing is being displayed. It is used
+	 * to tell whether we need to obtain a set of kds resources for
+	 * exported buffer objects.
+	 */
+	struct drm_framebuffer *displaying_fb;
+#endif
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * This spinlock protects "update_queue", "current_update_res"
+	 * and calls to do_flip_to_res() which updates the CLCD base
+	 * registers.
+	 */
+	spinlock_t base_update_lock;
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+ 
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/* 
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ *	(0 -> SHM)
+ *	(1 -> DMA)
+ * bit [2:1]: kind of mapping
+ *	(0x0 -> uncached)
+ *	(0x1 -> write combine)
+ *	(0x2 -> cached)
+ */
+#define PL111_BOT_MASK		(0x7)
+#define PL111_BOT_SHM		(0x0 << 0)
+#define PL111_BOT_DMA		(0x1 << 0)
+#define PL111_BOT_UNCACHED	(0x0 << 1)
+#define PL111_BOT_WC		(0x1 << 1)
+#define PL111_BOT_CACHED	(0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ *      - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ *	- PL111_BOT_DMA / PL111_BOT_SHM
+ *	- PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ *      - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+	uint32_t height;
+	uint32_t width;
+	uint32_t bpp;
+	uint32_t flags;
+	/* handle, pitch, size will be returned */
+	uint32_t handle;
+	uint32_t pitch;
+	uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE		0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + \
+			DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_connector.c b/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100755
index 000000000000..5d9cf80de618
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+						, false
+#endif
+						);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100755
index 000000000000..a06c991dbf23
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,421 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	struct drm_device *dev = pl111_crtc->crtc.dev;
+	struct pl111_drm_flip_resource *old_flip_res;
+	struct pl111_gem_bo *bo;
+	unsigned long irq_flags;
+	int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	/*
+	 * Cache the flip resource that caused the IRQ since it will be
+	 * dispatched later. Early return if the IRQ isn't associated to
+	 * a base register update.
+	 * 
+	 * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+	 */
+	old_flip_res = pl111_crtc->current_update_res;
+	if (!old_flip_res) {
+		spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+		return;
+	}
+	pl111_crtc->current_update_res = NULL;
+
+	/* Prepare the next flip (if any) of the queue as soon as possible. */
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	/* Finalize properly the flip that caused the IRQ */
+	DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	release_kds_resource_and_display(old_flip_res);
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	/* Release DMA buffer on this flip */
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+	if (old_flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					old_flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#else
+	if (old_flip_res->event) {
+		struct drm_pending_vblank_event *e = old_flip_res->event;
+		struct timeval now;
+		unsigned int seq;
+
+		DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+				old_flip_res->event);
+
+		spin_lock_bh(&dev->event_lock);
+		seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+							&now);
+		e->pipe = pl111_crtc->crtc_index;
+		e->event.sequence = seq;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_add_tail(&e->base.link,
+			&e->base.file_priv->event_list);
+
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+		spin_unlock_bh(&dev->event_lock);
+	}
+#endif
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+	crtc->fb = fb;
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+			struct drm_pending_vblank_event *event)
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+			crtc->fb->bits_per_pixel);
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+				const struct drm_display_mode *mode,
+#else
+				struct drm_display_mode *mode,
+#endif
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel may not work 
+	 * correctly on Versatile Express due to bandwidth issues
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+			crtc->fb->bits_per_pixel > 16) {
+		DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	/* don't disable crtc until no flips in flight as irq will be disabled */
+	ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+	if(ret) {
+		pr_err("pl111_crtc_helper_disable failed\n");
+		return;
+	}
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.cursor_set = pl111_crtc_cursor_set,
+	.cursor_move = pl111_crtc_cursor_move,
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100755
index 000000000000..87f1007d16dd
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD	(1 << 7)
+#define ARGB_ALPHA_SHIFT		24
+#define ARGB_ALPHA_MASK			(0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT			16
+#define ARGB_RED_MASK			(0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT		8
+#define ARGB_GREEN_MASK			(0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT			0
+#define ARGB_BLUE_MASK			(0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data;
+
+	/* 
+	 * Do not allow setting clipping values larger than
+	 * the cursor size since the cursor is already fully hidden
+	 * when x,y = PL111_MAX_CURSOR_WIDTH.
+	 */
+	if (x > PL111_MAX_CURSOR_WIDTH)
+		x = PL111_MAX_CURSOR_WIDTH;
+	if (y > PL111_MAX_CURSOR_WIDTH)
+		y = PL111_MAX_CURSOR_WIDTH;
+
+	reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+	6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+				uint32_t value)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	uint32_t shift;
+	uint32_t data;
+
+	shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+	/* Get the word containing this pixel */
+	cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+	/* Update pixel in cursor RAM */
+	data = readl(cursor_ram);
+	data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+	data |= value << shift;
+	writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+	u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+	u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+	u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+	u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+	u32 blue =  (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+	/*
+	 * Converting from 8 pixel transparency to binary transparency
+	 * it's the best we can achieve.
+	 */
+	if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+		u32 gray, max, min;
+
+		/*
+		 * Convert to gray using the lightness method:
+		 * gray = [max(R,G,B) + min(R,G,B)]/2
+		 */
+		min = min(red, green);
+		min = min(min, blue);
+		max = max(red, green);
+		max = max(max, blue);
+		gray = (min + max) >> 1; /* divide by 2 */
+		/* Apply binary threshold to the gray value calculated */
+		if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+			lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+		else
+			lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+	}
+
+	return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ *  to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+	/* Add 1 on width to insert trailing NULL */
+	char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	unsigned int x;
+	unsigned int y;
+
+	for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+		for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+			u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+			if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+				string_cursor[x] = 'T';
+			else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+				string_cursor[x] = 'F';
+			else if (value == CLCD_CRSR_LBBP_INVERSE)
+				string_cursor[x] = 'I';
+			else
+				string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+			set_lbbp_pixel(data, x, y, value);
+			++data;
+		}
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+		string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+		DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+	}
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height)
+{
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+		handle, width, height);
+
+	if (!handle) {
+		pl111_cursor_disable();
+		return 0;
+	}
+
+	if ((width != PL111_MAX_CURSOR_WIDTH) ||
+	    (height != PL111_MAX_CURSOR_HEIGHT))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object for handle = %d\n",
+			  handle);
+		return -ENOENT;
+	}
+
+	/*
+	 * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+	 * ARGB888 buffer object in the input.
+	 *
+	 */
+	if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+		DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+			  obj->size);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	bo = PL111_BO_FROM_GEM(obj);
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+			  obj);
+		drm_gem_object_unreference_unlocked(obj);
+		return -EINVAL;
+	}
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	/*
+	 * Since we copy the contents of the buffer to the HW cursor internal
+	 * memory this GEM object is not needed anymore.
+	 */
+	drm_gem_object_unreference_unlocked(obj);
+
+	pl111_cursor_enable();
+
+	return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int x_clip = 0;
+	int y_clip = 0;
+
+	DRM_DEBUG("x %d y %d\n", x, y);
+
+	/*
+	 * The cursor image is clipped automatically at the screen limits when
+	 * it extends beyond the screen image to the right or bottom but
+	 * we must clip it using pl111 HW features for negative values.
+	 */
+	if (x < 0) {
+		x_clip = -x;
+		x = 0;
+	}
+	if (y < 0) {
+		y_clip = -y;
+		y = 0;
+	}
+
+	pl111_set_cursor_clipping(x_clip, y_clip);
+	pl111_set_cursor_position(x, y);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_device.c b/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100755
index 000000000000..b8ea5f6725ec
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,326 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+#else
+	.open = pl111_gem_vm_open,
+	.close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.fasync = drm_fasync,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+		DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_FB_DMA | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = pl111_ioctls,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = &pl111_gem_prime_export,
+	.gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = DRM_ARRAY_SIZE(pl111_ioctls);
+	ret = 0;
+	driver.kdriver.platform_device = dev;
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	drm_platform_exit(&driver, dev);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100755
index 000000000000..a0d65f1d5faf
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,615 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct pl111_gem_bo *bo,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	unsigned long flags;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+			bo, dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+			if (pl111_fb->bo == bo) {
+				DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+				DRM_DEBUG_KMS(" is being displayed, keeping\n");
+				/* There shouldn't be a previous buffer to release */
+				BUG_ON(pl111_crtc->old_kds_res_set);
+
+				if (kds_res_set == NULL) {
+					err = kds_async_waitall(&kds_res_set,
+							&priv.kds_obtain_current_cb,
+							&wake, &cb_has_called,
+							1, shared, resource_list);
+					BUG_ON(err);
+					wait_event(wake, cb_has_called == true);
+				}
+
+				/* Current buffer will need releasing on next flip */
+				pl111_crtc->old_kds_res_set = kds_res_set;
+
+				/*
+				* Clear kds_res_set, so a new kds_res_set is allocated
+				* for additional CRTCs
+				*/
+				kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	mutex_unlock(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* Check for valid size. */
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+	vma->vm_ops = dev->driver->gem_vm_ops;
+	vma->vm_private_data = obj;
+
+	/* Take a ref for this mapping of the object, so that the fault
+	* handler can dereference the mmap offset's pointer to the object.
+	* This reference is cleaned up by the corresponding vm_close
+	* (which should happen whether the vma was created by this call, or
+	* by a vm_open due to mremap or partial unmap or whatever).
+	*/
+	drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+	pl111_drm_vm_open_locked(dev, vma);
+#else
+	drm_vm_open_locked(dev, vma);
+#endif
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb) {
+			struct pl111_drm_framebuffer *pl111_fb;
+			struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+			pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+			if (pl111_fb->bo == bo) {
+				kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+				pl111_crtc->old_kds_res_set = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int size, n_pages, nents;
+	struct scatterlist *s, *sg;
+	struct sg_table *sgt;
+	int ret, i;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+		      attach, bo);
+
+	/*
+	 * Nothing to do, if we are trying to map a dmabuf that has been imported.
+	 * Just return the existing sgt.
+	 */
+	if (obj->import_attach) {
+		BUG_ON(!bo->sgt);
+		return bo->sgt;
+	}
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		/* We use DMA coherent mappings for PL111_BOT_DMA so we must
+		 * use the virtual address returned at buffer allocation */
+		sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+	} else { /* PL111_BOT_SHM */
+		struct page **pages;
+		int pg = 0;
+
+		mutex_lock(&dev->struct_mutex);
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+
+		pl111_gem_sync_to_dma(bo);
+
+		/*
+		 * At this point the pages have been dma-mapped by either
+		 * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+		 * for cached. So the physical addresses can be assigned
+		 * to the sg entries.
+		 * drm_prime_pages_to_sg() may have combined contiguous pages
+		 * into chunks so we assign the physical address of the first
+		 * page of a chunk to the chunk and check that the physical
+		 * addresses of the rest of the pages in that chunk are also
+		 * contiguous.
+		 */
+		sg = sgt->sgl;
+		nents = sgt->nents;
+
+		for_each_sg(sg, s, nents, i) {
+			int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+			sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+			for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+				BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+						bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+			}
+
+			pg += n_pages_in_chunk;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	}
+	bo->sgt = sgt;
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+			attach, bo);
+
+	sg_free_table(sgt);
+	kfree(sgt);
+	bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	void *vaddr = NULL;
+
+	/* Make sure we cannot access outside the memory range */
+	if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+		return NULL;
+
+	if (bo->type & PL111_BOT_DMA) {
+		vaddr = (bo->backing_data.dma.fb_cpu_addr +
+						(pageno << PAGE_SHIFT));
+	} else {
+		vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+	}
+
+	return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+					size_t len)
+{
+	struct scatterlist *sg;
+	unsigned int count;
+	size_t size = 0;
+	dma_addr_t dma_handle = 0;
+
+	/* Find a scatterlist that starts in "start" and has "len"
+	* or return error */
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		if ((size == start) && (len == sg_dma_len(sg))) {
+			dma_handle = sg_dma_address(sg);
+			break;
+		}
+		size += sg_dma_len(sg);
+	}
+	return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return -EINVAL;
+
+	if (!(bo->type & PL111_BOT_SHM)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return -EINVAL;
+
+		dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+	return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+					size_t start, size_t len,
+					enum dma_data_direction dir)
+{
+	struct pl111_gem_bo *bo = dma_buf->priv;
+	struct sg_table *sgt = bo->sgt;
+	dma_addr_t dma_handle;
+
+	if ((start + len) > bo->gem_object.size)
+		return;
+
+	if (!(bo->type & PL111_BOT_DMA)) {
+		struct device *dev = bo->gem_object.dev->dev;
+
+		dma_handle = pl111_find_matching_sg(sgt, start, len);
+		if (!dma_handle)
+			return;
+
+		dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+	}
+	/* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap,
+	.kmap = &pl111_dma_buf_kmap,
+	.begin_cpu_access = &pl111_dma_buf_begin_cpu,
+	.end_cpu_access = &pl111_dma_buf_end_cpu,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attachment;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	struct scatterlist *sgl;
+	struct sg_table *sgt;
+	dma_addr_t cont_phys;
+	int ret = 0;
+	int i;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &pl111_dma_buf_ops) {
+		obj = dma_buf->priv;
+		/* is it from our device? */
+		if (obj->dev == dev) {
+			/*
+			* Importing dmabuf exported from our own gem increases
+			* refcount on gem itself instead of f_count of dmabuf.
+			*/
+			drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+			/* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+			 * we don't want it for self-imported buffers so drop it here */
+			dma_buf_put(dma_buf);
+#endif
+
+			return obj;
+		}
+	}
+
+	attachment = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attachment))
+		return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+	get_dma_buf(dma_buf);
+#endif
+
+	sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto err_buf_detach;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+		ret = -ENOMEM;
+		goto err_unmap_attach;
+	}
+
+	/* Find out whether the buffer is contiguous or not */
+	sgl = sgt->sgl;
+	cont_phys = sg_phys(sgl);
+	bo->type |= PL111_BOT_DMA;
+	for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+		dma_addr_t real_phys = sg_phys(sgl);
+		if (real_phys != cont_phys) {
+			bo->type &= ~PL111_BOT_DMA;
+			break;
+		}
+		cont_phys += (PAGE_SIZE - sgl->offset);
+	}
+
+	ret = drm_gem_private_object_init(dev, &bo->gem_object,
+					  dma_buf->size);
+	if (ret != 0) {
+		DRM_ERROR("DRM could not import DMA GEM obj\n");
+		goto err_free_buffer;
+	}
+
+	if (bo->type & PL111_BOT_DMA) {
+		bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+		bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+	} else { /* PL111_BOT_SHM */
+		DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+	}
+
+	bo->gem_object.import_attach = attachment;
+	bo->sgt = sgt;
+
+	return &bo->gem_object;
+
+err_free_buffer:
+	kfree(bo);
+	bo = NULL;
+err_unmap_attach:
+	dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+	dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+	/* from 3.10.0 we will have taken a ref so drop it here */
+	dma_buf_put(dma_buf);
+#endif
+	return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+	DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100755
index 000000000000..b7dbbf2794ab
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_fb.c b/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100755
index 000000000000..546e4d45f4c9
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+	unsigned long flags;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+								flags);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+	int err = 0;
+	size_t min_size;
+	int bpp;
+	int depth;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		err = -ENOENT;
+		goto error;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+	if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+		DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+		err = -EINVAL;
+		goto error;
+	}
+
+	min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+				+ mode_cmd->width * (bpp >> 3);
+
+	if (bo->gem_object.size < min_size) {
+		DRM_ERROR("gem obj size < min size\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* We can't scan out SHM so we can't create an fb for it */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		DRM_ERROR("Can't create FB for non-scanout buffer\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+	fb = &pl111_fb->fb;
+
+	err = drm_framebuffer_init(dev, fb, &fb_funcs);
+	if (err) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto error;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	/* The only framebuffer formats supported by pl111
+	 * are 16 bpp or 32 bpp with 24 bit depth.
+	 * See clcd_enable()
+	 */
+	if (!((fb->bits_per_pixel == 16) ||
+		  (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+		DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+		drm_framebuffer_cleanup(fb);
+		kfree(fb);
+		fb = NULL;
+		err = -EINVAL;
+		goto error;
+	}
+
+	pl111_fb->bo = bo;
+
+	DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+	return fb;
+
+error:
+	if (gem_obj != NULL)
+		drm_gem_object_unreference_unlocked(gem_obj);
+
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100755
index 000000000000..34e0bad994d7
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+			   struct drm_file *file_priv,
+			   uint32_t handle,
+			   uint32_t width,
+			   uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+				       struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_gem.c b/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100755
index 000000000000..ed5dcabe0082
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+
+	/*
+	 * Only free the backing memory if the object has not been imported.
+	 * If it has been imported, the exporter is in charge to free that
+	 * once dmabuf's refcount becomes 0.
+	 */
+	if (obj->import_attach)
+		goto imported_out;
+
+	if (bo->type & PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+				bo->backing_data.dma.fb_cpu_addr,
+				bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+
+imported_out:
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+				   u32 flags, struct drm_file *file_priv,
+				   u32 *handle)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!(bo->type & PL111_BOT_DMA)) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			bo->type |= PL111_BOT_DMA;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (bo->type & PL111_BOT_DMA) {
+		/* scanout compatible - use physically contiguous buffer */
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			goto free_dma;
+		}
+	} else { /* PL111_BOT_SHM */
+		/* not scanout compatible - use SHM backed object */
+		ret = drm_gem_object_init(dev, &bo->gem_object, size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			ret = -ENOMEM;
+			goto free_bo;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	}
+
+	DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		size, flags,
+		(bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type & PL111_BOT_DMA) ?
+			(unsigned long)bo->backing_data.dma.fb_dev_addr:
+			(unsigned long)bo->backing_data.shm.pages,
+			bo->type);
+
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		goto obj_release;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	
+	return 0;
+
+obj_release:
+	drm_gem_object_release(&bo->gem_object);
+free_dma:
+	if (bo->type & PL111_BOT_DMA)
+		dma_free_writecombine(dev->dev, size,
+			bo->backing_data.dma.fb_cpu_addr,
+			bo->backing_data.dma.fb_dev_addr);
+free_bo:
+	kfree(bo);
+finish:
+	return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct drm_pl111_gem_create *args = data;
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags & ~PL111_BOT_MASK) {
+		DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+					&args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	uint32_t bytes_pp;
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	if (args->flags) {
+		DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	return pl111_gem_object_create(dev, args->size,
+				       PL111_BOT_DMA | PL111_BOT_UNCACHED,
+				       file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0) {
+			drm_gem_object_unreference_unlocked(obj);
+			goto fail;
+		}
+	}
+
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+		int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+		struct page **pages = bo->backing_data.shm.pages;
+		bool dirty = false;
+
+		for (i = 0; i < npages; i++) {
+			if (!bo->backing_data.shm.dma_addrs[i]) {
+				DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+				 bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(dev->dev, pages[i], 0,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+				dirty = true;
+			}
+		}
+
+		if (dirty) {
+			DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+			/*
+			 * TODO MIDEGL-1813
+			 * 
+			 * Use flush_cache_page() and outer_flush_range() to
+			 * flush only the user space mappings of the dirty pages
+			 */
+			flush_cache_all();
+			outer_flush_all();
+			unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+					bo->gem_object.size, 1);
+		}
+	}
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+	struct drm_device *dev = bo->gem_object.dev;
+
+	/*
+	 * TODO MIDEGL-1808
+	 * 
+	 * The following check was meant to detect if the CPU is trying to access
+	 * a buffer that is currently mapped for DMA accesses, which is illegal
+	 * as described by the DMA-API.
+	 * 
+	 * However, some of our tests are trying to do that, which triggers the message
+	 * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+	 * leads to the test failing because of caches not being properly flushed.
+	 */
+
+	/*
+	if (bo->sgt) {
+		DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n",  __func__);
+		return;
+	}
+	*/
+
+	if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+	     bo->backing_data.shm.dma_addrs[pgoff]) {
+		DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+			  __func__, bo,  bo->gem_object.size,
+			  bo->backing_data.shm.dma_addrs[pgoff]);
+		dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		bo->backing_data.shm.dma_addrs[pgoff] = 0;
+	}
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+			__func__, obj, bo, bo->type);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (bo->type & PL111_BOT_WC) {
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (bo->type & PL111_BOT_CACHED) {
+		/*
+		 * Objects that do not have a filp (DMA backed) can't be
+		 * mapped as cached now. Write-combine should be enough.
+		 */
+		if (WARN_ON(!obj->filp))
+			return -EINVAL;
+
+		/*
+		 * As explained in Documentation/dma-buf-sharing.txt
+		 * we need this trick so that we can manually zap ptes
+		 * in order to fake coherency.
+		 */
+		fput(vma->vm_file);
+		vma->vm_pgoff = 0;
+		get_file(obj->filp);
+		vma->vm_file = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	} else { /* PL111_BOT_UNCACHED */
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	}
+	return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_local_map *map = NULL;
+	struct drm_hash_item *hash;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+	/* for an imported buffer we let the exporter handle the mmap */
+	if (obj->import_attach)
+		return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+	ret = drm_gem_mmap(file_priv, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap\n");
+		return ret;
+	}
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100755
index 000000000000..0b3c6813b36c
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->enable)
+			board->enable(NULL);
+	}
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	unsigned long flags;
+#endif
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	if (priv.amba_dev->dev.platform_data) {
+		board = priv.amba_dev->dev.platform_data;
+
+		if (board->disable)
+			board->disable(NULL);
+	}
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+	spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+	return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+	size_t min_size;
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+	min_size = (fb->height - 1) * fb->pitches[0]
+	         + fb->width * (fb->bits_per_pixel >> 3);
+
+	BUG_ON(bo->gem_object.size < min_size);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+	
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via pl111_common_irq, which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+		pl111_common_irq(pl111_crtc);
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL) {
+		pr_err("%s no private data\n", __func__);
+		return -EINVAL;
+	}
+
+	if (priv->amba_dev == NULL) {
+		pr_err("%s no amba device found\n", __func__);
+		return -EINVAL;
+	}
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (!board)
+		dev_warn(&dev->dev, "board data not available\n");
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_platform.c b/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100755
index 000000000000..fe7db7392792
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100755
index 000000000000..a61636f8424f
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_vma.c b/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100755
index 000000000000..c9918f79e939
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,307 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			/* Filter pages unmapped because of CPU accesses */
+			if (!bo->backing_data.shm.dma_addrs[i])
+				continue;
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		if (!(bo->type & PL111_BOT_CACHED)) {
+			for (i = 0; i < npages; ++i) {
+				bo->backing_data.shm.dma_addrs[i] =
+					dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+						DMA_BIDIRECTIONAL);
+				if (dma_mapping_error(obj->dev->dev,
+						bo->backing_data.shm.dma_addrs[i]))
+					goto error_out;
+			}
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	pgoff_t pgoff;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = ((unsigned long)vmf->virtual_address -
+		 vma->vm_start) >> PAGE_SHIFT;
+
+	if (bo->type & PL111_BOT_DMA) {
+		pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) + pgoff;
+	} else { /* PL111_BOT_SHM */
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			ret = PTR_ERR(pages);
+			goto error;
+		}
+		pfn = page_to_pfn(pages[pgoff]);
+		pl111_gem_sync_to_cpu(bo, pgoff);
+	}
+
+	DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+				bo, PFN_PHYS(pfn), PFN_PHYS(pgoff));
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ * 
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *vma_entry;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_inc(&dev->vma_count);
+
+	vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+	if (vma_entry) {
+		vma_entry->vma = vma;
+		vma_entry->pid = current->pid;
+		list_add(&vma_entry->head, &dev->vmalist);
+	}
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+                struct vm_area_struct *vma)
+{
+	struct drm_vma_entry *pt, *temp;
+
+	DRM_DEBUG("0x%08lx,0x%08lx\n",
+		vma->vm_start, vma->vm_end - vma->vm_start);
+	atomic_dec(&dev->vma_count);
+
+	list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+		if (pt->vma == vma) {
+			list_del(&pt->head);
+			kfree(pt);
+			break;
+		}
+	}
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+
+	drm_gem_object_reference(obj);
+
+	mutex_lock(&obj->dev->struct_mutex);
+	pl111_drm_vm_open_locked(obj->dev, vma);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	pl111_drm_vm_close_locked(obj->dev, vma);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+}
+#endif
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 8453214ec376..941ab3c287ec 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -768,6 +768,8 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_ALTERASE] = "AlternateErase",	[KEY_CANCEL] = "Cancel",
 	[KEY_BRIGHTNESSDOWN] = "BrightnessDown", [KEY_BRIGHTNESSUP] = "BrightnessUp",
 	[KEY_MEDIA] = "Media",			[KEY_UNKNOWN] = "Unknown",
+	[BTN_DPAD_UP] = "BtnDPadUp",		[BTN_DPAD_DOWN] = "BtnDPadDown",
+	[BTN_DPAD_LEFT] = "BtnDPadLeft",	[BTN_DPAD_RIGHT] = "BtnDPadRight",
 	[BTN_0] = "Btn0",			[BTN_1] = "Btn1",
 	[BTN_2] = "Btn2",			[BTN_3] = "Btn3",
 	[BTN_4] = "Btn4",			[BTN_5] = "Btn5",
@@ -797,7 +799,8 @@ static const char *keys[KEY_MAX + 1] = {
 	[BTN_TOOL_MOUSE] = "ToolMouse",		[BTN_TOOL_LENS] = "ToolLens",
 	[BTN_TOUCH] = "Touch",			[BTN_STYLUS] = "Stylus",
 	[BTN_STYLUS2] = "Stylus2",		[BTN_TOOL_DOUBLETAP] = "ToolDoubleTap",
-	[BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_GEAR_DOWN] = "WheelBtn",
+	[BTN_TOOL_TRIPLETAP] = "ToolTripleTap",	[BTN_TOOL_QUADTAP] = "ToolQuadrupleTap",
+	[BTN_GEAR_DOWN] = "WheelBtn",
 	[BTN_GEAR_UP] = "Gear up",		[KEY_OK] = "Ok",
 	[KEY_SELECT] = "Select",		[KEY_GOTO] = "Goto",
 	[KEY_CLEAR] = "Clear",			[KEY_POWER2] = "Power2",
@@ -852,6 +855,16 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_KBDILLUMDOWN] = "KbdIlluminationDown",
 	[KEY_KBDILLUMUP] = "KbdIlluminationUp",
 	[KEY_SWITCHVIDEOMODE] = "SwitchVideoMode",
+	[KEY_BUTTONCONFIG] = "ButtonConfig",
+	[KEY_TASKMANAGER] = "TaskManager",
+	[KEY_JOURNAL] = "Journal",
+	[KEY_CONTROLPANEL] = "ControlPanel",
+	[KEY_APPSELECT] = "AppSelect",
+	[KEY_SCREENSAVER] = "ScreenSaver",
+	[KEY_VOICECOMMAND] = "VoiceCommand",
+	[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
+	[KEY_BRIGHTNESS_MAX] = "BrightnessMax",
+	[KEY_BRIGHTNESS_AUTO] = "BrightnessAuto",
 };
 
 static const char *relatives[REL_MAX + 1] = {
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index a3915d12e746..1419f8bd11ba 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -729,6 +729,13 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x06c: map_key_clear(KEY_YELLOW);		break;
 		case 0x06d: map_key_clear(KEY_ZOOM);		break;
 
+		case 0x06f: map_key_clear(KEY_BRIGHTNESSUP);		break;
+		case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN);		break;
+		case 0x072: map_key_clear(KEY_BRIGHTNESS_TOGGLE);	break;
+		case 0x073: map_key_clear(KEY_BRIGHTNESS_MIN);		break;
+		case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX);		break;
+		case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO);		break;
+
 		case 0x082: map_key_clear(KEY_VIDEO_NEXT);	break;
 		case 0x083: map_key_clear(KEY_LAST);		break;
 		case 0x084: map_key_clear(KEY_ENTER);		break;
@@ -769,6 +776,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x0bf: map_key_clear(KEY_SLOW);		break;
 
 		case 0x0cd: map_key_clear(KEY_PLAYPAUSE);	break;
+		case 0x0cf: map_key_clear(KEY_VOICECOMMAND);	break;
 		case 0x0e0: map_abs_clear(ABS_VOLUME);		break;
 		case 0x0e2: map_key_clear(KEY_MUTE);		break;
 		case 0x0e5: map_key_clear(KEY_BASSBOOST);	break;
@@ -776,6 +784,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x0ea: map_key_clear(KEY_VOLUMEDOWN);	break;
 		case 0x0f5: map_key_clear(KEY_SLOW);		break;
 
+		case 0x181: map_key_clear(KEY_BUTTONCONFIG);	break;
 		case 0x182: map_key_clear(KEY_BOOKMARKS);	break;
 		case 0x183: map_key_clear(KEY_CONFIG);		break;
 		case 0x184: map_key_clear(KEY_WORDPROCESSOR);	break;
@@ -789,6 +798,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x18c: map_key_clear(KEY_VOICEMAIL);	break;
 		case 0x18d: map_key_clear(KEY_ADDRESSBOOK);	break;
 		case 0x18e: map_key_clear(KEY_CALENDAR);	break;
+		case 0x18f: map_key_clear(KEY_TASKMANAGER);	break;
+		case 0x190: map_key_clear(KEY_JOURNAL);		break;
 		case 0x191: map_key_clear(KEY_FINANCE);		break;
 		case 0x192: map_key_clear(KEY_CALC);		break;
 		case 0x193: map_key_clear(KEY_PLAYER);		break;
@@ -797,10 +808,16 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x199: map_key_clear(KEY_CHAT);		break;
 		case 0x19c: map_key_clear(KEY_LOGOFF);		break;
 		case 0x19e: map_key_clear(KEY_COFFEE);		break;
+		case 0x19f: map_key_clear(KEY_CONTROLPANEL);		break;
+		case 0x1a2: map_key_clear(KEY_APPSELECT);		break;
+		case 0x1a3: map_key_clear(KEY_NEXT);		break;
+		case 0x1a4: map_key_clear(KEY_PREVIOUS);	break;
 		case 0x1a6: map_key_clear(KEY_HELP);		break;
 		case 0x1a7: map_key_clear(KEY_DOCUMENTS);	break;
 		case 0x1ab: map_key_clear(KEY_SPELLCHECK);	break;
 		case 0x1ae: map_key_clear(KEY_KEYBOARD);	break;
+		case 0x1b1: map_key_clear(KEY_SCREENSAVER);		break;
+		case 0x1b4: map_key_clear(KEY_FILE);		break;
 		case 0x1b6: map_key_clear(KEY_IMAGES);		break;
 		case 0x1b7: map_key_clear(KEY_AUDIO);		break;
 		case 0x1b8: map_key_clear(KEY_VIDEO);		break;
@@ -1358,8 +1375,9 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 				 * UGCI) cram a lot of unrelated inputs into the
 				 * same interface. */
 				hidinput->report = report;
-				if (drv->input_configured)
-					drv->input_configured(hid, hidinput);
+				if (drv->input_configured &&
+				    drv->input_configured(hid, hidinput))
+					goto out_cleanup;
 				if (input_register_device(hidinput->input))
 					goto out_cleanup;
 				hidinput = NULL;
@@ -1380,8 +1398,9 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 	}
 
 	if (hidinput) {
-		if (drv->input_configured)
-			drv->input_configured(hid, hidinput);
+		if (drv->input_configured &&
+		    drv->input_configured(hid, hidinput))
+			goto out_cleanup;
 		if (input_register_device(hidinput->input))
 			goto out_cleanup;
 	}
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 3d8e58ac7499..fb9ac1266719 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -443,6 +443,16 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 	    (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON)
 		td->mt_flags |= INPUT_MT_POINTER;
 
+	/* Only map fields from TouchScreen or TouchPad collections.
+         * We need to ignore fields that belong to other collections
+         * such as Mouse that might have the same GenericDesktop usages. */
+	if (field->application == HID_DG_TOUCHSCREEN)
+		set_bit(INPUT_PROP_DIRECT, hi->input->propbit);
+	else if (field->application == HID_DG_TOUCHPAD)
+		set_bit(INPUT_PROP_POINTER, hi->input->propbit);
+	else
+		return 0;
+
 	if (usage->usage_index)
 		prev_usage = &field->usage[usage->usage_index - 1];
 
@@ -772,12 +782,13 @@ static void mt_touch_report(struct hid_device *hid, struct hid_report *report)
 		mt_sync_frame(td, report->field[0]->hidinput->input);
 }
 
-static void mt_touch_input_configured(struct hid_device *hdev,
+static int mt_touch_input_configured(struct hid_device *hdev,
 					struct hid_input *hi)
 {
 	struct mt_device *td = hid_get_drvdata(hdev);
 	struct mt_class *cls = &td->mtclass;
 	struct input_dev *input = hi->input;
+	int ret;
 
 	if (!td->maxcontacts)
 		td->maxcontacts = MT_DEFAULT_MAXCONTACT;
@@ -792,9 +803,12 @@ static void mt_touch_input_configured(struct hid_device *hdev,
 	if (cls->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP)
 		td->mt_flags |= INPUT_MT_DROP_UNUSED;
 
-	input_mt_init_slots(input, td->maxcontacts, td->mt_flags);
+	ret = input_mt_init_slots(input, td->maxcontacts, td->mt_flags);
+	if (ret)
+		return ret;
 
 	td->mt_flags = 0;
+	return 0;
 }
 
 static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
@@ -927,19 +941,21 @@ static void mt_post_parse(struct mt_device *td)
 		cls->quirks &= ~MT_QUIRK_CONTACT_CNT_ACCURATE;
 }
 
-static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
+static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
 {
 	struct mt_device *td = hid_get_drvdata(hdev);
 	char *name = kstrdup(hdev->name, GFP_KERNEL);
+	int ret = 0;
 
 	if (name)
 		hi->input->name = name;
 
 	if (hi->report->id == td->mt_report_id)
-		mt_touch_input_configured(hdev, hi);
+		ret = mt_touch_input_configured(hdev, hi);
 
 	if (hi->report->id == td->pen_report_id)
 		mt_pen_input_configured(hdev, hi);
+	return ret;
 }
 
 static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index f25f29835b3e..1d465264a05f 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1296,6 +1296,14 @@ config SENSORS_TWL4030_MADC
 	This driver can also be built as a module. If so it will be called
 	twl4030-madc-hwmon.
 
+config SENSORS_V2M_JUNO
+	tristate "V2M Juno"
+	depends on VEXPRESS_CONFIG
+	help
+	  This driver provides support for hardware sensors available on
+	  the ARM Ltd's V2M Juno platform. It can provide wide
+	  range of information like current, voltage, power, energy.
+
 config SENSORS_VEXPRESS
 	tristate "Versatile Express"
 	depends on VEXPRESS_CONFIG
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index d17d3e64f9f4..72e23b5cdf1c 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -130,6 +130,7 @@ obj-$(CONFIG_SENSORS_TMP102)	+= tmp102.o
 obj-$(CONFIG_SENSORS_TMP401)	+= tmp401.o
 obj-$(CONFIG_SENSORS_TMP421)	+= tmp421.o
 obj-$(CONFIG_SENSORS_TWL4030_MADC)+= twl4030-madc-hwmon.o
+obj-$(CONFIG_SENSORS_V2M_JUNO)	+= v2m-juno.o
 obj-$(CONFIG_SENSORS_VEXPRESS)	+= vexpress.o
 obj-$(CONFIG_SENSORS_VIA_CPUTEMP)+= via-cputemp.o
 obj-$(CONFIG_SENSORS_VIA686A)	+= via686a.o
diff --git a/drivers/hwmon/v2m-juno.c b/drivers/hwmon/v2m-juno.c
new file mode 100644
index 000000000000..1ce2b728779a
--- /dev/null
+++ b/drivers/hwmon/v2m-juno.c
@@ -0,0 +1,388 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2014 ARM Limited
+ */
+
+#define DRVNAME "v2m-juno-hwmon"
+#define pr_fmt(fmt) DRVNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+struct v2m_juno_hwmon_type {
+	const char *name;
+	const struct attribute_group *attr_groups;
+};
+
+static void __iomem *base;
+
+struct v2m_juno_hwmon_dev {
+	struct v2m_juno_hwmon_type *type;
+	struct device *hwmon_dev;
+	const char *name;
+	u32 offset;
+	u32 mult;
+	u32 div;
+};
+
+static ssize_t v2m_juno_hwmon_name_show(struct device *dev,
+		struct device_attribute *dev_attr, char *buffer)
+{
+	struct v2m_juno_hwmon_dev *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buffer, "%s\n", hdev->name);
+}
+
+static ssize_t v2m_juno_hwmon_u32_show(struct device *dev,
+		struct device_attribute *dev_attr, char *buffer)
+{
+	struct v2m_juno_hwmon_dev *hdev = dev_get_drvdata(dev);
+	u64 value;
+
+	value = readl(base + hdev->offset);
+	value *= hdev->mult;
+	return snprintf(buffer, PAGE_SIZE, "%llu\n", value / hdev->div);
+}
+
+static ssize_t v2m_juno_hwmon_u64_show(struct device *dev,
+		struct device_attribute *dev_attr, char *buffer)
+{
+	struct v2m_juno_hwmon_dev *hdev = dev_get_drvdata(dev);
+	u64 value;
+
+	value = readq(base + hdev->offset);
+	value *= hdev->mult;
+	return snprintf(buffer, PAGE_SIZE, "%llu\n", value / hdev->div);
+}
+
+static DEVICE_ATTR(name, S_IRUGO, v2m_juno_hwmon_name_show, NULL);
+
+#define JUNO_HWMON_ATTRS(_name, _input_attr)			\
+struct attribute *v2m_juno_hwmon_attrs_##_name[] = {		\
+	&dev_attr_name.attr,					\
+	&sensor_dev_attr_##_input_attr.dev_attr.attr,		\
+	NULL							\
+}
+
+#if !defined(CONFIG_REGULATOR_VEXPRESS)
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, v2m_juno_hwmon_u32_show,
+		NULL, 1);
+static	JUNO_HWMON_ATTRS(volt, in1_input);
+static struct attribute_group v2m_juno_hwmon_group_volt = {
+	.attrs = v2m_juno_hwmon_attrs_volt,
+};
+static struct v2m_juno_hwmon_type v2m_juno_hwmon_volt = {
+	.name = "v2m_juno_volt",
+	.attr_groups = &v2m_juno_hwmon_group_volt,
+};
+#endif
+
+static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, v2m_juno_hwmon_u32_show,
+		NULL, 1);
+static JUNO_HWMON_ATTRS(amp, curr1_input);
+static struct attribute_group v2m_juno_hwmon_group_amp = {
+	.attrs = v2m_juno_hwmon_attrs_amp,
+};
+static struct v2m_juno_hwmon_type v2m_juno_hwmon_amp = {
+	.name = "v2m_juno_amp",
+	.attr_groups = &v2m_juno_hwmon_group_amp,
+};
+
+static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, v2m_juno_hwmon_u32_show,
+		NULL, 1);
+static JUNO_HWMON_ATTRS(power, power1_input);
+static struct attribute_group v2m_juno_hwmon_group_power = {
+	.attrs = v2m_juno_hwmon_attrs_power,
+};
+static struct v2m_juno_hwmon_type v2m_juno_hwmon_power = {
+	.name = "v2m_juno_power",
+	.attr_groups = &v2m_juno_hwmon_group_power,
+};
+
+static SENSOR_DEVICE_ATTR(energy1_input, S_IRUGO, v2m_juno_hwmon_u64_show,
+		NULL, 1);
+static JUNO_HWMON_ATTRS(energy, energy1_input);
+static struct attribute_group v2m_juno_hwmon_group_energy = {
+	.attrs = v2m_juno_hwmon_attrs_energy,
+};
+static struct v2m_juno_hwmon_type v2m_juno_hwmon_energy = {
+	.name = "v2m_juno_energy",
+	.attr_groups = &v2m_juno_hwmon_group_energy,
+};
+
+/* current adc channels */
+#define	SYS_ADC_CH0_PM1_SYS	0xd0
+#define	SYS_ADC_CH1_PM2_A57	0xd4
+#define	SYS_ADC_CH2_PM3_A53	0xd8
+#define	SYS_ADC_CH3_PM4_GPU	0xdc
+
+/* voltage adc channels */
+#define	SYS_ADC_CH4_VSYS	0xe0
+#define	SYS_ADC_CH5_VA57	0xe4
+#define	SYS_ADC_CH6_VA53	0xe8
+#define	SYS_ADC_CH7_VGPU	0xec
+
+/* power adc channels */
+#define	SYS_EN_CH04_SYS		0xf0
+#define	SYS_EN_CH15_A57		0xf4
+#define	SYS_EN_CH26_A53		0xf8
+#define	SYS_EN_CH37_GPU		0xfc
+
+/* energy adc channels */
+#define SYS_ENM_CH0_L_SYS	0x100
+#define SYS_ENM_CH0_H_SYS	0x104
+#define SYS_ENM_CH1_L_A57	0x108
+#define SYS_ENM_CH1_H_A57	0x10c
+#define SYS_ENM_CH0_L_A53	0x110
+#define SYS_ENM_CH0_H_A53	0x114
+#define SYS_ENM_CH0_L_GPU	0x118
+#define SYS_ENM_CH0_H_GPU	0x11c
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_curr_sys = {
+	.type = &v2m_juno_hwmon_amp,
+	.offset = SYS_ADC_CH0_PM1_SYS,
+	.name = "sys_curr",
+	.mult = 1000,
+	.div = 761
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_curr_a57 = {
+	.type = &v2m_juno_hwmon_amp,
+	.offset = SYS_ADC_CH1_PM2_A57,
+	.name = "a57_curr",
+	.mult = 1000,
+	.div = 381,
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_curr_a53 = {
+	.type = &v2m_juno_hwmon_amp,
+	.offset = SYS_ADC_CH2_PM3_A53,
+	.name = "a53_curr",
+	.mult = 1000,
+	.div = 761
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_curr_gpu = {
+	.type = &v2m_juno_hwmon_amp,
+	.offset = SYS_ADC_CH3_PM4_GPU,
+	.name = "gpu_curr",
+	.mult = 1000,
+	.div = 381
+};
+
+#if !defined(CONFIG_REGULATOR_VEXPRESS)
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_volt_sys = {
+	.type = &v2m_juno_hwmon_volt,
+	.offset = SYS_ADC_CH4_VSYS,
+	.name = "sys_volt",
+	.mult = 1000,
+	.div = 1622
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_volt_a57 = {
+	.type = &v2m_juno_hwmon_volt,
+	.offset = SYS_ADC_CH5_VA57,
+	.name = "a57_volt",
+	.mult = 1000,
+	.div = 1622
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_volt_a53 = {
+	.type = &v2m_juno_hwmon_volt,
+	.offset = SYS_ADC_CH6_VA53,
+	.name = "a53_volt",
+	.mult = 1000,
+	.div = 1622
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_volt_gpu = {
+	.type = &v2m_juno_hwmon_volt,
+	.offset = SYS_ADC_CH7_VGPU,
+	.name = "gpu_volt",
+	.mult = 1000,
+	.div = 1622
+};
+#endif
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_power_sys = {
+	.type = &v2m_juno_hwmon_power,
+	.offset = SYS_EN_CH04_SYS,
+	.name = "sys_power",
+	.mult = 1000,
+	.div = 1234803
+
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_power_a57 = {
+	.type = &v2m_juno_hwmon_power,
+	.offset = SYS_EN_CH15_A57,
+	.name = "a57_power",
+	.mult = 1000,
+	.div = 617402
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_power_a53 = {
+	.type = &v2m_juno_hwmon_power,
+	.offset = SYS_EN_CH26_A53,
+	.name = "a53_power",
+	.mult = 1000,
+	.div = 1234803,
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_power_gpu = {
+	.type = &v2m_juno_hwmon_power,
+	.offset = SYS_EN_CH37_GPU,
+	.mult = 1000,
+	.name = "gpu_power",
+	.div = 617402
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_energy_sys = {
+	.type = &v2m_juno_hwmon_energy,
+	.offset = SYS_ENM_CH0_L_SYS,
+	.name = "sys_energy",
+	.mult = 100,
+	.div = 1234803,
+};
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_energy_a57 = {
+	.type = &v2m_juno_hwmon_energy,
+	.offset = SYS_ENM_CH1_L_A57,
+	.name = "a57_energy",
+	.mult = 100,
+	.div = 617402
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_energy_a53 = {
+	.type = &v2m_juno_hwmon_energy,
+	.offset = SYS_ENM_CH0_L_A53,
+	.name = "a53_energy",
+	.mult = 100,
+	.div = 1234803,
+};
+
+struct v2m_juno_hwmon_dev v2m_juno_hwmon_energy_gpu = {
+	.type = &v2m_juno_hwmon_energy,
+	.offset = SYS_ENM_CH0_L_GPU,
+	.name = "gpu_energy",
+	.mult = 100,
+	.div = 617402
+};
+
+static struct v2m_juno_hwmon_dev *v2m_juno_hwmon_devices[] = {
+#if !defined(CONFIG_REGULATOR_VEXPRESS)
+	&v2m_juno_hwmon_volt_sys,
+	&v2m_juno_hwmon_volt_a57,
+	&v2m_juno_hwmon_volt_a53,
+	&v2m_juno_hwmon_volt_gpu,
+#endif
+	&v2m_juno_hwmon_energy_sys,
+	&v2m_juno_hwmon_energy_a57,
+	&v2m_juno_hwmon_energy_a53,
+	&v2m_juno_hwmon_energy_gpu,
+	&v2m_juno_hwmon_power_sys,
+	&v2m_juno_hwmon_power_a57,
+	&v2m_juno_hwmon_power_a53,
+	&v2m_juno_hwmon_power_gpu,
+	&v2m_juno_hwmon_curr_sys,
+	&v2m_juno_hwmon_curr_a57,
+	&v2m_juno_hwmon_curr_a53,
+	&v2m_juno_hwmon_curr_gpu,
+};
+
+static struct of_device_id v2m_juno_hwmon_of_match[] = {
+	{
+		.compatible = "arm,v2m-juno-meters",
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, v2m_juno_hwmon_of_match);
+
+static int v2m_juno_hwmon_probe(struct platform_device *pdev)
+{
+	int i, err;
+	const struct of_device_id *match;
+	struct v2m_juno_hwmon_dev *dev;
+	struct resource *mem;
+
+	match = of_match_device(v2m_juno_hwmon_of_match, &pdev->dev);
+	if (!match)
+		return -ENODEV;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, mem);
+
+	for (i = 0; i < ARRAY_SIZE(v2m_juno_hwmon_devices); i++) {
+		dev = v2m_juno_hwmon_devices[i];
+		dev->hwmon_dev = hwmon_device_register(&pdev->dev);
+
+		if (IS_ERR(dev->hwmon_dev)) {
+			err = PTR_ERR(dev->hwmon_dev);
+			goto error;
+		}
+		err = sysfs_create_group(&dev->hwmon_dev->kobj,
+					 dev->type->attr_groups);
+		if (err)
+			goto error_sysfs;
+
+		dev_set_drvdata(dev->hwmon_dev, dev);
+	}
+
+	return 0;
+
+error:
+	while (--i >= 0) {
+		dev = v2m_juno_hwmon_devices[i];
+		sysfs_remove_group(&dev->hwmon_dev->kobj,
+					dev->type->attr_groups);
+error_sysfs:
+		hwmon_device_unregister(dev->hwmon_dev);
+	}
+
+	return err;
+}
+
+static int v2m_juno_hwmon_remove(struct platform_device *pdev)
+{
+	int i;
+	struct v2m_juno_hwmon_dev *dev;
+
+	for (i = 0; i < ARRAY_SIZE(v2m_juno_hwmon_devices); i++) {
+		dev = v2m_juno_hwmon_devices[i];
+		hwmon_device_unregister(dev->hwmon_dev);
+		sysfs_remove_group(&dev->hwmon_dev->kobj,
+				   dev->type->attr_groups);
+	}
+
+	return 0;
+}
+
+static struct platform_driver v2m_juno_hwmon_driver = {
+	.probe = v2m_juno_hwmon_probe,
+	.remove = v2m_juno_hwmon_remove,
+	.driver	= {
+		.name = DRVNAME,
+		.owner = THIS_MODULE,
+		.of_match_table = v2m_juno_hwmon_of_match,
+	},
+};
+
+module_platform_driver(v2m_juno_hwmon_driver);
+
+MODULE_AUTHOR("Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>");
+MODULE_DESCRIPTION("V2M Juno hwmon sensors driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:v2m-juno-hwmon");
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index f24a7385260a..6a661052f7e1 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -67,9 +67,12 @@
 #define DW_IC_STATUS		0x70
 #define DW_IC_TXFLR		0x74
 #define DW_IC_RXFLR		0x78
+#define DW_IC_SDA_HOLD		0x7c
 #define DW_IC_TX_ABRT_SOURCE	0x80
 #define DW_IC_ENABLE_STATUS	0x9c
 #define DW_IC_COMP_PARAM_1	0xf4
+#define DW_IC_COMP_VERSION	0xf8
+#define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
 #define DW_IC_COMP_TYPE		0xfc
 #define DW_IC_COMP_TYPE_VALUE	0x44570140
 
@@ -214,7 +217,7 @@ i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
 		 *
 		 * If your hardware is free from tHD;STA issue, try this one.
 		 */
-		return (ic_clk * tSYMBOL + 5000) / 10000 - 8 + offset;
+		return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset;
 	else
 		/*
 		 * Conditional expression:
@@ -230,7 +233,8 @@ i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
 		 * The reason why we need to take into account "tf" here,
 		 * is the same as described in i2c_dw_scl_lcnt().
 		 */
-		return (ic_clk * (tSYMBOL + tf) + 5000) / 10000 - 3 + offset;
+		return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000
+			- 3 + offset;
 }
 
 static u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
@@ -246,7 +250,7 @@ static u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
 	 * account the fall time of SCL signal (tf).  Default tf value
 	 * should be 0.3 us, for safety.
 	 */
-	return ((ic_clk * (tLOW + tf) + 5000) / 10000) - 1 + offset;
+	return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset;
 }
 
 static void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
@@ -283,6 +287,7 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 	u32 input_clock_khz;
 	u32 hcnt, lcnt;
 	u32 reg;
+	u32 sda_falling_time, scl_falling_time;
 
 	input_clock_khz = dev->get_clk_rate_khz(dev);
 
@@ -304,36 +309,60 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
 
 	/* set standard and fast speed deviders for high/low periods */
 
+	sda_falling_time = dev->sda_falling_time ?: 300; /* ns */
+	scl_falling_time = dev->scl_falling_time ?: 300; /* ns */
+
 	/* Standard-mode */
 	hcnt = i2c_dw_scl_hcnt(input_clock_khz,
-				40,	/* tHD;STA = tHIGH = 4.0 us */
-				3,	/* tf = 0.3 us */
+				4000,	/* tHD;STA = tHIGH = 4.0 us */
+				sda_falling_time,
 				0,	/* 0: DW default, 1: Ideal */
 				0);	/* No offset */
 	lcnt = i2c_dw_scl_lcnt(input_clock_khz,
-				47,	/* tLOW = 4.7 us */
-				3,	/* tf = 0.3 us */
+				4700,	/* tLOW = 4.7 us */
+				scl_falling_time,
 				0);	/* No offset */
+
+	/* Allow platforms to specify the ideal HCNT and LCNT values */
+	if (dev->ss_hcnt && dev->ss_lcnt) {
+		hcnt = dev->ss_hcnt;
+		lcnt = dev->ss_lcnt;
+	}
 	dw_writel(dev, hcnt, DW_IC_SS_SCL_HCNT);
 	dw_writel(dev, lcnt, DW_IC_SS_SCL_LCNT);
 	dev_dbg(dev->dev, "Standard-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
 
 	/* Fast-mode */
 	hcnt = i2c_dw_scl_hcnt(input_clock_khz,
-				6,	/* tHD;STA = tHIGH = 0.6 us */
-				3,	/* tf = 0.3 us */
+				600,	/* tHD;STA = tHIGH = 0.6 us */
+				sda_falling_time,
 				0,	/* 0: DW default, 1: Ideal */
 				0);	/* No offset */
 	lcnt = i2c_dw_scl_lcnt(input_clock_khz,
-				13,	/* tLOW = 1.3 us */
-				3,	/* tf = 0.3 us */
+				1300,	/* tLOW = 1.3 us */
+				scl_falling_time,
 				0);	/* No offset */
+
+	if (dev->fs_hcnt && dev->fs_lcnt) {
+		hcnt = dev->fs_hcnt;
+		lcnt = dev->fs_lcnt;
+	}
 	dw_writel(dev, hcnt, DW_IC_FS_SCL_HCNT);
 	dw_writel(dev, lcnt, DW_IC_FS_SCL_LCNT);
 	dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
 
+	/* Configure SDA Hold Time if required */
+	if (dev->sda_hold_time) {
+		reg = dw_readl(dev, DW_IC_COMP_VERSION);
+		if (reg >= DW_IC_SDA_HOLD_MIN_VERS)
+			dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
+		else
+			dev_warn(dev->dev,
+				"Hardware too old to adjust SDA hold time.");
+	}
+
 	/* Configure Tx/Rx FIFO threshold levels */
-	dw_writel(dev, dev->tx_fifo_depth - 1, DW_IC_TX_TL);
+	dw_writel(dev, dev->tx_fifo_depth / 2, DW_IC_TX_TL);
 	dw_writel(dev, 0, DW_IC_RX_TL);
 
 	/* configure the i2c master */
@@ -383,6 +412,9 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 	/* enforce disabled interrupts (due to HW issues) */
 	i2c_dw_disable_int(dev);
 
+	/* enforce disabled interrupts (due to HW issues) */
+	i2c_dw_disable_int(dev);
+
 	/* Enable the adapter */
 	__i2c_dw_enable(dev, true);
 
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index e761ad18dd61..d66b6cbc9edc 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -61,6 +61,14 @@
  * @tx_fifo_depth: depth of the hardware tx fifo
  * @rx_fifo_depth: depth of the hardware rx fifo
  * @rx_outstanding: current master-rx elements in tx fifo
+ * @ss_hcnt: standard speed HCNT value
+ * @ss_lcnt: standard speed LCNT value
+ * @fs_hcnt: fast speed HCNT value
+ * @fs_lcnt: fast speed LCNT value
+ *
+ * HCNT and LCNT parameters can be used if the platform knows more accurate
+ * values than the one computed based only on the input clock frequency.
+ * Leave them to be %0 if not used.
  */
 struct dw_i2c_dev {
 	struct device		*dev;
@@ -90,6 +98,13 @@ struct dw_i2c_dev {
 	unsigned int		tx_fifo_depth;
 	unsigned int		rx_fifo_depth;
 	int			rx_outstanding;
+	u32			sda_hold_time;
+	u32			sda_falling_time;
+	u32			scl_falling_time;
+	u16			ss_hcnt;
+	u16			ss_lcnt;
+	u16			fs_hcnt;
+	u16			fs_lcnt;
 };
 
 #define ACCESS_SWAP		0x00000001
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 35b70a1edf57..2da574a9f03f 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -34,6 +34,7 @@
 #include <linux/sched.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_i2c.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
@@ -86,6 +87,7 @@ static int dw_i2c_probe(struct platform_device *pdev)
 	struct i2c_adapter *adap;
 	struct resource *mem;
 	int irq, r;
+	u32 bus_rate = DW_IC_CON_SPEED_STD;
 
 	/* NOTE: driver uses the static register mapping */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -121,6 +123,39 @@ static int dw_i2c_probe(struct platform_device *pdev)
 		return PTR_ERR(dev->clk);
 	clk_prepare_enable(dev->clk);
 
+	if (pdev->dev.of_node) {
+		u32 ht = 0;
+		u32 ic_clk = dev->get_clk_rate_khz(dev);
+
+		of_property_read_u32(pdev->dev.of_node,
+					"i2c-sda-hold-time-ns", &ht);
+		dev->sda_hold_time = div_u64((u64)ic_clk * ht + 500000,
+					     1000000);
+
+		of_property_read_u32(pdev->dev.of_node,
+				     "i2c-sda-falling-time-ns",
+				     &dev->sda_falling_time);
+		of_property_read_u32(pdev->dev.of_node,
+				     "i2c-scl-falling-time-ns",
+				     &dev->scl_falling_time);
+
+		r = of_property_read_u32(pdev->dev.of_node, "clock-frequency",
+						&bus_rate);
+		if (r)
+			bus_rate = 400000; /* default clock rate */
+		switch (bus_rate) {
+		case 400000:
+			bus_rate = DW_IC_CON_SPEED_FAST;
+			break;
+		case 100000:
+			bus_rate = DW_IC_CON_SPEED_STD;
+			break;
+		default:
+			dev_err(&pdev->dev, "Invalid bus speed (not 100kHz or 400kHz)\n");
+			return -EINVAL;
+		}
+	}
+
 	dev->functionality =
 		I2C_FUNC_I2C |
 		I2C_FUNC_10BIT_ADDR |
@@ -129,7 +164,7 @@ static int dw_i2c_probe(struct platform_device *pdev)
 		I2C_FUNC_SMBUS_WORD_DATA |
 		I2C_FUNC_SMBUS_I2C_BLOCK;
 	dev->master_cfg =  DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
-		DW_IC_CON_RESTART_EN | DW_IC_CON_SPEED_FAST;
+		DW_IC_CON_RESTART_EN | bus_rate;
 
 	/* Try first if we can configure the device from ACPI */
 	r = dw_i2c_acpi_configure(pdev);
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 10aa9ef86cec..145c98617936 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -35,6 +35,7 @@
  */
 struct iio_event_interface {
 	wait_queue_head_t	wait;
+	struct mutex		read_lock;
 	DECLARE_KFIFO(det_events, struct iio_event_data, 16);
 
 	struct list_head	dev_attr_list;
@@ -97,14 +98,16 @@ static ssize_t iio_event_chrdev_read(struct file *filep,
 	if (count < sizeof(struct iio_event_data))
 		return -EINVAL;
 
-	spin_lock_irq(&ev_int->wait.lock);
+	if (mutex_lock_interruptible(&ev_int->read_lock))
+		return -ERESTARTSYS;
+
 	if (kfifo_is_empty(&ev_int->det_events)) {
 		if (filep->f_flags & O_NONBLOCK) {
 			ret = -EAGAIN;
 			goto error_unlock;
 		}
 		/* Blocking on device; waiting for something to be there */
-		ret = wait_event_interruptible_locked_irq(ev_int->wait,
+		ret = wait_event_interruptible(ev_int->wait,
 					!kfifo_is_empty(&ev_int->det_events));
 		if (ret)
 			goto error_unlock;
@@ -114,7 +117,7 @@ static ssize_t iio_event_chrdev_read(struct file *filep,
 	ret = kfifo_to_user(&ev_int->det_events, buf, count, &copied);
 
 error_unlock:
-	spin_unlock_irq(&ev_int->wait.lock);
+	mutex_unlock(&ev_int->read_lock);
 
 	return ret ? ret : copied;
 }
@@ -371,6 +374,7 @@ static void iio_setup_ev_int(struct iio_event_interface *ev_int)
 {
 	INIT_KFIFO(ev_int->det_events);
 	init_waitqueue_head(&ev_int->wait);
+	mutex_init(&ev_int->read_lock);
 }
 
 static const char *iio_event_group_name = "events";
@@ -434,6 +438,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev)
 
 error_free_setup_event_lines:
 	__iio_remove_event_config_attrs(indio_dev);
+	mutex_destroy(&indio_dev->event_interface->read_lock);
 	kfree(indio_dev->event_interface);
 error_ret:
 
@@ -446,5 +451,6 @@ void iio_device_unregister_eventset(struct iio_dev *indio_dev)
 		return;
 	__iio_remove_event_config_attrs(indio_dev);
 	kfree(indio_dev->event_interface->group.attrs);
+	mutex_destroy(&indio_dev->event_interface->read_lock);
 	kfree(indio_dev->event_interface);
 }
diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index a11ff74a5127..518efa2a9f52 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -174,6 +174,25 @@ config INPUT_APMPOWER
 	  To compile this driver as a module, choose M here: the
 	  module will be called apm-power.
 
+config INPUT_KEYRESET
+	tristate "Reset key"
+	depends on INPUT
+	select INPUT_KEYCOMBO
+	---help---
+	  Say Y here if you want to reboot when some keys are pressed;
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called keyreset.
+
+config INPUT_KEYCOMBO
+	tristate "Key combo"
+	depends on INPUT
+	---help---
+	  Say Y here if you want to take action when some keys are pressed;
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called keycombo.
+
 comment "Input Device Drivers"
 
 source "drivers/input/keyboard/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 5ca3f631497f..ee4c06520bb4 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -25,3 +25,6 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN)	+= touchscreen/
 obj-$(CONFIG_INPUT_MISC)	+= misc/
 
 obj-$(CONFIG_INPUT_APMPOWER)	+= apm-power.o
+obj-$(CONFIG_INPUT_KEYRESET)	+= keyreset.o
+obj-$(CONFIG_INPUT_KEYCOMBO)	+= keycombo.o
+
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c122dd2adc22..f4897c8c1500 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -26,6 +26,7 @@
 #include <linux/major.h>
 #include <linux/device.h>
 #include <linux/cdev.h>
+#include <linux/wakelock.h>
 #include "input-compat.h"
 
 struct evdev {
@@ -46,6 +47,9 @@ struct evdev_client {
 	unsigned int tail;
 	unsigned int packet_head; /* [future] position of the first element of next packet */
 	spinlock_t buffer_lock; /* protects access to buffer, head and tail */
+	struct wake_lock wake_lock;
+	bool use_wake_lock;
+	char name[28];
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
@@ -73,10 +77,14 @@ static void __pass_event(struct evdev_client *client,
 		client->buffer[client->tail].value = 0;
 
 		client->packet_head = client->tail;
+		if (client->use_wake_lock)
+			wake_unlock(&client->wake_lock);
 	}
 
 	if (event->type == EV_SYN && event->code == SYN_REPORT) {
 		client->packet_head = client->head;
+		if (client->use_wake_lock)
+			wake_lock(&client->wake_lock);
 		kill_fasync(&client->fasync, SIGIO, POLL_IN);
 	}
 }
@@ -291,6 +299,8 @@ static int evdev_release(struct inode *inode, struct file *file)
 	mutex_unlock(&evdev->mutex);
 
 	evdev_detach_client(evdev, client);
+	if (client->use_wake_lock)
+		wake_lock_destroy(&client->wake_lock);
 
 	if (is_vmalloc_addr(client))
 		vfree(client);
@@ -328,6 +338,8 @@ static int evdev_open(struct inode *inode, struct file *file)
 
 	client->bufsize = bufsize;
 	spin_lock_init(&client->buffer_lock);
+	snprintf(client->name, sizeof(client->name), "%s-%d",
+			dev_name(&evdev->dev), task_tgid_vnr(current));
 	client->evdev = evdev;
 	evdev_attach_client(evdev, client);
 
@@ -394,6 +406,9 @@ static int evdev_fetch_next_event(struct evdev_client *client,
 	if (have_event) {
 		*event = client->buffer[client->tail++];
 		client->tail &= client->bufsize - 1;
+		if (client->use_wake_lock &&
+		    client->packet_head == client->tail)
+			wake_unlock(&client->wake_lock);
 	}
 
 	spin_unlock_irq(&client->buffer_lock);
@@ -682,6 +697,35 @@ static int evdev_handle_mt_request(struct input_dev *dev,
 	return 0;
 }
 
+static int evdev_enable_suspend_block(struct evdev *evdev,
+				      struct evdev_client *client)
+{
+	if (client->use_wake_lock)
+		return 0;
+
+	spin_lock_irq(&client->buffer_lock);
+	wake_lock_init(&client->wake_lock, WAKE_LOCK_SUSPEND, client->name);
+	client->use_wake_lock = true;
+	if (client->packet_head != client->tail)
+		wake_lock(&client->wake_lock);
+	spin_unlock_irq(&client->buffer_lock);
+	return 0;
+}
+
+static int evdev_disable_suspend_block(struct evdev *evdev,
+				       struct evdev_client *client)
+{
+	if (!client->use_wake_lock)
+		return 0;
+
+	spin_lock_irq(&client->buffer_lock);
+	client->use_wake_lock = false;
+	wake_lock_destroy(&client->wake_lock);
+	spin_unlock_irq(&client->buffer_lock);
+
+	return 0;
+}
+
 static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 			   void __user *p, int compat_mode)
 {
@@ -763,6 +807,15 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 
 	case EVIOCSKEYCODE_V2:
 		return evdev_handle_set_keycode_v2(dev, p);
+
+	case EVIOCGSUSPENDBLOCK:
+		return put_user(client->use_wake_lock, ip);
+
+	case EVIOCSSUSPENDBLOCK:
+		if (p)
+			return evdev_enable_suspend_block(evdev, client);
+		else
+			return evdev_disable_suspend_block(evdev, client);
 	}
 
 	size = _IOC_SIZE(cmd);
diff --git a/drivers/input/keycombo.c b/drivers/input/keycombo.c
new file mode 100644
index 000000000000..2fba451b91d5
--- /dev/null
+++ b/drivers/input/keycombo.c
@@ -0,0 +1,261 @@
+/* drivers/input/keycombo.c
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/input.h>
+#include <linux/keycombo.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+struct keycombo_state {
+	struct input_handler input_handler;
+	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long upbit[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long key[BITS_TO_LONGS(KEY_CNT)];
+	spinlock_t lock;
+	struct  workqueue_struct *wq;
+	int key_down_target;
+	int key_down;
+	int key_up;
+	struct delayed_work key_down_work;
+	int delay;
+	struct work_struct key_up_work;
+	void (*key_up_fn)(void *);
+	void (*key_down_fn)(void *);
+	void *priv;
+	int key_is_down;
+	struct wakeup_source combo_held_wake_source;
+	struct wakeup_source combo_up_wake_source;
+};
+
+static void do_key_down(struct work_struct *work)
+{
+	struct delayed_work *dwork = container_of(work, struct delayed_work,
+									work);
+	struct keycombo_state *state = container_of(dwork,
+					struct keycombo_state, key_down_work);
+	if (state->key_down_fn)
+		state->key_down_fn(state->priv);
+}
+
+static void do_key_up(struct work_struct *work)
+{
+	struct keycombo_state *state = container_of(work, struct keycombo_state,
+								key_up_work);
+	if (state->key_up_fn)
+		state->key_up_fn(state->priv);
+	__pm_relax(&state->combo_up_wake_source);
+}
+
+static void keycombo_event(struct input_handle *handle, unsigned int type,
+		unsigned int code, int value)
+{
+	unsigned long flags;
+	struct keycombo_state *state = handle->private;
+
+	if (type != EV_KEY)
+		return;
+
+	if (code >= KEY_MAX)
+		return;
+
+	if (!test_bit(code, state->keybit))
+		return;
+
+	spin_lock_irqsave(&state->lock, flags);
+	if (!test_bit(code, state->key) == !value)
+		goto done;
+	__change_bit(code, state->key);
+	if (test_bit(code, state->upbit)) {
+		if (value)
+			state->key_up++;
+		else
+			state->key_up--;
+	} else {
+		if (value)
+			state->key_down++;
+		else
+			state->key_down--;
+	}
+	if (state->key_down == state->key_down_target && state->key_up == 0) {
+		__pm_stay_awake(&state->combo_held_wake_source);
+		state->key_is_down = 1;
+		if (queue_delayed_work(state->wq, &state->key_down_work,
+								state->delay))
+			pr_debug("Key down work already queued!");
+	} else if (state->key_is_down) {
+		if (!cancel_delayed_work(&state->key_down_work)) {
+			__pm_stay_awake(&state->combo_up_wake_source);
+			queue_work(state->wq, &state->key_up_work);
+		}
+		__pm_relax(&state->combo_held_wake_source);
+		state->key_is_down = 0;
+	}
+done:
+	spin_unlock_irqrestore(&state->lock, flags);
+}
+
+static int keycombo_connect(struct input_handler *handler,
+		struct input_dev *dev,
+		const struct input_device_id *id)
+{
+	int i;
+	int ret;
+	struct input_handle *handle;
+	struct keycombo_state *state =
+		container_of(handler, struct keycombo_state, input_handler);
+	for (i = 0; i < KEY_MAX; i++) {
+		if (test_bit(i, state->keybit) && test_bit(i, dev->keybit))
+			break;
+	}
+	if (i == KEY_MAX)
+		return -ENODEV;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = KEYCOMBO_NAME;
+	handle->private = state;
+
+	ret = input_register_handle(handle);
+	if (ret)
+		goto err_input_register_handle;
+
+	ret = input_open_device(handle);
+	if (ret)
+		goto err_input_open_device;
+
+	return 0;
+
+err_input_open_device:
+	input_unregister_handle(handle);
+err_input_register_handle:
+	kfree(handle);
+	return ret;
+}
+
+static void keycombo_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+static const struct input_device_id keycombo_ids[] = {
+		{
+				.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+				.evbit = { BIT_MASK(EV_KEY) },
+		},
+		{ },
+};
+MODULE_DEVICE_TABLE(input, keycombo_ids);
+
+static int keycombo_probe(struct platform_device *pdev)
+{
+	int ret;
+	int key, *keyp;
+	struct keycombo_state *state;
+	struct keycombo_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	spin_lock_init(&state->lock);
+	keyp = pdata->keys_down;
+	while ((key = *keyp++)) {
+		if (key >= KEY_MAX)
+			continue;
+		state->key_down_target++;
+		__set_bit(key, state->keybit);
+	}
+	if (pdata->keys_up) {
+		keyp = pdata->keys_up;
+		while ((key = *keyp++)) {
+			if (key >= KEY_MAX)
+				continue;
+			__set_bit(key, state->keybit);
+			__set_bit(key, state->upbit);
+		}
+	}
+
+	state->wq = alloc_ordered_workqueue("keycombo", 0);
+	if (!state->wq)
+		return -ENOMEM;
+
+	state->priv = pdata->priv;
+
+	if (pdata->key_down_fn)
+		state->key_down_fn = pdata->key_down_fn;
+	INIT_DELAYED_WORK(&state->key_down_work, do_key_down);
+
+	if (pdata->key_up_fn)
+		state->key_up_fn = pdata->key_up_fn;
+	INIT_WORK(&state->key_up_work, do_key_up);
+
+	wakeup_source_init(&state->combo_held_wake_source, "key combo");
+	wakeup_source_init(&state->combo_up_wake_source, "key combo up");
+	state->delay = msecs_to_jiffies(pdata->key_down_delay);
+
+	state->input_handler.event = keycombo_event;
+	state->input_handler.connect = keycombo_connect;
+	state->input_handler.disconnect = keycombo_disconnect;
+	state->input_handler.name = KEYCOMBO_NAME;
+	state->input_handler.id_table = keycombo_ids;
+	ret = input_register_handler(&state->input_handler);
+	if (ret) {
+		kfree(state);
+		return ret;
+	}
+	platform_set_drvdata(pdev, state);
+	return 0;
+}
+
+int keycombo_remove(struct platform_device *pdev)
+{
+	struct keycombo_state *state = platform_get_drvdata(pdev);
+	input_unregister_handler(&state->input_handler);
+	destroy_workqueue(state->wq);
+	kfree(state);
+	return 0;
+}
+
+
+struct platform_driver keycombo_driver = {
+		.driver.name = KEYCOMBO_NAME,
+		.probe = keycombo_probe,
+		.remove = keycombo_remove,
+};
+
+static int __init keycombo_init(void)
+{
+	return platform_driver_register(&keycombo_driver);
+}
+
+static void __exit keycombo_exit(void)
+{
+	return platform_driver_unregister(&keycombo_driver);
+}
+
+module_init(keycombo_init);
+module_exit(keycombo_exit);
diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c
new file mode 100644
index 000000000000..7fbf7247e65f
--- /dev/null
+++ b/drivers/input/keyreset.c
@@ -0,0 +1,145 @@
+/* drivers/input/keyreset.c
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/input.h>
+#include <linux/keyreset.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/keycombo.h>
+
+struct keyreset_state {
+	int restart_requested;
+	int (*reset_fn)(void);
+	struct platform_device *pdev_child;
+	struct work_struct restart_work;
+};
+
+static void do_restart(struct work_struct *unused)
+{
+	sys_sync();
+	kernel_restart(NULL);
+}
+
+static void do_reset_fn(void *priv)
+{
+	struct keyreset_state *state = priv;
+	if (state->restart_requested)
+		panic("keyboard reset failed, %d", state->restart_requested);
+	if (state->reset_fn) {
+		state->restart_requested = state->reset_fn();
+	} else {
+		pr_info("keyboard reset\n");
+		schedule_work(&state->restart_work);
+		state->restart_requested = 1;
+	}
+}
+
+static int keyreset_probe(struct platform_device *pdev)
+{
+	int ret = -ENOMEM;
+	struct keycombo_platform_data *pdata_child;
+	struct keyreset_platform_data *pdata = pdev->dev.platform_data;
+	int up_size = 0, down_size = 0, size;
+	int key, *keyp;
+	struct keyreset_state *state;
+
+	if (!pdata)
+		return -EINVAL;
+	state = devm_kzalloc(&pdev->dev, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	state->pdev_child = platform_device_alloc(KEYCOMBO_NAME,
+							PLATFORM_DEVID_AUTO);
+	if (!state->pdev_child)
+		return -ENOMEM;
+	state->pdev_child->dev.parent = &pdev->dev;
+	INIT_WORK(&state->restart_work, do_restart);
+
+	keyp = pdata->keys_down;
+	while ((key = *keyp++)) {
+		if (key >= KEY_MAX)
+			continue;
+		down_size++;
+	}
+	if (pdata->keys_up) {
+		keyp = pdata->keys_up;
+		while ((key = *keyp++)) {
+			if (key >= KEY_MAX)
+				continue;
+			up_size++;
+		}
+	}
+	size = sizeof(struct keycombo_platform_data)
+			+ sizeof(int) * (down_size + 1);
+	pdata_child = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!pdata_child)
+		goto error;
+	memcpy(pdata_child->keys_down, pdata->keys_down,
+						sizeof(int) * down_size);
+	if (up_size > 0) {
+		pdata_child->keys_up = devm_kzalloc(&pdev->dev, up_size + 1,
+								GFP_KERNEL);
+		if (!pdata_child->keys_up)
+			goto error;
+		memcpy(pdata_child->keys_up, pdata->keys_up,
+							sizeof(int) * up_size);
+		if (!pdata_child->keys_up)
+			goto error;
+	}
+	state->reset_fn = pdata->reset_fn;
+	pdata_child->key_down_fn = do_reset_fn;
+	pdata_child->priv = state;
+	pdata_child->key_down_delay = pdata->key_down_delay;
+	ret = platform_device_add_data(state->pdev_child, pdata_child, size);
+	if (ret)
+		goto error;
+	platform_set_drvdata(pdev, state);
+	return platform_device_add(state->pdev_child);
+error:
+	platform_device_put(state->pdev_child);
+	return ret;
+}
+
+int keyreset_remove(struct platform_device *pdev)
+{
+	struct keyreset_state *state = platform_get_drvdata(pdev);
+	platform_device_put(state->pdev_child);
+	return 0;
+}
+
+
+struct platform_driver keyreset_driver = {
+	.driver.name = KEYRESET_NAME,
+	.probe = keyreset_probe,
+	.remove = keyreset_remove,
+};
+
+static int __init keyreset_init(void)
+{
+	return platform_driver_register(&keyreset_driver);
+}
+
+static void __exit keyreset_exit(void)
+{
+	return platform_driver_unregister(&keyreset_driver);
+}
+
+module_init(keyreset_init);
+module_exit(keyreset_exit);
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index bb698e1f9e42..4abf046e30b1 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -299,6 +299,17 @@ config INPUT_ATI_REMOTE2
 	  To compile this driver as a module, choose M here: the module will be
 	  called ati_remote2.
 
+config INPUT_KEYCHORD
+	tristate "Key chord input driver support"
+	help
+	  Say Y here if you want to enable the key chord driver
+	  accessible at /dev/keychord.  This driver can be used
+	  for receiving notifications when client specified key
+	  combinations are pressed.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called keychord.
+
 config INPUT_KEYSPAN_REMOTE
 	tristate "Keyspan DMR USB remote control"
 	depends on USB_ARCH_HAS_HCD
@@ -434,6 +445,11 @@ config INPUT_SGI_BTNS
 	  To compile this driver as a module, choose M here: the
 	  module will be called sgi_btns.
 
+config INPUT_GPIO
+	tristate "GPIO driver support"
+	help
+	  Say Y here if you want to support gpio based keys, wheels etc...
+
 config HP_SDC_RTC
 	tristate "HP SDC Real Time Clock"
 	depends on (GSC || HP300) && SERIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index d7fc17f11d77..6b0e8a677725 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -28,9 +28,11 @@ obj-$(CONFIG_INPUT_DA9055_ONKEY)	+= da9055_onkey.o
 obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
 obj-$(CONFIG_INPUT_GP2A)		+= gp2ap002a00f.o
 obj-$(CONFIG_INPUT_GPIO_TILT_POLLED)	+= gpio_tilt_polled.o
+obj-$(CONFIG_INPUT_GPIO)		+= gpio_event.o gpio_matrix.o gpio_input.o gpio_output.o gpio_axis.o
 obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IMS_PCU)		+= ims-pcu.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
+obj-$(CONFIG_INPUT_KEYCHORD)		+= keychord.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
 obj-$(CONFIG_INPUT_M68K_BEEP)		+= m68kspkr.o
diff --git a/drivers/input/misc/gpio_axis.c b/drivers/input/misc/gpio_axis.c
new file mode 100644
index 000000000000..0acf4a576f53
--- /dev/null
+++ b/drivers/input/misc/gpio_axis.c
@@ -0,0 +1,192 @@
+/* drivers/input/misc/gpio_axis.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+struct gpio_axis_state {
+	struct gpio_event_input_devs *input_devs;
+	struct gpio_event_axis_info *info;
+	uint32_t pos;
+};
+
+uint16_t gpio_axis_4bit_gray_map_table[] = {
+	[0x0] = 0x0, [0x1] = 0x1, /* 0000 0001 */
+	[0x3] = 0x2, [0x2] = 0x3, /* 0011 0010 */
+	[0x6] = 0x4, [0x7] = 0x5, /* 0110 0111 */
+	[0x5] = 0x6, [0x4] = 0x7, /* 0101 0100 */
+	[0xc] = 0x8, [0xd] = 0x9, /* 1100 1101 */
+	[0xf] = 0xa, [0xe] = 0xb, /* 1111 1110 */
+	[0xa] = 0xc, [0xb] = 0xd, /* 1010 1011 */
+	[0x9] = 0xe, [0x8] = 0xf, /* 1001 1000 */
+};
+uint16_t gpio_axis_4bit_gray_map(struct gpio_event_axis_info *info, uint16_t in)
+{
+	return gpio_axis_4bit_gray_map_table[in];
+}
+
+uint16_t gpio_axis_5bit_singletrack_map_table[] = {
+	[0x10] = 0x00, [0x14] = 0x01, [0x1c] = 0x02, /*     10000 10100 11100 */
+	[0x1e] = 0x03, [0x1a] = 0x04, [0x18] = 0x05, /*     11110 11010 11000 */
+	[0x08] = 0x06, [0x0a] = 0x07, [0x0e] = 0x08, /*    01000 01010 01110  */
+	[0x0f] = 0x09, [0x0d] = 0x0a, [0x0c] = 0x0b, /*    01111 01101 01100  */
+	[0x04] = 0x0c, [0x05] = 0x0d, [0x07] = 0x0e, /*   00100 00101 00111   */
+	[0x17] = 0x0f, [0x16] = 0x10, [0x06] = 0x11, /*   10111 10110 00110   */
+	[0x02] = 0x12, [0x12] = 0x13, [0x13] = 0x14, /*  00010 10010 10011    */
+	[0x1b] = 0x15, [0x0b] = 0x16, [0x03] = 0x17, /*  11011 01011 00011    */
+	[0x01] = 0x18, [0x09] = 0x19, [0x19] = 0x1a, /* 00001 01001 11001     */
+	[0x1d] = 0x1b, [0x15] = 0x1c, [0x11] = 0x1d, /* 11101 10101 10001     */
+};
+uint16_t gpio_axis_5bit_singletrack_map(
+	struct gpio_event_axis_info *info, uint16_t in)
+{
+	return gpio_axis_5bit_singletrack_map_table[in];
+}
+
+static void gpio_event_update_axis(struct gpio_axis_state *as, int report)
+{
+	struct gpio_event_axis_info *ai = as->info;
+	int i;
+	int change;
+	uint16_t state = 0;
+	uint16_t pos;
+	uint16_t old_pos = as->pos;
+	for (i = ai->count - 1; i >= 0; i--)
+		state = (state << 1) | gpio_get_value(ai->gpio[i]);
+	pos = ai->map(ai, state);
+	if (ai->flags & GPIOEAF_PRINT_RAW)
+		pr_info("axis %d-%d raw %x, pos %d -> %d\n",
+			ai->type, ai->code, state, old_pos, pos);
+	if (report && pos != old_pos) {
+		if (ai->type == EV_REL) {
+			change = (ai->decoded_size + pos - old_pos) %
+				  ai->decoded_size;
+			if (change > ai->decoded_size / 2)
+				change -= ai->decoded_size;
+			if (change == ai->decoded_size / 2) {
+				if (ai->flags & GPIOEAF_PRINT_EVENT)
+					pr_info("axis %d-%d unknown direction, "
+						"pos %d -> %d\n", ai->type,
+						ai->code, old_pos, pos);
+				change = 0; /* no closest direction */
+			}
+			if (ai->flags & GPIOEAF_PRINT_EVENT)
+				pr_info("axis %d-%d change %d\n",
+					ai->type, ai->code, change);
+			input_report_rel(as->input_devs->dev[ai->dev],
+						ai->code, change);
+		} else {
+			if (ai->flags & GPIOEAF_PRINT_EVENT)
+				pr_info("axis %d-%d now %d\n",
+					ai->type, ai->code, pos);
+			input_event(as->input_devs->dev[ai->dev],
+					ai->type, ai->code, pos);
+		}
+		input_sync(as->input_devs->dev[ai->dev]);
+	}
+	as->pos = pos;
+}
+
+static irqreturn_t gpio_axis_irq_handler(int irq, void *dev_id)
+{
+	struct gpio_axis_state *as = dev_id;
+	gpio_event_update_axis(as, 1);
+	return IRQ_HANDLED;
+}
+
+int gpio_event_axis_func(struct gpio_event_input_devs *input_devs,
+			 struct gpio_event_info *info, void **data, int func)
+{
+	int ret;
+	int i;
+	int irq;
+	struct gpio_event_axis_info *ai;
+	struct gpio_axis_state *as;
+
+	ai = container_of(info, struct gpio_event_axis_info, info);
+	if (func == GPIO_EVENT_FUNC_SUSPEND) {
+		for (i = 0; i < ai->count; i++)
+			disable_irq(gpio_to_irq(ai->gpio[i]));
+		return 0;
+	}
+	if (func == GPIO_EVENT_FUNC_RESUME) {
+		for (i = 0; i < ai->count; i++)
+			enable_irq(gpio_to_irq(ai->gpio[i]));
+		return 0;
+	}
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		*data = as = kmalloc(sizeof(*as), GFP_KERNEL);
+		if (as == NULL) {
+			ret = -ENOMEM;
+			goto err_alloc_axis_state_failed;
+		}
+		as->input_devs = input_devs;
+		as->info = ai;
+		if (ai->dev >= input_devs->count) {
+			pr_err("gpio_event_axis: bad device index %d >= %d "
+				"for %d:%d\n", ai->dev, input_devs->count,
+				ai->type, ai->code);
+			ret = -EINVAL;
+			goto err_bad_device_index;
+		}
+
+		input_set_capability(input_devs->dev[ai->dev],
+				     ai->type, ai->code);
+		if (ai->type == EV_ABS) {
+			input_set_abs_params(input_devs->dev[ai->dev], ai->code,
+					     0, ai->decoded_size - 1, 0, 0);
+		}
+		for (i = 0; i < ai->count; i++) {
+			ret = gpio_request(ai->gpio[i], "gpio_event_axis");
+			if (ret < 0)
+				goto err_request_gpio_failed;
+			ret = gpio_direction_input(ai->gpio[i]);
+			if (ret < 0)
+				goto err_gpio_direction_input_failed;
+			ret = irq = gpio_to_irq(ai->gpio[i]);
+			if (ret < 0)
+				goto err_get_irq_num_failed;
+			ret = request_irq(irq, gpio_axis_irq_handler,
+					  IRQF_TRIGGER_RISING |
+					  IRQF_TRIGGER_FALLING,
+					  "gpio_event_axis", as);
+			if (ret < 0)
+				goto err_request_irq_failed;
+		}
+		gpio_event_update_axis(as, 0);
+		return 0;
+	}
+
+	ret = 0;
+	as = *data;
+	for (i = ai->count - 1; i >= 0; i--) {
+		free_irq(gpio_to_irq(ai->gpio[i]), as);
+err_request_irq_failed:
+err_get_irq_num_failed:
+err_gpio_direction_input_failed:
+		gpio_free(ai->gpio[i]);
+err_request_gpio_failed:
+		;
+	}
+err_bad_device_index:
+	kfree(as);
+	*data = NULL;
+err_alloc_axis_state_failed:
+	return ret;
+}
diff --git a/drivers/input/misc/gpio_event.c b/drivers/input/misc/gpio_event.c
new file mode 100644
index 000000000000..90f07eba3ce9
--- /dev/null
+++ b/drivers/input/misc/gpio_event.c
@@ -0,0 +1,228 @@
+/* drivers/input/misc/gpio_event.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/gpio_event.h>
+#include <linux/hrtimer.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+struct gpio_event {
+	struct gpio_event_input_devs *input_devs;
+	const struct gpio_event_platform_data *info;
+	void *state[0];
+};
+
+static int gpio_input_event(
+	struct input_dev *dev, unsigned int type, unsigned int code, int value)
+{
+	int i;
+	int devnr;
+	int ret = 0;
+	int tmp_ret;
+	struct gpio_event_info **ii;
+	struct gpio_event *ip = input_get_drvdata(dev);
+
+	for (devnr = 0; devnr < ip->input_devs->count; devnr++)
+		if (ip->input_devs->dev[devnr] == dev)
+			break;
+	if (devnr == ip->input_devs->count) {
+		pr_err("gpio_input_event: unknown device %p\n", dev);
+		return -EIO;
+	}
+
+	for (i = 0, ii = ip->info->info; i < ip->info->info_count; i++, ii++) {
+		if ((*ii)->event) {
+			tmp_ret = (*ii)->event(ip->input_devs, *ii,
+						&ip->state[i],
+						devnr, type, code, value);
+			if (tmp_ret)
+				ret = tmp_ret;
+		}
+	}
+	return ret;
+}
+
+static int gpio_event_call_all_func(struct gpio_event *ip, int func)
+{
+	int i;
+	int ret;
+	struct gpio_event_info **ii;
+
+	if (func == GPIO_EVENT_FUNC_INIT || func == GPIO_EVENT_FUNC_RESUME) {
+		ii = ip->info->info;
+		for (i = 0; i < ip->info->info_count; i++, ii++) {
+			if ((*ii)->func == NULL) {
+				ret = -ENODEV;
+				pr_err("gpio_event_probe: Incomplete pdata, "
+					"no function\n");
+				goto err_no_func;
+			}
+			if (func == GPIO_EVENT_FUNC_RESUME && (*ii)->no_suspend)
+				continue;
+			ret = (*ii)->func(ip->input_devs, *ii, &ip->state[i],
+					  func);
+			if (ret) {
+				pr_err("gpio_event_probe: function failed\n");
+				goto err_func_failed;
+			}
+		}
+		return 0;
+	}
+
+	ret = 0;
+	i = ip->info->info_count;
+	ii = ip->info->info + i;
+	while (i > 0) {
+		i--;
+		ii--;
+		if ((func & ~1) == GPIO_EVENT_FUNC_SUSPEND && (*ii)->no_suspend)
+			continue;
+		(*ii)->func(ip->input_devs, *ii, &ip->state[i], func & ~1);
+err_func_failed:
+err_no_func:
+		;
+	}
+	return ret;
+}
+
+static void __maybe_unused gpio_event_suspend(struct gpio_event *ip)
+{
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_SUSPEND);
+	if (ip->info->power)
+		ip->info->power(ip->info, 0);
+}
+
+static void __maybe_unused gpio_event_resume(struct gpio_event *ip)
+{
+	if (ip->info->power)
+		ip->info->power(ip->info, 1);
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_RESUME);
+}
+
+static int gpio_event_probe(struct platform_device *pdev)
+{
+	int err;
+	struct gpio_event *ip;
+	struct gpio_event_platform_data *event_info;
+	int dev_count = 1;
+	int i;
+	int registered = 0;
+
+	event_info = pdev->dev.platform_data;
+	if (event_info == NULL) {
+		pr_err("gpio_event_probe: No pdata\n");
+		return -ENODEV;
+	}
+	if ((!event_info->name && !event_info->names[0]) ||
+	    !event_info->info || !event_info->info_count) {
+		pr_err("gpio_event_probe: Incomplete pdata\n");
+		return -ENODEV;
+	}
+	if (!event_info->name)
+		while (event_info->names[dev_count])
+			dev_count++;
+	ip = kzalloc(sizeof(*ip) +
+		     sizeof(ip->state[0]) * event_info->info_count +
+		     sizeof(*ip->input_devs) +
+		     sizeof(ip->input_devs->dev[0]) * dev_count, GFP_KERNEL);
+	if (ip == NULL) {
+		err = -ENOMEM;
+		pr_err("gpio_event_probe: Failed to allocate private data\n");
+		goto err_kp_alloc_failed;
+	}
+	ip->input_devs = (void*)&ip->state[event_info->info_count];
+	platform_set_drvdata(pdev, ip);
+
+	for (i = 0; i < dev_count; i++) {
+		struct input_dev *input_dev = input_allocate_device();
+		if (input_dev == NULL) {
+			err = -ENOMEM;
+			pr_err("gpio_event_probe: "
+				"Failed to allocate input device\n");
+			goto err_input_dev_alloc_failed;
+		}
+		input_set_drvdata(input_dev, ip);
+		input_dev->name = event_info->name ?
+					event_info->name : event_info->names[i];
+		input_dev->event = gpio_input_event;
+		ip->input_devs->dev[i] = input_dev;
+	}
+	ip->input_devs->count = dev_count;
+	ip->info = event_info;
+	if (event_info->power)
+		ip->info->power(ip->info, 1);
+
+	err = gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_INIT);
+	if (err)
+		goto err_call_all_func_failed;
+
+	for (i = 0; i < dev_count; i++) {
+		err = input_register_device(ip->input_devs->dev[i]);
+		if (err) {
+			pr_err("gpio_event_probe: Unable to register %s "
+				"input device\n", ip->input_devs->dev[i]->name);
+			goto err_input_register_device_failed;
+		}
+		registered++;
+	}
+
+	return 0;
+
+err_input_register_device_failed:
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT);
+err_call_all_func_failed:
+	if (event_info->power)
+		ip->info->power(ip->info, 0);
+	for (i = 0; i < registered; i++)
+		input_unregister_device(ip->input_devs->dev[i]);
+	for (i = dev_count - 1; i >= registered; i--) {
+		input_free_device(ip->input_devs->dev[i]);
+err_input_dev_alloc_failed:
+		;
+	}
+	kfree(ip);
+err_kp_alloc_failed:
+	return err;
+}
+
+static int gpio_event_remove(struct platform_device *pdev)
+{
+	struct gpio_event *ip = platform_get_drvdata(pdev);
+	int i;
+
+	gpio_event_call_all_func(ip, GPIO_EVENT_FUNC_UNINIT);
+	if (ip->info->power)
+		ip->info->power(ip->info, 0);
+	for (i = 0; i < ip->input_devs->count; i++)
+		input_unregister_device(ip->input_devs->dev[i]);
+	kfree(ip);
+	return 0;
+}
+
+static struct platform_driver gpio_event_driver = {
+	.probe		= gpio_event_probe,
+	.remove		= gpio_event_remove,
+	.driver		= {
+		.name	= GPIO_EVENT_DEV_NAME,
+	},
+};
+
+module_platform_driver(gpio_event_driver);
+
+MODULE_DESCRIPTION("GPIO Event Driver");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/input/misc/gpio_input.c b/drivers/input/misc/gpio_input.c
new file mode 100644
index 000000000000..eefd02725aff
--- /dev/null
+++ b/drivers/input/misc/gpio_input.c
@@ -0,0 +1,390 @@
+/* drivers/input/misc/gpio_input.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+#include <linux/hrtimer.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/pm_wakeup.h>
+
+enum {
+	DEBOUNCE_UNSTABLE     = BIT(0),	/* Got irq, while debouncing */
+	DEBOUNCE_PRESSED      = BIT(1),
+	DEBOUNCE_NOTPRESSED   = BIT(2),
+	DEBOUNCE_WAIT_IRQ     = BIT(3),	/* Stable irq state */
+	DEBOUNCE_POLL         = BIT(4),	/* Stable polling state */
+
+	DEBOUNCE_UNKNOWN =
+		DEBOUNCE_PRESSED | DEBOUNCE_NOTPRESSED,
+};
+
+struct gpio_key_state {
+	struct gpio_input_state *ds;
+	uint8_t debounce;
+};
+
+struct gpio_input_state {
+	struct gpio_event_input_devs *input_devs;
+	const struct gpio_event_input_info *info;
+	struct hrtimer timer;
+	int use_irq;
+	int debounce_count;
+	spinlock_t irq_lock;
+	struct wakeup_source *ws;
+	struct gpio_key_state key_state[0];
+};
+
+static enum hrtimer_restart gpio_event_input_timer_func(struct hrtimer *timer)
+{
+	int i;
+	int pressed;
+	struct gpio_input_state *ds =
+		container_of(timer, struct gpio_input_state, timer);
+	unsigned gpio_flags = ds->info->flags;
+	unsigned npolarity;
+	int nkeys = ds->info->keymap_size;
+	const struct gpio_event_direct_entry *key_entry;
+	struct gpio_key_state *key_state;
+	unsigned long irqflags;
+	uint8_t debounce;
+	bool sync_needed;
+
+#if 0
+	key_entry = kp->keys_info->keymap;
+	key_state = kp->key_state;
+	for (i = 0; i < nkeys; i++, key_entry++, key_state++)
+		pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio,
+			gpio_read_detect_status(key_entry->gpio));
+#endif
+	key_entry = ds->info->keymap;
+	key_state = ds->key_state;
+	sync_needed = false;
+	spin_lock_irqsave(&ds->irq_lock, irqflags);
+	for (i = 0; i < nkeys; i++, key_entry++, key_state++) {
+		debounce = key_state->debounce;
+		if (debounce & DEBOUNCE_WAIT_IRQ)
+			continue;
+		if (key_state->debounce & DEBOUNCE_UNSTABLE) {
+			debounce = key_state->debounce = DEBOUNCE_UNKNOWN;
+			enable_irq(gpio_to_irq(key_entry->gpio));
+			if (gpio_flags & GPIOEDF_PRINT_KEY_UNSTABLE)
+				pr_info("gpio_keys_scan_keys: key %x-%x, %d "
+					"(%d) continue debounce\n",
+					ds->info->type, key_entry->code,
+					i, key_entry->gpio);
+		}
+		npolarity = !(gpio_flags & GPIOEDF_ACTIVE_HIGH);
+		pressed = gpio_get_value(key_entry->gpio) ^ npolarity;
+		if (debounce & DEBOUNCE_POLL) {
+			if (pressed == !(debounce & DEBOUNCE_PRESSED)) {
+				ds->debounce_count++;
+				key_state->debounce = DEBOUNCE_UNKNOWN;
+				if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+					pr_info("gpio_keys_scan_keys: key %x-"
+						"%x, %d (%d) start debounce\n",
+						ds->info->type, key_entry->code,
+						i, key_entry->gpio);
+			}
+			continue;
+		}
+		if (pressed && (debounce & DEBOUNCE_NOTPRESSED)) {
+			if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+				pr_info("gpio_keys_scan_keys: key %x-%x, %d "
+					"(%d) debounce pressed 1\n",
+					ds->info->type, key_entry->code,
+					i, key_entry->gpio);
+			key_state->debounce = DEBOUNCE_PRESSED;
+			continue;
+		}
+		if (!pressed && (debounce & DEBOUNCE_PRESSED)) {
+			if (gpio_flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+				pr_info("gpio_keys_scan_keys: key %x-%x, %d "
+					"(%d) debounce pressed 0\n",
+					ds->info->type, key_entry->code,
+					i, key_entry->gpio);
+			key_state->debounce = DEBOUNCE_NOTPRESSED;
+			continue;
+		}
+		/* key is stable */
+		ds->debounce_count--;
+		if (ds->use_irq)
+			key_state->debounce |= DEBOUNCE_WAIT_IRQ;
+		else
+			key_state->debounce |= DEBOUNCE_POLL;
+		if (gpio_flags & GPIOEDF_PRINT_KEYS)
+			pr_info("gpio_keys_scan_keys: key %x-%x, %d (%d) "
+				"changed to %d\n", ds->info->type,
+				key_entry->code, i, key_entry->gpio, pressed);
+		input_event(ds->input_devs->dev[key_entry->dev], ds->info->type,
+			    key_entry->code, pressed);
+		sync_needed = true;
+	}
+	if (sync_needed) {
+		for (i = 0; i < ds->input_devs->count; i++)
+			input_sync(ds->input_devs->dev[i]);
+	}
+
+#if 0
+	key_entry = kp->keys_info->keymap;
+	key_state = kp->key_state;
+	for (i = 0; i < nkeys; i++, key_entry++, key_state++) {
+		pr_info("gpio_read_detect_status %d %d\n", key_entry->gpio,
+			gpio_read_detect_status(key_entry->gpio));
+	}
+#endif
+
+	if (ds->debounce_count)
+		hrtimer_start(timer, ds->info->debounce_time, HRTIMER_MODE_REL);
+	else if (!ds->use_irq)
+		hrtimer_start(timer, ds->info->poll_time, HRTIMER_MODE_REL);
+	else
+		__pm_relax(ds->ws);
+
+	spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+
+	return HRTIMER_NORESTART;
+}
+
+static irqreturn_t gpio_event_input_irq_handler(int irq, void *dev_id)
+{
+	struct gpio_key_state *ks = dev_id;
+	struct gpio_input_state *ds = ks->ds;
+	int keymap_index = ks - ds->key_state;
+	const struct gpio_event_direct_entry *key_entry;
+	unsigned long irqflags;
+	int pressed;
+
+	if (!ds->use_irq)
+		return IRQ_HANDLED;
+
+	key_entry = &ds->info->keymap[keymap_index];
+
+	if (ds->info->debounce_time.tv64) {
+		spin_lock_irqsave(&ds->irq_lock, irqflags);
+		if (ks->debounce & DEBOUNCE_WAIT_IRQ) {
+			ks->debounce = DEBOUNCE_UNKNOWN;
+			if (ds->debounce_count++ == 0) {
+				__pm_stay_awake(ds->ws);
+				hrtimer_start(
+					&ds->timer, ds->info->debounce_time,
+					HRTIMER_MODE_REL);
+			}
+			if (ds->info->flags & GPIOEDF_PRINT_KEY_DEBOUNCE)
+				pr_info("gpio_event_input_irq_handler: "
+					"key %x-%x, %d (%d) start debounce\n",
+					ds->info->type, key_entry->code,
+					keymap_index, key_entry->gpio);
+		} else {
+			disable_irq_nosync(irq);
+			ks->debounce = DEBOUNCE_UNSTABLE;
+		}
+		spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+	} else {
+		pressed = gpio_get_value(key_entry->gpio) ^
+			!(ds->info->flags & GPIOEDF_ACTIVE_HIGH);
+		if (ds->info->flags & GPIOEDF_PRINT_KEYS)
+			pr_info("gpio_event_input_irq_handler: key %x-%x, %d "
+				"(%d) changed to %d\n",
+				ds->info->type, key_entry->code, keymap_index,
+				key_entry->gpio, pressed);
+		input_event(ds->input_devs->dev[key_entry->dev], ds->info->type,
+			    key_entry->code, pressed);
+		input_sync(ds->input_devs->dev[key_entry->dev]);
+	}
+	return IRQ_HANDLED;
+}
+
+static int gpio_event_input_request_irqs(struct gpio_input_state *ds)
+{
+	int i;
+	int err;
+	unsigned int irq;
+	unsigned long req_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING;
+
+	for (i = 0; i < ds->info->keymap_size; i++) {
+		err = irq = gpio_to_irq(ds->info->keymap[i].gpio);
+		if (err < 0)
+			goto err_gpio_get_irq_num_failed;
+		err = request_irq(irq, gpio_event_input_irq_handler,
+				  req_flags, "gpio_keys", &ds->key_state[i]);
+		if (err) {
+			pr_err("gpio_event_input_request_irqs: request_irq "
+				"failed for input %d, irq %d\n",
+				ds->info->keymap[i].gpio, irq);
+			goto err_request_irq_failed;
+		}
+		if (ds->info->info.no_suspend) {
+			err = enable_irq_wake(irq);
+			if (err) {
+				pr_err("gpio_event_input_request_irqs: "
+					"enable_irq_wake failed for input %d, "
+					"irq %d\n",
+					ds->info->keymap[i].gpio, irq);
+				goto err_enable_irq_wake_failed;
+			}
+		}
+	}
+	return 0;
+
+	for (i = ds->info->keymap_size - 1; i >= 0; i--) {
+		irq = gpio_to_irq(ds->info->keymap[i].gpio);
+		if (ds->info->info.no_suspend)
+			disable_irq_wake(irq);
+err_enable_irq_wake_failed:
+		free_irq(irq, &ds->key_state[i]);
+err_request_irq_failed:
+err_gpio_get_irq_num_failed:
+		;
+	}
+	return err;
+}
+
+int gpio_event_input_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func)
+{
+	int ret;
+	int i;
+	unsigned long irqflags;
+	struct gpio_event_input_info *di;
+	struct gpio_input_state *ds = *data;
+	char *wlname;
+
+	di = container_of(info, struct gpio_event_input_info, info);
+
+	if (func == GPIO_EVENT_FUNC_SUSPEND) {
+		if (ds->use_irq)
+			for (i = 0; i < di->keymap_size; i++)
+				disable_irq(gpio_to_irq(di->keymap[i].gpio));
+		hrtimer_cancel(&ds->timer);
+		return 0;
+	}
+	if (func == GPIO_EVENT_FUNC_RESUME) {
+		spin_lock_irqsave(&ds->irq_lock, irqflags);
+		if (ds->use_irq)
+			for (i = 0; i < di->keymap_size; i++)
+				enable_irq(gpio_to_irq(di->keymap[i].gpio));
+		hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+		spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+		return 0;
+	}
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		if (ktime_to_ns(di->poll_time) <= 0)
+			di->poll_time = ktime_set(0, 20 * NSEC_PER_MSEC);
+
+		*data = ds = kzalloc(sizeof(*ds) + sizeof(ds->key_state[0]) *
+					di->keymap_size, GFP_KERNEL);
+		if (ds == NULL) {
+			ret = -ENOMEM;
+			pr_err("gpio_event_input_func: "
+				"Failed to allocate private data\n");
+			goto err_ds_alloc_failed;
+		}
+		ds->debounce_count = di->keymap_size;
+		ds->input_devs = input_devs;
+		ds->info = di;
+		wlname = kasprintf(GFP_KERNEL, "gpio_input:%s%s",
+				   input_devs->dev[0]->name,
+				   (input_devs->count > 1) ? "..." : "");
+
+		ds->ws = wakeup_source_register(wlname);
+		kfree(wlname);
+		if (!ds->ws) {
+			ret = -ENOMEM;
+			pr_err("gpio_event_input_func: "
+				"Failed to allocate wakeup source\n");
+			goto err_ws_failed;
+		}
+
+		spin_lock_init(&ds->irq_lock);
+
+		for (i = 0; i < di->keymap_size; i++) {
+			int dev = di->keymap[i].dev;
+			if (dev >= input_devs->count) {
+				pr_err("gpio_event_input_func: bad device "
+					"index %d >= %d for key code %d\n",
+					dev, input_devs->count,
+					di->keymap[i].code);
+				ret = -EINVAL;
+				goto err_bad_keymap;
+			}
+			input_set_capability(input_devs->dev[dev], di->type,
+					     di->keymap[i].code);
+			ds->key_state[i].ds = ds;
+			ds->key_state[i].debounce = DEBOUNCE_UNKNOWN;
+		}
+
+		for (i = 0; i < di->keymap_size; i++) {
+			ret = gpio_request(di->keymap[i].gpio, "gpio_kp_in");
+			if (ret) {
+				pr_err("gpio_event_input_func: gpio_request "
+					"failed for %d\n", di->keymap[i].gpio);
+				goto err_gpio_request_failed;
+			}
+			ret = gpio_direction_input(di->keymap[i].gpio);
+			if (ret) {
+				pr_err("gpio_event_input_func: "
+					"gpio_direction_input failed for %d\n",
+					di->keymap[i].gpio);
+				goto err_gpio_configure_failed;
+			}
+		}
+
+		ret = gpio_event_input_request_irqs(ds);
+
+		spin_lock_irqsave(&ds->irq_lock, irqflags);
+		ds->use_irq = ret == 0;
+
+		pr_info("GPIO Input Driver: Start gpio inputs for %s%s in %s "
+			"mode\n", input_devs->dev[0]->name,
+			(input_devs->count > 1) ? "..." : "",
+			ret == 0 ? "interrupt" : "polling");
+
+		hrtimer_init(&ds->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		ds->timer.function = gpio_event_input_timer_func;
+		hrtimer_start(&ds->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+		spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+		return 0;
+	}
+
+	ret = 0;
+	spin_lock_irqsave(&ds->irq_lock, irqflags);
+	hrtimer_cancel(&ds->timer);
+	if (ds->use_irq) {
+		for (i = di->keymap_size - 1; i >= 0; i--) {
+			int irq = gpio_to_irq(di->keymap[i].gpio);
+			if (ds->info->info.no_suspend)
+				disable_irq_wake(irq);
+			free_irq(irq, &ds->key_state[i]);
+		}
+	}
+	spin_unlock_irqrestore(&ds->irq_lock, irqflags);
+
+	for (i = di->keymap_size - 1; i >= 0; i--) {
+err_gpio_configure_failed:
+		gpio_free(di->keymap[i].gpio);
+err_gpio_request_failed:
+		;
+	}
+err_bad_keymap:
+	wakeup_source_unregister(ds->ws);
+err_ws_failed:
+	kfree(ds);
+err_ds_alloc_failed:
+	return ret;
+}
diff --git a/drivers/input/misc/gpio_matrix.c b/drivers/input/misc/gpio_matrix.c
new file mode 100644
index 000000000000..eaa9e89d473a
--- /dev/null
+++ b/drivers/input/misc/gpio_matrix.c
@@ -0,0 +1,441 @@
+/* drivers/input/misc/gpio_matrix.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/wakelock.h>
+
+struct gpio_kp {
+	struct gpio_event_input_devs *input_devs;
+	struct gpio_event_matrix_info *keypad_info;
+	struct hrtimer timer;
+	struct wake_lock wake_lock;
+	int current_output;
+	unsigned int use_irq:1;
+	unsigned int key_state_changed:1;
+	unsigned int last_key_state_changed:1;
+	unsigned int some_keys_pressed:2;
+	unsigned int disabled_irq:1;
+	unsigned long keys_pressed[0];
+};
+
+static void clear_phantom_key(struct gpio_kp *kp, int out, int in)
+{
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	int key_index = out * mi->ninputs + in;
+	unsigned short keyentry = mi->keymap[key_index];
+	unsigned short keycode = keyentry & MATRIX_KEY_MASK;
+	unsigned short dev = keyentry >> MATRIX_CODE_BITS;
+
+	if (!test_bit(keycode, kp->input_devs->dev[dev]->key)) {
+		if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS)
+			pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) "
+				"cleared\n", keycode, out, in,
+				mi->output_gpios[out], mi->input_gpios[in]);
+		__clear_bit(key_index, kp->keys_pressed);
+	} else {
+		if (mi->flags & GPIOKPF_PRINT_PHANTOM_KEYS)
+			pr_info("gpiomatrix: phantom key %x, %d-%d (%d-%d) "
+				"not cleared\n", keycode, out, in,
+				mi->output_gpios[out], mi->input_gpios[in]);
+	}
+}
+
+static int restore_keys_for_input(struct gpio_kp *kp, int out, int in)
+{
+	int rv = 0;
+	int key_index;
+
+	key_index = out * kp->keypad_info->ninputs + in;
+	while (out < kp->keypad_info->noutputs) {
+		if (test_bit(key_index, kp->keys_pressed)) {
+			rv = 1;
+			clear_phantom_key(kp, out, in);
+		}
+		key_index += kp->keypad_info->ninputs;
+		out++;
+	}
+	return rv;
+}
+
+static void remove_phantom_keys(struct gpio_kp *kp)
+{
+	int out, in, inp;
+	int key_index;
+
+	if (kp->some_keys_pressed < 3)
+		return;
+
+	for (out = 0; out < kp->keypad_info->noutputs; out++) {
+		inp = -1;
+		key_index = out * kp->keypad_info->ninputs;
+		for (in = 0; in < kp->keypad_info->ninputs; in++, key_index++) {
+			if (test_bit(key_index, kp->keys_pressed)) {
+				if (inp == -1) {
+					inp = in;
+					continue;
+				}
+				if (inp >= 0) {
+					if (!restore_keys_for_input(kp, out + 1,
+									inp))
+						break;
+					clear_phantom_key(kp, out, inp);
+					inp = -2;
+				}
+				restore_keys_for_input(kp, out, in);
+			}
+		}
+	}
+}
+
+static void report_key(struct gpio_kp *kp, int key_index, int out, int in)
+{
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	int pressed = test_bit(key_index, kp->keys_pressed);
+	unsigned short keyentry = mi->keymap[key_index];
+	unsigned short keycode = keyentry & MATRIX_KEY_MASK;
+	unsigned short dev = keyentry >> MATRIX_CODE_BITS;
+
+	if (pressed != test_bit(keycode, kp->input_devs->dev[dev]->key)) {
+		if (keycode == KEY_RESERVED) {
+			if (mi->flags & GPIOKPF_PRINT_UNMAPPED_KEYS)
+				pr_info("gpiomatrix: unmapped key, %d-%d "
+					"(%d-%d) changed to %d\n",
+					out, in, mi->output_gpios[out],
+					mi->input_gpios[in], pressed);
+		} else {
+			if (mi->flags & GPIOKPF_PRINT_MAPPED_KEYS)
+				pr_info("gpiomatrix: key %x, %d-%d (%d-%d) "
+					"changed to %d\n", keycode,
+					out, in, mi->output_gpios[out],
+					mi->input_gpios[in], pressed);
+			input_report_key(kp->input_devs->dev[dev], keycode, pressed);
+		}
+	}
+}
+
+static void report_sync(struct gpio_kp *kp)
+{
+	int i;
+
+	for (i = 0; i < kp->input_devs->count; i++)
+		input_sync(kp->input_devs->dev[i]);
+}
+
+static enum hrtimer_restart gpio_keypad_timer_func(struct hrtimer *timer)
+{
+	int out, in;
+	int key_index;
+	int gpio;
+	struct gpio_kp *kp = container_of(timer, struct gpio_kp, timer);
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	unsigned gpio_keypad_flags = mi->flags;
+	unsigned polarity = !!(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH);
+
+	out = kp->current_output;
+	if (out == mi->noutputs) {
+		out = 0;
+		kp->last_key_state_changed = kp->key_state_changed;
+		kp->key_state_changed = 0;
+		kp->some_keys_pressed = 0;
+	} else {
+		key_index = out * mi->ninputs;
+		for (in = 0; in < mi->ninputs; in++, key_index++) {
+			gpio = mi->input_gpios[in];
+			if (gpio_get_value(gpio) ^ !polarity) {
+				if (kp->some_keys_pressed < 3)
+					kp->some_keys_pressed++;
+				kp->key_state_changed |= !__test_and_set_bit(
+						key_index, kp->keys_pressed);
+			} else
+				kp->key_state_changed |= __test_and_clear_bit(
+						key_index, kp->keys_pressed);
+		}
+		gpio = mi->output_gpios[out];
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(gpio, !polarity);
+		else
+			gpio_direction_input(gpio);
+		out++;
+	}
+	kp->current_output = out;
+	if (out < mi->noutputs) {
+		gpio = mi->output_gpios[out];
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(gpio, polarity);
+		else
+			gpio_direction_output(gpio, polarity);
+		hrtimer_start(timer, mi->settle_time, HRTIMER_MODE_REL);
+		return HRTIMER_NORESTART;
+	}
+	if (gpio_keypad_flags & GPIOKPF_DEBOUNCE) {
+		if (kp->key_state_changed) {
+			hrtimer_start(&kp->timer, mi->debounce_delay,
+				      HRTIMER_MODE_REL);
+			return HRTIMER_NORESTART;
+		}
+		kp->key_state_changed = kp->last_key_state_changed;
+	}
+	if (kp->key_state_changed) {
+		if (gpio_keypad_flags & GPIOKPF_REMOVE_SOME_PHANTOM_KEYS)
+			remove_phantom_keys(kp);
+		key_index = 0;
+		for (out = 0; out < mi->noutputs; out++)
+			for (in = 0; in < mi->ninputs; in++, key_index++)
+				report_key(kp, key_index, out, in);
+		report_sync(kp);
+	}
+	if (!kp->use_irq || kp->some_keys_pressed) {
+		hrtimer_start(timer, mi->poll_time, HRTIMER_MODE_REL);
+		return HRTIMER_NORESTART;
+	}
+
+	/* No keys are pressed, reenable interrupt */
+	for (out = 0; out < mi->noutputs; out++) {
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(mi->output_gpios[out], polarity);
+		else
+			gpio_direction_output(mi->output_gpios[out], polarity);
+	}
+	for (in = 0; in < mi->ninputs; in++)
+		enable_irq(gpio_to_irq(mi->input_gpios[in]));
+	wake_unlock(&kp->wake_lock);
+	return HRTIMER_NORESTART;
+}
+
+static irqreturn_t gpio_keypad_irq_handler(int irq_in, void *dev_id)
+{
+	int i;
+	struct gpio_kp *kp = dev_id;
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+	unsigned gpio_keypad_flags = mi->flags;
+
+	if (!kp->use_irq) {
+		/* ignore interrupt while registering the handler */
+		kp->disabled_irq = 1;
+		disable_irq_nosync(irq_in);
+		return IRQ_HANDLED;
+	}
+
+	for (i = 0; i < mi->ninputs; i++)
+		disable_irq_nosync(gpio_to_irq(mi->input_gpios[i]));
+	for (i = 0; i < mi->noutputs; i++) {
+		if (gpio_keypad_flags & GPIOKPF_DRIVE_INACTIVE)
+			gpio_set_value(mi->output_gpios[i],
+				!(gpio_keypad_flags & GPIOKPF_ACTIVE_HIGH));
+		else
+			gpio_direction_input(mi->output_gpios[i]);
+	}
+	wake_lock(&kp->wake_lock);
+	hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+	return IRQ_HANDLED;
+}
+
+static int gpio_keypad_request_irqs(struct gpio_kp *kp)
+{
+	int i;
+	int err;
+	unsigned int irq;
+	unsigned long request_flags;
+	struct gpio_event_matrix_info *mi = kp->keypad_info;
+
+	switch (mi->flags & (GPIOKPF_ACTIVE_HIGH|GPIOKPF_LEVEL_TRIGGERED_IRQ)) {
+	default:
+		request_flags = IRQF_TRIGGER_FALLING;
+		break;
+	case GPIOKPF_ACTIVE_HIGH:
+		request_flags = IRQF_TRIGGER_RISING;
+		break;
+	case GPIOKPF_LEVEL_TRIGGERED_IRQ:
+		request_flags = IRQF_TRIGGER_LOW;
+		break;
+	case GPIOKPF_LEVEL_TRIGGERED_IRQ | GPIOKPF_ACTIVE_HIGH:
+		request_flags = IRQF_TRIGGER_HIGH;
+		break;
+	}
+
+	for (i = 0; i < mi->ninputs; i++) {
+		err = irq = gpio_to_irq(mi->input_gpios[i]);
+		if (err < 0)
+			goto err_gpio_get_irq_num_failed;
+		err = request_irq(irq, gpio_keypad_irq_handler, request_flags,
+				  "gpio_kp", kp);
+		if (err) {
+			pr_err("gpiomatrix: request_irq failed for input %d, "
+				"irq %d\n", mi->input_gpios[i], irq);
+			goto err_request_irq_failed;
+		}
+		err = enable_irq_wake(irq);
+		if (err) {
+			pr_err("gpiomatrix: set_irq_wake failed for input %d, "
+				"irq %d\n", mi->input_gpios[i], irq);
+		}
+		disable_irq(irq);
+		if (kp->disabled_irq) {
+			kp->disabled_irq = 0;
+			enable_irq(irq);
+		}
+	}
+	return 0;
+
+	for (i = mi->noutputs - 1; i >= 0; i--) {
+		free_irq(gpio_to_irq(mi->input_gpios[i]), kp);
+err_request_irq_failed:
+err_gpio_get_irq_num_failed:
+		;
+	}
+	return err;
+}
+
+int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs,
+	struct gpio_event_info *info, void **data, int func)
+{
+	int i;
+	int err;
+	int key_count;
+	struct gpio_kp *kp;
+	struct gpio_event_matrix_info *mi;
+
+	mi = container_of(info, struct gpio_event_matrix_info, info);
+	if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME) {
+		/* TODO: disable scanning */
+		return 0;
+	}
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		if (mi->keymap == NULL ||
+		   mi->input_gpios == NULL ||
+		   mi->output_gpios == NULL) {
+			err = -ENODEV;
+			pr_err("gpiomatrix: Incomplete pdata\n");
+			goto err_invalid_platform_data;
+		}
+		key_count = mi->ninputs * mi->noutputs;
+
+		*data = kp = kzalloc(sizeof(*kp) + sizeof(kp->keys_pressed[0]) *
+				     BITS_TO_LONGS(key_count), GFP_KERNEL);
+		if (kp == NULL) {
+			err = -ENOMEM;
+			pr_err("gpiomatrix: Failed to allocate private data\n");
+			goto err_kp_alloc_failed;
+		}
+		kp->input_devs = input_devs;
+		kp->keypad_info = mi;
+		for (i = 0; i < key_count; i++) {
+			unsigned short keyentry = mi->keymap[i];
+			unsigned short keycode = keyentry & MATRIX_KEY_MASK;
+			unsigned short dev = keyentry >> MATRIX_CODE_BITS;
+			if (dev >= input_devs->count) {
+				pr_err("gpiomatrix: bad device index %d >= "
+					"%d for key code %d\n",
+					dev, input_devs->count, keycode);
+				err = -EINVAL;
+				goto err_bad_keymap;
+			}
+			if (keycode && keycode <= KEY_MAX)
+				input_set_capability(input_devs->dev[dev],
+							EV_KEY, keycode);
+		}
+
+		for (i = 0; i < mi->noutputs; i++) {
+			err = gpio_request(mi->output_gpios[i], "gpio_kp_out");
+			if (err) {
+				pr_err("gpiomatrix: gpio_request failed for "
+					"output %d\n", mi->output_gpios[i]);
+				goto err_request_output_gpio_failed;
+			}
+			if (gpio_cansleep(mi->output_gpios[i])) {
+				pr_err("gpiomatrix: unsupported output gpio %d,"
+					" can sleep\n", mi->output_gpios[i]);
+				err = -EINVAL;
+				goto err_output_gpio_configure_failed;
+			}
+			if (mi->flags & GPIOKPF_DRIVE_INACTIVE)
+				err = gpio_direction_output(mi->output_gpios[i],
+					!(mi->flags & GPIOKPF_ACTIVE_HIGH));
+			else
+				err = gpio_direction_input(mi->output_gpios[i]);
+			if (err) {
+				pr_err("gpiomatrix: gpio_configure failed for "
+					"output %d\n", mi->output_gpios[i]);
+				goto err_output_gpio_configure_failed;
+			}
+		}
+		for (i = 0; i < mi->ninputs; i++) {
+			err = gpio_request(mi->input_gpios[i], "gpio_kp_in");
+			if (err) {
+				pr_err("gpiomatrix: gpio_request failed for "
+					"input %d\n", mi->input_gpios[i]);
+				goto err_request_input_gpio_failed;
+			}
+			err = gpio_direction_input(mi->input_gpios[i]);
+			if (err) {
+				pr_err("gpiomatrix: gpio_direction_input failed"
+					" for input %d\n", mi->input_gpios[i]);
+				goto err_gpio_direction_input_failed;
+			}
+		}
+		kp->current_output = mi->noutputs;
+		kp->key_state_changed = 1;
+
+		hrtimer_init(&kp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		kp->timer.function = gpio_keypad_timer_func;
+		wake_lock_init(&kp->wake_lock, WAKE_LOCK_SUSPEND, "gpio_kp");
+		err = gpio_keypad_request_irqs(kp);
+		kp->use_irq = err == 0;
+
+		pr_info("GPIO Matrix Keypad Driver: Start keypad matrix for "
+			"%s%s in %s mode\n", input_devs->dev[0]->name,
+			(input_devs->count > 1) ? "..." : "",
+			kp->use_irq ? "interrupt" : "polling");
+
+		if (kp->use_irq)
+			wake_lock(&kp->wake_lock);
+		hrtimer_start(&kp->timer, ktime_set(0, 0), HRTIMER_MODE_REL);
+
+		return 0;
+	}
+
+	err = 0;
+	kp = *data;
+
+	if (kp->use_irq)
+		for (i = mi->noutputs - 1; i >= 0; i--)
+			free_irq(gpio_to_irq(mi->input_gpios[i]), kp);
+
+	hrtimer_cancel(&kp->timer);
+	wake_lock_destroy(&kp->wake_lock);
+	for (i = mi->noutputs - 1; i >= 0; i--) {
+err_gpio_direction_input_failed:
+		gpio_free(mi->input_gpios[i]);
+err_request_input_gpio_failed:
+		;
+	}
+	for (i = mi->noutputs - 1; i >= 0; i--) {
+err_output_gpio_configure_failed:
+		gpio_free(mi->output_gpios[i]);
+err_request_output_gpio_failed:
+		;
+	}
+err_bad_keymap:
+	kfree(kp);
+err_kp_alloc_failed:
+err_invalid_platform_data:
+	return err;
+}
diff --git a/drivers/input/misc/gpio_output.c b/drivers/input/misc/gpio_output.c
new file mode 100644
index 000000000000..2aac2fad0a17
--- /dev/null
+++ b/drivers/input/misc/gpio_output.c
@@ -0,0 +1,97 @@
+/* drivers/input/misc/gpio_output.c
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_event.h>
+
+int gpio_event_output_event(
+	struct gpio_event_input_devs *input_devs, struct gpio_event_info *info,
+	void **data, unsigned int dev, unsigned int type,
+	unsigned int code, int value)
+{
+	int i;
+	struct gpio_event_output_info *oi;
+	oi = container_of(info, struct gpio_event_output_info, info);
+	if (type != oi->type)
+		return 0;
+	if (!(oi->flags & GPIOEDF_ACTIVE_HIGH))
+		value = !value;
+	for (i = 0; i < oi->keymap_size; i++)
+		if (dev == oi->keymap[i].dev && code == oi->keymap[i].code)
+			gpio_set_value(oi->keymap[i].gpio, value);
+	return 0;
+}
+
+int gpio_event_output_func(
+	struct gpio_event_input_devs *input_devs, struct gpio_event_info *info,
+	void **data, int func)
+{
+	int ret;
+	int i;
+	struct gpio_event_output_info *oi;
+	oi = container_of(info, struct gpio_event_output_info, info);
+
+	if (func == GPIO_EVENT_FUNC_SUSPEND || func == GPIO_EVENT_FUNC_RESUME)
+		return 0;
+
+	if (func == GPIO_EVENT_FUNC_INIT) {
+		int output_level = !(oi->flags & GPIOEDF_ACTIVE_HIGH);
+
+		for (i = 0; i < oi->keymap_size; i++) {
+			int dev = oi->keymap[i].dev;
+			if (dev >= input_devs->count) {
+				pr_err("gpio_event_output_func: bad device "
+					"index %d >= %d for key code %d\n",
+					dev, input_devs->count,
+					oi->keymap[i].code);
+				ret = -EINVAL;
+				goto err_bad_keymap;
+			}
+			input_set_capability(input_devs->dev[dev], oi->type,
+					     oi->keymap[i].code);
+		}
+
+		for (i = 0; i < oi->keymap_size; i++) {
+			ret = gpio_request(oi->keymap[i].gpio,
+					   "gpio_event_output");
+			if (ret) {
+				pr_err("gpio_event_output_func: gpio_request "
+					"failed for %d\n", oi->keymap[i].gpio);
+				goto err_gpio_request_failed;
+			}
+			ret = gpio_direction_output(oi->keymap[i].gpio,
+						    output_level);
+			if (ret) {
+				pr_err("gpio_event_output_func: "
+					"gpio_direction_output failed for %d\n",
+					oi->keymap[i].gpio);
+				goto err_gpio_direction_output_failed;
+			}
+		}
+		return 0;
+	}
+
+	ret = 0;
+	for (i = oi->keymap_size - 1; i >= 0; i--) {
+err_gpio_direction_output_failed:
+		gpio_free(oi->keymap[i].gpio);
+err_gpio_request_failed:
+		;
+	}
+err_bad_keymap:
+	return ret;
+}
+
diff --git a/drivers/input/misc/keychord.c b/drivers/input/misc/keychord.c
new file mode 100644
index 000000000000..a5ea27ad0e16
--- /dev/null
+++ b/drivers/input/misc/keychord.c
@@ -0,0 +1,391 @@
+/*
+ *  drivers/input/misc/keychord.c
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/keychord.h>
+#include <linux/sched.h>
+
+#define KEYCHORD_NAME		"keychord"
+#define BUFFER_SIZE			16
+
+MODULE_AUTHOR("Mike Lockwood <lockwood@android.com>");
+MODULE_DESCRIPTION("Key chord input driver");
+MODULE_SUPPORTED_DEVICE("keychord");
+MODULE_LICENSE("GPL");
+
+#define NEXT_KEYCHORD(kc) ((struct input_keychord *) \
+		((char *)kc + sizeof(struct input_keychord) + \
+		kc->count * sizeof(kc->keycodes[0])))
+
+struct keychord_device {
+	struct input_handler	input_handler;
+	int			registered;
+
+	/* list of keychords to monitor */
+	struct input_keychord	*keychords;
+	int			keychord_count;
+
+	/* bitmask of keys contained in our keychords */
+	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+	/* current state of the keys */
+	unsigned long keystate[BITS_TO_LONGS(KEY_CNT)];
+	/* number of keys that are currently pressed */
+	int key_down;
+
+	/* second input_device_id is needed for null termination */
+	struct input_device_id  device_ids[2];
+
+	spinlock_t		lock;
+	wait_queue_head_t	waitq;
+	unsigned char		head;
+	unsigned char		tail;
+	__u16			buff[BUFFER_SIZE];
+};
+
+static int check_keychord(struct keychord_device *kdev,
+		struct input_keychord *keychord)
+{
+	int i;
+
+	if (keychord->count != kdev->key_down)
+		return 0;
+
+	for (i = 0; i < keychord->count; i++) {
+		if (!test_bit(keychord->keycodes[i], kdev->keystate))
+			return 0;
+	}
+
+	/* we have a match */
+	return 1;
+}
+
+static void keychord_event(struct input_handle *handle, unsigned int type,
+			   unsigned int code, int value)
+{
+	struct keychord_device *kdev = handle->private;
+	struct input_keychord *keychord;
+	unsigned long flags;
+	int i, got_chord = 0;
+
+	if (type != EV_KEY || code >= KEY_MAX)
+		return;
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	/* do nothing if key state did not change */
+	if (!test_bit(code, kdev->keystate) == !value)
+		goto done;
+	__change_bit(code, kdev->keystate);
+	if (value)
+		kdev->key_down++;
+	else
+		kdev->key_down--;
+
+	/* don't notify on key up */
+	if (!value)
+		goto done;
+	/* ignore this event if it is not one of the keys we are monitoring */
+	if (!test_bit(code, kdev->keybit))
+		goto done;
+
+	keychord = kdev->keychords;
+	if (!keychord)
+		goto done;
+
+	/* check to see if the keyboard state matches any keychords */
+	for (i = 0; i < kdev->keychord_count; i++) {
+		if (check_keychord(kdev, keychord)) {
+			kdev->buff[kdev->head] = keychord->id;
+			kdev->head = (kdev->head + 1) % BUFFER_SIZE;
+			got_chord = 1;
+			break;
+		}
+		/* skip to next keychord */
+		keychord = NEXT_KEYCHORD(keychord);
+	}
+
+done:
+	spin_unlock_irqrestore(&kdev->lock, flags);
+
+	if (got_chord) {
+		pr_info("keychord: got keychord id %d. Any tasks: %d\n",
+			keychord->id,
+			!list_empty_careful(&kdev->waitq.task_list));
+		wake_up_interruptible(&kdev->waitq);
+	}
+}
+
+static int keychord_connect(struct input_handler *handler,
+					  struct input_dev *dev,
+					  const struct input_device_id *id)
+{
+	int i, ret;
+	struct input_handle *handle;
+	struct keychord_device *kdev =
+		container_of(handler, struct keychord_device, input_handler);
+
+	/*
+	 * ignore this input device if it does not contain any keycodes
+	 * that we are monitoring
+	 */
+	for (i = 0; i < KEY_MAX; i++) {
+		if (test_bit(i, kdev->keybit) && test_bit(i, dev->keybit))
+			break;
+	}
+	if (i == KEY_MAX)
+		return -ENODEV;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = KEYCHORD_NAME;
+	handle->private = kdev;
+
+	ret = input_register_handle(handle);
+	if (ret)
+		goto err_input_register_handle;
+
+	ret = input_open_device(handle);
+	if (ret)
+		goto err_input_open_device;
+
+	pr_info("keychord: using input dev %s for fevent\n", dev->name);
+
+	return 0;
+
+err_input_open_device:
+	input_unregister_handle(handle);
+err_input_register_handle:
+	kfree(handle);
+	return ret;
+}
+
+static void keychord_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+/*
+ * keychord_read is used to read keychord events from the driver
+ */
+static ssize_t keychord_read(struct file *file, char __user *buffer,
+		size_t count, loff_t *ppos)
+{
+	struct keychord_device *kdev = file->private_data;
+	__u16   id;
+	int retval;
+	unsigned long flags;
+
+	if (count < sizeof(id))
+		return -EINVAL;
+	count = sizeof(id);
+
+	if (kdev->head == kdev->tail && (file->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	retval = wait_event_interruptible(kdev->waitq,
+			kdev->head != kdev->tail);
+	if (retval)
+		return retval;
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	/* pop a keychord ID off the queue */
+	id = kdev->buff[kdev->tail];
+	kdev->tail = (kdev->tail + 1) % BUFFER_SIZE;
+	spin_unlock_irqrestore(&kdev->lock, flags);
+
+	if (copy_to_user(buffer, &id, count))
+		return -EFAULT;
+
+	return count;
+}
+
+/*
+ * keychord_write is used to configure the driver
+ */
+static ssize_t keychord_write(struct file *file, const char __user *buffer,
+		size_t count, loff_t *ppos)
+{
+	struct keychord_device *kdev = file->private_data;
+	struct input_keychord *keychords = 0;
+	struct input_keychord *keychord, *next, *end;
+	int ret, i, key;
+	unsigned long flags;
+
+	if (count < sizeof(struct input_keychord))
+		return -EINVAL;
+	keychords = kzalloc(count, GFP_KERNEL);
+	if (!keychords)
+		return -ENOMEM;
+
+	/* read list of keychords from userspace */
+	if (copy_from_user(keychords, buffer, count)) {
+		kfree(keychords);
+		return -EFAULT;
+	}
+
+	/* unregister handler before changing configuration */
+	if (kdev->registered) {
+		input_unregister_handler(&kdev->input_handler);
+		kdev->registered = 0;
+	}
+
+	spin_lock_irqsave(&kdev->lock, flags);
+	/* clear any existing configuration */
+	kfree(kdev->keychords);
+	kdev->keychords = 0;
+	kdev->keychord_count = 0;
+	kdev->key_down = 0;
+	memset(kdev->keybit, 0, sizeof(kdev->keybit));
+	memset(kdev->keystate, 0, sizeof(kdev->keystate));
+	kdev->head = kdev->tail = 0;
+
+	keychord = keychords;
+	end = (struct input_keychord *)((char *)keychord + count);
+
+	while (keychord < end) {
+		next = NEXT_KEYCHORD(keychord);
+		if (keychord->count <= 0 || next > end) {
+			pr_err("keychord: invalid keycode count %d\n",
+				keychord->count);
+			goto err_unlock_return;
+		}
+		if (keychord->version != KEYCHORD_VERSION) {
+			pr_err("keychord: unsupported version %d\n",
+				keychord->version);
+			goto err_unlock_return;
+		}
+
+		/* keep track of the keys we are monitoring in keybit */
+		for (i = 0; i < keychord->count; i++) {
+			key = keychord->keycodes[i];
+			if (key < 0 || key >= KEY_CNT) {
+				pr_err("keychord: keycode %d out of range\n",
+					key);
+				goto err_unlock_return;
+			}
+			__set_bit(key, kdev->keybit);
+		}
+
+		kdev->keychord_count++;
+		keychord = next;
+	}
+
+	kdev->keychords = keychords;
+	spin_unlock_irqrestore(&kdev->lock, flags);
+
+	ret = input_register_handler(&kdev->input_handler);
+	if (ret) {
+		kfree(keychords);
+		kdev->keychords = 0;
+		return ret;
+	}
+	kdev->registered = 1;
+
+	return count;
+
+err_unlock_return:
+	spin_unlock_irqrestore(&kdev->lock, flags);
+	kfree(keychords);
+	return -EINVAL;
+}
+
+static unsigned int keychord_poll(struct file *file, poll_table *wait)
+{
+	struct keychord_device *kdev = file->private_data;
+
+	poll_wait(file, &kdev->waitq, wait);
+
+	if (kdev->head != kdev->tail)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static int keychord_open(struct inode *inode, struct file *file)
+{
+	struct keychord_device *kdev;
+
+	kdev = kzalloc(sizeof(struct keychord_device), GFP_KERNEL);
+	if (!kdev)
+		return -ENOMEM;
+
+	spin_lock_init(&kdev->lock);
+	init_waitqueue_head(&kdev->waitq);
+
+	kdev->input_handler.event = keychord_event;
+	kdev->input_handler.connect = keychord_connect;
+	kdev->input_handler.disconnect = keychord_disconnect;
+	kdev->input_handler.name = KEYCHORD_NAME;
+	kdev->input_handler.id_table = kdev->device_ids;
+
+	kdev->device_ids[0].flags = INPUT_DEVICE_ID_MATCH_EVBIT;
+	__set_bit(EV_KEY, kdev->device_ids[0].evbit);
+
+	file->private_data = kdev;
+
+	return 0;
+}
+
+static int keychord_release(struct inode *inode, struct file *file)
+{
+	struct keychord_device *kdev = file->private_data;
+
+	if (kdev->registered)
+		input_unregister_handler(&kdev->input_handler);
+	kfree(kdev);
+
+	return 0;
+}
+
+static const struct file_operations keychord_fops = {
+	.owner		= THIS_MODULE,
+	.open		= keychord_open,
+	.release	= keychord_release,
+	.read		= keychord_read,
+	.write		= keychord_write,
+	.poll		= keychord_poll,
+};
+
+static struct miscdevice keychord_misc = {
+	.fops		= &keychord_fops,
+	.name		= KEYCHORD_NAME,
+	.minor		= MISC_DYNAMIC_MINOR,
+};
+
+static int __init keychord_init(void)
+{
+	return misc_register(&keychord_misc);
+}
+
+static void __exit keychord_exit(void)
+{
+	misc_deregister(&keychord_misc);
+}
+
+module_init(keychord_init);
+module_exit(keychord_exit);
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 9545c9f03809..ecd0d07eb303 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -6,6 +6,34 @@ menuconfig MAILBOX
 	  signals. Say Y if your platform supports hardware mailboxes.
 
 if MAILBOX
+config ARM_MHU_MBOX
+	tristate "ARM Message Handling Unit (MHU) Mailbox"
+	help
+	  This driver provides support for inter-processor communication
+	  (IPC) between two processors in a SoC.
+	  For example, System Control Processor(SCP) with Cortex-M3
+	  processor and Application Processors(AP) on few ARM platforms.
+	  SCP controls most of the power management on the AP.
+
+config ARM_SCPI_PROTOCOL
+	tristate "ARM System Control and Power Interface (SCPI) Message Protocol"
+	depends on ARM_MHU_MBOX
+	help
+	  System Control and Power Interface (SCPI) Message Protocol is
+	  defined for the purpose of communication between the Application
+	  Cores(AP) and the System Control Processor(SCP). The MHU peripheral
+	  provides a mechanism for inter-processor communication between SCP
+	  and AP.
+
+	  SCP controls most of the power managament on the Application
+	  Processors. It offers control and management of: the core/cluster
+	  power states, various power domain DVFS including the core/cluster,
+	  certain system clocks configuration, thermal sensors and many
+	  others.
+
+	  This protocol library provides interface for all the client drivers
+	  making use of the features offered by the SCP.
+
 config PL320_MBOX
 	bool "ARM PL320 Mailbox"
 	depends on ARM_AMBA
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index fefef7ebcbec..22e948479254 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -2,4 +2,6 @@
 
 obj-$(CONFIG_MAILBOX)		+= mailbox.o
 
+obj-$(CONFIG_ARM_MHU_MBOX)	+= arm_mhu.o
+obj-$(CONFIG_ARM_SCPI_PROTOCOL)	+= scpi_protocol.o
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
diff --git a/drivers/mailbox/arm_mhu.c b/drivers/mailbox/arm_mhu.c
new file mode 100644
index 000000000000..a2d368201a4c
--- /dev/null
+++ b/drivers/mailbox/arm_mhu.c
@@ -0,0 +1,272 @@
+/*
+ * Driver for the Message Handling Unit (MHU) which is the peripheral
+ * in any SoC providing a mechanism for inter-processor communication
+ * between two processors. For example System Control Processor (SCP)
+ * with Cortex-M3 processor and Application Processors (AP). SCP
+ * controls most of the power management on the AP.
+ *
+ * The MHU peripheral provides a mechanism to assert interrupt signals
+ * to facilitate inter-processor message passing between the SCP and the
+ * AP. The message payload can be deposited into main memory or on-chip
+ * memories and MHU expects the payload to be ready before asserting the
+ * signals. The payload is handled by protocol drivers and is out of
+ * scope of this controller driver.
+ *
+ * The MHU supports three bi-directional channels - low priority, high
+ * priority and secure(can't be used in non-secure execution modes)
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * Author: Sudeep Holla <sudeep.holla@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define CONTROLLER_NAME		"mhu_ctlr"
+#define CHANNEL_MAX		2
+
+/*
+ * +--------------------+-------+---------------+
+ * |  Hardware Register | Offset|  Driver View  |
+ * +--------------------+-------+---------------+
+ * |  SCP_INTR_L_STAT   | 0x000 |  RX_STATUS(L) |
+ * |  SCP_INTR_L_SET    | 0x008 |  RX_SET(L)    |
+ * |  SCP_INTR_L_CLEAR  | 0x010 |  RX_CLEAR(L)  |
+ * +--------------------+-------+---------------+
+ * |  SCP_INTR_H_STAT   | 0x020 |  RX_STATUS(H) |
+ * |  SCP_INTR_H_SET    | 0x028 |  RX_SET(H)    |
+ * |  SCP_INTR_H_CLEAR  | 0x030 |  RX_CLEAR(H)  |
+ * +--------------------+-------+---------------+
+ * |  CPU_INTR_L_STAT   | 0x100 |  TX_STATUS(L) |
+ * |  CPU_INTR_L_SET    | 0x108 |  TX_SET(L)    |
+ * |  CPU_INTR_L_CLEAR  | 0x110 |  TX_CLEAR(L)  |
+ * +--------------------+-------+---------------+
+ * |  CPU_INTR_H_STAT   | 0x120 |  TX_STATUS(H) |
+ * |  CPU_INTR_H_SET    | 0x128 |  TX_SET(H)    |
+ * |  CPU_INTR_H_CLEAR  | 0x130 |  TX_CLEAR(H)  |
+ * +--------------------+-------+---------------+
+*/
+#define RX_OFFSET(chan)		((chan) * 0x20)
+#define TX_OFFSET(chan)		(0x100 + (chan) * 0x20)
+
+#define REG_STATUS		0x00
+#define REG_SET			0x08
+#define REG_CLEAR		0x10
+
+struct mhu_chan {
+	void __iomem *tx_offset;
+	void __iomem *rx_offset;
+	int rx_irq;
+};
+
+struct mhu_ctlr {
+	void __iomem *mbox_base;
+	struct mbox_controller mbox_con;
+};
+
+static irqreturn_t mbox_rx_handler(int irq, void *p)
+{
+	struct mbox_chan *mchan = p;
+	struct mhu_chan *chan = mchan->con_priv;
+	u32 rx_status;
+
+	rx_status = readl_relaxed(chan->rx_offset + REG_STATUS);
+	if (rx_status) {
+		mbox_chan_received_data(mchan, &rx_status);
+		writel_relaxed(rx_status, chan->rx_offset + REG_CLEAR);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mhu_send_data(struct mbox_chan *mchan, void *msg)
+{
+	struct mhu_chan *chan = mchan->con_priv;
+	u32 cmd = *(u32 *)msg;
+
+	writel_relaxed(cmd, chan->tx_offset + REG_SET);
+	return 0;
+}
+
+static bool mhu_last_tx_done(struct mbox_chan *mchan)
+{
+	struct mhu_chan *chan = mchan->con_priv;
+
+	return readl_relaxed(chan->tx_offset + REG_STATUS) == 0;
+}
+
+static int mhu_startup(struct mbox_chan *mchan)
+{
+	return mhu_last_tx_done(mchan) ? 0 : -EBUSY;
+}
+
+static void mhu_shutdown(struct mbox_chan *mchan)
+{
+}
+
+static struct mbox_chan_ops mhu_ops = {
+	.send_data = mhu_send_data,
+	.startup = mhu_startup,
+	.shutdown = mhu_shutdown,
+	.last_tx_done = mhu_last_tx_done,
+};
+
+static int mhu_probe(struct platform_device *pdev)
+{
+	int idx, ret;
+	struct mhu_ctlr *ctlr;
+	struct mbox_chan *mbox_chans;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	const char *const rx_irq_names[] = {
+		"mhu_lpri_rx",
+		"mhu_hpri_rx",
+	};
+
+	ctlr = devm_kzalloc(dev, sizeof(*ctlr), GFP_KERNEL);
+	if (!ctlr) {
+		dev_err(dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "failed to get mailbox memory resource\n");
+		return -ENXIO;
+	}
+
+	ctlr->mbox_base = devm_ioremap_resource(dev, res);
+	if (!ctlr->mbox_base) {
+		dev_err(dev, "failed to request or ioremap mailbox control\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	mbox_chans = devm_kcalloc(dev, CHANNEL_MAX, sizeof(*mbox_chans),
+				  GFP_KERNEL);
+	if (!mbox_chans) {
+		dev_err(dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, ctlr);
+
+	ctlr->mbox_con.dev = dev;
+	ctlr->mbox_con.ops = &mhu_ops;
+	ctlr->mbox_con.num_chans = CHANNEL_MAX;
+	ctlr->mbox_con.txdone_irq = false;
+	ctlr->mbox_con.txdone_poll = true;
+	ctlr->mbox_con.txpoll_period = 1;
+	ctlr->mbox_con.chans = mbox_chans;
+
+	for (idx = 0; idx < CHANNEL_MAX; idx++, mbox_chans++) {
+		struct mhu_chan *chan;
+
+		chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+		if (!chan) {
+			dev_err(dev, "failed to allocate memory\n");
+			return -ENOMEM;
+		}
+
+		chan->tx_offset = ctlr->mbox_base + TX_OFFSET(idx);
+		chan->rx_offset = ctlr->mbox_base + RX_OFFSET(idx);
+
+		chan->rx_irq = platform_get_irq_byname(pdev, rx_irq_names[idx]);
+		if (chan->rx_irq < 0) {
+			dev_err(dev, "failed to get interrupt for %s\n",
+				rx_irq_names[idx]);
+			return -ENXIO;
+		}
+
+		ret = devm_request_threaded_irq(dev, chan->rx_irq, NULL,
+						mbox_rx_handler, IRQF_ONESHOT,
+						rx_irq_names[idx], mbox_chans);
+		if (ret) {
+			dev_err(dev, "failed to register '%s' irq\n",
+				rx_irq_names[idx]);
+			return ret;
+		}
+
+		mbox_chans->con_priv = chan;
+	}
+
+	if (mbox_controller_register(&ctlr->mbox_con)) {
+		dev_err(dev, "failed to register mailbox controller\n");
+		return -ENOMEM;
+	}
+	_dev_info(dev, "registered mailbox controller %s\n", CONTROLLER_NAME);
+	return 0;
+}
+
+static int mhu_remove(struct platform_device *pdev)
+{
+	int idx;
+	struct device *dev = &pdev->dev;
+	struct mhu_ctlr *ctlr = platform_get_drvdata(pdev);
+	struct mbox_chan *mbox_chans = ctlr->mbox_con.chans;
+
+	for (idx = 0; idx < CHANNEL_MAX; idx++, mbox_chans++) {
+		struct mhu_chan *chan = mbox_chans->con_priv;
+
+		devm_free_irq(dev, chan->rx_irq, mbox_chans);
+		devm_kfree(dev, chan);
+	}
+
+	mbox_controller_unregister(&ctlr->mbox_con);
+	_dev_info(dev, "unregistered mailbox controller %s\n",
+		  CONTROLLER_NAME);
+	devm_kfree(dev, ctlr->mbox_con.chans);
+
+	devm_iounmap(dev, ctlr->mbox_base);
+
+	platform_set_drvdata(pdev, NULL);
+	devm_kfree(dev, ctlr);
+	return 0;
+}
+
+static const struct of_device_id mhu_of_match[] = {
+	{.compatible = "arm,mhu"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, mhu_of_match);
+
+static struct platform_driver mhu_driver = {
+	.probe = mhu_probe,
+	.remove = mhu_remove,
+	.driver = {
+		   .name = CONTROLLER_NAME,
+		   .of_match_table = mhu_of_match,
+		   },
+};
+module_platform_driver(mhu_driver);
+
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("ARM MHU mailbox driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index afcb430508ec..4b4044f47b45 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -76,6 +76,8 @@ static void msg_submit(struct mbox_chan *chan)
 
 	data = chan->msg_data[idx];
 
+	if (chan->cl->tx_prepare)
+		chan->cl->tx_prepare(chan->cl, data);
 	/* Try to submit a message to the MBOX controller */
 	err = chan->mbox->ops->send_data(chan, data);
 	if (!err) {
diff --git a/drivers/mailbox/scpi_protocol.c b/drivers/mailbox/scpi_protocol.c
new file mode 100644
index 000000000000..dde7c274d093
--- /dev/null
+++ b/drivers/mailbox/scpi_protocol.c
@@ -0,0 +1,795 @@
+/*
+ * System Control and Power Interface (SCPI) Message Protocol driver
+ *
+ * SCPI Message Protocol is used between the System Control Processor(SCP)
+ * and the Application Processors(AP). The Message Handling Unit(MHU)
+ * provides a mechanism for inter-processor communication between SCP's
+ * Cortex M3 and AP.
+ *
+ * SCP offers control and management of the core/cluster power states,
+ * various power domain DVFS including the core/cluster, certain system
+ * clocks configuration, thermal sensors and many others.
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/printk.h>
+#include <linux/scpi_protocol.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define CMD_ID_SHIFT		0
+#define CMD_ID_MASK		0x7f
+#define CMD_TOKEN_ID_SHIFT	8
+#define CMD_TOKEN_ID_MASK	0xff
+#define CMD_DATA_SIZE_SHIFT	16
+#define CMD_DATA_SIZE_MASK	0x1ff
+#define PACK_SCPI_CMD(cmd_id, token, tx_sz)			\
+	((((cmd_id) & CMD_ID_MASK) << CMD_ID_SHIFT) |		\
+	(((token) & CMD_TOKEN_ID_MASK) << CMD_TOKEN_ID_SHIFT) |	\
+	(((tx_sz) & CMD_DATA_SIZE_MASK) << CMD_DATA_SIZE_SHIFT))
+
+#define CMD_SIZE(cmd)	(((cmd) >> CMD_DATA_SIZE_SHIFT) & CMD_DATA_SIZE_MASK)
+#define CMD_UNIQ_MASK	(CMD_TOKEN_ID_MASK << CMD_TOKEN_ID_SHIFT | CMD_ID_MASK)
+#define CMD_XTRACT_UNIQ(cmd)	((cmd) & CMD_UNIQ_MASK)
+
+#define SCPI_SLOT		0
+
+#define MAX_DVFS_DOMAINS	3
+#define MAX_DVFS_OPPS		8
+#define DVFS_LATENCY(hdr)	(le32_to_cpu(hdr) >> 16)
+#define DVFS_OPP_COUNT(hdr)	((le32_to_cpu(hdr) >> 8) & 0xff)
+
+#define PROTOCOL_REV_MINOR_BITS	16
+#define PROTOCOL_REV_MINOR_MASK	((1U << PROTOCOL_REV_MINOR_BITS) - 1)
+#define PROTOCOL_REV_MAJOR(x)	((x) >> PROTOCOL_REV_MINOR_BITS)
+#define PROTOCOL_REV_MINOR(x)	((x) & PROTOCOL_REV_MINOR_MASK)
+
+#define FW_REV_MAJOR_BITS	24
+#define FW_REV_MINOR_BITS	16
+#define FW_REV_PATCH_MASK	((1U << FW_REV_MINOR_BITS) - 1)
+#define FW_REV_MINOR_MASK	((1U << FW_REV_MAJOR_BITS) - 1)
+#define FW_REV_MAJOR(x)		((x) >> FW_REV_MAJOR_BITS)
+#define FW_REV_MINOR(x)		(((x) & FW_REV_MINOR_MASK) >> FW_REV_MINOR_BITS)
+#define FW_REV_PATCH(x)		((x) & FW_REV_PATCH_MASK)
+
+#define MAX_RX_TIMEOUT		(msecs_to_jiffies(30))
+
+enum scpi_error_codes {
+	SCPI_SUCCESS = 0, /* Success */
+	SCPI_ERR_PARAM = 1, /* Invalid parameter(s) */
+	SCPI_ERR_ALIGN = 2, /* Invalid alignment */
+	SCPI_ERR_SIZE = 3, /* Invalid size */
+	SCPI_ERR_HANDLER = 4, /* Invalid handler/callback */
+	SCPI_ERR_ACCESS = 5, /* Invalid access/permission denied */
+	SCPI_ERR_RANGE = 6, /* Value out of range */
+	SCPI_ERR_TIMEOUT = 7, /* Timeout has occurred */
+	SCPI_ERR_NOMEM = 8, /* Invalid memory area or pointer */
+	SCPI_ERR_PWRSTATE = 9, /* Invalid power state */
+	SCPI_ERR_SUPPORT = 10, /* Not supported or disabled */
+	SCPI_ERR_DEVICE = 11, /* Device error */
+	SCPI_ERR_MAX
+};
+
+enum scpi_std_cmd {
+	SCPI_CMD_INVALID		= 0x00,
+	SCPI_CMD_SCPI_READY		= 0x01,
+	SCPI_CMD_SCPI_CAPABILITIES	= 0x02,
+	SCPI_CMD_SET_CSS_PWR_STATE	= 0x03,
+	SCPI_CMD_GET_CSS_PWR_STATE	= 0x04,
+	SCPI_CMD_SET_SYS_PWR_STATE	= 0x05,
+	SCPI_CMD_SET_CPU_TIMER		= 0x06,
+	SCPI_CMD_CANCEL_CPU_TIMER	= 0x07,
+	SCPI_CMD_DVFS_CAPABILITIES	= 0x08,
+	SCPI_CMD_GET_DVFS_INFO		= 0x09,
+	SCPI_CMD_SET_DVFS		= 0x0a,
+	SCPI_CMD_GET_DVFS		= 0x0b,
+	SCPI_CMD_GET_DVFS_STAT		= 0x0c,
+	SCPI_CMD_CLOCK_CAPABILITIES	= 0x0d,
+	SCPI_CMD_GET_CLOCK_INFO		= 0x0e,
+	SCPI_CMD_SET_CLOCK_VALUE	= 0x0f,
+	SCPI_CMD_GET_CLOCK_VALUE	= 0x10,
+	SCPI_CMD_PSU_CAPABILITIES	= 0x11,
+	SCPI_CMD_GET_PSU_INFO		= 0x12,
+	SCPI_CMD_SET_PSU		= 0x13,
+	SCPI_CMD_GET_PSU		= 0x14,
+	SCPI_CMD_SENSOR_CAPABILITIES	= 0x15,
+	SCPI_CMD_SENSOR_INFO		= 0x16,
+	SCPI_CMD_SENSOR_VALUE		= 0x17,
+	SCPI_CMD_SENSOR_CFG_PERIODIC	= 0x18,
+	SCPI_CMD_SENSOR_CFG_BOUNDS	= 0x19,
+	SCPI_CMD_SENSOR_ASYNC_VALUE	= 0x1a,
+	SCPI_CMD_SET_DEVICE_PWR_STATE	= 0x1b,
+	SCPI_CMD_GET_DEVICE_PWR_STATE	= 0x1c,
+	SCPI_CMD_COUNT
+};
+
+struct scpi_xfer {
+	u32 slot; /* has to be first element */
+	u32 cmd;
+	u32 status;
+	const void *tx_buf;
+	void *rx_buf;
+	unsigned int tx_len;
+	struct list_head node;
+	struct completion done;
+};
+
+struct scpi_chan {
+	struct mbox_client cl;
+	struct mbox_chan *chan;
+	void __iomem *tx_payload;
+	void __iomem *rx_payload;
+	struct list_head rx_pending;
+	struct list_head xfers_list;
+	struct scpi_xfer *xfers;
+	spinlock_t rx_lock; /* locking for the rx pending list */
+	struct mutex xfers_lock;
+	atomic_t token;
+};
+
+struct scpi_drvinfo {
+	u32 protocol_version;
+	u32 firmware_version;
+	int num_chans;
+	atomic_t next_chan;
+	struct scpi_ops *scpi_ops;
+	struct scpi_chan *channels;
+	struct scpi_dvfs_info *dvfs[MAX_DVFS_DOMAINS];
+};
+
+/*
+ * The SCP firmware only executes in little-endian mode, so any buffers
+ * shared through SCPI should have their contents converted to little-endian
+ */
+struct scpi_shared_mem {
+	__le32 command;
+	__le32 status;
+	u8 payload[0];
+} __packed;
+
+struct scp_capabilities {
+	__le32 protocol_version;
+	__le32 event_version;
+	__le32 platform_version;
+	__le32 commands[4];
+} __packed;
+
+struct clk_get_info {
+	__le16 id;
+	__le16 flags;
+	__le32 min_rate;
+	__le32 max_rate;
+	u8 name[20];
+} __packed;
+
+struct clk_get_value {
+	__le32 rate;
+} __packed;
+
+struct clk_set_value {
+	__le16 id;
+	__le16 reserved;
+	__le32 rate;
+} __packed;
+
+struct dvfs_info {
+	__le32 header;
+	struct {
+		__le32 freq;
+		__le32 m_volt;
+	} opps[MAX_DVFS_OPPS];
+} __packed;
+
+struct dvfs_get {
+	u8 index;
+} __packed;
+
+struct dvfs_set {
+	u8 domain;
+	u8 index;
+} __packed;
+
+struct sensor_capabilities {
+	__le16 sensors;
+} __packed;
+
+struct sensor_info {
+	__le16 id;
+	u8 class;
+	u8 triggers;
+	u8 name[20];
+} __packed;
+
+struct sensor_value {
+	__le32 value;
+} __packed;
+
+static struct scpi_drvinfo *scpi_info;
+
+static int scpi_linux_errmap[SCPI_ERR_MAX] = {
+	/* better than switch case as long as return value is continuous */
+	0, /* SCPI_SUCCESS */
+	-EINVAL, /* SCPI_ERR_PARAM */
+	-ENOEXEC, /* SCPI_ERR_ALIGN */
+	-EMSGSIZE, /* SCPI_ERR_SIZE */
+	-EINVAL, /* SCPI_ERR_HANDLER */
+	-EACCES, /* SCPI_ERR_ACCESS */
+	-ERANGE, /* SCPI_ERR_RANGE */
+	-ETIMEDOUT, /* SCPI_ERR_TIMEOUT */
+	-ENOMEM, /* SCPI_ERR_NOMEM */
+	-EINVAL, /* SCPI_ERR_PWRSTATE */
+	-EOPNOTSUPP, /* SCPI_ERR_SUPPORT */
+	-EIO, /* SCPI_ERR_DEVICE */
+};
+
+static inline int scpi_to_linux_errno(int errno)
+{
+	if (errno >= SCPI_SUCCESS && errno < SCPI_ERR_MAX)
+		return scpi_linux_errmap[errno];
+	return -EIO;
+}
+
+/**
+ * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups
+ * @kobj:	The kobject to create the group on
+ * @groups:	The attribute groups to create, NULL terminated
+ *
+ * This function creates a bunch of attribute groups.  If an error occurs when
+ * creating a group, all previously created groups will be removed, unwinding
+ * everything back to the original state when this function was called.
+ * It will explicitly warn and error if any of the attribute files being
+ * created already exist.
+ *
+ * Returns 0 on success or error code from sysfs_create_groups on error.
+ */
+static int sysfs_create_groups(struct kobject *kobj,
+			const struct attribute_group **groups)
+{
+	int error = 0;
+	int i;
+
+	if (!groups)
+		return 0;
+
+	for (i = 0; groups[i]; i++) {
+		error = sysfs_create_group(kobj, groups[i]);
+		if (error) {
+			while (--i >= 0)
+				sysfs_remove_group(kobj, groups[i]);
+			break;
+		}
+	}
+	return error;
+}
+
+/**
+ * sysfs_remove_groups - remove a list of groups
+ *
+ * kobj:	The kobject for the groups to be removed from
+ * groups:	NULL terminated list of groups to be removed
+ *
+ * If groups is not NULL, the all groups will be removed from the kobject
+ */
+static void sysfs_remove_groups(struct kobject *kobj,
+			 const struct attribute_group **groups)
+{
+	int i;
+
+	if (!groups)
+		return;
+	for (i = 0; groups[i]; i++)
+		sysfs_remove_group(kobj, groups[i]);
+}
+
+static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd)
+{
+	unsigned long flags;
+	struct scpi_xfer *t, *match = NULL;
+
+	spin_lock_irqsave(&ch->rx_lock, flags);
+	if (list_empty(&ch->rx_pending)) {
+		spin_unlock_irqrestore(&ch->rx_lock, flags);
+		return;
+	}
+
+	list_for_each_entry(t, &ch->rx_pending, node)
+		if (CMD_XTRACT_UNIQ(t->cmd) == CMD_XTRACT_UNIQ(cmd)) {
+			list_del(&t->node);
+			match = t;
+			break;
+		}
+	/* check if wait_for_completion is in progress or timed-out */
+	if (match && !completion_done(&match->done)) {
+		struct scpi_shared_mem *mem = ch->rx_payload;
+
+		match->status = le32_to_cpu(mem->status);
+		memcpy_fromio(match->rx_buf, mem->payload, CMD_SIZE(cmd));
+		complete(&match->done);
+	}
+	spin_unlock_irqrestore(&ch->rx_lock, flags);
+}
+
+static void scpi_handle_remote_msg(struct mbox_client *c, void *msg)
+{
+	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
+	struct scpi_shared_mem *mem = ch->rx_payload;
+	u32 cmd = le32_to_cpu(mem->command);
+
+	scpi_process_cmd(ch, cmd);
+}
+
+static void scpi_tx_prepare(struct mbox_client *c, void *msg)
+{
+	unsigned long flags;
+	struct scpi_xfer *t = msg;
+	struct scpi_chan *ch = container_of(c, struct scpi_chan, cl);
+	struct scpi_shared_mem *mem = (struct scpi_shared_mem *)ch->tx_payload;
+
+	mem->command = cpu_to_le32(t->cmd);
+	if (t->tx_buf)
+		memcpy_toio(mem->payload, t->tx_buf, t->tx_len);
+	if (t->rx_buf) {
+		spin_lock_irqsave(&ch->rx_lock, flags);
+		list_add_tail(&t->node, &ch->rx_pending);
+		spin_unlock_irqrestore(&ch->rx_lock, flags);
+	}
+}
+
+static struct scpi_xfer *get_scpi_xfer(struct scpi_chan *ch)
+{
+	struct scpi_xfer *t;
+
+	mutex_lock(&ch->xfers_lock);
+	if (list_empty(&ch->xfers_list)) {
+		mutex_unlock(&ch->xfers_lock);
+		return NULL;
+	}
+	t = list_first_entry(&ch->xfers_list, struct scpi_xfer, node);
+	list_del(&t->node);
+	mutex_unlock(&ch->xfers_lock);
+	return t;
+}
+
+static void put_scpi_xfer(struct scpi_xfer *t, struct scpi_chan *ch)
+{
+	mutex_lock(&ch->xfers_lock);
+	list_add_tail(&t->node, &ch->xfers_list);
+	mutex_unlock(&ch->xfers_lock);
+}
+
+static int
+scpi_send_message(u8 cmd, void *tx_buf, unsigned int len, void *rx_buf)
+{
+	int ret;
+	u8 token, chan;
+	struct scpi_xfer *msg;
+	struct scpi_chan *scpi_chan;
+
+	chan = atomic_inc_return(&scpi_info->next_chan) % scpi_info->num_chans;
+	scpi_chan = scpi_info->channels + chan;
+
+	msg = get_scpi_xfer(scpi_chan);
+	if (!msg)
+		return -ENOMEM;
+
+	token = atomic_inc_return(&scpi_chan->token) & CMD_TOKEN_ID_MASK;
+
+	msg->slot = BIT(SCPI_SLOT);
+	msg->cmd = PACK_SCPI_CMD(cmd, token, len);
+	msg->tx_buf = tx_buf;
+	msg->tx_len = len;
+	msg->rx_buf = rx_buf;
+	init_completion(&msg->done);
+
+	ret = mbox_send_message(scpi_chan->chan, msg);
+	if (ret < 0 || !rx_buf)
+		goto out;
+
+	if (!wait_for_completion_timeout(&msg->done, MAX_RX_TIMEOUT))
+		ret = -ETIMEDOUT;
+	else
+		/* first status word */
+		ret = le32_to_cpu(msg->status);
+out:
+	if (ret < 0 && rx_buf) /* remove entry from the list if timed-out */
+		scpi_process_cmd(scpi_chan, msg->cmd);
+
+	put_scpi_xfer(msg, scpi_chan);
+	/* SCPI error codes > 0, translate them to Linux scale*/
+	return ret > 0 ? scpi_to_linux_errno(ret) : ret;
+}
+
+static u32 scpi_get_version(void)
+{
+	return scpi_info->protocol_version;
+}
+
+static int
+scpi_clk_get_range(u16 clk_id, unsigned long *min, unsigned long *max)
+{
+	int ret;
+	struct clk_get_info clk;
+	__le16 le_clk_id = cpu_to_le16(clk_id);
+
+	ret = scpi_send_message(SCPI_CMD_GET_CLOCK_INFO,
+				&le_clk_id, sizeof(le_clk_id), &clk);
+	if (!ret) {
+		*min = le32_to_cpu(clk.min_rate);
+		*max = le32_to_cpu(clk.max_rate);
+	}
+	return ret;
+}
+
+static unsigned long scpi_clk_get_val(u16 clk_id)
+{
+	int ret;
+	struct clk_get_value clk;
+	__le16 le_clk_id = cpu_to_le16(clk_id);
+
+	ret = scpi_send_message(SCPI_CMD_GET_CLOCK_VALUE,
+				&le_clk_id, sizeof(le_clk_id), &clk);
+	return ret ? ret : le32_to_cpu(clk.rate);
+}
+
+static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
+{
+	int stat;
+	struct clk_set_value clk = { cpu_to_le16(clk_id), 0, cpu_to_le32(rate) };
+
+	return scpi_send_message(SCPI_CMD_SET_CLOCK_VALUE,
+				 &clk, sizeof(clk), &stat);
+}
+
+static int scpi_dvfs_get_idx(u8 domain)
+{
+	int ret;
+	struct dvfs_get dvfs;
+
+	ret = scpi_send_message(SCPI_CMD_GET_DVFS,
+				&domain, sizeof(domain), &dvfs);
+	return ret ? ret : dvfs.index;
+}
+
+static int scpi_dvfs_set_idx(u8 domain, u8 index)
+{
+	int stat;
+	struct dvfs_set dvfs = {domain, index};
+
+	return scpi_send_message(SCPI_CMD_SET_DVFS,
+				 &dvfs, sizeof(dvfs), &stat);
+}
+
+static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain)
+{
+	struct scpi_dvfs_info *info;
+	struct scpi_opp *opp;
+	struct dvfs_info buf;
+	int ret, i;
+
+	if (domain >= MAX_DVFS_DOMAINS)
+		return ERR_PTR(-EINVAL);
+
+	if (scpi_info->dvfs[domain])	/* data already populated */
+		return scpi_info->dvfs[domain];
+
+	ret = scpi_send_message(SCPI_CMD_GET_DVFS_INFO, &domain,
+				sizeof(domain), &buf);
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	info->count = DVFS_OPP_COUNT(buf.header);
+	info->latency = DVFS_LATENCY(buf.header) * 1000; /* uS to nS */
+
+	info->opps = kcalloc(info->count, sizeof(*opp), GFP_KERNEL);
+	if (!info->opps) {
+		kfree(info);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0, opp = info->opps; i < info->count; i++, opp++) {
+		opp->freq = le32_to_cpu(buf.opps[i].freq);
+		opp->m_volt = le32_to_cpu(buf.opps[i].m_volt);
+	}
+
+	scpi_info->dvfs[domain] = info;
+	return info;
+}
+
+static int scpi_sensor_get_id(char *name)
+{
+	struct sensor_capabilities caps;
+	u16 sensors, sensor_id;
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_CAPABILITIES,
+				NULL, 0, &caps);
+	if (ret)
+		return ret;
+	sensors = le16_to_cpu(caps.sensors);
+
+	for (sensor_id = 0; sensor_id < sensors; sensor_id++) {
+
+		struct sensor_info info;
+		__le16 le_sensor_id = cpu_to_le16(sensor_id);
+		ret = scpi_send_message(SCPI_CMD_SENSOR_INFO, &le_sensor_id,
+					sizeof(le_sensor_id), &info);
+		if (ret)
+			return ret;
+
+		if (strcmp(name, info.name) == 0)
+			return sensor_id;
+	}
+
+	return -ENODEV;
+}
+
+static int scpi_sensor_get_value(u16 sensor_id, u32 *value)
+{
+	struct sensor_value val;
+	__le16 le_sensor_id = cpu_to_le16(sensor_id);
+	int ret;
+
+	ret = scpi_send_message(SCPI_CMD_SENSOR_VALUE, &le_sensor_id,
+				sizeof(le_sensor_id), &val);
+	if (ret == 0)
+		*value = le32_to_cpu(val.value);
+
+	return ret;
+}
+
+static struct scpi_ops scpi_ops = {
+	.get_version = scpi_get_version,
+	.clk_get_range = scpi_clk_get_range,
+	.clk_get_val = scpi_clk_get_val,
+	.clk_set_val = scpi_clk_set_val,
+	.dvfs_get_idx = scpi_dvfs_get_idx,
+	.dvfs_set_idx = scpi_dvfs_set_idx,
+	.dvfs_get_info = scpi_dvfs_get_info,
+	.sensor_get_id = scpi_sensor_get_id,
+	.sensor_get_value = scpi_sensor_get_value,
+};
+
+struct scpi_ops *get_scpi_ops(void)
+{
+	return scpi_info ? scpi_info->scpi_ops : NULL;
+}
+EXPORT_SYMBOL_GPL(get_scpi_ops);
+
+static int scpi_init_versions(struct scpi_drvinfo *info)
+{
+	int ret;
+	struct scp_capabilities caps;
+
+	ret = scpi_send_message(SCPI_CMD_SCPI_CAPABILITIES, NULL, 0, &caps);
+	if (!ret) {
+		info->protocol_version = le32_to_cpu(caps.protocol_version);
+		info->firmware_version = le32_to_cpu(caps.platform_version);
+	}
+	return ret;
+}
+
+static ssize_t protocol_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d\n",
+		       PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		       PROTOCOL_REV_MINOR(scpi_info->protocol_version));
+}
+static DEVICE_ATTR_RO(protocol_version);
+
+static ssize_t firmware_version_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d.%d.%d\n",
+		       FW_REV_MAJOR(scpi_info->firmware_version),
+		       FW_REV_MINOR(scpi_info->firmware_version),
+		       FW_REV_PATCH(scpi_info->firmware_version));
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static struct attribute *versions_attrs[] = {
+	&dev_attr_firmware_version.attr,
+	&dev_attr_protocol_version.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(versions);
+
+static void
+scpi_free_channels(struct device *dev, struct scpi_chan *pchan, int count)
+{
+	int i;
+
+	for (i = 0; i < count && pchan->chan; i++, pchan++) {
+		mbox_free_channel(pchan->chan);
+		devm_kfree(dev, pchan->xfers);
+		devm_iounmap(dev, pchan->rx_payload);
+	}
+}
+
+static int scpi_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct scpi_drvinfo *scpi_info = platform_get_drvdata(pdev);
+
+	of_platform_depopulate(dev);
+	sysfs_remove_groups(&dev->kobj, versions_groups);
+	scpi_free_channels(dev, scpi_info->channels, scpi_info->num_chans);
+	platform_set_drvdata(pdev, NULL);
+
+	devm_kfree(dev, scpi_info->channels);
+	devm_kfree(dev, scpi_info);
+	scpi_info = NULL;
+
+	return 0;
+}
+
+#define MAX_SCPI_XFERS		10
+static int scpi_alloc_xfer_list(struct device *dev, struct scpi_chan *ch)
+{
+	int i;
+	struct scpi_xfer *xfers;
+
+	xfers = devm_kzalloc(dev, MAX_SCPI_XFERS * sizeof(*xfers), GFP_KERNEL);
+	if (!xfers)
+		return -ENOMEM;
+
+	ch->xfers = xfers;
+	for (i = 0; i < MAX_SCPI_XFERS; i++, xfers++)
+		list_add_tail(&xfers->node, &ch->xfers_list);
+	return 0;
+}
+
+static int scpi_probe(struct platform_device *pdev)
+{
+	int count, idx, ret;
+	struct resource res;
+	struct scpi_chan *scpi_chan;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+
+	scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
+	if (!scpi_info) {
+		dev_err(dev, "failed to allocate memory for scpi drvinfo\n");
+		return -ENOMEM;
+	}
+
+	count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+	if (count < 0) {
+		dev_err(dev, "no mboxes property in '%s'\n", np->full_name);
+		return -ENODEV;
+	}
+
+	scpi_chan = devm_kcalloc(dev, count, sizeof(*scpi_chan), GFP_KERNEL);
+	if (!scpi_chan) {
+		dev_err(dev, "failed to allocate memory scpi chaninfo\n");
+		return -ENOMEM;
+	}
+
+	for (idx = 0; idx < count; idx++) {
+		resource_size_t size;
+		struct scpi_chan *pchan = scpi_chan + idx;
+		struct mbox_client *cl = &pchan->cl;
+		struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
+
+		if (of_address_to_resource(shmem, 0, &res)) {
+			dev_err(dev, "failed to get SCPI payload mem resource\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		size = resource_size(&res);
+		pchan->rx_payload = devm_ioremap(dev, res.start, size);
+		if (!pchan->rx_payload) {
+			dev_err(dev, "failed to ioremap SCPI payload\n");
+			ret = -EADDRNOTAVAIL;
+			goto err;
+		}
+		pchan->tx_payload = pchan->rx_payload + (size >> 1);
+
+		cl->dev = dev;
+		cl->rx_callback = scpi_handle_remote_msg;
+		cl->tx_prepare = scpi_tx_prepare;
+		cl->tx_block = true;
+		cl->tx_tout = 50;
+		cl->knows_txdone = false; /* controller can ack */
+
+		INIT_LIST_HEAD(&pchan->rx_pending);
+		INIT_LIST_HEAD(&pchan->xfers_list);
+		spin_lock_init(&pchan->rx_lock);
+		mutex_init(&pchan->xfers_lock);
+
+		ret = scpi_alloc_xfer_list(dev, pchan);
+		if (!ret) {
+			pchan->chan = mbox_request_channel(cl, idx);
+			if (!IS_ERR(pchan->chan))
+				continue;
+			ret = -EPROBE_DEFER;
+			dev_err(dev, "failed to acquire channel#%d\n", idx);
+		}
+err:
+		scpi_free_channels(dev, scpi_chan, idx);
+		scpi_info = NULL;
+		return ret;
+	}
+
+	scpi_info->channels = scpi_chan;
+	scpi_info->num_chans = count;
+	platform_set_drvdata(pdev, scpi_info);
+
+	ret = scpi_init_versions(scpi_info);
+	if (ret) {
+		dev_err(dev, "incorrect or no SCP firmware found\n");
+		scpi_remove(pdev);
+		return ret;
+	}
+
+	_dev_info(dev, "SCP Protocol %d.%d Firmware %d.%d.%d version\n",
+		  PROTOCOL_REV_MAJOR(scpi_info->protocol_version),
+		  PROTOCOL_REV_MINOR(scpi_info->protocol_version),
+		  FW_REV_MAJOR(scpi_info->firmware_version),
+		  FW_REV_MINOR(scpi_info->firmware_version),
+		  FW_REV_PATCH(scpi_info->firmware_version));
+	scpi_info->scpi_ops = &scpi_ops;
+
+	ret = sysfs_create_groups(&dev->kobj, versions_groups);
+	if (ret)
+		dev_err(dev, "unable to create sysfs version group\n");
+
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
+}
+
+static const struct of_device_id scpi_of_match[] = {
+	{.compatible = "arm,scpi"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, scpi_of_match);
+
+static struct platform_driver scpi_driver = {
+	.driver = {
+		.name = "scpi_protocol",
+		.of_match_table = scpi_of_match,
+	},
+	.probe = scpi_probe,
+	.remove = scpi_remove,
+};
+module_platform_driver(scpi_driver);
+
+MODULE_AUTHOR("Sudeep Holla <sudeep.holla@arm.com>");
+MODULE_DESCRIPTION("ARM SCPI mailbox protocol driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 0d2e812d424b..fe07d915e20b 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -18,20 +18,36 @@
 
 #include <linux/module.h>
 #include <linux/device-mapper.h>
+#include <linux/reboot.h>
 #include <crypto/hash.h>
 
 #define DM_MSG_PREFIX			"verity"
 
+#define DM_VERITY_ENV_LENGTH		42
+#define DM_VERITY_ENV_VAR_NAME		"VERITY_ERR_BLOCK_NR"
+
 #define DM_VERITY_IO_VEC_INLINE		16
 #define DM_VERITY_MEMPOOL_SIZE		4
 #define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
 
 #define DM_VERITY_MAX_LEVELS		63
+#define DM_VERITY_MAX_CORRUPTED_ERRS	100
 
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
 
 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
 
+enum verity_mode {
+	DM_VERITY_MODE_EIO = 0,
+	DM_VERITY_MODE_LOGGING = 1,
+	DM_VERITY_MODE_RESTART = 2
+};
+
+enum verity_block_type {
+	DM_VERITY_BLOCK_TYPE_DATA,
+	DM_VERITY_BLOCK_TYPE_METADATA
+};
+
 struct dm_verity {
 	struct dm_dev *data_dev;
 	struct dm_dev *hash_dev;
@@ -54,6 +70,8 @@ struct dm_verity {
 	unsigned digest_size;	/* digest size for the current hash algorithm */
 	unsigned shash_descsize;/* the size of temporary space for crypto */
 	int hash_failed;	/* set to 1 if hash of any block failed */
+	enum verity_mode mode;	/* mode for handling verification errors */
+	unsigned corrupted_errs;/* Number of errors for corrupted blocks */
 
 	mempool_t *vec_mempool;	/* mempool of bio vector */
 
@@ -180,6 +198,54 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
 }
 
 /*
+ * Handle verification errors.
+ */
+static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
+				 unsigned long long block)
+{
+	char verity_env[DM_VERITY_ENV_LENGTH];
+	char *envp[] = { verity_env, NULL };
+	const char *type_str = "";
+	struct mapped_device *md = dm_table_get_md(v->ti->table);
+
+	if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
+		goto out;
+
+	++v->corrupted_errs;
+
+	switch (type) {
+	case DM_VERITY_BLOCK_TYPE_DATA:
+		type_str = "data";
+		break;
+	case DM_VERITY_BLOCK_TYPE_METADATA:
+		type_str = "metadata";
+		break;
+	default:
+		BUG();
+	}
+
+	DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name,
+                type_str, block);
+
+	if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
+		DMERR("%s: reached maximum errors", v->data_dev->name);
+
+	snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
+		DM_VERITY_ENV_VAR_NAME, type, block);
+
+	kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
+
+out:
+	if (v->mode == DM_VERITY_MODE_LOGGING)
+		return 0;
+
+	if (v->mode == DM_VERITY_MODE_RESTART)
+		kernel_restart("dm-verity device corrupted");
+
+	return 1;
+}
+
+/*
  * Verify hash of a metadata block pertaining to the specified data block
  * ("block" argument) at a specified level ("level" argument).
  *
@@ -256,11 +322,13 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block,
 			goto release_ret_r;
 		}
 		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-			DMERR_LIMIT("metadata block %llu is corrupted",
-				(unsigned long long)hash_block);
 			v->hash_failed = 1;
-			r = -EIO;
-			goto release_ret_r;
+
+			if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
+					      hash_block)) {
+				r = -EIO;
+				goto release_ret_r;
+			}
 		} else
 			aux->hash_verified = 1;
 	}
@@ -377,10 +445,11 @@ test_block_hash:
 			return r;
 		}
 		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-			DMERR_LIMIT("data block %llu is corrupted",
-				(unsigned long long)(io->block + b));
 			v->hash_failed = 1;
-			return -EIO;
+
+			if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
+					      io->block + b))
+				return -EIO;
 		}
 	}
 	BUG_ON(vector != io->io_vec_size);
@@ -689,8 +758,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto bad;
 	}
 
-	if (argc != 10) {
-		ti->error = "Invalid argument count: exactly 10 arguments required";
+	if (argc < 10 || argc > 11) {
+		ti->error = "Invalid argument count: 10-11 arguments required";
 		r = -EINVAL;
 		goto bad;
 	}
@@ -811,6 +880,17 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		}
 	}
 
+	if (argc > 10) {
+		if (sscanf(argv[10], "%d%c", &num, &dummy) != 1 ||
+			num < DM_VERITY_MODE_EIO ||
+			num > DM_VERITY_MODE_RESTART) {
+			ti->error = "Invalid mode";
+			r = -EINVAL;
+			goto bad;
+		}
+		v->mode = num;
+	}
+
 	v->hash_per_block_bits =
 		fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1;
 
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index c002d8660e30..181fc59fec04 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -424,6 +424,10 @@ config TI_DAC7512
 	  This driver can also be built as a module. If so, the module
 	  will be called ti_dac7512.
 
+config UID_STAT
+	bool "UID based statistics tracking exported to /proc/uid_stat"
+	default n
+
 config VMWARE_BALLOON
 	tristate "VMware Balloon Driver"
 	depends on X86 && HYPERVISOR_GUEST
@@ -527,6 +531,12 @@ config SRAM
 	  the genalloc API. It is supposed to be used for small on-chip SRAM
 	  areas found on many SoCs.
 
+config UID_CPUTIME
+	tristate "Per-UID cpu time statistics"
+	depends on PROFILING
+	help
+	  Per UID based cpu time statistics exported to /proc/uid_cputime
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c235d5b68311..89435c9a182c 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
 obj-$(CONFIG_EP93XX_PWM)	+= ep93xx_pwm.o
 obj-$(CONFIG_DS1682)		+= ds1682.o
 obj-$(CONFIG_TI_DAC7512)	+= ti_dac7512.o
+obj-$(CONFIG_UID_STAT)		+= uid_stat.o
 obj-$(CONFIG_C2PORT)		+= c2port/
 obj-$(CONFIG_HMC6352)		+= hmc6352.o
 obj-y				+= eeprom/
@@ -53,3 +54,4 @@ obj-$(CONFIG_INTEL_MEI)		+= mei/
 obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
 obj-$(CONFIG_SRAM)		+= sram.o
+obj-$(CONFIG_UID_CPUTIME) += uid_cputime.o
diff --git a/drivers/misc/uid_cputime.c b/drivers/misc/uid_cputime.c
new file mode 100644
index 000000000000..acd7046ce497
--- /dev/null
+++ b/drivers/misc/uid_cputime.c
@@ -0,0 +1,237 @@
+/* drivers/misc/uid_cputime.c
+ *
+ * Copyright (C) 2014 - 2015 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/hashtable.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#define UID_HASH_BITS	10
+DECLARE_HASHTABLE(hash_table, UID_HASH_BITS);
+
+static DEFINE_MUTEX(uid_lock);
+static struct proc_dir_entry *parent;
+
+struct uid_entry {
+	uid_t uid;
+	cputime_t utime;
+	cputime_t stime;
+	cputime_t active_utime;
+	cputime_t active_stime;
+	struct hlist_node hash;
+};
+
+static struct uid_entry *find_uid_entry(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+	hash_for_each_possible(hash_table, uid_entry, hash, uid) {
+		if (uid_entry->uid == uid)
+			return uid_entry;
+	}
+	return NULL;
+}
+
+static struct uid_entry *find_or_register_uid(uid_t uid)
+{
+	struct uid_entry *uid_entry;
+
+	uid_entry = find_uid_entry(uid);
+	if (uid_entry)
+		return uid_entry;
+
+	uid_entry = kzalloc(sizeof(struct uid_entry), GFP_ATOMIC);
+	if (!uid_entry)
+		return NULL;
+
+	uid_entry->uid = uid;
+
+	hash_add(hash_table, &uid_entry->hash, uid);
+
+	return uid_entry;
+}
+
+static int uid_stat_show(struct seq_file *m, void *v)
+{
+	struct uid_entry *uid_entry;
+	struct task_struct *task;
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long bkt;
+
+	mutex_lock(&uid_lock);
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		uid_entry->active_stime = 0;
+		uid_entry->active_utime = 0;
+	}
+
+	read_lock(&tasklist_lock);
+	for_each_process(task) {
+		uid_entry = find_or_register_uid(from_kuid_munged(
+			current_user_ns(), task_uid(task)));
+		if (!uid_entry) {
+			read_unlock(&tasklist_lock);
+			mutex_unlock(&uid_lock);
+			pr_err("%s: failed to find the uid_entry for uid %d\n",
+				__func__, from_kuid_munged(current_user_ns(),
+				task_uid(task)));
+			return -ENOMEM;
+		}
+		task_cputime_adjusted(task, &utime, &stime);
+		uid_entry->active_utime += utime;
+		uid_entry->active_stime += stime;
+	}
+	read_unlock(&tasklist_lock);
+
+	hash_for_each(hash_table, bkt, uid_entry, hash) {
+		cputime_t total_utime = uid_entry->utime +
+							uid_entry->active_utime;
+		cputime_t total_stime = uid_entry->stime +
+							uid_entry->active_stime;
+		seq_printf(m, "%d: %u %u\n", uid_entry->uid,
+						cputime_to_usecs(total_utime),
+						cputime_to_usecs(total_stime));
+	}
+
+	mutex_unlock(&uid_lock);
+	return 0;
+}
+
+static int uid_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uid_stat_show, PDE_DATA(inode));
+}
+
+static const struct file_operations uid_stat_fops = {
+	.open		= uid_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int uid_remove_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, NULL, NULL);
+}
+
+static ssize_t uid_remove_write(struct file *file,
+			const char __user *buffer, size_t count, loff_t *ppos)
+{
+	struct uid_entry *uid_entry;
+	struct hlist_node *tmp;
+	char uids[128];
+	char *start_uid, *end_uid = NULL;
+	long int uid_start = 0, uid_end = 0;
+
+	if (count >= sizeof(uids))
+		count = sizeof(uids) - 1;
+
+	if (copy_from_user(uids, buffer, count))
+		return -EFAULT;
+
+	uids[count] = '\0';
+	end_uid = uids;
+	start_uid = strsep(&end_uid, "-");
+
+	if (!start_uid || !end_uid)
+		return -EINVAL;
+
+	if (kstrtol(start_uid, 10, &uid_start) != 0 ||
+		kstrtol(end_uid, 10, &uid_end) != 0) {
+		return -EINVAL;
+	}
+
+	mutex_lock(&uid_lock);
+
+	for (; uid_start <= uid_end; uid_start++) {
+		hash_for_each_possible_safe(hash_table, uid_entry, tmp,
+							hash, uid_start) {
+			hash_del(&uid_entry->hash);
+			kfree(uid_entry);
+		}
+	}
+
+	mutex_unlock(&uid_lock);
+	return count;
+}
+
+static const struct file_operations uid_remove_fops = {
+	.open		= uid_remove_open,
+	.release	= single_release,
+	.write		= uid_remove_write,
+};
+
+static int process_notifier(struct notifier_block *self,
+			unsigned long cmd, void *v)
+{
+	struct task_struct *task = v;
+	struct uid_entry *uid_entry;
+	cputime_t utime, stime;
+	uid_t uid;
+
+	if (!task)
+		return NOTIFY_OK;
+
+	mutex_lock(&uid_lock);
+	uid = from_kuid_munged(current_user_ns(), task_uid(task));
+	uid_entry = find_or_register_uid(uid);
+	if (!uid_entry) {
+		pr_err("%s: failed to find uid %d\n", __func__, uid);
+		goto exit;
+	}
+
+	task_cputime_adjusted(task, &utime, &stime);
+	uid_entry->utime += utime;
+	uid_entry->stime += stime;
+
+exit:
+	mutex_unlock(&uid_lock);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block process_notifier_block = {
+	.notifier_call	= process_notifier,
+};
+
+static int __init proc_uid_cputime_init(void)
+{
+	hash_init(hash_table);
+
+	parent = proc_mkdir("uid_cputime", NULL);
+	if (!parent) {
+		pr_err("%s: failed to create proc entry\n", __func__);
+		return -ENOMEM;
+	}
+
+	proc_create_data("remove_uid_range", S_IWUGO, parent, &uid_remove_fops,
+					NULL);
+
+	proc_create_data("show_uid_stat", S_IWUGO, parent, &uid_stat_fops,
+					NULL);
+
+	profile_event_register(PROFILE_TASK_EXIT, &process_notifier_block);
+
+	return 0;
+}
+
+early_initcall(proc_uid_cputime_init);
diff --git a/drivers/misc/uid_stat.c b/drivers/misc/uid_stat.c
new file mode 100644
index 000000000000..4766c1f83b94
--- /dev/null
+++ b/drivers/misc/uid_stat.c
@@ -0,0 +1,152 @@
+/* drivers/misc/uid_stat.c
+ *
+ * Copyright (C) 2008 - 2009 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <asm/atomic.h>
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/uid_stat.h>
+#include <net/activity_stats.h>
+
+static DEFINE_SPINLOCK(uid_lock);
+static LIST_HEAD(uid_list);
+static struct proc_dir_entry *parent;
+
+struct uid_stat {
+	struct list_head link;
+	uid_t uid;
+	atomic_t tcp_rcv;
+	atomic_t tcp_snd;
+};
+
+static struct uid_stat *find_uid_stat(uid_t uid) {
+	struct uid_stat *entry;
+
+	list_for_each_entry(entry, &uid_list, link) {
+		if (entry->uid == uid) {
+			return entry;
+		}
+	}
+	return NULL;
+}
+
+static int uid_stat_atomic_int_show(struct seq_file *m, void *v)
+{
+	unsigned int bytes;
+	atomic_t *counter = m->private;
+
+	bytes = (unsigned int) (atomic_read(counter) + INT_MIN);
+	return seq_printf(m, "%u\n", bytes);
+}
+
+static int uid_stat_read_atomic_int_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, uid_stat_atomic_int_show, PDE_DATA(inode));
+}
+
+static const struct file_operations uid_stat_read_atomic_int_fops = {
+	.open		= uid_stat_read_atomic_int_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/* Create a new entry for tracking the specified uid. */
+static struct uid_stat *create_stat(uid_t uid) {
+	struct uid_stat *new_uid;
+	/* Create the uid stat struct and append it to the list. */
+	new_uid = kmalloc(sizeof(struct uid_stat), GFP_ATOMIC);
+	if (!new_uid)
+		return NULL;
+
+	new_uid->uid = uid;
+	/* Counters start at INT_MIN, so we can track 4GB of network traffic. */
+	atomic_set(&new_uid->tcp_rcv, INT_MIN);
+	atomic_set(&new_uid->tcp_snd, INT_MIN);
+
+	list_add_tail(&new_uid->link, &uid_list);
+	return new_uid;
+}
+
+static void create_stat_proc(struct uid_stat *new_uid)
+{
+	char uid_s[32];
+	struct proc_dir_entry *entry;
+	sprintf(uid_s, "%d", new_uid->uid);
+	entry = proc_mkdir(uid_s, parent);
+
+	/* Keep reference to uid_stat so we know what uid to read stats from. */
+	proc_create_data("tcp_snd", S_IRUGO, entry,
+			 &uid_stat_read_atomic_int_fops, &new_uid->tcp_snd);
+
+	proc_create_data("tcp_rcv", S_IRUGO, entry,
+			 &uid_stat_read_atomic_int_fops, &new_uid->tcp_rcv);
+}
+
+static struct uid_stat *find_or_create_uid_stat(uid_t uid)
+{
+	struct uid_stat *entry;
+	unsigned long flags;
+	spin_lock_irqsave(&uid_lock, flags);
+	entry = find_uid_stat(uid);
+	if (entry) {
+		spin_unlock_irqrestore(&uid_lock, flags);
+		return entry;
+	}
+	entry = create_stat(uid);
+	spin_unlock_irqrestore(&uid_lock, flags);
+	if (entry)
+		create_stat_proc(entry);
+	return entry;
+}
+
+int uid_stat_tcp_snd(uid_t uid, int size) {
+	struct uid_stat *entry;
+	activity_stats_update();
+	entry = find_or_create_uid_stat(uid);
+	if (!entry)
+		return -1;
+	atomic_add(size, &entry->tcp_snd);
+	return 0;
+}
+
+int uid_stat_tcp_rcv(uid_t uid, int size) {
+	struct uid_stat *entry;
+	activity_stats_update();
+	entry = find_or_create_uid_stat(uid);
+	if (!entry)
+		return -1;
+	atomic_add(size, &entry->tcp_rcv);
+	return 0;
+}
+
+static int __init uid_stat_init(void)
+{
+	parent = proc_mkdir("uid_stat", NULL);
+	if (!parent) {
+		pr_err("uid_stat: failed to create proc entry\n");
+		return -1;
+	}
+	return 0;
+}
+
+__initcall(uid_stat_init);
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index 5562308699bc..79d82124413f 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -50,6 +50,15 @@ config MMC_BLOCK_BOUNCE
 
 	  If unsure, say Y here.
 
+config MMC_BLOCK_DEFERRED_RESUME
+	bool "Deferr MMC layer resume until I/O is requested"
+	depends on MMC_BLOCK
+	default n
+	help
+	  Say Y here to enable deferred MMC resume until I/O
+	  is requested. This will reduce overall resume latency and
+	  save power when theres an SD card inserted but not being used.
+
 config SDIO_UART
 	tristate "SDIO UART/GPS class support"
 	depends on TTY
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 56998eca1a8d..d813cb9a3666 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -35,6 +35,9 @@
 #include <linux/capability.h>
 #include <linux/compat.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mmc.h>
+
 #include <linux/mmc/ioctl.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -163,11 +166,7 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 
 static inline int mmc_get_devidx(struct gendisk *disk)
 {
-	int devmaj = MAJOR(disk_devt(disk));
-	int devidx = MINOR(disk_devt(disk)) / perdev_minors;
-
-	if (!devmaj)
-		devidx = disk->first_minor / perdev_minors;
+	int devidx = disk->first_minor / perdev_minors;
 	return devidx;
 }
 
@@ -728,18 +727,22 @@ static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
 			req->rq_disk->disk_name, "timed out", name, status);
 
 		/* If the status cmd initially failed, retry the r/w cmd */
-		if (!status_valid)
+		if (!status_valid) {
+			pr_err("%s: status not valid, retrying timeout\n", req->rq_disk->disk_name);
 			return ERR_RETRY;
-
+		}
 		/*
 		 * If it was a r/w cmd crc error, or illegal command
 		 * (eg, issued in wrong state) then retry - we should
 		 * have corrected the state problem above.
 		 */
-		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
+		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) {
+			pr_err("%s: command error, retrying timeout\n", req->rq_disk->disk_name);
 			return ERR_RETRY;
+		}
 
 		/* Otherwise abort the command */
+		pr_err("%s: not retrying timeout\n", req->rq_disk->disk_name);
 		return ERR_ABORT;
 
 	default:
@@ -1031,9 +1034,12 @@ retry:
 			goto out;
 	}
 
-	if (mmc_can_sanitize(card))
+	if (mmc_can_sanitize(card)) {
+		trace_mmc_blk_erase_start(EXT_CSD_SANITIZE_START, 0, 0);
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_SANITIZE_START, 1, 0);
+		trace_mmc_blk_erase_end(EXT_CSD_SANITIZE_START, 0, 0);
+	}
 out_retry:
 	if (err && !mmc_blk_reset(md, card->host, type))
 		goto retry;
@@ -1945,6 +1951,11 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	unsigned long flags;
 	unsigned int cmd_flags = req ? req->cmd_flags : 0;
 
+#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
+	if (mmc_bus_needs_resume(card->host))
+		mmc_resume_bus(card->host);
+#endif
+
 	if (req && !mq->mqrq_prev->req)
 		/* claim host only for the first request */
 		mmc_claim_host(card->host);
@@ -2067,6 +2078,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	md->disk->queue = md->queue.queue;
 	md->disk->driverfs_dev = parent;
 	set_disk_ro(md->disk, md->read_only || default_ro);
+	md->disk->flags = GENHD_FL_EXT_DEVT;
 	if (area_type & MMC_BLK_DATA_AREA_RPMB)
 		md->disk->flags |= GENHD_FL_NO_PART_SCAN;
 
@@ -2381,6 +2393,9 @@ static int mmc_blk_probe(struct mmc_card *card)
 	mmc_set_drvdata(card, md);
 	mmc_fixup_device(card, blk_fixups);
 
+#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
+	mmc_set_bus_resume_policy(card->host, 1);
+#endif
 	if (mmc_add_disk(md))
 		goto out;
 
@@ -2406,6 +2421,9 @@ static void mmc_blk_remove(struct mmc_card *card)
 	mmc_release_host(card->host);
 	mmc_blk_remove_req(md);
 	mmc_set_drvdata(card, NULL);
+#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
+	mmc_set_bus_resume_policy(card->host, 0);
+#endif
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index 269d072ef55e..ae10a372af0f 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -26,3 +26,18 @@ config MMC_CLKGATE
 	  support handling this in order for it to be of any use.
 
 	  If unsure, say N.
+
+config MMC_EMBEDDED_SDIO
+	boolean "MMC embedded SDIO device support (EXPERIMENTAL)"
+	help
+	  If you say Y here, support will be added for embedded SDIO
+	  devices which do not contain the necessary enumeration
+	  support in hardware to be properly detected.
+
+config MMC_PARANOID_SD_INIT
+	bool "Enable paranoid SD card initialization (EXPERIMENTAL)"
+	help
+	  If you say Y here, the MMC layer will be extra paranoid
+	  about re-trying SD init requests. This can be a useful
+	  work-around for buggy controllers and hardware. Enable
+	  if you are experiencing issues with SD detection.
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c40396f23202..6a83f4ccc108 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -27,6 +27,9 @@
 #include <linux/fault-inject.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/wakelock.h>
+
+#include <trace/events/mmc.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -172,6 +175,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 			pr_debug("%s:     %d bytes transferred: %d\n",
 				mmc_hostname(host),
 				mrq->data->bytes_xfered, mrq->data->error);
+			trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data);
 		}
 
 		if (mrq->stop) {
@@ -536,8 +540,12 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 			mmc_start_bkops(host->card, true);
 	}
 
-	if (!err && areq)
+	if (!err && areq) {
+		trace_mmc_blk_rw_start(areq->mrq->cmd->opcode,
+				       areq->mrq->cmd->arg,
+				       areq->mrq->data);
 		start_err = __mmc_start_data_req(host, areq->mrq);
+	}
 
 	if (host->areq)
 		mmc_post_req(host, host->areq->mrq, 0);
@@ -1591,6 +1599,36 @@ static inline void mmc_bus_put(struct mmc_host *host)
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+int mmc_resume_bus(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	if (!mmc_bus_needs_resume(host))
+		return -EINVAL;
+
+	printk("%s: Starting deferred resume\n", mmc_hostname(host));
+	spin_lock_irqsave(&host->lock, flags);
+	host->bus_resume_flags &= ~MMC_BUSRESUME_NEEDS_RESUME;
+	host->rescan_disable = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	mmc_bus_get(host);
+	if (host->bus_ops && !host->bus_dead) {
+		mmc_power_up(host);
+		BUG_ON(!host->bus_ops->resume);
+		host->bus_ops->resume(host);
+	}
+
+	if (host->bus_ops->detect && !host->bus_dead)
+		host->bus_ops->detect(host);
+
+	mmc_bus_put(host);
+	printk("%s: Deferred resume completed\n", mmc_hostname(host));
+	return 0;
+}
+
+EXPORT_SYMBOL(mmc_resume_bus);
+
 /*
  * Assign a mmc bus handler to a host. Only one bus handler may control a
  * host at any given time.
@@ -1656,6 +1694,8 @@ void mmc_detect_change(struct mmc_host *host, unsigned long delay)
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
 	host->detect_change = 1;
+
+	wake_lock(&host->detect_wake_lock);
 	mmc_schedule_delayed_work(&host->detect, delay);
 }
 
@@ -1815,8 +1855,13 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 	struct mmc_command cmd = {0};
 	unsigned int qty = 0;
 	unsigned long timeout;
+	unsigned int fr, nr;
 	int err;
 
+	fr = from;
+	nr = to - from + 1;
+	trace_mmc_blk_erase_start(arg, fr, nr);
+
 	/*
 	 * qty is used to calculate the erase timeout which depends on how many
 	 * erase groups (or allocation units in SD terminology) are affected.
@@ -1920,6 +1965,8 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 	} while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
 		 (R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG));
 out:
+
+	trace_mmc_blk_erase_end(arg, fr, nr);
 	return err;
 }
 
@@ -2351,6 +2398,7 @@ void mmc_rescan(struct work_struct *work)
 	struct mmc_host *host =
 		container_of(work, struct mmc_host, detect.work);
 	int i;
+	bool extend_wakelock = false;
 
 	if (host->rescan_disable)
 		return;
@@ -2372,6 +2420,12 @@ void mmc_rescan(struct work_struct *work)
 
 	host->detect_change = 0;
 
+	/* If the card was removed the bus will be marked
+	 * as dead - extend the wakelock so userspace
+	 * can respond */
+	if (host->bus_dead)
+		extend_wakelock = 1;
+
 	/*
 	 * Let mmc_bus_put() free the bus/bus_ops if we've found that
 	 * the card is no longer present.
@@ -2400,16 +2454,24 @@ void mmc_rescan(struct work_struct *work)
 
 	mmc_claim_host(host);
 	for (i = 0; i < ARRAY_SIZE(freqs); i++) {
-		if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min)))
+		if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min))) {
+			extend_wakelock = true;
 			break;
+		}
 		if (freqs[i] <= host->f_min)
 			break;
 	}
 	mmc_release_host(host);
 
  out:
-	if (host->caps & MMC_CAP_NEEDS_POLL)
+	if (extend_wakelock)
+		wake_lock_timeout(&host->detect_wake_lock, HZ / 2);
+	else
+		wake_unlock(&host->detect_wake_lock);
+	if (host->caps & MMC_CAP_NEEDS_POLL) {
+		wake_lock(&host->detect_wake_lock);
 		mmc_schedule_delayed_work(&host->detect, HZ);
+	}
 }
 
 void mmc_start_host(struct mmc_host *host)
@@ -2433,7 +2495,8 @@ void mmc_stop_host(struct mmc_host *host)
 #endif
 
 	host->rescan_disable = 1;
-	cancel_delayed_work_sync(&host->detect);
+	if (cancel_delayed_work_sync(&host->detect))
+		wake_unlock(&host->detect_wake_lock);
 	mmc_flush_scheduled_work();
 
 	/* clear pm flags now and let card drivers set them as needed */
@@ -2628,7 +2691,11 @@ int mmc_suspend_host(struct mmc_host *host)
 {
 	int err = 0;
 
-	cancel_delayed_work(&host->detect);
+	if (mmc_bus_needs_resume(host))
+		return 0;
+
+	if (cancel_delayed_work(&host->detect))
+		wake_unlock(&host->detect_wake_lock);
 	mmc_flush_scheduled_work();
 
 	mmc_bus_get(host);
@@ -2679,6 +2746,12 @@ int mmc_resume_host(struct mmc_host *host)
 	int err = 0;
 
 	mmc_bus_get(host);
+	if (mmc_bus_manual_resume(host)) {
+		host->bus_resume_flags |= MMC_BUSRESUME_NEEDS_RESUME;
+		mmc_bus_put(host);
+		return 0;
+	}
+
 	if (host->bus_ops && !host->bus_dead) {
 		if (!mmc_card_keep_power(host)) {
 			mmc_power_up(host);
@@ -2739,9 +2812,14 @@ int mmc_pm_notify(struct notifier_block *notify_block,
 		}
 
 		spin_lock_irqsave(&host->lock, flags);
+		if (mmc_bus_needs_resume(host)) {
+			spin_unlock_irqrestore(&host->lock, flags);
+			break;
+		}
 		host->rescan_disable = 1;
 		spin_unlock_irqrestore(&host->lock, flags);
-		cancel_delayed_work_sync(&host->detect);
+		if (cancel_delayed_work_sync(&host->detect))
+			wake_unlock(&host->detect_wake_lock);
 
 		if (!host->bus_ops || host->bus_ops->suspend)
 			break;
@@ -2762,6 +2840,10 @@ int mmc_pm_notify(struct notifier_block *notify_block,
 	case PM_POST_RESTORE:
 
 		spin_lock_irqsave(&host->lock, flags);
+		if (mmc_bus_manual_resume(host)) {
+			spin_unlock_irqrestore(&host->lock, flags);
+			break;
+		}
 		host->rescan_disable = 0;
 		spin_unlock_irqrestore(&host->lock, flags);
 		mmc_detect_change(host, 0);
@@ -2789,6 +2871,22 @@ void mmc_init_context_info(struct mmc_host *host)
 	init_waitqueue_head(&host->context_info.wait);
 }
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+void mmc_set_embedded_sdio_data(struct mmc_host *host,
+				struct sdio_cis *cis,
+				struct sdio_cccr *cccr,
+				struct sdio_embedded_func *funcs,
+				int num_funcs)
+{
+	host->embedded_sdio_data.cis = cis;
+	host->embedded_sdio_data.cccr = cccr;
+	host->embedded_sdio_data.funcs = funcs;
+	host->embedded_sdio_data.num_funcs = num_funcs;
+}
+
+EXPORT_SYMBOL(mmc_set_embedded_sdio_data);
+#endif
+
 static int __init mmc_init(void)
 {
 	int ret;
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 2a3593d9f87d..56dadd667ec1 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -459,6 +459,8 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
+	wake_lock_init(&host->detect_wake_lock, WAKE_LOCK_SUSPEND,
+		kasprintf(GFP_KERNEL, "%s_detect", mmc_hostname(host)));
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
 #ifdef CONFIG_PM
 	host->pm_notify.notifier_call = mmc_pm_notify;
@@ -511,7 +513,8 @@ int mmc_add_host(struct mmc_host *host)
 	mmc_host_clk_sysfs_init(host);
 
 	mmc_start_host(host);
-	register_pm_notifier(&host->pm_notify);
+	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
+		register_pm_notifier(&host->pm_notify);
 
 	return 0;
 }
@@ -528,7 +531,9 @@ EXPORT_SYMBOL(mmc_add_host);
  */
 void mmc_remove_host(struct mmc_host *host)
 {
-	unregister_pm_notifier(&host->pm_notify);
+	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
+		unregister_pm_notifier(&host->pm_notify);
+
 	mmc_stop_host(host);
 
 #ifdef CONFIG_DEBUG_FS
@@ -555,6 +560,7 @@ void mmc_free_host(struct mmc_host *host)
 	spin_lock(&mmc_host_lock);
 	idr_remove(&mmc_host_idr, host->index);
 	spin_unlock(&mmc_host_lock);
+	wake_lock_destroy(&host->detect_wake_lock);
 
 	put_device(&host->class_dev);
 }
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 9e645e19cec6..f008318c5c4d 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -805,6 +805,9 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 	bool reinit)
 {
 	int err;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries;
+#endif
 
 	if (!reinit) {
 		/*
@@ -831,7 +834,26 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 		/*
 		 * Fetch switch information from card.
 		 */
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+		for (retries = 1; retries <= 3; retries++) {
+			err = mmc_read_switch(card);
+			if (!err) {
+				if (retries > 1) {
+					printk(KERN_WARNING
+					       "%s: recovered\n", 
+					       mmc_hostname(host));
+				}
+				break;
+			} else {
+				printk(KERN_WARNING
+				       "%s: read switch failed (attempt %d)\n",
+				       mmc_hostname(host), retries);
+			}
+		}
+#else
 		err = mmc_read_switch(card);
+#endif
+
 		if (err)
 			return err;
 	}
@@ -1032,7 +1054,10 @@ static int mmc_sd_alive(struct mmc_host *host)
  */
 static void mmc_sd_detect(struct mmc_host *host)
 {
-	int err;
+	int err = 0;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries = 5;
+#endif
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
@@ -1042,7 +1067,23 @@ static void mmc_sd_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	while(retries) {
+		err = mmc_send_status(host->card, NULL);
+		if (err) {
+			retries--;
+			udelay(5);
+			continue;
+		}
+		break;
+	}
+	if (!retries) {
+		printk(KERN_ERR "%s(%s): Unable to re-detect card (%d)\n",
+		       __func__, mmc_hostname(host), err);
+	}
+#else
 	err = _mmc_detect_card_removed(host);
+#endif
 
 	mmc_release_host(host);
 
@@ -1084,12 +1125,31 @@ static int mmc_sd_suspend(struct mmc_host *host)
 static int mmc_sd_resume(struct mmc_host *host)
 {
 	int err;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries;
+#endif
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	retries = 5;
+	while (retries) {
+		err = mmc_sd_init_card(host, host->ocr, host->card);
+
+		if (err) {
+			printk(KERN_ERR "%s: Re-init card rc = %d (retries = %d)\n",
+			       mmc_hostname(host), err, retries);
+			mdelay(5);
+			retries--;
+			continue;
+		}
+		break;
+	}
+#else
 	err = mmc_sd_init_card(host, host->ocr, host->card);
+#endif
 	mmc_release_host(host);
 
 	return err;
@@ -1143,6 +1203,9 @@ int mmc_attach_sd(struct mmc_host *host)
 {
 	int err;
 	u32 ocr;
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	int retries;
+#endif
 
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
@@ -1198,9 +1261,27 @@ int mmc_attach_sd(struct mmc_host *host)
 	/*
 	 * Detect and init the card.
 	 */
+#ifdef CONFIG_MMC_PARANOID_SD_INIT
+	retries = 5;
+	while (retries) {
+		err = mmc_sd_init_card(host, host->ocr, NULL);
+		if (err) {
+			retries--;
+			continue;
+		}
+		break;
+	}
+
+	if (!retries) {
+		printk(KERN_ERR "%s: mmc_sd_init_card() failure (err = %d)\n",
+		       mmc_hostname(host), err);
+		goto err;
+	}
+#else
 	err = mmc_sd_init_card(host, host->ocr, NULL);
 	if (err)
 		goto err;
+#endif
 
 	mmc_release_host(host);
 	err = mmc_add_card(host->card);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 6889a821c1da..46e68f125ff2 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/pm_runtime.h>
 
 #include <linux/mmc/host.h>
@@ -28,6 +29,10 @@
 #include "sdio_ops.h"
 #include "sdio_cis.h"
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+#include <linux/mmc/sdio_ids.h>
+#endif
+
 static int sdio_read_fbr(struct sdio_func *func)
 {
 	int ret;
@@ -728,19 +733,35 @@ try_again:
 		goto finish;
 	}
 
-	/*
-	 * Read the common registers.
-	 */
-	err = sdio_read_cccr(card, ocr);
-	if (err)
-		goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	if (host->embedded_sdio_data.cccr)
+		memcpy(&card->cccr, host->embedded_sdio_data.cccr, sizeof(struct sdio_cccr));
+	else {
+#endif
+		/*
+		 * Read the common registers.
+		 */
+		err = sdio_read_cccr(card,  ocr);
+		if (err)
+			goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	}
+#endif
 
-	/*
-	 * Read the common CIS tuples.
-	 */
-	err = sdio_read_common_cis(card);
-	if (err)
-		goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	if (host->embedded_sdio_data.cis)
+		memcpy(&card->cis, host->embedded_sdio_data.cis, sizeof(struct sdio_cis));
+	else {
+#endif
+		/*
+		 * Read the common CIS tuples.
+		 */
+		err = sdio_read_common_cis(card);
+		if (err)
+			goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	}
+#endif
 
 	if (oldcard) {
 		int same = (card->cis.vendor == oldcard->cis.vendor &&
@@ -1147,14 +1168,36 @@ int mmc_attach_sdio(struct mmc_host *host)
 	funcs = (ocr & 0x70000000) >> 28;
 	card->sdio_funcs = 0;
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	if (host->embedded_sdio_data.funcs)
+		card->sdio_funcs = funcs = host->embedded_sdio_data.num_funcs;
+#endif
+
 	/*
 	 * Initialize (but don't add) all present functions.
 	 */
 	for (i = 0; i < funcs; i++, card->sdio_funcs++) {
-		err = sdio_init_func(host->card, i + 1);
-		if (err)
-			goto remove;
-
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+		if (host->embedded_sdio_data.funcs) {
+			struct sdio_func *tmp;
+
+			tmp = sdio_alloc_func(host->card);
+			if (IS_ERR(tmp))
+				goto remove;
+			tmp->num = (i + 1);
+			card->sdio_func[i] = tmp;
+			tmp->class = host->embedded_sdio_data.funcs[i].f_class;
+			tmp->max_blksize = host->embedded_sdio_data.funcs[i].f_maxblksize;
+			tmp->vendor = card->cis.vendor;
+			tmp->device = card->cis.device;
+		} else {
+#endif
+			err = sdio_init_func(host->card, i + 1);
+			if (err)
+				goto remove;
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+		}
+#endif
 		/*
 		 * Enable Runtime PM for this func (if supported)
 		 */
@@ -1202,3 +1245,39 @@ err:
 	return err;
 }
 
+int sdio_reset_comm(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	u32 ocr;
+	int err;
+
+	printk("%s():\n", __func__);
+	mmc_claim_host(host);
+
+	mmc_go_idle(host);
+
+	mmc_set_clock(host, host->f_min);
+
+	err = mmc_send_io_op_cond(host, 0, &ocr);
+	if (err)
+		goto err;
+
+	host->ocr = mmc_select_voltage(host, ocr);
+	if (!host->ocr) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = mmc_sdio_init_card(host, host->ocr, card, 0);
+	if (err)
+		goto err;
+
+	mmc_release_host(host);
+	return 0;
+err:
+	printk("%s: Error resetting SDIO communications (%d)\n",
+	       mmc_hostname(host), err);
+	mmc_release_host(host);
+	return err;
+}
+EXPORT_SYMBOL(sdio_reset_comm);
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 546c67c2bbbf..c012cf59b7d6 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -25,6 +25,10 @@
 #include "sdio_cis.h"
 #include "sdio_bus.h"
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+#include <linux/mmc/host.h>
+#endif
+
 /* show configuration fields */
 #define sdio_config_attr(field, format_string)				\
 static ssize_t								\
@@ -270,7 +274,14 @@ static void sdio_release_func(struct device *dev)
 {
 	struct sdio_func *func = dev_to_sdio_func(dev);
 
-	sdio_free_func_cis(func);
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	/*
+	 * If this device is embedded then we never allocated
+	 * cis tables for this func
+	 */
+	if (!func->card->host->embedded_sdio_data.funcs)
+#endif
+		sdio_free_func_cis(func);
 
 	kfree(func->info);
 
diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index 78cb4d5d9d58..8fdeb07723a6 100644..100755
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -384,6 +384,39 @@ u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret)
 EXPORT_SYMBOL_GPL(sdio_readb);
 
 /**
+ *	sdio_readb_ext - read a single byte from a SDIO function
+ *	@func: SDIO function to access
+ *	@addr: address to read
+ *	@err_ret: optional status value from transfer
+ *	@in: value to add to argument
+ *
+ *	Reads a single byte from the address space of a given SDIO
+ *	function. If there is a problem reading the address, 0xff
+ *	is returned and @err_ret will contain the error code.
+ */
+unsigned char sdio_readb_ext(struct sdio_func *func, unsigned int addr,
+	int *err_ret, unsigned in)
+{
+	int ret;
+	unsigned char val;
+
+	BUG_ON(!func);
+
+	if (err_ret)
+		*err_ret = 0;
+
+	ret = mmc_io_rw_direct(func->card, 0, func->num, addr, (u8)in, &val);
+	if (ret) {
+		if (err_ret)
+			*err_ret = ret;
+		return 0xFF;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(sdio_readb_ext);
+
+/**
  *	sdio_writeb - write a single byte to a SDIO function
  *	@func: SDIO function to access
  *	@b: byte to write
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 50543f166215..f2ab08c2c5fb 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -1,3 +1,10 @@
+config MTD_NAND_IDS
+	tristate "Include chip ids for known NAND devices."
+	depends on MTD
+	help
+	  Useful for NAND drivers that do not use the NAND subsystem but
+	  still like to take advantage of the known chip information.
+
 config MTD_NAND_ECC
 	tristate
 
@@ -133,9 +140,6 @@ config BCH_CONST_T
 	default 8 if MTD_NAND_OMAP_BCH8
 endif
 
-config MTD_NAND_IDS
-	tristate
-
 config MTD_NAND_RICOH
 	tristate "Ricoh xD card reader"
 	default n
diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig
index 1373c6d7278d..282aec4860eb 100644
--- a/drivers/net/ppp/Kconfig
+++ b/drivers/net/ppp/Kconfig
@@ -149,6 +149,23 @@ config PPPOL2TP
 	  tunnels. L2TP is replacing PPTP for VPN uses.
 if TTY
 
+config PPPOLAC
+	tristate "PPP on L2TP Access Concentrator"
+	depends on PPP && INET
+	help
+	  L2TP (RFC 2661) is a tunneling protocol widely used in virtual private
+	  networks. This driver handles L2TP data packets between a UDP socket
+	  and a PPP channel, but only permits one session per socket. Thus it is
+	  fairly simple and suited for clients.
+
+config PPPOPNS
+	tristate "PPP on PPTP Network Server"
+	depends on PPP && INET
+	help
+	  PPTP (RFC 2637) is a tunneling protocol widely used in virtual private
+	  networks. This driver handles PPTP data packets between a RAW socket
+	  and a PPP channel. It is fairly simple and easy to use.
+
 config PPP_ASYNC
 	tristate "PPP support for async serial ports"
 	depends on PPP
diff --git a/drivers/net/ppp/Makefile b/drivers/net/ppp/Makefile
index a6b6297b0066..d283d03c4683 100644
--- a/drivers/net/ppp/Makefile
+++ b/drivers/net/ppp/Makefile
@@ -11,3 +11,5 @@ obj-$(CONFIG_PPP_SYNC_TTY) += ppp_synctty.o
 obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
 obj-$(CONFIG_PPPOL2TP) += pppox.o
 obj-$(CONFIG_PPTP) += pppox.o pptp.o
+obj-$(CONFIG_PPPOLAC) += pppox.o pppolac.o
+obj-$(CONFIG_PPPOPNS) += pppox.o pppopns.o
diff --git a/drivers/net/ppp/pppolac.c b/drivers/net/ppp/pppolac.c
new file mode 100644
index 000000000000..a5d3d634fd9a
--- /dev/null
+++ b/drivers/net/ppp/pppolac.c
@@ -0,0 +1,449 @@
+/* drivers/net/pppolac.c
+ *
+ * Driver for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* This driver handles L2TP data packets between a UDP socket and a PPP channel.
+ * The socket must keep connected, and only one session per socket is permitted.
+ * Sequencing of outgoing packets is controlled by LNS. Incoming packets with
+ * sequences are reordered within a sliding window of one second. Currently
+ * reordering only happens when a packet is received. It is done for simplicity
+ * since no additional locks or threads are required. This driver only works on
+ * IPv4 due to the lack of UDP encapsulation support in IPv6. */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/udp.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_channel.h>
+#include <net/tcp_states.h>
+#include <asm/uaccess.h>
+
+#define L2TP_CONTROL_BIT	0x80
+#define L2TP_LENGTH_BIT		0x40
+#define L2TP_SEQUENCE_BIT	0x08
+#define L2TP_OFFSET_BIT		0x02
+#define L2TP_VERSION		0x02
+#define L2TP_VERSION_MASK	0x0F
+
+#define PPP_ADDR	0xFF
+#define PPP_CTRL	0x03
+
+union unaligned {
+	__u32 u32;
+} __attribute__((packed));
+
+static inline union unaligned *unaligned(void *ptr)
+{
+	return (union unaligned *)ptr;
+}
+
+struct meta {
+	__u32 sequence;
+	__u32 timestamp;
+};
+
+static inline struct meta *skb_meta(struct sk_buff *skb)
+{
+	return (struct meta *)skb->cb;
+}
+
+/******************************************************************************/
+
+static int pppolac_recv_core(struct sock *sk_udp, struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)sk_udp->sk_user_data;
+	struct pppolac_opt *opt = &pppox_sk(sk)->proto.lac;
+	struct meta *meta = skb_meta(skb);
+	__u32 now = jiffies;
+	__u8 bits;
+	__u8 *ptr;
+
+	/* Drop the packet if L2TP header is missing. */
+	if (skb->len < sizeof(struct udphdr) + 6)
+		goto drop;
+
+	/* Put it back if it is a control packet. */
+	if (skb->data[sizeof(struct udphdr)] & L2TP_CONTROL_BIT)
+		return opt->backlog_rcv(sk_udp, skb);
+
+	/* Skip UDP header. */
+	skb_pull(skb, sizeof(struct udphdr));
+
+	/* Check the version. */
+	if ((skb->data[1] & L2TP_VERSION_MASK) != L2TP_VERSION)
+		goto drop;
+	bits = skb->data[0];
+	ptr = &skb->data[2];
+
+	/* Check the length if it is present. */
+	if (bits & L2TP_LENGTH_BIT) {
+		if ((ptr[0] << 8 | ptr[1]) != skb->len)
+			goto drop;
+		ptr += 2;
+	}
+
+	/* Skip all fields including optional ones. */
+	if (!skb_pull(skb, 6 + (bits & L2TP_SEQUENCE_BIT ? 4 : 0) +
+			(bits & L2TP_LENGTH_BIT ? 2 : 0) +
+			(bits & L2TP_OFFSET_BIT ? 2 : 0)))
+		goto drop;
+
+	/* Skip the offset padding if it is present. */
+	if (bits & L2TP_OFFSET_BIT &&
+			!skb_pull(skb, skb->data[-2] << 8 | skb->data[-1]))
+		goto drop;
+
+	/* Check the tunnel and the session. */
+	if (unaligned(ptr)->u32 != opt->local)
+		goto drop;
+
+	/* Check the sequence if it is present. */
+	if (bits & L2TP_SEQUENCE_BIT) {
+		meta->sequence = ptr[4] << 8 | ptr[5];
+		if ((__s16)(meta->sequence - opt->recv_sequence) < 0)
+			goto drop;
+	}
+
+	/* Skip PPP address and control if they are present. */
+	if (skb->len >= 2 && skb->data[0] == PPP_ADDR &&
+			skb->data[1] == PPP_CTRL)
+		skb_pull(skb, 2);
+
+	/* Fix PPP protocol if it is compressed. */
+	if (skb->len >= 1 && skb->data[0] & 1)
+		skb_push(skb, 1)[0] = 0;
+
+	/* Drop the packet if PPP protocol is missing. */
+	if (skb->len < 2)
+		goto drop;
+
+	/* Perform reordering if sequencing is enabled. */
+	atomic_set(&opt->sequencing, bits & L2TP_SEQUENCE_BIT);
+	if (bits & L2TP_SEQUENCE_BIT) {
+		struct sk_buff *skb1;
+
+		/* Insert the packet into receive queue in order. */
+		skb_set_owner_r(skb, sk);
+		skb_queue_walk(&sk->sk_receive_queue, skb1) {
+			struct meta *meta1 = skb_meta(skb1);
+			__s16 order = meta->sequence - meta1->sequence;
+			if (order == 0)
+				goto drop;
+			if (order < 0) {
+				meta->timestamp = meta1->timestamp;
+				skb_insert(skb1, skb, &sk->sk_receive_queue);
+				skb = NULL;
+				break;
+			}
+		}
+		if (skb) {
+			meta->timestamp = now;
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+		}
+
+		/* Remove packets from receive queue as long as
+		 * 1. the receive buffer is full,
+		 * 2. they are queued longer than one second, or
+		 * 3. there are no missing packets before them. */
+		skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) {
+			meta = skb_meta(skb);
+			if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+					now - meta->timestamp < HZ &&
+					meta->sequence != opt->recv_sequence)
+				break;
+			skb_unlink(skb, &sk->sk_receive_queue);
+			opt->recv_sequence = (__u16)(meta->sequence + 1);
+			skb_orphan(skb);
+			ppp_input(&pppox_sk(sk)->chan, skb);
+		}
+		return NET_RX_SUCCESS;
+	}
+
+	/* Flush receive queue if sequencing is disabled. */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_orphan(skb);
+	ppp_input(&pppox_sk(sk)->chan, skb);
+	return NET_RX_SUCCESS;
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static int pppolac_recv(struct sock *sk_udp, struct sk_buff *skb)
+{
+	sock_hold(sk_udp);
+	sk_receive_skb(sk_udp, skb, 0);
+	return 0;
+}
+
+static struct sk_buff_head delivery_queue;
+
+static void pppolac_xmit_core(struct work_struct *delivery_work)
+{
+	mm_segment_t old_fs = get_fs();
+	struct sk_buff *skb;
+
+	set_fs(KERNEL_DS);
+	while ((skb = skb_dequeue(&delivery_queue))) {
+		struct sock *sk_udp = skb->sk;
+		struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len};
+		struct msghdr msg = {
+			.msg_iov = (struct iovec *)&iov,
+			.msg_iovlen = 1,
+			.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT,
+		};
+		sk_udp->sk_prot->sendmsg(NULL, sk_udp, &msg, skb->len);
+		kfree_skb(skb);
+	}
+	set_fs(old_fs);
+}
+
+static DECLARE_WORK(delivery_work, pppolac_xmit_core);
+
+static int pppolac_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct sock *sk_udp = (struct sock *)chan->private;
+	struct pppolac_opt *opt = &pppox_sk(sk_udp->sk_user_data)->proto.lac;
+
+	/* Install PPP address and control. */
+	skb_push(skb, 2);
+	skb->data[0] = PPP_ADDR;
+	skb->data[1] = PPP_CTRL;
+
+	/* Install L2TP header. */
+	if (atomic_read(&opt->sequencing)) {
+		skb_push(skb, 10);
+		skb->data[0] = L2TP_SEQUENCE_BIT;
+		skb->data[6] = opt->xmit_sequence >> 8;
+		skb->data[7] = opt->xmit_sequence;
+		skb->data[8] = 0;
+		skb->data[9] = 0;
+		opt->xmit_sequence++;
+	} else {
+		skb_push(skb, 6);
+		skb->data[0] = 0;
+	}
+	skb->data[1] = L2TP_VERSION;
+	unaligned(&skb->data[2])->u32 = opt->remote;
+
+	/* Now send the packet via the delivery queue. */
+	skb_set_owner_w(skb, sk_udp);
+	skb_queue_tail(&delivery_queue, skb);
+	schedule_work(&delivery_work);
+	return 1;
+}
+
+/******************************************************************************/
+
+static struct ppp_channel_ops pppolac_channel_ops = {
+	.start_xmit = pppolac_xmit,
+};
+
+static int pppolac_connect(struct socket *sock, struct sockaddr *useraddr,
+	int addrlen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sockaddr_pppolac *addr = (struct sockaddr_pppolac *)useraddr;
+	struct socket *sock_udp = NULL;
+	struct sock *sk_udp;
+	int error;
+
+	if (addrlen != sizeof(struct sockaddr_pppolac) ||
+			!addr->local.tunnel || !addr->local.session ||
+			!addr->remote.tunnel || !addr->remote.session) {
+		return -EINVAL;
+	}
+
+	lock_sock(sk);
+	error = -EALREADY;
+	if (sk->sk_state != PPPOX_NONE)
+		goto out;
+
+	sock_udp = sockfd_lookup(addr->udp_socket, &error);
+	if (!sock_udp)
+		goto out;
+	sk_udp = sock_udp->sk;
+	lock_sock(sk_udp);
+
+	/* Remove this check when IPv6 supports UDP encapsulation. */
+	error = -EAFNOSUPPORT;
+	if (sk_udp->sk_family != AF_INET)
+		goto out;
+	error = -EPROTONOSUPPORT;
+	if (sk_udp->sk_protocol != IPPROTO_UDP)
+		goto out;
+	error = -EDESTADDRREQ;
+	if (sk_udp->sk_state != TCP_ESTABLISHED)
+		goto out;
+	error = -EBUSY;
+	if (udp_sk(sk_udp)->encap_type || sk_udp->sk_user_data)
+		goto out;
+	if (!sk_udp->sk_bound_dev_if) {
+		struct dst_entry *dst = sk_dst_get(sk_udp);
+		error = -ENODEV;
+		if (!dst)
+			goto out;
+		sk_udp->sk_bound_dev_if = dst->dev->ifindex;
+		dst_release(dst);
+	}
+
+	po->chan.hdrlen = 12;
+	po->chan.private = sk_udp;
+	po->chan.ops = &pppolac_channel_ops;
+	po->chan.mtu = PPP_MRU - 80;
+	po->proto.lac.local = unaligned(&addr->local)->u32;
+	po->proto.lac.remote = unaligned(&addr->remote)->u32;
+	atomic_set(&po->proto.lac.sequencing, 1);
+	po->proto.lac.backlog_rcv = sk_udp->sk_backlog_rcv;
+
+	error = ppp_register_channel(&po->chan);
+	if (error)
+		goto out;
+
+	sk->sk_state = PPPOX_CONNECTED;
+	udp_sk(sk_udp)->encap_type = UDP_ENCAP_L2TPINUDP;
+	udp_sk(sk_udp)->encap_rcv = pppolac_recv;
+	sk_udp->sk_backlog_rcv = pppolac_recv_core;
+	sk_udp->sk_user_data = sk;
+out:
+	if (sock_udp) {
+		release_sock(sk_udp);
+		if (error)
+			sockfd_put(sock_udp);
+	}
+	release_sock(sk);
+	return error;
+}
+
+static int pppolac_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)) {
+		release_sock(sk);
+		return -EBADF;
+	}
+
+	if (sk->sk_state != PPPOX_NONE) {
+		struct sock *sk_udp = (struct sock *)pppox_sk(sk)->chan.private;
+		lock_sock(sk_udp);
+		skb_queue_purge(&sk->sk_receive_queue);
+		pppox_unbind_sock(sk);
+		udp_sk(sk_udp)->encap_type = 0;
+		udp_sk(sk_udp)->encap_rcv = NULL;
+		sk_udp->sk_backlog_rcv = pppox_sk(sk)->proto.lac.backlog_rcv;
+		sk_udp->sk_user_data = NULL;
+		release_sock(sk_udp);
+		sockfd_put(sk_udp->sk_socket);
+	}
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct proto pppolac_proto = {
+	.name = "PPPOLAC",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static struct proto_ops pppolac_proto_ops = {
+	.family = PF_PPPOX,
+	.owner = THIS_MODULE,
+	.release = pppolac_release,
+	.bind = sock_no_bind,
+	.connect = pppolac_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = sock_no_poll,
+	.ioctl = pppox_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = sock_no_sendmsg,
+	.recvmsg = sock_no_recvmsg,
+	.mmap = sock_no_mmap,
+};
+
+static int pppolac_create(struct net *net, struct socket *sock)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppolac_proto);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sock->state = SS_UNCONNECTED;
+	sock->ops = &pppolac_proto_ops;
+	sk->sk_protocol = PX_PROTO_OLAC;
+	sk->sk_state = PPPOX_NONE;
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct pppox_proto pppolac_pppox_proto = {
+	.create = pppolac_create,
+	.owner = THIS_MODULE,
+};
+
+static int __init pppolac_init(void)
+{
+	int error;
+
+	error = proto_register(&pppolac_proto, 0);
+	if (error)
+		return error;
+
+	error = register_pppox_proto(PX_PROTO_OLAC, &pppolac_pppox_proto);
+	if (error)
+		proto_unregister(&pppolac_proto);
+	else
+		skb_queue_head_init(&delivery_queue);
+	return error;
+}
+
+static void __exit pppolac_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OLAC);
+	proto_unregister(&pppolac_proto);
+}
+
+module_init(pppolac_init);
+module_exit(pppolac_exit);
+
+MODULE_DESCRIPTION("PPP on L2TP Access Concentrator (PPPoLAC)");
+MODULE_AUTHOR("Chia-chi Yeh <chiachi@android.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ppp/pppopns.c b/drivers/net/ppp/pppopns.c
new file mode 100644
index 000000000000..6016d29c0660
--- /dev/null
+++ b/drivers/net/ppp/pppopns.c
@@ -0,0 +1,428 @@
+/* drivers/net/pppopns.c
+ *
+ * Driver for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* This driver handles PPTP data packets between a RAW socket and a PPP channel.
+ * The socket is created in the kernel space and connected to the same address
+ * of the control socket. Outgoing packets are always sent with sequences but
+ * without acknowledgements. Incoming packets with sequences are reordered
+ * within a sliding window of one second. Currently reordering only happens when
+ * a packet is received. It is done for simplicity since no additional locks or
+ * threads are required. This driver should work on both IPv4 and IPv6. */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/ppp_defs.h>
+#include <linux/if.h>
+#include <linux/if_ppp.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_channel.h>
+#include <asm/uaccess.h>
+
+#define GRE_HEADER_SIZE		8
+
+#define PPTP_GRE_BITS		htons(0x2001)
+#define PPTP_GRE_BITS_MASK	htons(0xEF7F)
+#define PPTP_GRE_SEQ_BIT	htons(0x1000)
+#define PPTP_GRE_ACK_BIT	htons(0x0080)
+#define PPTP_GRE_TYPE		htons(0x880B)
+
+#define PPP_ADDR	0xFF
+#define PPP_CTRL	0x03
+
+struct header {
+	__u16	bits;
+	__u16	type;
+	__u16	length;
+	__u16	call;
+	__u32	sequence;
+} __attribute__((packed));
+
+struct meta {
+	__u32 sequence;
+	__u32 timestamp;
+};
+
+static inline struct meta *skb_meta(struct sk_buff *skb)
+{
+	return (struct meta *)skb->cb;
+}
+
+/******************************************************************************/
+
+static int pppopns_recv_core(struct sock *sk_raw, struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)sk_raw->sk_user_data;
+	struct pppopns_opt *opt = &pppox_sk(sk)->proto.pns;
+	struct meta *meta = skb_meta(skb);
+	__u32 now = jiffies;
+	struct header *hdr;
+
+	/* Skip transport header */
+	skb_pull(skb, skb_transport_header(skb) - skb->data);
+
+	/* Drop the packet if GRE header is missing. */
+	if (skb->len < GRE_HEADER_SIZE)
+		goto drop;
+	hdr = (struct header *)skb->data;
+
+	/* Check the header. */
+	if (hdr->type != PPTP_GRE_TYPE || hdr->call != opt->local ||
+			(hdr->bits & PPTP_GRE_BITS_MASK) != PPTP_GRE_BITS)
+		goto drop;
+
+	/* Skip all fields including optional ones. */
+	if (!skb_pull(skb, GRE_HEADER_SIZE +
+			(hdr->bits & PPTP_GRE_SEQ_BIT ? 4 : 0) +
+			(hdr->bits & PPTP_GRE_ACK_BIT ? 4 : 0)))
+		goto drop;
+
+	/* Check the length. */
+	if (skb->len != ntohs(hdr->length))
+		goto drop;
+
+	/* Check the sequence if it is present. */
+	if (hdr->bits & PPTP_GRE_SEQ_BIT) {
+		meta->sequence = ntohl(hdr->sequence);
+		if ((__s32)(meta->sequence - opt->recv_sequence) < 0)
+			goto drop;
+	}
+
+	/* Skip PPP address and control if they are present. */
+	if (skb->len >= 2 && skb->data[0] == PPP_ADDR &&
+			skb->data[1] == PPP_CTRL)
+		skb_pull(skb, 2);
+
+	/* Fix PPP protocol if it is compressed. */
+	if (skb->len >= 1 && skb->data[0] & 1)
+		skb_push(skb, 1)[0] = 0;
+
+	/* Drop the packet if PPP protocol is missing. */
+	if (skb->len < 2)
+		goto drop;
+
+	/* Perform reordering if sequencing is enabled. */
+	if (hdr->bits & PPTP_GRE_SEQ_BIT) {
+		struct sk_buff *skb1;
+
+		/* Insert the packet into receive queue in order. */
+		skb_set_owner_r(skb, sk);
+		skb_queue_walk(&sk->sk_receive_queue, skb1) {
+			struct meta *meta1 = skb_meta(skb1);
+			__s32 order = meta->sequence - meta1->sequence;
+			if (order == 0)
+				goto drop;
+			if (order < 0) {
+				meta->timestamp = meta1->timestamp;
+				skb_insert(skb1, skb, &sk->sk_receive_queue);
+				skb = NULL;
+				break;
+			}
+		}
+		if (skb) {
+			meta->timestamp = now;
+			skb_queue_tail(&sk->sk_receive_queue, skb);
+		}
+
+		/* Remove packets from receive queue as long as
+		 * 1. the receive buffer is full,
+		 * 2. they are queued longer than one second, or
+		 * 3. there are no missing packets before them. */
+		skb_queue_walk_safe(&sk->sk_receive_queue, skb, skb1) {
+			meta = skb_meta(skb);
+			if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
+					now - meta->timestamp < HZ &&
+					meta->sequence != opt->recv_sequence)
+				break;
+			skb_unlink(skb, &sk->sk_receive_queue);
+			opt->recv_sequence = meta->sequence + 1;
+			skb_orphan(skb);
+			ppp_input(&pppox_sk(sk)->chan, skb);
+		}
+		return NET_RX_SUCCESS;
+	}
+
+	/* Flush receive queue if sequencing is disabled. */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_orphan(skb);
+	ppp_input(&pppox_sk(sk)->chan, skb);
+	return NET_RX_SUCCESS;
+drop:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+static void pppopns_recv(struct sock *sk_raw, int length)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue(&sk_raw->sk_receive_queue))) {
+		sock_hold(sk_raw);
+		sk_receive_skb(sk_raw, skb, 0);
+	}
+}
+
+static struct sk_buff_head delivery_queue;
+
+static void pppopns_xmit_core(struct work_struct *delivery_work)
+{
+	mm_segment_t old_fs = get_fs();
+	struct sk_buff *skb;
+
+	set_fs(KERNEL_DS);
+	while ((skb = skb_dequeue(&delivery_queue))) {
+		struct sock *sk_raw = skb->sk;
+		struct kvec iov = {.iov_base = skb->data, .iov_len = skb->len};
+		struct msghdr msg = {
+			.msg_iov = (struct iovec *)&iov,
+			.msg_iovlen = 1,
+			.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT,
+		};
+		sk_raw->sk_prot->sendmsg(NULL, sk_raw, &msg, skb->len);
+		kfree_skb(skb);
+	}
+	set_fs(old_fs);
+}
+
+static DECLARE_WORK(delivery_work, pppopns_xmit_core);
+
+static int pppopns_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	struct sock *sk_raw = (struct sock *)chan->private;
+	struct pppopns_opt *opt = &pppox_sk(sk_raw->sk_user_data)->proto.pns;
+	struct header *hdr;
+	__u16 length;
+
+	/* Install PPP address and control. */
+	skb_push(skb, 2);
+	skb->data[0] = PPP_ADDR;
+	skb->data[1] = PPP_CTRL;
+	length = skb->len;
+
+	/* Install PPTP GRE header. */
+	hdr = (struct header *)skb_push(skb, 12);
+	hdr->bits = PPTP_GRE_BITS | PPTP_GRE_SEQ_BIT;
+	hdr->type = PPTP_GRE_TYPE;
+	hdr->length = htons(length);
+	hdr->call = opt->remote;
+	hdr->sequence = htonl(opt->xmit_sequence);
+	opt->xmit_sequence++;
+
+	/* Now send the packet via the delivery queue. */
+	skb_set_owner_w(skb, sk_raw);
+	skb_queue_tail(&delivery_queue, skb);
+	schedule_work(&delivery_work);
+	return 1;
+}
+
+/******************************************************************************/
+
+static struct ppp_channel_ops pppopns_channel_ops = {
+	.start_xmit = pppopns_xmit,
+};
+
+static int pppopns_connect(struct socket *sock, struct sockaddr *useraddr,
+	int addrlen, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sockaddr_pppopns *addr = (struct sockaddr_pppopns *)useraddr;
+	struct sockaddr_storage ss;
+	struct socket *sock_tcp = NULL;
+	struct socket *sock_raw = NULL;
+	struct sock *sk_tcp;
+	struct sock *sk_raw;
+	int error;
+
+	if (addrlen != sizeof(struct sockaddr_pppopns))
+		return -EINVAL;
+
+	lock_sock(sk);
+	error = -EALREADY;
+	if (sk->sk_state != PPPOX_NONE)
+		goto out;
+
+	sock_tcp = sockfd_lookup(addr->tcp_socket, &error);
+	if (!sock_tcp)
+		goto out;
+	sk_tcp = sock_tcp->sk;
+	error = -EPROTONOSUPPORT;
+	if (sk_tcp->sk_protocol != IPPROTO_TCP)
+		goto out;
+	addrlen = sizeof(struct sockaddr_storage);
+	error = kernel_getpeername(sock_tcp, (struct sockaddr *)&ss, &addrlen);
+	if (error)
+		goto out;
+	if (!sk_tcp->sk_bound_dev_if) {
+		struct dst_entry *dst = sk_dst_get(sk_tcp);
+		error = -ENODEV;
+		if (!dst)
+			goto out;
+		sk_tcp->sk_bound_dev_if = dst->dev->ifindex;
+		dst_release(dst);
+	}
+
+	error = sock_create(ss.ss_family, SOCK_RAW, IPPROTO_GRE, &sock_raw);
+	if (error)
+		goto out;
+	sk_raw = sock_raw->sk;
+	sk_raw->sk_bound_dev_if = sk_tcp->sk_bound_dev_if;
+	error = kernel_connect(sock_raw, (struct sockaddr *)&ss, addrlen, 0);
+	if (error)
+		goto out;
+
+	po->chan.hdrlen = 14;
+	po->chan.private = sk_raw;
+	po->chan.ops = &pppopns_channel_ops;
+	po->chan.mtu = PPP_MRU - 80;
+	po->proto.pns.local = addr->local;
+	po->proto.pns.remote = addr->remote;
+	po->proto.pns.data_ready = sk_raw->sk_data_ready;
+	po->proto.pns.backlog_rcv = sk_raw->sk_backlog_rcv;
+
+	error = ppp_register_channel(&po->chan);
+	if (error)
+		goto out;
+
+	sk->sk_state = PPPOX_CONNECTED;
+	lock_sock(sk_raw);
+	sk_raw->sk_data_ready = pppopns_recv;
+	sk_raw->sk_backlog_rcv = pppopns_recv_core;
+	sk_raw->sk_user_data = sk;
+	release_sock(sk_raw);
+out:
+	if (sock_tcp)
+		sockfd_put(sock_tcp);
+	if (error && sock_raw)
+		sock_release(sock_raw);
+	release_sock(sk);
+	return error;
+}
+
+static int pppopns_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)) {
+		release_sock(sk);
+		return -EBADF;
+	}
+
+	if (sk->sk_state != PPPOX_NONE) {
+		struct sock *sk_raw = (struct sock *)pppox_sk(sk)->chan.private;
+		lock_sock(sk_raw);
+		skb_queue_purge(&sk->sk_receive_queue);
+		pppox_unbind_sock(sk);
+		sk_raw->sk_data_ready = pppox_sk(sk)->proto.pns.data_ready;
+		sk_raw->sk_backlog_rcv = pppox_sk(sk)->proto.pns.backlog_rcv;
+		sk_raw->sk_user_data = NULL;
+		release_sock(sk_raw);
+		sock_release(sk_raw->sk_socket);
+	}
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct proto pppopns_proto = {
+	.name = "PPPOPNS",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static struct proto_ops pppopns_proto_ops = {
+	.family = PF_PPPOX,
+	.owner = THIS_MODULE,
+	.release = pppopns_release,
+	.bind = sock_no_bind,
+	.connect = pppopns_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = sock_no_poll,
+	.ioctl = pppox_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = sock_no_sendmsg,
+	.recvmsg = sock_no_recvmsg,
+	.mmap = sock_no_mmap,
+};
+
+static int pppopns_create(struct net *net, struct socket *sock)
+{
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppopns_proto);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sock->state = SS_UNCONNECTED;
+	sock->ops = &pppopns_proto_ops;
+	sk->sk_protocol = PX_PROTO_OPNS;
+	sk->sk_state = PPPOX_NONE;
+	return 0;
+}
+
+/******************************************************************************/
+
+static struct pppox_proto pppopns_pppox_proto = {
+	.create = pppopns_create,
+	.owner = THIS_MODULE,
+};
+
+static int __init pppopns_init(void)
+{
+	int error;
+
+	error = proto_register(&pppopns_proto, 0);
+	if (error)
+		return error;
+
+	error = register_pppox_proto(PX_PROTO_OPNS, &pppopns_pppox_proto);
+	if (error)
+		proto_unregister(&pppopns_proto);
+	else
+		skb_queue_head_init(&delivery_queue);
+	return error;
+}
+
+static void __exit pppopns_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OPNS);
+	proto_unregister(&pppopns_proto);
+}
+
+module_init(pppopns_init);
+module_exit(pppopns_exit);
+
+MODULE_DESCRIPTION("PPP on PPTP Network Server (PPPoPNS)");
+MODULE_AUTHOR("Chia-chi Yeh <chiachi@android.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 582497103fe8..aeff706f1634 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1898,6 +1898,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	int vnet_hdr_sz;
 	int ret;
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+	if (cmd != TUNGETIFF && !capable(CAP_NET_ADMIN)) {
+		return -EPERM;
+	}
+#endif
+
 	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
 		if (copy_from_user(&ifr, argp, ifreq_len))
 			return -EFAULT;
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index f8f0156dff4e..5b0a49cf1dea 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -264,6 +264,11 @@ config MWL8K
 	  To compile this driver as a module, choose M here: the module
 	  will be called mwl8k.  If unsure, say N.
 
+config WIFI_CONTROL_FUNC
+	bool "Enable WiFi control function abstraction"
+	help
+	  Enables Power/Reset/Carddetect function abstraction
+
 source "drivers/net/wireless/ath/Kconfig"
 source "drivers/net/wireless/b43/Kconfig"
 source "drivers/net/wireless/b43legacy/Kconfig"
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 42fcf572ebfe..137c31f5bb05 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -703,36 +703,66 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 	return 0;
 }
 
+/*
+ * Convert configs to something easy to use in C code
+ */
+#if defined(CONFIG_CMDLINE_FORCE)
+static const int overwrite_incoming_cmdline = 1;
+static const int read_dt_cmdline;
+static const int concat_cmdline;
+#elif defined(CONFIG_CMDLINE_EXTEND)
+static const int overwrite_incoming_cmdline;
+static const int read_dt_cmdline = 1;
+static const int concat_cmdline = 1;
+#else /* CMDLINE_FROM_BOOTLOADER */
+static const int overwrite_incoming_cmdline;
+static const int read_dt_cmdline = 1;
+static const int concat_cmdline;
+#endif
+
+#ifdef CONFIG_CMDLINE
+static const char *config_cmdline = CONFIG_CMDLINE;
+#else
+static const char *config_cmdline = "";
+#endif
+
 int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
 	int l;
 	const char *p;
+	char *cmdline = data;
 
 	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
-	if (depth != 1 || !data ||
+	if (depth != 1 || !cmdline ||
 	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 		return 0;
 
 	early_init_dt_check_for_initrd(node);
 
-	/* Retrieve command line */
-	p = of_get_flat_dt_prop(node, "bootargs", &l);
-	if (p != NULL && l > 0)
-		strlcpy(data, p, min((int)l, COMMAND_LINE_SIZE));
-
-	/*
-	 * CONFIG_CMDLINE is meant to be a default in case nothing else
-	 * managed to set the command line, unless CONFIG_CMDLINE_FORCE
-	 * is set in which case we override whatever was found earlier.
-	 */
-#ifdef CONFIG_CMDLINE
-#ifndef CONFIG_CMDLINE_FORCE
-	if (!((char *)data)[0])
-#endif
-		strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif /* CONFIG_CMDLINE */
+	/* Put CONFIG_CMDLINE in if forced or if data had nothing in it to start */
+	if (overwrite_incoming_cmdline || !cmdline[0])
+		strlcpy(cmdline, config_cmdline, COMMAND_LINE_SIZE);
+
+	/* Retrieve command line unless forcing */
+	if (read_dt_cmdline)
+		p = of_get_flat_dt_prop(node, "bootargs", &l);
+
+	if (p != NULL && l > 0) {
+		if (concat_cmdline) {
+			int cmdline_len;
+			int copy_len;
+			strlcat(cmdline, " ", COMMAND_LINE_SIZE);
+			cmdline_len = strlen(cmdline);
+			copy_len = COMMAND_LINE_SIZE - cmdline_len - 1;
+			copy_len = min((int)l, copy_len);
+			strncpy(cmdline + cmdline_len, p, copy_len);
+			cmdline[cmdline_len + copy_len] = '\0';
+		} else {
+			strlcpy(cmdline, p, min((int)l, COMMAND_LINE_SIZE));
+		}
+	}
 
 	pr_debug("Command line is: %s\n", (char*)data);
 
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 41bff21a0829..715b4bab2943 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -216,6 +216,8 @@ struct platform_device *of_platform_device_create_pdata(
 	dev->archdata.dma_mask = 0xffffffffUL;
 #endif
 	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	if (!dev->dev.dma_mask)
+		dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
 
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index f94467c05225..7f21f75baa59 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -330,7 +330,8 @@ static int acerhdf_bind(struct thermal_zone_device *thermal,
 		return 0;
 
 	if (thermal_zone_bind_cooling_device(thermal, 0, cdev,
-			THERMAL_NO_LIMIT, THERMAL_NO_LIMIT)) {
+			THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+			THERMAL_WEIGHT_DEFAULT)) {
 		pr_err("error binding cooling dev\n");
 		return -EINVAL;
 	}
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 1c517c34e4be..082d3c2714e9 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -67,23 +67,40 @@ static int __power_supply_changed_work(struct device *dev, void *data)
 
 static void power_supply_changed_work(struct work_struct *work)
 {
+	unsigned long flags;
 	struct power_supply *psy = container_of(work, struct power_supply,
 						changed_work);
 
 	dev_dbg(psy->dev, "%s\n", __func__);
 
-	class_for_each_device(power_supply_class, NULL, psy,
-			      __power_supply_changed_work);
+	spin_lock_irqsave(&psy->changed_lock, flags);
+	if (psy->changed) {
+		psy->changed = false;
+		spin_unlock_irqrestore(&psy->changed_lock, flags);
 
-	power_supply_update_leds(psy);
+		class_for_each_device(power_supply_class, NULL, psy,
+				      __power_supply_changed_work);
 
-	kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
+		power_supply_update_leds(psy);
+
+		kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
+		spin_lock_irqsave(&psy->changed_lock, flags);
+	}
+	if (!psy->changed)
+		pm_relax(psy->dev);
+	spin_unlock_irqrestore(&psy->changed_lock, flags);
 }
 
 void power_supply_changed(struct power_supply *psy)
 {
+	unsigned long flags;
+
 	dev_dbg(psy->dev, "%s\n", __func__);
 
+	spin_lock_irqsave(&psy->changed_lock, flags);
+	psy->changed = true;
+	pm_stay_awake(psy->dev);
+	spin_unlock_irqrestore(&psy->changed_lock, flags);
 	schedule_work(&psy->changed_work);
 }
 EXPORT_SYMBOL_GPL(power_supply_changed);
@@ -504,6 +521,11 @@ int power_supply_register(struct device *parent, struct power_supply *psy)
 	if (rc)
 		goto device_add_failed;
 
+	spin_lock_init(&psy->changed_lock);
+	rc = device_init_wakeup(dev, true);
+	if (rc)
+		goto wakeup_init_failed;
+
 	rc = psy_register_thermal(psy);
 	if (rc)
 		goto register_thermal_failed;
@@ -525,6 +547,7 @@ create_triggers_failed:
 register_cooler_failed:
 	psy_unregister_thermal(psy);
 register_thermal_failed:
+wakeup_init_failed:
 	device_del(dev);
 kobject_set_name_failed:
 device_add_failed:
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 29178f78d73c..1f7d79b03725 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -105,7 +105,10 @@ static ssize_t power_supply_show_property(struct device *dev,
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
-	return sprintf(buf, "%d\n", value.intval);
+	if (off == POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT)
+		return sprintf(buf, "%lld\n", value.int64val);
+	else
+		return sprintf(buf, "%d\n", value.intval);
 }
 
 static ssize_t power_supply_store_property(struct device *dev,
@@ -189,6 +192,12 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(time_to_full_avg),
 	POWER_SUPPLY_ATTR(type),
 	POWER_SUPPLY_ATTR(scope),
+	/* Local extensions */
+	POWER_SUPPLY_ATTR(usb_hc),
+	POWER_SUPPLY_ATTR(usb_otg),
+	POWER_SUPPLY_ATTR(charge_enabled),
+	/* Local extensions of type int64_t */
+	POWER_SUPPLY_ATTR(charge_counter_ext),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c
index 2bea4f0b684a..503594e5f76d 100644
--- a/drivers/scsi/scsi_trace.c
+++ b/drivers/scsi/scsi_trace.c
@@ -28,7 +28,7 @@ scsi_trace_misc(struct trace_seq *, unsigned char *, int);
 static const char *
 scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= ((cdb[1] & 0x1F) << 16);
@@ -46,7 +46,7 @@ scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= (cdb[2] << 24);
@@ -71,7 +71,7 @@ scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= (cdb[2] << 24);
@@ -94,7 +94,7 @@ scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	sector_t lba = 0, txlen = 0;
 
 	lba |= ((u64)cdb[2] << 56);
@@ -125,7 +125,7 @@ scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len, *cmd;
+	const char *ret = trace_seq_buffer_ptr(p), *cmd;
 	sector_t lba = 0, txlen = 0;
 	u32 ei_lbrt = 0;
 
@@ -180,7 +180,7 @@ out:
 static const char *
 scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	unsigned int regions = cdb[7] << 8 | cdb[8];
 
 	trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
@@ -192,7 +192,7 @@ scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len, *cmd;
+	const char *ret = trace_seq_buffer_ptr(p), *cmd;
 	sector_t lba = 0;
 	u32 alloc_len = 0;
 
@@ -247,7 +247,7 @@ scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
 static const char *
 scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
 {
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	trace_seq_printf(p, "-");
 	trace_seq_putc(p, 0);
diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index c0c95be0f969..b3c054e4fd58 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -8,17 +8,6 @@ config ANDROID
 
 if ANDROID
 
-config ANDROID_BINDER_IPC
-	bool "Android Binder IPC Driver"
-	default n
-	---help---
-	  Binder is used in Android for both communication between processes,
-	  and remote method invocation.
-
-	  This means one Android process can call a method/routine in another
-	  Android process, using Binder to identify, invoke and pass arguments
-	  between said processes.
-
 config ASHMEM
 	bool "Enable the Anonymous Shared Memory Subsystem"
 	default n
@@ -31,23 +20,6 @@ config ASHMEM
 	  It is, in theory, a good memory allocator for low-memory devices,
 	  because it can discard shared memory units when under memory pressure.
 
-config ANDROID_LOGGER
-	tristate "Android log driver"
-	default n
-	---help---
-	  This adds support for system-wide logging using four log buffers.
-
-	  These are:
-
-	      1: main
-	      2: events
-	      3: radio
-	      4: system
-
-	  Log reading and writing is performed via normal Linux reads and
-	  optimized writes. This optimization avoids logging having too
-	  much overhead in the system.
-
 config ANDROID_TIMED_OUTPUT
 	bool "Timed output class driver"
 	default y
@@ -63,14 +35,14 @@ config ANDROID_LOW_MEMORY_KILLER
 	---help---
 	  Registers processes to be killed when memory is low
 
-config ANDROID_INTF_ALARM_DEV
-	bool "Android alarm driver"
-	depends on RTC_CLASS
-	default n
+config ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
+	bool "Android Low Memory Killer: detect oom_adj values"
+	depends on ANDROID_LOW_MEMORY_KILLER
+	default y
 	---help---
-	  Provides non-wakeup and rtc backed wakeup alarms based on rtc or
-	  elapsed realtime, and a non-wakeup alarm on the monotonic clock.
-	  Also exports the alarm interface to user-space.
+	  Detect oom_adj values written to
+	  /sys/module/lowmemorykiller/parameters/adj and convert them
+	  to oom_score_adj values.
 
 config SYNC
 	bool "Synchronization framework"
@@ -99,6 +71,10 @@ config SW_SYNC_USER
 	  *WARNING* improper use of this can result in deadlocking kernel
 	  drivers from userspace.
 
+source "drivers/staging/android/ion/Kconfig"
+
+source "drivers/staging/android/fiq_debugger/Kconfig"
+
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index c136299e05af..30ba2f0cb084 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -1,11 +1,11 @@
 ccflags-y += -I$(src)			# needed for trace events
 
-obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o
+obj-y					+= ion/
+obj-$(CONFIG_FIQ_DEBUGGER)		+= fiq_debugger/
+
 obj-$(CONFIG_ASHMEM)			+= ashmem.o
-obj-$(CONFIG_ANDROID_LOGGER)		+= logger.o
 obj-$(CONFIG_ANDROID_TIMED_OUTPUT)	+= timed_output.o
 obj-$(CONFIG_ANDROID_TIMED_GPIO)	+= timed_gpio.o
 obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER)	+= lowmemorykiller.o
-obj-$(CONFIG_ANDROID_INTF_ALARM_DEV)	+= alarm-dev.o
 obj-$(CONFIG_SYNC)			+= sync.o
 obj-$(CONFIG_SW_SYNC)			+= sw_sync.o
diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO
deleted file mode 100644
index b15fb0d6b152..000000000000
--- a/drivers/staging/android/TODO
+++ /dev/null
@@ -1,10 +0,0 @@
-TODO:
-	- checkpatch.pl cleanups
-	- sparse fixes
-	- rename files to be not so "generic"
-	- make sure things build as modules properly
-	- add proper arch dependencies as needed
-	- audit userspace interfaces to make sure they are sane
-
-Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc:
-Brian Swetland <swetland@google.com>
diff --git a/drivers/staging/android/alarm-dev.c b/drivers/staging/android/alarm-dev.c
deleted file mode 100644
index 6dc27dac679d..000000000000
--- a/drivers/staging/android/alarm-dev.c
+++ /dev/null
@@ -1,443 +0,0 @@
-/* drivers/rtc/alarm-dev.c
- *
- * Copyright (C) 2007-2009 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/time.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/platform_device.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
-#include <linux/alarmtimer.h>
-#include "android_alarm.h"
-
-#define ANDROID_ALARM_PRINT_INFO (1U << 0)
-#define ANDROID_ALARM_PRINT_IO (1U << 1)
-#define ANDROID_ALARM_PRINT_INT (1U << 2)
-
-static int debug_mask = ANDROID_ALARM_PRINT_INFO;
-module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP);
-
-#define alarm_dbg(debug_level_mask, fmt, ...)				\
-do {									\
-	if (debug_mask & ANDROID_ALARM_PRINT_##debug_level_mask)	\
-		pr_info(fmt, ##__VA_ARGS__);				\
-} while (0)
-
-#define ANDROID_ALARM_WAKEUP_MASK ( \
-	ANDROID_ALARM_RTC_WAKEUP_MASK | \
-	ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK)
-
-static int alarm_opened;
-static DEFINE_SPINLOCK(alarm_slock);
-static struct wakeup_source alarm_wake_lock;
-static DECLARE_WAIT_QUEUE_HEAD(alarm_wait_queue);
-static uint32_t alarm_pending;
-static uint32_t alarm_enabled;
-static uint32_t wait_pending;
-
-struct devalarm {
-	union {
-		struct hrtimer hrt;
-		struct alarm alrm;
-	} u;
-	enum android_alarm_type type;
-};
-
-static struct devalarm alarms[ANDROID_ALARM_TYPE_COUNT];
-
-
-static int is_wakeup(enum android_alarm_type type)
-{
-	return (type == ANDROID_ALARM_RTC_WAKEUP ||
-		type == ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP);
-}
-
-
-static void devalarm_start(struct devalarm *alrm, ktime_t exp)
-{
-	if (is_wakeup(alrm->type))
-		alarm_start(&alrm->u.alrm, exp);
-	else
-		hrtimer_start(&alrm->u.hrt, exp, HRTIMER_MODE_ABS);
-}
-
-
-static int devalarm_try_to_cancel(struct devalarm *alrm)
-{
-	if (is_wakeup(alrm->type))
-		return alarm_try_to_cancel(&alrm->u.alrm);
-	return hrtimer_try_to_cancel(&alrm->u.hrt);
-}
-
-static void devalarm_cancel(struct devalarm *alrm)
-{
-	if (is_wakeup(alrm->type))
-		alarm_cancel(&alrm->u.alrm);
-	else
-		hrtimer_cancel(&alrm->u.hrt);
-}
-
-static void alarm_clear(enum android_alarm_type alarm_type)
-{
-	uint32_t alarm_type_mask = 1U << alarm_type;
-	unsigned long flags;
-
-	spin_lock_irqsave(&alarm_slock, flags);
-	alarm_dbg(IO, "alarm %d clear\n", alarm_type);
-	devalarm_try_to_cancel(&alarms[alarm_type]);
-	if (alarm_pending) {
-		alarm_pending &= ~alarm_type_mask;
-		if (!alarm_pending && !wait_pending)
-			__pm_relax(&alarm_wake_lock);
-	}
-	alarm_enabled &= ~alarm_type_mask;
-	spin_unlock_irqrestore(&alarm_slock, flags);
-
-}
-
-static void alarm_set(enum android_alarm_type alarm_type,
-							struct timespec *ts)
-{
-	uint32_t alarm_type_mask = 1U << alarm_type;
-	unsigned long flags;
-
-	spin_lock_irqsave(&alarm_slock, flags);
-	alarm_dbg(IO, "alarm %d set %ld.%09ld\n",
-			alarm_type, ts->tv_sec, ts->tv_nsec);
-	alarm_enabled |= alarm_type_mask;
-	devalarm_start(&alarms[alarm_type], timespec_to_ktime(*ts));
-	spin_unlock_irqrestore(&alarm_slock, flags);
-}
-
-static int alarm_wait(void)
-{
-	unsigned long flags;
-	int rv = 0;
-
-	spin_lock_irqsave(&alarm_slock, flags);
-	alarm_dbg(IO, "alarm wait\n");
-	if (!alarm_pending && wait_pending) {
-		__pm_relax(&alarm_wake_lock);
-		wait_pending = 0;
-	}
-	spin_unlock_irqrestore(&alarm_slock, flags);
-
-	rv = wait_event_interruptible(alarm_wait_queue, alarm_pending);
-	if (rv)
-		return rv;
-
-	spin_lock_irqsave(&alarm_slock, flags);
-	rv = alarm_pending;
-	wait_pending = 1;
-	alarm_pending = 0;
-	spin_unlock_irqrestore(&alarm_slock, flags);
-
-	return rv;
-}
-
-static int alarm_set_rtc(struct timespec *ts)
-{
-	struct rtc_time new_rtc_tm;
-	struct rtc_device *rtc_dev;
-	unsigned long flags;
-	int rv = 0;
-
-	rtc_time_to_tm(ts->tv_sec, &new_rtc_tm);
-	rtc_dev = alarmtimer_get_rtcdev();
-	rv = do_settimeofday(ts);
-	if (rv < 0)
-		return rv;
-	if (rtc_dev)
-		rv = rtc_set_time(rtc_dev, &new_rtc_tm);
-
-	spin_lock_irqsave(&alarm_slock, flags);
-	alarm_pending |= ANDROID_ALARM_TIME_CHANGE_MASK;
-	wake_up(&alarm_wait_queue);
-	spin_unlock_irqrestore(&alarm_slock, flags);
-
-	return rv;
-}
-
-static int alarm_get_time(enum android_alarm_type alarm_type,
-							struct timespec *ts)
-{
-	int rv = 0;
-
-	switch (alarm_type) {
-	case ANDROID_ALARM_RTC_WAKEUP:
-	case ANDROID_ALARM_RTC:
-		getnstimeofday(ts);
-		break;
-	case ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP:
-	case ANDROID_ALARM_ELAPSED_REALTIME:
-		get_monotonic_boottime(ts);
-		break;
-	case ANDROID_ALARM_SYSTEMTIME:
-		ktime_get_ts(ts);
-		break;
-	default:
-		rv = -EINVAL;
-	}
-	return rv;
-}
-
-static long alarm_do_ioctl(struct file *file, unsigned int cmd,
-							struct timespec *ts)
-{
-	int rv = 0;
-	unsigned long flags;
-	enum android_alarm_type alarm_type = ANDROID_ALARM_IOCTL_TO_TYPE(cmd);
-
-	if (alarm_type >= ANDROID_ALARM_TYPE_COUNT)
-		return -EINVAL;
-
-	if (ANDROID_ALARM_BASE_CMD(cmd) != ANDROID_ALARM_GET_TIME(0)) {
-		if ((file->f_flags & O_ACCMODE) == O_RDONLY)
-			return -EPERM;
-		if (file->private_data == NULL &&
-		    cmd != ANDROID_ALARM_SET_RTC) {
-			spin_lock_irqsave(&alarm_slock, flags);
-			if (alarm_opened) {
-				spin_unlock_irqrestore(&alarm_slock, flags);
-				return -EBUSY;
-			}
-			alarm_opened = 1;
-			file->private_data = (void *)1;
-			spin_unlock_irqrestore(&alarm_slock, flags);
-		}
-	}
-
-	switch (ANDROID_ALARM_BASE_CMD(cmd)) {
-	case ANDROID_ALARM_CLEAR(0):
-		alarm_clear(alarm_type);
-		break;
-	case ANDROID_ALARM_SET(0):
-		alarm_set(alarm_type, ts);
-		break;
-	case ANDROID_ALARM_SET_AND_WAIT(0):
-		alarm_set(alarm_type, ts);
-		/* fall though */
-	case ANDROID_ALARM_WAIT:
-		rv = alarm_wait();
-		break;
-	case ANDROID_ALARM_SET_RTC:
-		rv = alarm_set_rtc(ts);
-		break;
-	case ANDROID_ALARM_GET_TIME(0):
-		rv = alarm_get_time(alarm_type, ts);
-		break;
-
-	default:
-		rv = -EINVAL;
-	}
-	return rv;
-}
-
-static long alarm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-
-	struct timespec ts;
-	int rv;
-
-	switch (ANDROID_ALARM_BASE_CMD(cmd)) {
-	case ANDROID_ALARM_SET_AND_WAIT(0):
-	case ANDROID_ALARM_SET(0):
-	case ANDROID_ALARM_SET_RTC:
-		if (copy_from_user(&ts, (void __user *)arg, sizeof(ts)))
-			return -EFAULT;
-		break;
-	}
-
-	rv = alarm_do_ioctl(file, cmd, &ts);
-	if (rv)
-		return rv;
-
-	switch (ANDROID_ALARM_BASE_CMD(cmd)) {
-	case ANDROID_ALARM_GET_TIME(0):
-		if (copy_to_user((void __user *)arg, &ts, sizeof(ts)))
-			return -EFAULT;
-		break;
-	}
-
-	return 0;
-}
-#ifdef CONFIG_COMPAT
-static long alarm_compat_ioctl(struct file *file, unsigned int cmd,
-							unsigned long arg)
-{
-
-	struct timespec ts;
-	int rv;
-
-	switch (ANDROID_ALARM_BASE_CMD(cmd)) {
-	case ANDROID_ALARM_SET_AND_WAIT_COMPAT(0):
-	case ANDROID_ALARM_SET_COMPAT(0):
-	case ANDROID_ALARM_SET_RTC_COMPAT:
-		if (compat_get_timespec(&ts, (void __user *)arg))
-			return -EFAULT;
-		/* fall through */
-	case ANDROID_ALARM_GET_TIME_COMPAT(0):
-		cmd = ANDROID_ALARM_COMPAT_TO_NORM(cmd);
-		break;
-	}
-
-	rv = alarm_do_ioctl(file, cmd, &ts);
-	if (rv)
-		return rv;
-
-	switch (ANDROID_ALARM_BASE_CMD(cmd)) {
-	case ANDROID_ALARM_GET_TIME(0): /* NOTE: we modified cmd above */
-		if (compat_put_timespec(&ts, (void __user *)arg))
-			return -EFAULT;
-		break;
-	}
-
-	return 0;
-}
-#endif
-
-static int alarm_open(struct inode *inode, struct file *file)
-{
-	file->private_data = NULL;
-	return 0;
-}
-
-static int alarm_release(struct inode *inode, struct file *file)
-{
-	int i;
-	unsigned long flags;
-
-	spin_lock_irqsave(&alarm_slock, flags);
-	if (file->private_data) {
-		for (i = 0; i < ANDROID_ALARM_TYPE_COUNT; i++) {
-			uint32_t alarm_type_mask = 1U << i;
-			if (alarm_enabled & alarm_type_mask) {
-				alarm_dbg(INFO,
-					  "%s: clear alarm, pending %d\n",
-					  __func__,
-					  !!(alarm_pending & alarm_type_mask));
-				alarm_enabled &= ~alarm_type_mask;
-			}
-			spin_unlock_irqrestore(&alarm_slock, flags);
-			devalarm_cancel(&alarms[i]);
-			spin_lock_irqsave(&alarm_slock, flags);
-		}
-		if (alarm_pending | wait_pending) {
-			if (alarm_pending)
-				alarm_dbg(INFO, "%s: clear pending alarms %x\n",
-					  __func__, alarm_pending);
-			__pm_relax(&alarm_wake_lock);
-			wait_pending = 0;
-			alarm_pending = 0;
-		}
-		alarm_opened = 0;
-	}
-	spin_unlock_irqrestore(&alarm_slock, flags);
-	return 0;
-}
-
-static void devalarm_triggered(struct devalarm *alarm)
-{
-	unsigned long flags;
-	uint32_t alarm_type_mask = 1U << alarm->type;
-
-	alarm_dbg(INT, "%s: type %d\n", __func__, alarm->type);
-	spin_lock_irqsave(&alarm_slock, flags);
-	if (alarm_enabled & alarm_type_mask) {
-		__pm_wakeup_event(&alarm_wake_lock, 5000); /* 5secs */
-		alarm_enabled &= ~alarm_type_mask;
-		alarm_pending |= alarm_type_mask;
-		wake_up(&alarm_wait_queue);
-	}
-	spin_unlock_irqrestore(&alarm_slock, flags);
-}
-
-
-static enum hrtimer_restart devalarm_hrthandler(struct hrtimer *hrt)
-{
-	struct devalarm *devalrm = container_of(hrt, struct devalarm, u.hrt);
-
-	devalarm_triggered(devalrm);
-	return HRTIMER_NORESTART;
-}
-
-static enum alarmtimer_restart devalarm_alarmhandler(struct alarm *alrm,
-							ktime_t now)
-{
-	struct devalarm *devalrm = container_of(alrm, struct devalarm, u.alrm);
-
-	devalarm_triggered(devalrm);
-	return ALARMTIMER_NORESTART;
-}
-
-
-static const struct file_operations alarm_fops = {
-	.owner = THIS_MODULE,
-	.unlocked_ioctl = alarm_ioctl,
-	.open = alarm_open,
-	.release = alarm_release,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = alarm_compat_ioctl,
-#endif
-};
-
-static struct miscdevice alarm_device = {
-	.minor = MISC_DYNAMIC_MINOR,
-	.name = "alarm",
-	.fops = &alarm_fops,
-};
-
-static int __init alarm_dev_init(void)
-{
-	int err;
-	int i;
-
-	err = misc_register(&alarm_device);
-	if (err)
-		return err;
-
-	alarm_init(&alarms[ANDROID_ALARM_RTC_WAKEUP].u.alrm,
-			ALARM_REALTIME, devalarm_alarmhandler);
-	hrtimer_init(&alarms[ANDROID_ALARM_RTC].u.hrt,
-			CLOCK_REALTIME, HRTIMER_MODE_ABS);
-	alarm_init(&alarms[ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP].u.alrm,
-			ALARM_BOOTTIME, devalarm_alarmhandler);
-	hrtimer_init(&alarms[ANDROID_ALARM_ELAPSED_REALTIME].u.hrt,
-			CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
-	hrtimer_init(&alarms[ANDROID_ALARM_SYSTEMTIME].u.hrt,
-			CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-
-	for (i = 0; i < ANDROID_ALARM_TYPE_COUNT; i++) {
-		alarms[i].type = i;
-		if (!is_wakeup(i))
-			alarms[i].u.hrt.function = devalarm_hrthandler;
-	}
-
-	wakeup_source_init(&alarm_wake_lock, "alarm");
-	return 0;
-}
-
-static void  __exit alarm_dev_exit(void)
-{
-	misc_deregister(&alarm_device);
-	wakeup_source_trash(&alarm_wake_lock);
-}
-
-module_init(alarm_dev_init);
-module_exit(alarm_dev_exit);
-
diff --git a/drivers/staging/android/android_alarm.h b/drivers/staging/android/android_alarm.h
deleted file mode 100644
index 4fd32f337f9c..000000000000
--- a/drivers/staging/android/android_alarm.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* include/linux/android_alarm.h
- *
- * Copyright (C) 2006-2007 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _LINUX_ANDROID_ALARM_H
-#define _LINUX_ANDROID_ALARM_H
-
-#include <linux/ioctl.h>
-#include <linux/time.h>
-#include <linux/compat.h>
-
-enum android_alarm_type {
-	/* return code bit numbers or set alarm arg */
-	ANDROID_ALARM_RTC_WAKEUP,
-	ANDROID_ALARM_RTC,
-	ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP,
-	ANDROID_ALARM_ELAPSED_REALTIME,
-	ANDROID_ALARM_SYSTEMTIME,
-
-	ANDROID_ALARM_TYPE_COUNT,
-
-	/* return code bit numbers */
-	/* ANDROID_ALARM_TIME_CHANGE = 16 */
-};
-
-enum android_alarm_return_flags {
-	ANDROID_ALARM_RTC_WAKEUP_MASK = 1U << ANDROID_ALARM_RTC_WAKEUP,
-	ANDROID_ALARM_RTC_MASK = 1U << ANDROID_ALARM_RTC,
-	ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP_MASK =
-				1U << ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP,
-	ANDROID_ALARM_ELAPSED_REALTIME_MASK =
-				1U << ANDROID_ALARM_ELAPSED_REALTIME,
-	ANDROID_ALARM_SYSTEMTIME_MASK = 1U << ANDROID_ALARM_SYSTEMTIME,
-	ANDROID_ALARM_TIME_CHANGE_MASK = 1U << 16
-};
-
-/* Disable alarm */
-#define ANDROID_ALARM_CLEAR(type)           _IO('a', 0 | ((type) << 4))
-
-/* Ack last alarm and wait for next */
-#define ANDROID_ALARM_WAIT                  _IO('a', 1)
-
-#define ALARM_IOW(c, type, size)            _IOW('a', (c) | ((type) << 4), size)
-/* Set alarm */
-#define ANDROID_ALARM_SET(type)             ALARM_IOW(2, type, struct timespec)
-#define ANDROID_ALARM_SET_AND_WAIT(type)    ALARM_IOW(3, type, struct timespec)
-#define ANDROID_ALARM_GET_TIME(type)        ALARM_IOW(4, type, struct timespec)
-#define ANDROID_ALARM_SET_RTC               _IOW('a', 5, struct timespec)
-#define ANDROID_ALARM_BASE_CMD(cmd)         (cmd & ~(_IOC(0, 0, 0xf0, 0)))
-#define ANDROID_ALARM_IOCTL_TO_TYPE(cmd)    (_IOC_NR(cmd) >> 4)
-
-
-#ifdef CONFIG_COMPAT
-#define ANDROID_ALARM_SET_COMPAT(type)		ALARM_IOW(2, type, \
-							struct compat_timespec)
-#define ANDROID_ALARM_SET_AND_WAIT_COMPAT(type)	ALARM_IOW(3, type, \
-							struct compat_timespec)
-#define ANDROID_ALARM_GET_TIME_COMPAT(type)	ALARM_IOW(4, type, \
-							struct compat_timespec)
-#define ANDROID_ALARM_SET_RTC_COMPAT		_IOW('a', 5, \
-							struct compat_timespec)
-#define ANDROID_ALARM_IOCTL_NR(cmd)		(_IOC_NR(cmd) & ((1<<4)-1))
-#define ANDROID_ALARM_COMPAT_TO_NORM(cmd)  \
-				ALARM_IOW(ANDROID_ALARM_IOCTL_NR(cmd), \
-					ANDROID_ALARM_IOCTL_TO_TYPE(cmd), \
-					struct timespec)
-
-#endif
-
-#endif
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index e681bdd9aa5f..ccaef8b48eba 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -224,21 +224,29 @@ static ssize_t ashmem_read(struct file *file, char __user *buf,
 
 	/* If size is not set, or set to 0, always return EOF. */
 	if (asma->size == 0)
-		goto out;
+		goto out_unlock;
 
 	if (!asma->file) {
 		ret = -EBADF;
-		goto out;
+		goto out_unlock;
 	}
 
-	ret = asma->file->f_op->read(asma->file, buf, len, pos);
-	if (ret < 0)
-		goto out;
+	mutex_unlock(&ashmem_mutex);
 
-	/** Update backing file pos, since f_ops->read() doesn't */
-	asma->file->f_pos = *pos;
+	/*
+	 * asma and asma->file are used outside the lock here.  We assume
+	 * once asma->file is set it will never be changed, and will not
+	 * be destroyed until all references to the file are dropped and
+	 * ashmem_release is called.
+	 */
+	ret = asma->file->f_op->read(asma->file, buf, len, pos);
+	if (ret >= 0) {
+		/** Update backing file pos, since f_ops->read() doesn't */
+		asma->file->f_pos = *pos;
+	}
+	return ret;
 
-out:
+out_unlock:
 	mutex_unlock(&ashmem_mutex);
 	return ret;
 }
@@ -317,22 +325,14 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 	get_file(asma->file);
 
-	/*
-	 * XXX - Reworked to use shmem_zero_setup() instead of
-	 * shmem_set_file while we're in staging. -jstultz
-	 */
-	if (vma->vm_flags & VM_SHARED) {
-		ret = shmem_zero_setup(vma);
-		if (ret) {
-			fput(asma->file);
-			goto out;
-		}
+	if (vma->vm_flags & VM_SHARED)
+		shmem_set_file(vma, asma->file);
+	else {
+		if (vma->vm_file)
+			fput(vma->vm_file);
+		vma->vm_file = asma->file;
 	}
 
-	if (vma->vm_file)
-		fput(vma->vm_file);
-	vma->vm_file = asma->file;
-
 out:
 	mutex_unlock(&ashmem_mutex);
 	return ret;
@@ -363,7 +363,9 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	if (!sc->nr_to_scan)
 		return lru_count;
 
-	mutex_lock(&ashmem_mutex);
+	if (!mutex_trylock(&ashmem_mutex))
+		return -1;
+
 	list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
 		loff_t start = range->pgstart * PAGE_SIZE;
 		loff_t end = (range->pgend + 1) * PAGE_SIZE;
@@ -413,6 +415,7 @@ out:
 
 static int set_name(struct ashmem_area *asma, void __user *name)
 {
+	int len;
 	int ret = 0;
 	char local_name[ASHMEM_NAME_LEN];
 
@@ -425,21 +428,19 @@ static int set_name(struct ashmem_area *asma, void __user *name)
 	 * variable that does not need protection and later copy the local
 	 * variable to the structure member with lock held.
 	 */
-	if (copy_from_user(local_name, name, ASHMEM_NAME_LEN))
-		return -EFAULT;
-
+	len = strncpy_from_user(local_name, name, ASHMEM_NAME_LEN);
+	if (len < 0)
+		return len;
+	if (len == ASHMEM_NAME_LEN)
+		local_name[ASHMEM_NAME_LEN - 1] = '\0';
 	mutex_lock(&ashmem_mutex);
 	/* cannot change an existing mapping's name */
-	if (unlikely(asma->file)) {
+	if (unlikely(asma->file))
 		ret = -EINVAL;
-		goto out;
-	}
-	memcpy(asma->name + ASHMEM_NAME_PREFIX_LEN,
-		local_name, ASHMEM_NAME_LEN);
-	asma->name[ASHMEM_FULL_NAME_LEN-1] = '\0';
-out:
-	mutex_unlock(&ashmem_mutex);
+	else
+		strcpy(asma->name + ASHMEM_NAME_PREFIX_LEN, local_name);
 
+	mutex_unlock(&ashmem_mutex);
 	return ret;
 }
 
diff --git a/drivers/staging/android/ashmem.h b/drivers/staging/android/ashmem.h
index 8dc0f0d3adf3..5abcfd7aa706 100644
--- a/drivers/staging/android/ashmem.h
+++ b/drivers/staging/android/ashmem.h
@@ -16,35 +16,7 @@
 #include <linux/ioctl.h>
 #include <linux/compat.h>
 
-#define ASHMEM_NAME_LEN		256
-
-#define ASHMEM_NAME_DEF		"dev/ashmem"
-
-/* Return values from ASHMEM_PIN: Was the mapping purged while unpinned? */
-#define ASHMEM_NOT_PURGED	0
-#define ASHMEM_WAS_PURGED	1
-
-/* Return values from ASHMEM_GET_PIN_STATUS: Is the mapping pinned? */
-#define ASHMEM_IS_UNPINNED	0
-#define ASHMEM_IS_PINNED	1
-
-struct ashmem_pin {
-	__u32 offset;	/* offset into region, in bytes, page-aligned */
-	__u32 len;	/* length forward from offset, in bytes, page-aligned */
-};
-
-#define __ASHMEMIOC		0x77
-
-#define ASHMEM_SET_NAME		_IOW(__ASHMEMIOC, 1, char[ASHMEM_NAME_LEN])
-#define ASHMEM_GET_NAME		_IOR(__ASHMEMIOC, 2, char[ASHMEM_NAME_LEN])
-#define ASHMEM_SET_SIZE		_IOW(__ASHMEMIOC, 3, size_t)
-#define ASHMEM_GET_SIZE		_IO(__ASHMEMIOC, 4)
-#define ASHMEM_SET_PROT_MASK	_IOW(__ASHMEMIOC, 5, unsigned long)
-#define ASHMEM_GET_PROT_MASK	_IO(__ASHMEMIOC, 6)
-#define ASHMEM_PIN		_IOW(__ASHMEMIOC, 7, struct ashmem_pin)
-#define ASHMEM_UNPIN		_IOW(__ASHMEMIOC, 8, struct ashmem_pin)
-#define ASHMEM_GET_PIN_STATUS	_IO(__ASHMEMIOC, 9)
-#define ASHMEM_PURGE_ALL_CACHES	_IO(__ASHMEMIOC, 10)
+#include "uapi/ashmem.h"
 
 /* support of 32bit userspace on 64bit platforms */
 #ifdef CONFIG_COMPAT
diff --git a/drivers/staging/android/fiq_debugger/Kconfig b/drivers/staging/android/fiq_debugger/Kconfig
new file mode 100644
index 000000000000..56f7f999377e
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/Kconfig
@@ -0,0 +1,49 @@
+config FIQ_DEBUGGER
+	bool "FIQ Mode Serial Debugger"
+	default n
+	depends on ARM || ARM64
+	help
+	  The FIQ serial debugger can accept commands even when the
+	  kernel is unresponsive due to being stuck with interrupts
+	  disabled.
+
+config FIQ_DEBUGGER_NO_SLEEP
+	bool "Keep serial debugger active"
+	depends on FIQ_DEBUGGER
+	default n
+	help
+	  Enables the serial debugger at boot. Passing
+	  fiq_debugger.no_sleep on the kernel commandline will
+	  override this config option.
+
+config FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON
+	bool "Don't disable wakeup IRQ when debugger is active"
+	depends on FIQ_DEBUGGER
+	default n
+	help
+	  Don't disable the wakeup irq when enabling the uart clock.  This will
+	  cause extra interrupts, but it makes the serial debugger usable with
+	  on some MSM radio builds that ignore the uart clock request in power
+	  collapse.
+
+config FIQ_DEBUGGER_CONSOLE
+	bool "Console on FIQ Serial Debugger port"
+	depends on FIQ_DEBUGGER
+	default n
+	help
+	  Enables a console so that printk messages are displayed on
+	  the debugger serial port as the occur.
+
+config FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE
+	bool "Put the FIQ debugger into console mode by default"
+	depends on FIQ_DEBUGGER_CONSOLE
+	default n
+	help
+	  If enabled, this puts the fiq debugger into console mode by default.
+	  Otherwise, the fiq debugger will start out in debug mode.
+
+config FIQ_WATCHDOG
+	bool
+	select FIQ_DEBUGGER
+	select PSTORE_RAM
+	default n
diff --git a/drivers/staging/android/fiq_debugger/Makefile b/drivers/staging/android/fiq_debugger/Makefile
new file mode 100644
index 000000000000..a7ca4871cad3
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/Makefile
@@ -0,0 +1,4 @@
+obj-y			+= fiq_debugger.o
+obj-$(CONFIG_ARM)	+= fiq_debugger_arm.o
+obj-$(CONFIG_ARM64)	+= fiq_debugger_arm64.o
+obj-$(CONFIG_FIQ_WATCHDOG)	+= fiq_watchdog.o
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.c b/drivers/staging/android/fiq_debugger/fiq_debugger.c
new file mode 100644
index 000000000000..1d733624d70a
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger.c
@@ -0,0 +1,1213 @@
+/*
+ * drivers/staging/android/fiq_debugger.c
+ *
+ * Serial Debugger Interface accessed through an FIQ interrupt.
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/console.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kmsg_dump.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/timer.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/wakelock.h>
+
+#ifdef CONFIG_FIQ_GLUE
+#include <asm/fiq_glue.h>
+#endif
+
+#include <linux/uaccess.h>
+
+#include "fiq_debugger.h"
+#include "fiq_debugger_priv.h"
+#include "fiq_debugger_ringbuf.h"
+
+#define DEBUG_MAX 64
+#define MAX_UNHANDLED_FIQ_COUNT 1000000
+
+#define MAX_FIQ_DEBUGGER_PORTS 4
+
+struct fiq_debugger_state {
+#ifdef CONFIG_FIQ_GLUE
+	struct fiq_glue_handler handler;
+#endif
+	struct fiq_debugger_output output;
+
+	int fiq;
+	int uart_irq;
+	int signal_irq;
+	int wakeup_irq;
+	bool wakeup_irq_no_set_wake;
+	struct clk *clk;
+	struct fiq_debugger_pdata *pdata;
+	struct platform_device *pdev;
+
+	char debug_cmd[DEBUG_MAX];
+	int debug_busy;
+	int debug_abort;
+
+	char debug_buf[DEBUG_MAX];
+	int debug_count;
+
+	bool no_sleep;
+	bool debug_enable;
+	bool ignore_next_wakeup_irq;
+	struct timer_list sleep_timer;
+	spinlock_t sleep_timer_lock;
+	bool uart_enabled;
+	struct wake_lock debugger_wake_lock;
+	bool console_enable;
+	int current_cpu;
+	atomic_t unhandled_fiq_count;
+	bool in_fiq;
+
+	struct work_struct work;
+	spinlock_t work_lock;
+	char work_cmd[DEBUG_MAX];
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+	spinlock_t console_lock;
+	struct console console;
+	struct tty_port tty_port;
+	struct fiq_debugger_ringbuf *tty_rbuf;
+	bool syslog_dumping;
+#endif
+
+	unsigned int last_irqs[NR_IRQS];
+	unsigned int last_local_timer_irqs[NR_CPUS];
+};
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+struct tty_driver *fiq_tty_driver;
+#endif
+
+#ifdef CONFIG_FIQ_DEBUGGER_NO_SLEEP
+static bool initial_no_sleep = true;
+#else
+static bool initial_no_sleep;
+#endif
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE_DEFAULT_ENABLE
+static bool initial_debug_enable = true;
+static bool initial_console_enable = true;
+#else
+static bool initial_debug_enable;
+static bool initial_console_enable;
+#endif
+
+static bool fiq_kgdb_enable;
+
+module_param_named(no_sleep, initial_no_sleep, bool, 0644);
+module_param_named(debug_enable, initial_debug_enable, bool, 0644);
+module_param_named(console_enable, initial_console_enable, bool, 0644);
+module_param_named(kgdb_enable, fiq_kgdb_enable, bool, 0644);
+
+#ifdef CONFIG_FIQ_DEBUGGER_WAKEUP_IRQ_ALWAYS_ON
+static inline
+void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state) {}
+static inline
+void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state) {}
+#else
+static inline
+void fiq_debugger_enable_wakeup_irq(struct fiq_debugger_state *state)
+{
+	if (state->wakeup_irq < 0)
+		return;
+	enable_irq(state->wakeup_irq);
+	if (!state->wakeup_irq_no_set_wake)
+		enable_irq_wake(state->wakeup_irq);
+}
+static inline
+void fiq_debugger_disable_wakeup_irq(struct fiq_debugger_state *state)
+{
+	if (state->wakeup_irq < 0)
+		return;
+	disable_irq_nosync(state->wakeup_irq);
+	if (!state->wakeup_irq_no_set_wake)
+		disable_irq_wake(state->wakeup_irq);
+}
+#endif
+
+static inline bool fiq_debugger_have_fiq(struct fiq_debugger_state *state)
+{
+	return (state->fiq >= 0);
+}
+
+#ifdef CONFIG_FIQ_GLUE
+static void fiq_debugger_force_irq(struct fiq_debugger_state *state)
+{
+	unsigned int irq = state->signal_irq;
+
+	if (WARN_ON(!fiq_debugger_have_fiq(state)))
+		return;
+	if (state->pdata->force_irq) {
+		state->pdata->force_irq(state->pdev, irq);
+	} else {
+		struct irq_chip *chip = irq_get_chip(irq);
+		if (chip && chip->irq_retrigger)
+			chip->irq_retrigger(irq_get_irq_data(irq));
+	}
+}
+#endif
+
+static void fiq_debugger_uart_enable(struct fiq_debugger_state *state)
+{
+	if (state->clk)
+		clk_enable(state->clk);
+	if (state->pdata->uart_enable)
+		state->pdata->uart_enable(state->pdev);
+}
+
+static void fiq_debugger_uart_disable(struct fiq_debugger_state *state)
+{
+	if (state->pdata->uart_disable)
+		state->pdata->uart_disable(state->pdev);
+	if (state->clk)
+		clk_disable(state->clk);
+}
+
+static void fiq_debugger_uart_flush(struct fiq_debugger_state *state)
+{
+	if (state->pdata->uart_flush)
+		state->pdata->uart_flush(state->pdev);
+}
+
+static void fiq_debugger_putc(struct fiq_debugger_state *state, char c)
+{
+	state->pdata->uart_putc(state->pdev, c);
+}
+
+static void fiq_debugger_puts(struct fiq_debugger_state *state, char *s)
+{
+	unsigned c;
+	while ((c = *s++)) {
+		if (c == '\n')
+			fiq_debugger_putc(state, '\r');
+		fiq_debugger_putc(state, c);
+	}
+}
+
+static void fiq_debugger_prompt(struct fiq_debugger_state *state)
+{
+	fiq_debugger_puts(state, "debug> ");
+}
+
+static void fiq_debugger_dump_kernel_log(struct fiq_debugger_state *state)
+{
+	char buf[512];
+	size_t len;
+	struct kmsg_dumper dumper = { .active = true };
+
+
+	kmsg_dump_rewind_nolock(&dumper);
+	while (kmsg_dump_get_line_nolock(&dumper, true, buf,
+					 sizeof(buf) - 1, &len)) {
+		buf[len] = 0;
+		fiq_debugger_puts(state, buf);
+	}
+}
+
+static void fiq_debugger_printf(struct fiq_debugger_output *output,
+			       const char *fmt, ...)
+{
+	struct fiq_debugger_state *state;
+	char buf[256];
+	va_list ap;
+
+	state = container_of(output, struct fiq_debugger_state, output);
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	fiq_debugger_puts(state, buf);
+}
+
+/* Safe outside fiq context */
+static int fiq_debugger_printf_nfiq(void *cookie, const char *fmt, ...)
+{
+	struct fiq_debugger_state *state = cookie;
+	char buf[256];
+	va_list ap;
+	unsigned long irq_flags;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, 128, fmt, ap);
+	va_end(ap);
+
+	local_irq_save(irq_flags);
+	fiq_debugger_puts(state, buf);
+	fiq_debugger_uart_flush(state);
+	local_irq_restore(irq_flags);
+	return state->debug_abort;
+}
+
+static void fiq_debugger_dump_irqs(struct fiq_debugger_state *state)
+{
+	int n;
+	struct irq_desc *desc;
+
+	fiq_debugger_printf(&state->output,
+			"irqnr       total  since-last   status  name\n");
+	for_each_irq_desc(n, desc) {
+		struct irqaction *act = desc->action;
+		if (!act && !kstat_irqs(n))
+			continue;
+		fiq_debugger_printf(&state->output, "%5d: %10u %11u %8x  %s\n", n,
+			kstat_irqs(n),
+			kstat_irqs(n) - state->last_irqs[n],
+			desc->status_use_accessors,
+			(act && act->name) ? act->name : "???");
+		state->last_irqs[n] = kstat_irqs(n);
+	}
+}
+
+static void fiq_debugger_do_ps(struct fiq_debugger_state *state)
+{
+	struct task_struct *g;
+	struct task_struct *p;
+	unsigned task_state;
+	static const char stat_nam[] = "RSDTtZX";
+
+	fiq_debugger_printf(&state->output, "pid   ppid  prio task            pc\n");
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		task_state = p->state ? __ffs(p->state) + 1 : 0;
+		fiq_debugger_printf(&state->output,
+			     "%5d %5d %4d ", p->pid, p->parent->pid, p->prio);
+		fiq_debugger_printf(&state->output, "%-13.13s %c", p->comm,
+			     task_state >= sizeof(stat_nam) ? '?' : stat_nam[task_state]);
+		if (task_state == TASK_RUNNING)
+			fiq_debugger_printf(&state->output, " running\n");
+		else
+			fiq_debugger_printf(&state->output, " %08lx\n",
+					thread_saved_pc(p));
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+}
+
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state)
+{
+	state->syslog_dumping = true;
+}
+
+static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state)
+{
+	state->syslog_dumping = false;
+}
+#else
+extern int do_syslog(int type, char __user *bug, int count);
+static void fiq_debugger_begin_syslog_dump(struct fiq_debugger_state *state)
+{
+	do_syslog(5 /* clear */, NULL, 0);
+}
+
+static void fiq_debugger_end_syslog_dump(struct fiq_debugger_state *state)
+{
+	fiq_debugger_dump_kernel_log(state);
+}
+#endif
+
+static void fiq_debugger_do_sysrq(struct fiq_debugger_state *state, char rq)
+{
+	if ((rq == 'g' || rq == 'G') && !fiq_kgdb_enable) {
+		fiq_debugger_printf(&state->output, "sysrq-g blocked\n");
+		return;
+	}
+	fiq_debugger_begin_syslog_dump(state);
+	handle_sysrq(rq);
+	fiq_debugger_end_syslog_dump(state);
+}
+
+#ifdef CONFIG_KGDB
+static void fiq_debugger_do_kgdb(struct fiq_debugger_state *state)
+{
+	if (!fiq_kgdb_enable) {
+		fiq_debugger_printf(&state->output, "kgdb through fiq debugger not enabled\n");
+		return;
+	}
+
+	fiq_debugger_printf(&state->output, "enabling console and triggering kgdb\n");
+	state->console_enable = true;
+	handle_sysrq('g');
+}
+#endif
+
+static void fiq_debugger_schedule_work(struct fiq_debugger_state *state,
+		char *cmd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->work_lock, flags);
+	if (state->work_cmd[0] != '\0') {
+		fiq_debugger_printf(&state->output, "work command processor busy\n");
+		spin_unlock_irqrestore(&state->work_lock, flags);
+		return;
+	}
+
+	strlcpy(state->work_cmd, cmd, sizeof(state->work_cmd));
+	spin_unlock_irqrestore(&state->work_lock, flags);
+
+	schedule_work(&state->work);
+}
+
+static void fiq_debugger_work(struct work_struct *work)
+{
+	struct fiq_debugger_state *state;
+	char work_cmd[DEBUG_MAX];
+	char *cmd;
+	unsigned long flags;
+
+	state = container_of(work, struct fiq_debugger_state, work);
+
+	spin_lock_irqsave(&state->work_lock, flags);
+
+	strlcpy(work_cmd, state->work_cmd, sizeof(work_cmd));
+	state->work_cmd[0] = '\0';
+
+	spin_unlock_irqrestore(&state->work_lock, flags);
+
+	cmd = work_cmd;
+	if (!strncmp(cmd, "reboot", 6)) {
+		cmd += 6;
+		while (*cmd == ' ')
+			cmd++;
+		if (cmd != '\0')
+			kernel_restart(cmd);
+		else
+			kernel_restart(NULL);
+	} else {
+		fiq_debugger_printf(&state->output, "unknown work command '%s'\n",
+				work_cmd);
+	}
+}
+
+/* This function CANNOT be called in FIQ context */
+static void fiq_debugger_irq_exec(struct fiq_debugger_state *state, char *cmd)
+{
+	if (!strcmp(cmd, "ps"))
+		fiq_debugger_do_ps(state);
+	if (!strcmp(cmd, "sysrq"))
+		fiq_debugger_do_sysrq(state, 'h');
+	if (!strncmp(cmd, "sysrq ", 6))
+		fiq_debugger_do_sysrq(state, cmd[6]);
+#ifdef CONFIG_KGDB
+	if (!strcmp(cmd, "kgdb"))
+		fiq_debugger_do_kgdb(state);
+#endif
+	if (!strncmp(cmd, "reboot", 6))
+		fiq_debugger_schedule_work(state, cmd);
+}
+
+static void fiq_debugger_help(struct fiq_debugger_state *state)
+{
+	fiq_debugger_printf(&state->output,
+				"FIQ Debugger commands:\n"
+				" pc            PC status\n"
+				" regs          Register dump\n"
+				" allregs       Extended Register dump\n"
+				" bt            Stack trace\n");
+	fiq_debugger_printf(&state->output,
+				" reboot [<c>]  Reboot with command <c>\n"
+				" reset [<c>]   Hard reset with command <c>\n"
+				" irqs          Interupt status\n"
+				" kmsg          Kernel log\n"
+				" version       Kernel version\n");
+	fiq_debugger_printf(&state->output,
+				" sleep         Allow sleep while in FIQ\n"
+				" nosleep       Disable sleep while in FIQ\n"
+				" console       Switch terminal to console\n"
+				" cpu           Current CPU\n"
+				" cpu <number>  Switch to CPU<number>\n");
+	fiq_debugger_printf(&state->output,
+				" ps            Process list\n"
+				" sysrq         sysrq options\n"
+				" sysrq <param> Execute sysrq with <param>\n");
+#ifdef CONFIG_KGDB
+	fiq_debugger_printf(&state->output,
+				" kgdb          Enter kernel debugger\n");
+#endif
+}
+
+static void fiq_debugger_take_affinity(void *info)
+{
+	struct fiq_debugger_state *state = info;
+	struct cpumask cpumask;
+
+	cpumask_clear(&cpumask);
+	cpumask_set_cpu(get_cpu(), &cpumask);
+
+	irq_set_affinity(state->uart_irq, &cpumask);
+}
+
+static void fiq_debugger_switch_cpu(struct fiq_debugger_state *state, int cpu)
+{
+	if (!fiq_debugger_have_fiq(state))
+		smp_call_function_single(cpu, fiq_debugger_take_affinity, state,
+				false);
+	state->current_cpu = cpu;
+}
+
+static bool fiq_debugger_fiq_exec(struct fiq_debugger_state *state,
+			const char *cmd, const struct pt_regs *regs,
+			void *svc_sp)
+{
+	bool signal_helper = false;
+
+	if (!strcmp(cmd, "help") || !strcmp(cmd, "?")) {
+		fiq_debugger_help(state);
+	} else if (!strcmp(cmd, "pc")) {
+		fiq_debugger_dump_pc(&state->output, regs);
+	} else if (!strcmp(cmd, "regs")) {
+		fiq_debugger_dump_regs(&state->output, regs);
+	} else if (!strcmp(cmd, "allregs")) {
+		fiq_debugger_dump_allregs(&state->output, regs);
+	} else if (!strcmp(cmd, "bt")) {
+		fiq_debugger_dump_stacktrace(&state->output, regs, 100, svc_sp);
+	} else if (!strncmp(cmd, "reset", 5)) {
+		cmd += 5;
+		while (*cmd == ' ')
+			cmd++;
+		if (*cmd) {
+			char tmp_cmd[32];
+			strlcpy(tmp_cmd, cmd, sizeof(tmp_cmd));
+			machine_restart(tmp_cmd);
+		} else {
+			machine_restart(NULL);
+		}
+	} else if (!strcmp(cmd, "irqs")) {
+		fiq_debugger_dump_irqs(state);
+	} else if (!strcmp(cmd, "kmsg")) {
+		fiq_debugger_dump_kernel_log(state);
+	} else if (!strcmp(cmd, "version")) {
+		fiq_debugger_printf(&state->output, "%s\n", linux_banner);
+	} else if (!strcmp(cmd, "sleep")) {
+		state->no_sleep = false;
+		fiq_debugger_printf(&state->output, "enabling sleep\n");
+	} else if (!strcmp(cmd, "nosleep")) {
+		state->no_sleep = true;
+		fiq_debugger_printf(&state->output, "disabling sleep\n");
+	} else if (!strcmp(cmd, "console")) {
+		fiq_debugger_printf(&state->output, "console mode\n");
+		fiq_debugger_uart_flush(state);
+		state->console_enable = true;
+	} else if (!strcmp(cmd, "cpu")) {
+		fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu);
+	} else if (!strncmp(cmd, "cpu ", 4)) {
+		unsigned long cpu = 0;
+		if (strict_strtoul(cmd + 4, 10, &cpu) == 0)
+			fiq_debugger_switch_cpu(state, cpu);
+		else
+			fiq_debugger_printf(&state->output, "invalid cpu\n");
+		fiq_debugger_printf(&state->output, "cpu %d\n", state->current_cpu);
+	} else {
+		if (state->debug_busy) {
+			fiq_debugger_printf(&state->output,
+				"command processor busy. trying to abort.\n");
+			state->debug_abort = -1;
+		} else {
+			strcpy(state->debug_cmd, cmd);
+			state->debug_busy = 1;
+		}
+
+		return true;
+	}
+	if (!state->console_enable)
+		fiq_debugger_prompt(state);
+
+	return signal_helper;
+}
+
+static void fiq_debugger_sleep_timer_expired(unsigned long data)
+{
+	struct fiq_debugger_state *state = (struct fiq_debugger_state *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->sleep_timer_lock, flags);
+	if (state->uart_enabled && !state->no_sleep) {
+		if (state->debug_enable && !state->console_enable) {
+			state->debug_enable = false;
+			fiq_debugger_printf_nfiq(state,
+					"suspending fiq debugger\n");
+		}
+		state->ignore_next_wakeup_irq = true;
+		fiq_debugger_uart_disable(state);
+		state->uart_enabled = false;
+		fiq_debugger_enable_wakeup_irq(state);
+	}
+	wake_unlock(&state->debugger_wake_lock);
+	spin_unlock_irqrestore(&state->sleep_timer_lock, flags);
+}
+
+static void fiq_debugger_handle_wakeup(struct fiq_debugger_state *state)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->sleep_timer_lock, flags);
+	if (state->wakeup_irq >= 0 && state->ignore_next_wakeup_irq) {
+		state->ignore_next_wakeup_irq = false;
+	} else if (!state->uart_enabled) {
+		wake_lock(&state->debugger_wake_lock);
+		fiq_debugger_uart_enable(state);
+		state->uart_enabled = true;
+		fiq_debugger_disable_wakeup_irq(state);
+		mod_timer(&state->sleep_timer, jiffies + HZ / 2);
+	}
+	spin_unlock_irqrestore(&state->sleep_timer_lock, flags);
+}
+
+static irqreturn_t fiq_debugger_wakeup_irq_handler(int irq, void *dev)
+{
+	struct fiq_debugger_state *state = dev;
+
+	if (!state->no_sleep)
+		fiq_debugger_puts(state, "WAKEUP\n");
+	fiq_debugger_handle_wakeup(state);
+
+	return IRQ_HANDLED;
+}
+
+static
+void fiq_debugger_handle_console_irq_context(struct fiq_debugger_state *state)
+{
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+	if (state->tty_port.ops) {
+		int i;
+		int count = fiq_debugger_ringbuf_level(state->tty_rbuf);
+		for (i = 0; i < count; i++) {
+			int c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0);
+			tty_insert_flip_char(&state->tty_port, c, TTY_NORMAL);
+			if (!fiq_debugger_ringbuf_consume(state->tty_rbuf, 1))
+				pr_warn("fiq tty failed to consume byte\n");
+		}
+		tty_flip_buffer_push(&state->tty_port);
+	}
+#endif
+}
+
+static void fiq_debugger_handle_irq_context(struct fiq_debugger_state *state)
+{
+	if (!state->no_sleep) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&state->sleep_timer_lock, flags);
+		wake_lock(&state->debugger_wake_lock);
+		mod_timer(&state->sleep_timer, jiffies + HZ * 5);
+		spin_unlock_irqrestore(&state->sleep_timer_lock, flags);
+	}
+	fiq_debugger_handle_console_irq_context(state);
+	if (state->debug_busy) {
+		fiq_debugger_irq_exec(state, state->debug_cmd);
+		if (!state->console_enable)
+			fiq_debugger_prompt(state);
+		state->debug_busy = 0;
+	}
+}
+
+static int fiq_debugger_getc(struct fiq_debugger_state *state)
+{
+	return state->pdata->uart_getc(state->pdev);
+}
+
+static bool fiq_debugger_handle_uart_interrupt(struct fiq_debugger_state *state,
+			int this_cpu, const struct pt_regs *regs, void *svc_sp)
+{
+	int c;
+	static int last_c;
+	int count = 0;
+	bool signal_helper = false;
+
+	if (this_cpu != state->current_cpu) {
+		if (state->in_fiq)
+			return false;
+
+		if (atomic_inc_return(&state->unhandled_fiq_count) !=
+					MAX_UNHANDLED_FIQ_COUNT)
+			return false;
+
+		fiq_debugger_printf(&state->output,
+			"fiq_debugger: cpu %d not responding, "
+			"reverting to cpu %d\n", state->current_cpu,
+			this_cpu);
+
+		atomic_set(&state->unhandled_fiq_count, 0);
+		fiq_debugger_switch_cpu(state, this_cpu);
+		return false;
+	}
+
+	state->in_fiq = true;
+
+	while ((c = fiq_debugger_getc(state)) != FIQ_DEBUGGER_NO_CHAR) {
+		count++;
+		if (!state->debug_enable) {
+			if ((c == 13) || (c == 10)) {
+				state->debug_enable = true;
+				state->debug_count = 0;
+				fiq_debugger_prompt(state);
+			}
+		} else if (c == FIQ_DEBUGGER_BREAK) {
+			state->console_enable = false;
+			fiq_debugger_puts(state, "fiq debugger mode\n");
+			state->debug_count = 0;
+			fiq_debugger_prompt(state);
+#ifdef CONFIG_FIQ_DEBUGGER_CONSOLE
+		} else if (state->console_enable && state->tty_rbuf) {
+			fiq_debugger_ringbuf_push(state->tty_rbuf, c);
+			signal_helper = true;
+#endif
+		} else if ((c >= ' ') && (c < 127)) {
+			if (state->debug_count < (DEBUG_MAX - 1)) {
+				state->debug_buf[state->debug_count++] = c;
+				fiq_debugger_putc(state, c);
+			}
+		} else if ((c == 8) || (c == 127)) {
+			if (state->debug_count > 0) {
+				state->debug_count--;
+				fiq_debugger_putc(state, 8);
+				fiq_debugger_putc(state, ' ');
+				fiq_debugger_putc(state, 8);
+			}
+		} else if ((c == 13) || (c == 10)) {
+			if (c == '\r' || (c == '\n' && last_c != '\r')) {
+				fiq_debugger_putc(state, '\r');
+				fiq_debugger_putc(state, '\n');
+			}
+			if (state->debug_count) {
+				state->debug_buf[state->debug_count] = 0;
+				state->debug_count = 0;
+				signal_helper |=
+					fiq_debugger_fiq_exec(state,
+							state->debug_buf,
+							regs, svc_sp);
+			} else {
+				fiq_debugger_prompt(state);
+			}
+		}
+		last_c = c;
+	}
+	if (!state->console_enable)
+		fiq_debugger_uart_flush(state);
+	if (state->pdata->fiq_ack)
+		state->pdata->fiq_ack(state->pdev, state->fiq);
+
+	/* poke sleep timer if necessary */
+	if (state->debug_enable && !state->no_sleep)
+		signal_helper = true;
+
+	atomic_set(&state->unhandled_fiq_count, 0);
+	state->in_fiq = false;
+
+	return signal_helper;
+}
+
+#ifdef CONFIG_FIQ_GLUE
+static void fiq_debugger_fiq(struct fiq_glue_handler *h,
+		const struct pt_regs *regs, void *svc_sp)
+{
+	struct fiq_debugger_state *state =
+		container_of(h, struct fiq_debugger_state, handler);
+	unsigned int this_cpu = THREAD_INFO(svc_sp)->cpu;
+	bool need_irq;
+
+	need_irq = fiq_debugger_handle_uart_interrupt(state, this_cpu, regs,
+			svc_sp);
+	if (need_irq)
+		fiq_debugger_force_irq(state);
+}
+#endif
+
+/*
+ * When not using FIQs, we only use this single interrupt as an entry point.
+ * This just effectively takes over the UART interrupt and does all the work
+ * in this context.
+ */
+static irqreturn_t fiq_debugger_uart_irq(int irq, void *dev)
+{
+	struct fiq_debugger_state *state = dev;
+	bool not_done;
+
+	fiq_debugger_handle_wakeup(state);
+
+	/* handle the debugger irq in regular context */
+	not_done = fiq_debugger_handle_uart_interrupt(state, smp_processor_id(),
+					      get_irq_regs(),
+					      current_thread_info());
+	if (not_done)
+		fiq_debugger_handle_irq_context(state);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * If FIQs are used, not everything can happen in fiq context.
+ * FIQ handler does what it can and then signals this interrupt to finish the
+ * job in irq context.
+ */
+static irqreturn_t fiq_debugger_signal_irq(int irq, void *dev)
+{
+	struct fiq_debugger_state *state = dev;
+
+	if (state->pdata->force_irq_ack)
+		state->pdata->force_irq_ack(state->pdev, state->signal_irq);
+
+	fiq_debugger_handle_irq_context(state);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_FIQ_GLUE
+static void fiq_debugger_resume(struct fiq_glue_handler *h)
+{
+	struct fiq_debugger_state *state =
+		container_of(h, struct fiq_debugger_state, handler);
+	if (state->pdata->uart_resume)
+		state->pdata->uart_resume(state->pdev);
+}
+#endif
+
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+struct tty_driver *fiq_debugger_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+	return fiq_tty_driver;
+}
+
+static void fiq_debugger_console_write(struct console *co,
+				const char *s, unsigned int count)
+{
+	struct fiq_debugger_state *state;
+	unsigned long flags;
+
+	state = container_of(co, struct fiq_debugger_state, console);
+
+	if (!state->console_enable && !state->syslog_dumping)
+		return;
+
+	fiq_debugger_uart_enable(state);
+	spin_lock_irqsave(&state->console_lock, flags);
+	while (count--) {
+		if (*s == '\n')
+			fiq_debugger_putc(state, '\r');
+		fiq_debugger_putc(state, *s++);
+	}
+	fiq_debugger_uart_flush(state);
+	spin_unlock_irqrestore(&state->console_lock, flags);
+	fiq_debugger_uart_disable(state);
+}
+
+static struct console fiq_debugger_console = {
+	.name = "ttyFIQ",
+	.device = fiq_debugger_console_device,
+	.write = fiq_debugger_console_write,
+	.flags = CON_PRINTBUFFER | CON_ANYTIME | CON_ENABLED,
+};
+
+int fiq_tty_open(struct tty_struct *tty, struct file *filp)
+{
+	int line = tty->index;
+	struct fiq_debugger_state **states = tty->driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+
+	return tty_port_open(&state->tty_port, tty, filp);
+}
+
+void fiq_tty_close(struct tty_struct *tty, struct file *filp)
+{
+	tty_port_close(tty->port, tty, filp);
+}
+
+int  fiq_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+	int i;
+	int line = tty->index;
+	struct fiq_debugger_state **states = tty->driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+
+	if (!state->console_enable)
+		return count;
+
+	fiq_debugger_uart_enable(state);
+	spin_lock_irq(&state->console_lock);
+	for (i = 0; i < count; i++)
+		fiq_debugger_putc(state, *buf++);
+	spin_unlock_irq(&state->console_lock);
+	fiq_debugger_uart_disable(state);
+
+	return count;
+}
+
+int  fiq_tty_write_room(struct tty_struct *tty)
+{
+	return 16;
+}
+
+#ifdef CONFIG_CONSOLE_POLL
+static int fiq_tty_poll_init(struct tty_driver *driver, int line, char *options)
+{
+	return 0;
+}
+
+static int fiq_tty_poll_get_char(struct tty_driver *driver, int line)
+{
+	struct fiq_debugger_state **states = driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+	int c = NO_POLL_CHAR;
+
+	fiq_debugger_uart_enable(state);
+	if (fiq_debugger_have_fiq(state)) {
+		int count = fiq_debugger_ringbuf_level(state->tty_rbuf);
+		if (count > 0) {
+			c = fiq_debugger_ringbuf_peek(state->tty_rbuf, 0);
+			fiq_debugger_ringbuf_consume(state->tty_rbuf, 1);
+		}
+	} else {
+		c = fiq_debugger_getc(state);
+		if (c == FIQ_DEBUGGER_NO_CHAR)
+			c = NO_POLL_CHAR;
+	}
+	fiq_debugger_uart_disable(state);
+
+	return c;
+}
+
+static void fiq_tty_poll_put_char(struct tty_driver *driver, int line, char ch)
+{
+	struct fiq_debugger_state **states = driver->driver_state;
+	struct fiq_debugger_state *state = states[line];
+	fiq_debugger_uart_enable(state);
+	fiq_debugger_putc(state, ch);
+	fiq_debugger_uart_disable(state);
+}
+#endif
+
+static const struct tty_port_operations fiq_tty_port_ops;
+
+static const struct tty_operations fiq_tty_driver_ops = {
+	.write = fiq_tty_write,
+	.write_room = fiq_tty_write_room,
+	.open = fiq_tty_open,
+	.close = fiq_tty_close,
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_init = fiq_tty_poll_init,
+	.poll_get_char = fiq_tty_poll_get_char,
+	.poll_put_char = fiq_tty_poll_put_char,
+#endif
+};
+
+static int fiq_debugger_tty_init(void)
+{
+	int ret;
+	struct fiq_debugger_state **states = NULL;
+
+	states = kzalloc(sizeof(*states) * MAX_FIQ_DEBUGGER_PORTS, GFP_KERNEL);
+	if (!states) {
+		pr_err("Failed to allocate fiq debugger state structres\n");
+		return -ENOMEM;
+	}
+
+	fiq_tty_driver = alloc_tty_driver(MAX_FIQ_DEBUGGER_PORTS);
+	if (!fiq_tty_driver) {
+		pr_err("Failed to allocate fiq debugger tty\n");
+		ret = -ENOMEM;
+		goto err_free_state;
+	}
+
+	fiq_tty_driver->owner		= THIS_MODULE;
+	fiq_tty_driver->driver_name	= "fiq-debugger";
+	fiq_tty_driver->name		= "ttyFIQ";
+	fiq_tty_driver->type		= TTY_DRIVER_TYPE_SERIAL;
+	fiq_tty_driver->subtype		= SERIAL_TYPE_NORMAL;
+	fiq_tty_driver->init_termios	= tty_std_termios;
+	fiq_tty_driver->flags		= TTY_DRIVER_REAL_RAW |
+					  TTY_DRIVER_DYNAMIC_DEV;
+	fiq_tty_driver->driver_state	= states;
+
+	fiq_tty_driver->init_termios.c_cflag =
+					B115200 | CS8 | CREAD | HUPCL | CLOCAL;
+	fiq_tty_driver->init_termios.c_ispeed = 115200;
+	fiq_tty_driver->init_termios.c_ospeed = 115200;
+
+	tty_set_operations(fiq_tty_driver, &fiq_tty_driver_ops);
+
+	ret = tty_register_driver(fiq_tty_driver);
+	if (ret) {
+		pr_err("Failed to register fiq tty: %d\n", ret);
+		goto err_free_tty;
+	}
+
+	pr_info("Registered FIQ tty driver\n");
+	return 0;
+
+err_free_tty:
+	put_tty_driver(fiq_tty_driver);
+	fiq_tty_driver = NULL;
+err_free_state:
+	kfree(states);
+	return ret;
+}
+
+static int fiq_debugger_tty_init_one(struct fiq_debugger_state *state)
+{
+	int ret;
+	struct device *tty_dev;
+	struct fiq_debugger_state **states = fiq_tty_driver->driver_state;
+
+	states[state->pdev->id] = state;
+
+	state->tty_rbuf = fiq_debugger_ringbuf_alloc(1024);
+	if (!state->tty_rbuf) {
+		pr_err("Failed to allocate fiq debugger ringbuf\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	tty_port_init(&state->tty_port);
+	state->tty_port.ops = &fiq_tty_port_ops;
+
+	tty_dev = tty_port_register_device(&state->tty_port, fiq_tty_driver,
+					   state->pdev->id, &state->pdev->dev);
+	if (IS_ERR(tty_dev)) {
+		pr_err("Failed to register fiq debugger tty device\n");
+		ret = PTR_ERR(tty_dev);
+		goto err;
+	}
+
+	device_set_wakeup_capable(tty_dev, 1);
+
+	pr_info("Registered fiq debugger ttyFIQ%d\n", state->pdev->id);
+
+	return 0;
+
+err:
+	fiq_debugger_ringbuf_free(state->tty_rbuf);
+	state->tty_rbuf = NULL;
+	return ret;
+}
+#endif
+
+static int fiq_debugger_dev_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fiq_debugger_state *state = platform_get_drvdata(pdev);
+
+	if (state->pdata->uart_dev_suspend)
+		return state->pdata->uart_dev_suspend(pdev);
+	return 0;
+}
+
+static int fiq_debugger_dev_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fiq_debugger_state *state = platform_get_drvdata(pdev);
+
+	if (state->pdata->uart_dev_resume)
+		return state->pdata->uart_dev_resume(pdev);
+	return 0;
+}
+
+static int fiq_debugger_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct fiq_debugger_pdata *pdata = dev_get_platdata(&pdev->dev);
+	struct fiq_debugger_state *state;
+	int fiq;
+	int uart_irq;
+
+	if (pdev->id >= MAX_FIQ_DEBUGGER_PORTS)
+		return -EINVAL;
+
+	if (!pdata->uart_getc || !pdata->uart_putc)
+		return -EINVAL;
+	if ((pdata->uart_enable && !pdata->uart_disable) ||
+	    (!pdata->uart_enable && pdata->uart_disable))
+		return -EINVAL;
+
+	fiq = platform_get_irq_byname(pdev, "fiq");
+	uart_irq = platform_get_irq_byname(pdev, "uart_irq");
+
+	/* uart_irq mode and fiq mode are mutually exclusive, but one of them
+	 * is required */
+	if ((uart_irq < 0 && fiq < 0) || (uart_irq >= 0 && fiq >= 0))
+		return -EINVAL;
+	if (fiq >= 0 && !pdata->fiq_enable)
+		return -EINVAL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	state->output.printf = fiq_debugger_printf;
+	setup_timer(&state->sleep_timer, fiq_debugger_sleep_timer_expired,
+		    (unsigned long)state);
+	state->pdata = pdata;
+	state->pdev = pdev;
+	state->no_sleep = initial_no_sleep;
+	state->debug_enable = initial_debug_enable;
+	state->console_enable = initial_console_enable;
+
+	state->fiq = fiq;
+	state->uart_irq = uart_irq;
+	state->signal_irq = platform_get_irq_byname(pdev, "signal");
+	state->wakeup_irq = platform_get_irq_byname(pdev, "wakeup");
+
+	INIT_WORK(&state->work, fiq_debugger_work);
+	spin_lock_init(&state->work_lock);
+
+	platform_set_drvdata(pdev, state);
+
+	spin_lock_init(&state->sleep_timer_lock);
+
+	if (state->wakeup_irq < 0 && fiq_debugger_have_fiq(state))
+		state->no_sleep = true;
+	state->ignore_next_wakeup_irq = !state->no_sleep;
+
+	wake_lock_init(&state->debugger_wake_lock,
+			WAKE_LOCK_SUSPEND, "serial-debug");
+
+	state->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(state->clk))
+		state->clk = NULL;
+
+	/* do not call pdata->uart_enable here since uart_init may still
+	 * need to do some initialization before uart_enable can work.
+	 * So, only try to manage the clock during init.
+	 */
+	if (state->clk)
+		clk_enable(state->clk);
+
+	if (pdata->uart_init) {
+		ret = pdata->uart_init(pdev);
+		if (ret)
+			goto err_uart_init;
+	}
+
+	fiq_debugger_printf_nfiq(state,
+				"<hit enter %sto activate fiq debugger>\n",
+				state->no_sleep ? "" : "twice ");
+
+#ifdef CONFIG_FIQ_GLUE
+	if (fiq_debugger_have_fiq(state)) {
+		state->handler.fiq = fiq_debugger_fiq;
+		state->handler.resume = fiq_debugger_resume;
+		ret = fiq_glue_register_handler(&state->handler);
+		if (ret) {
+			pr_err("%s: could not install fiq handler\n", __func__);
+			goto err_register_irq;
+		}
+
+		pdata->fiq_enable(pdev, state->fiq, 1);
+	} else
+#endif
+	{
+		ret = request_irq(state->uart_irq, fiq_debugger_uart_irq,
+				  IRQF_NO_SUSPEND, "debug", state);
+		if (ret) {
+			pr_err("%s: could not install irq handler\n", __func__);
+			goto err_register_irq;
+		}
+
+		/* for irq-only mode, we want this irq to wake us up, if it
+		 * can.
+		 */
+		enable_irq_wake(state->uart_irq);
+	}
+
+	if (state->clk)
+		clk_disable(state->clk);
+
+	if (state->signal_irq >= 0) {
+		ret = request_irq(state->signal_irq, fiq_debugger_signal_irq,
+			  IRQF_TRIGGER_RISING, "debug-signal", state);
+		if (ret)
+			pr_err("serial_debugger: could not install signal_irq");
+	}
+
+	if (state->wakeup_irq >= 0) {
+		ret = request_irq(state->wakeup_irq,
+				  fiq_debugger_wakeup_irq_handler,
+				  IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+				  "debug-wakeup", state);
+		if (ret) {
+			pr_err("serial_debugger: "
+				"could not install wakeup irq\n");
+			state->wakeup_irq = -1;
+		} else {
+			ret = enable_irq_wake(state->wakeup_irq);
+			if (ret) {
+				pr_err("serial_debugger: "
+					"could not enable wakeup\n");
+				state->wakeup_irq_no_set_wake = true;
+			}
+		}
+	}
+	if (state->no_sleep)
+		fiq_debugger_handle_wakeup(state);
+
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+	spin_lock_init(&state->console_lock);
+	state->console = fiq_debugger_console;
+	state->console.index = pdev->id;
+	if (!console_set_on_cmdline)
+		add_preferred_console(state->console.name,
+			state->console.index, NULL);
+	register_console(&state->console);
+	fiq_debugger_tty_init_one(state);
+#endif
+	return 0;
+
+err_register_irq:
+	if (pdata->uart_free)
+		pdata->uart_free(pdev);
+err_uart_init:
+	if (state->clk)
+		clk_disable(state->clk);
+	if (state->clk)
+		clk_put(state->clk);
+	wake_lock_destroy(&state->debugger_wake_lock);
+	platform_set_drvdata(pdev, NULL);
+	kfree(state);
+	return ret;
+}
+
+static const struct dev_pm_ops fiq_debugger_dev_pm_ops = {
+	.suspend	= fiq_debugger_dev_suspend,
+	.resume		= fiq_debugger_dev_resume,
+};
+
+static struct platform_driver fiq_debugger_driver = {
+	.probe	= fiq_debugger_probe,
+	.driver	= {
+		.name	= "fiq_debugger",
+		.pm	= &fiq_debugger_dev_pm_ops,
+	},
+};
+
+static int __init fiq_debugger_init(void)
+{
+#if defined(CONFIG_FIQ_DEBUGGER_CONSOLE)
+	fiq_debugger_tty_init();
+#endif
+	return platform_driver_register(&fiq_debugger_driver);
+}
+
+postcore_initcall(fiq_debugger_init);
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger.h b/drivers/staging/android/fiq_debugger/fiq_debugger.h
new file mode 100644
index 000000000000..c9ec4f8db086
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger.h
@@ -0,0 +1,64 @@
+/*
+ * drivers/staging/android/fiq_debugger/fiq_debugger.h
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_
+#define _ARCH_ARM_MACH_TEGRA_FIQ_DEBUGGER_H_
+
+#include <linux/serial_core.h>
+
+#define FIQ_DEBUGGER_NO_CHAR NO_POLL_CHAR
+#define FIQ_DEBUGGER_BREAK 0x00ff0100
+
+#define FIQ_DEBUGGER_FIQ_IRQ_NAME	"fiq"
+#define FIQ_DEBUGGER_SIGNAL_IRQ_NAME	"signal"
+#define FIQ_DEBUGGER_WAKEUP_IRQ_NAME	"wakeup"
+
+/**
+ * struct fiq_debugger_pdata - fiq debugger platform data
+ * @uart_resume:	used to restore uart state right before enabling
+ *			the fiq.
+ * @uart_enable:	Do the work necessary to communicate with the uart
+ *			hw (enable clocks, etc.). This must be ref-counted.
+ * @uart_disable:	Do the work necessary to disable the uart hw
+ *			(disable clocks, etc.). This must be ref-counted.
+ * @uart_dev_suspend:	called during PM suspend, generally not needed
+ *			for real fiq mode debugger.
+ * @uart_dev_resume:	called during PM resume, generally not needed
+ *			for real fiq mode debugger.
+ */
+struct fiq_debugger_pdata {
+	int (*uart_init)(struct platform_device *pdev);
+	void (*uart_free)(struct platform_device *pdev);
+	int (*uart_resume)(struct platform_device *pdev);
+	int (*uart_getc)(struct platform_device *pdev);
+	void (*uart_putc)(struct platform_device *pdev, unsigned int c);
+	void (*uart_flush)(struct platform_device *pdev);
+	void (*uart_enable)(struct platform_device *pdev);
+	void (*uart_disable)(struct platform_device *pdev);
+
+	int (*uart_dev_suspend)(struct platform_device *pdev);
+	int (*uart_dev_resume)(struct platform_device *pdev);
+
+	void (*fiq_enable)(struct platform_device *pdev, unsigned int fiq,
+								bool enable);
+	void (*fiq_ack)(struct platform_device *pdev, unsigned int fiq);
+
+	void (*force_irq)(struct platform_device *pdev, unsigned int irq);
+	void (*force_irq_ack)(struct platform_device *pdev, unsigned int irq);
+};
+
+#endif
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c
new file mode 100644
index 000000000000..8b3e0137be1a
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ptrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/stacktrace.h>
+
+#include "fiq_debugger_priv.h"
+
+static char *mode_name(unsigned cpsr)
+{
+	switch (cpsr & MODE_MASK) {
+	case USR_MODE: return "USR";
+	case FIQ_MODE: return "FIQ";
+	case IRQ_MODE: return "IRQ";
+	case SVC_MODE: return "SVC";
+	case ABT_MODE: return "ABT";
+	case UND_MODE: return "UND";
+	case SYSTEM_MODE: return "SYS";
+	default: return "???";
+	}
+}
+
+void fiq_debugger_dump_pc(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output, " pc %08x cpsr %08x mode %s\n",
+		regs->ARM_pc, regs->ARM_cpsr, mode_name(regs->ARM_cpsr));
+}
+
+void fiq_debugger_dump_regs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output,
+			" r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
+			regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	output->printf(output,
+			" r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
+			regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7);
+	output->printf(output,
+			" r8 %08x  r9 %08x r10 %08x r11 %08x  mode %s\n",
+			regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp,
+			mode_name(regs->ARM_cpsr));
+	output->printf(output,
+			" ip %08x  sp %08x  lr %08x  pc %08x cpsr %08x\n",
+			regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc,
+			regs->ARM_cpsr);
+}
+
+struct mode_regs {
+	unsigned long sp_svc;
+	unsigned long lr_svc;
+	unsigned long spsr_svc;
+
+	unsigned long sp_abt;
+	unsigned long lr_abt;
+	unsigned long spsr_abt;
+
+	unsigned long sp_und;
+	unsigned long lr_und;
+	unsigned long spsr_und;
+
+	unsigned long sp_irq;
+	unsigned long lr_irq;
+	unsigned long spsr_irq;
+
+	unsigned long r8_fiq;
+	unsigned long r9_fiq;
+	unsigned long r10_fiq;
+	unsigned long r11_fiq;
+	unsigned long r12_fiq;
+	unsigned long sp_fiq;
+	unsigned long lr_fiq;
+	unsigned long spsr_fiq;
+};
+
+static void __naked get_mode_regs(struct mode_regs *regs)
+{
+	asm volatile (
+	"mrs	r1, cpsr\n"
+	"msr	cpsr_c, #0xd3 @(SVC_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xd7 @(ABT_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xdb @(UND_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xd2 @(IRQ_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r13 - r14}\n"
+	"mrs	r2, spsr\n"
+	"msr	cpsr_c, #0xd1 @(FIQ_MODE | PSR_I_BIT | PSR_F_BIT)\n"
+	"stmia	r0!, {r2, r8 - r14}\n"
+	"mrs	r2, spsr\n"
+	"stmia	r0!, {r2}\n"
+	"msr	cpsr_c, r1\n"
+	"bx	lr\n");
+}
+
+
+void fiq_debugger_dump_allregs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	struct mode_regs mode_regs;
+	unsigned long mode = regs->ARM_cpsr & MODE_MASK;
+
+	fiq_debugger_dump_regs(output, regs);
+	get_mode_regs(&mode_regs);
+
+	output->printf(output,
+			"%csvc: sp %08x  lr %08x  spsr %08x\n",
+			mode == SVC_MODE ? '*' : ' ',
+			mode_regs.sp_svc, mode_regs.lr_svc, mode_regs.spsr_svc);
+	output->printf(output,
+			"%cabt: sp %08x  lr %08x  spsr %08x\n",
+			mode == ABT_MODE ? '*' : ' ',
+			mode_regs.sp_abt, mode_regs.lr_abt, mode_regs.spsr_abt);
+	output->printf(output,
+			"%cund: sp %08x  lr %08x  spsr %08x\n",
+			mode == UND_MODE ? '*' : ' ',
+			mode_regs.sp_und, mode_regs.lr_und, mode_regs.spsr_und);
+	output->printf(output,
+			"%cirq: sp %08x  lr %08x  spsr %08x\n",
+			mode == IRQ_MODE ? '*' : ' ',
+			mode_regs.sp_irq, mode_regs.lr_irq, mode_regs.spsr_irq);
+	output->printf(output,
+			"%cfiq: r8 %08x  r9 %08x  r10 %08x  r11 %08x  r12 %08x\n",
+			mode == FIQ_MODE ? '*' : ' ',
+			mode_regs.r8_fiq, mode_regs.r9_fiq, mode_regs.r10_fiq,
+			mode_regs.r11_fiq, mode_regs.r12_fiq);
+	output->printf(output,
+			" fiq: sp %08x  lr %08x  spsr %08x\n",
+			mode_regs.sp_fiq, mode_regs.lr_fiq, mode_regs.spsr_fiq);
+}
+
+struct stacktrace_state {
+	struct fiq_debugger_output *output;
+	unsigned int depth;
+};
+
+static int report_trace(struct stackframe *frame, void *d)
+{
+	struct stacktrace_state *sts = d;
+
+	if (sts->depth) {
+		sts->output->printf(sts->output,
+			"  pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n",
+			frame->pc, frame->pc, frame->lr, frame->lr,
+			frame->sp, frame->fp);
+		sts->depth--;
+		return 0;
+	}
+	sts->output->printf(sts->output, "  ...\n");
+
+	return sts->depth == 0;
+}
+
+struct frame_tail {
+	struct frame_tail *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __attribute__((packed));
+
+static struct frame_tail *user_backtrace(struct fiq_debugger_output *output,
+					struct frame_tail *tail)
+{
+	struct frame_tail buftail[2];
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) {
+		output->printf(output, "  invalid frame pointer %p\n",
+				tail);
+		return NULL;
+	}
+	if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail))) {
+		output->printf(output,
+			"  failed to copy frame pointer %p\n", tail);
+		return NULL;
+	}
+
+	output->printf(output, "  %p\n", buftail[0].lr);
+
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (tail >= buftail[0].fp)
+		return NULL;
+
+	return buftail[0].fp-1;
+}
+
+void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output,
+		const struct pt_regs *regs, unsigned int depth, void *ssp)
+{
+	struct frame_tail *tail;
+	struct thread_info *real_thread_info = THREAD_INFO(ssp);
+	struct stacktrace_state sts;
+
+	sts.depth = depth;
+	sts.output = output;
+	*current_thread_info() = *real_thread_info;
+
+	if (!current)
+		output->printf(output, "current NULL\n");
+	else
+		output->printf(output, "pid: %d  comm: %s\n",
+			current->pid, current->comm);
+	fiq_debugger_dump_regs(output, regs);
+
+	if (!user_mode(regs)) {
+		struct stackframe frame;
+		frame.fp = regs->ARM_fp;
+		frame.sp = regs->ARM_sp;
+		frame.lr = regs->ARM_lr;
+		frame.pc = regs->ARM_pc;
+		output->printf(output,
+			"  pc: %p (%pF), lr %p (%pF), sp %p, fp %p\n",
+			regs->ARM_pc, regs->ARM_pc, regs->ARM_lr, regs->ARM_lr,
+			regs->ARM_sp, regs->ARM_fp);
+		walk_stackframe(&frame, report_trace, &sts);
+		return;
+	}
+
+	tail = ((struct frame_tail *) regs->ARM_fp) - 1;
+	while (depth-- && tail && !((unsigned long) tail & 3))
+		tail = user_backtrace(output, tail);
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c
new file mode 100644
index 000000000000..99c6584fcfa5
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_arm64.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ptrace.h>
+#include <asm/stacktrace.h>
+
+#include "fiq_debugger_priv.h"
+
+static char *mode_name(const struct pt_regs *regs)
+{
+	if (compat_user_mode(regs)) {
+		return "USR";
+	} else {
+		switch (processor_mode(regs)) {
+		case PSR_MODE_EL0t: return "EL0t";
+		case PSR_MODE_EL1t: return "EL1t";
+		case PSR_MODE_EL1h: return "EL1h";
+		case PSR_MODE_EL2t: return "EL2t";
+		case PSR_MODE_EL2h: return "EL2h";
+		default: return "???";
+		}
+	}
+}
+
+void fiq_debugger_dump_pc(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output, " pc %016lx cpsr %08lx mode %s\n",
+		regs->pc, regs->pstate, mode_name(regs));
+}
+
+void fiq_debugger_dump_regs_aarch32(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	output->printf(output, " r0 %08x  r1 %08x  r2 %08x  r3 %08x\n",
+			regs->compat_usr(0), regs->compat_usr(1),
+			regs->compat_usr(2), regs->compat_usr(3));
+	output->printf(output, " r4 %08x  r5 %08x  r6 %08x  r7 %08x\n",
+			regs->compat_usr(4), regs->compat_usr(5),
+			regs->compat_usr(6), regs->compat_usr(7));
+	output->printf(output, " r8 %08x  r9 %08x r10 %08x r11 %08x\n",
+			regs->compat_usr(8), regs->compat_usr(9),
+			regs->compat_usr(10), regs->compat_usr(11));
+	output->printf(output, " ip %08x  sp %08x  lr %08x  pc %08x\n",
+			regs->compat_usr(12), regs->compat_sp,
+			regs->compat_lr, regs->pc);
+	output->printf(output, " cpsr %08x (%s)\n",
+			regs->pstate, mode_name(regs));
+}
+
+void fiq_debugger_dump_regs_aarch64(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+
+	output->printf(output, "  x0 %016lx   x1 %016lx\n",
+			regs->regs[0], regs->regs[1]);
+	output->printf(output, "  x2 %016lx   x3 %016lx\n",
+			regs->regs[2], regs->regs[3]);
+	output->printf(output, "  x4 %016lx   x5 %016lx\n",
+			regs->regs[4], regs->regs[5]);
+	output->printf(output, "  x6 %016lx   x7 %016lx\n",
+			regs->regs[6], regs->regs[7]);
+	output->printf(output, "  x8 %016lx   x9 %016lx\n",
+			regs->regs[8], regs->regs[9]);
+	output->printf(output, " x10 %016lx  x11 %016lx\n",
+			regs->regs[10], regs->regs[11]);
+	output->printf(output, " x12 %016lx  x13 %016lx\n",
+			regs->regs[12], regs->regs[13]);
+	output->printf(output, " x14 %016lx  x15 %016lx\n",
+			regs->regs[14], regs->regs[15]);
+	output->printf(output, " x16 %016lx  x17 %016lx\n",
+			regs->regs[16], regs->regs[17]);
+	output->printf(output, " x18 %016lx  x19 %016lx\n",
+			regs->regs[18], regs->regs[19]);
+	output->printf(output, " x20 %016lx  x21 %016lx\n",
+			regs->regs[20], regs->regs[21]);
+	output->printf(output, " x22 %016lx  x23 %016lx\n",
+			regs->regs[22], regs->regs[23]);
+	output->printf(output, " x24 %016lx  x25 %016lx\n",
+			regs->regs[24], regs->regs[25]);
+	output->printf(output, " x26 %016lx  x27 %016lx\n",
+			regs->regs[26], regs->regs[27]);
+	output->printf(output, " x28 %016lx  x29 %016lx\n",
+			regs->regs[28], regs->regs[29]);
+	output->printf(output, " x30 %016lx   sp %016lx\n",
+			regs->regs[30], regs->sp);
+	output->printf(output, "  pc %016lx cpsr %08x (%s)\n",
+			regs->pc, regs->pstate, mode_name(regs));
+}
+
+void fiq_debugger_dump_regs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	if (compat_user_mode(regs))
+		fiq_debugger_dump_regs_aarch32(output, regs);
+	else
+		fiq_debugger_dump_regs_aarch64(output, regs);
+}
+
+#define READ_SPECIAL_REG(x) ({ \
+	u64 val; \
+	asm volatile ("mrs %0, " # x : "=r"(val)); \
+	val; \
+})
+
+void fiq_debugger_dump_allregs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs)
+{
+	u32 pstate = READ_SPECIAL_REG(CurrentEl);
+	bool in_el2 = (pstate & PSR_MODE_MASK) >= PSR_MODE_EL2t;
+
+	fiq_debugger_dump_regs(output, regs);
+
+	output->printf(output, " sp_el0   %016lx\n",
+			READ_SPECIAL_REG(sp_el0));
+
+	if (in_el2)
+		output->printf(output, " sp_el1   %016lx\n",
+				READ_SPECIAL_REG(sp_el1));
+
+	output->printf(output, " elr_el1  %016lx\n",
+			READ_SPECIAL_REG(elr_el1));
+
+	output->printf(output, " spsr_el1 %08lx\n",
+			READ_SPECIAL_REG(spsr_el1));
+
+	if (in_el2) {
+		output->printf(output, " spsr_irq %08lx\n",
+				READ_SPECIAL_REG(spsr_irq));
+		output->printf(output, " spsr_abt %08lx\n",
+				READ_SPECIAL_REG(spsr_abt));
+		output->printf(output, " spsr_und %08lx\n",
+				READ_SPECIAL_REG(spsr_und));
+		output->printf(output, " spsr_fiq %08lx\n",
+				READ_SPECIAL_REG(spsr_fiq));
+		output->printf(output, " spsr_el2 %08lx\n",
+				READ_SPECIAL_REG(elr_el2));
+		output->printf(output, " spsr_el2 %08lx\n",
+				READ_SPECIAL_REG(spsr_el2));
+	}
+}
+
+struct stacktrace_state {
+	struct fiq_debugger_output *output;
+	unsigned int depth;
+};
+
+static int report_trace(struct stackframe *frame, void *d)
+{
+	struct stacktrace_state *sts = d;
+
+	if (sts->depth) {
+		sts->output->printf(sts->output, "%pF:\n", frame->pc);
+		sts->output->printf(sts->output,
+				"  pc %016lx   sp %016lx   fp %016lx\n",
+				frame->pc, frame->sp, frame->fp);
+		sts->depth--;
+		return 0;
+	}
+	sts->output->printf(sts->output, "  ...\n");
+
+	return sts->depth == 0;
+}
+
+void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output,
+		const struct pt_regs *regs, unsigned int depth, void *ssp)
+{
+	struct thread_info *real_thread_info = THREAD_INFO(ssp);
+	struct stacktrace_state sts;
+
+	sts.depth = depth;
+	sts.output = output;
+	*current_thread_info() = *real_thread_info;
+
+	if (!current)
+		output->printf(output, "current NULL\n");
+	else
+		output->printf(output, "pid: %d  comm: %s\n",
+			current->pid, current->comm);
+	fiq_debugger_dump_regs(output, regs);
+
+	if (!user_mode(regs)) {
+		struct stackframe frame;
+		frame.fp = regs->regs[29];
+		frame.sp = regs->sp;
+		frame.pc = regs->pc;
+		output->printf(output, "\n");
+		walk_stackframe(&frame, report_trace, &sts);
+	}
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h
new file mode 100644
index 000000000000..d5d051f727a8
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_priv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FIQ_DEBUGGER_PRIV_H_
+#define _FIQ_DEBUGGER_PRIV_H_
+
+#define THREAD_INFO(sp) ((struct thread_info *) \
+		((unsigned long)(sp) & ~(THREAD_SIZE - 1)))
+
+struct fiq_debugger_output {
+	void (*printf)(struct fiq_debugger_output *output, const char *fmt, ...);
+};
+
+struct pt_regs;
+
+void fiq_debugger_dump_pc(struct fiq_debugger_output *output,
+		const struct pt_regs *regs);
+void fiq_debugger_dump_regs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs);
+void fiq_debugger_dump_allregs(struct fiq_debugger_output *output,
+		const struct pt_regs *regs);
+void fiq_debugger_dump_stacktrace(struct fiq_debugger_output *output,
+		const struct pt_regs *regs, unsigned int depth, void *ssp);
+
+#endif
diff --git a/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
new file mode 100644
index 000000000000..10c3c5d09098
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
@@ -0,0 +1,94 @@
+/*
+ * drivers/staging/android/fiq_debugger/fiq_debugger_ringbuf.h
+ *
+ * simple lockless ringbuffer
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+struct fiq_debugger_ringbuf {
+	int len;
+	int head;
+	int tail;
+	u8 buf[];
+};
+
+
+static inline struct fiq_debugger_ringbuf *fiq_debugger_ringbuf_alloc(int len)
+{
+	struct fiq_debugger_ringbuf *rbuf;
+
+	rbuf = kzalloc(sizeof(*rbuf) + len, GFP_KERNEL);
+	if (rbuf == NULL)
+		return NULL;
+
+	rbuf->len = len;
+	rbuf->head = 0;
+	rbuf->tail = 0;
+	smp_mb();
+
+	return rbuf;
+}
+
+static inline void fiq_debugger_ringbuf_free(struct fiq_debugger_ringbuf *rbuf)
+{
+	kfree(rbuf);
+}
+
+static inline int fiq_debugger_ringbuf_level(struct fiq_debugger_ringbuf *rbuf)
+{
+	int level = rbuf->head - rbuf->tail;
+
+	if (level < 0)
+		level = rbuf->len + level;
+
+	return level;
+}
+
+static inline int fiq_debugger_ringbuf_room(struct fiq_debugger_ringbuf *rbuf)
+{
+	return rbuf->len - fiq_debugger_ringbuf_level(rbuf) - 1;
+}
+
+static inline u8
+fiq_debugger_ringbuf_peek(struct fiq_debugger_ringbuf *rbuf, int i)
+{
+	return rbuf->buf[(rbuf->tail + i) % rbuf->len];
+}
+
+static inline int
+fiq_debugger_ringbuf_consume(struct fiq_debugger_ringbuf *rbuf, int count)
+{
+	count = min(count, fiq_debugger_ringbuf_level(rbuf));
+
+	rbuf->tail = (rbuf->tail + count) % rbuf->len;
+	smp_mb();
+
+	return count;
+}
+
+static inline int
+fiq_debugger_ringbuf_push(struct fiq_debugger_ringbuf *rbuf, u8 datum)
+{
+	if (fiq_debugger_ringbuf_room(rbuf) == 0)
+		return 0;
+
+	rbuf->buf[rbuf->head] = datum;
+	smp_mb();
+	rbuf->head = (rbuf->head + 1) % rbuf->len;
+	smp_mb();
+
+	return 1;
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.c b/drivers/staging/android/fiq_debugger/fiq_watchdog.c
new file mode 100644
index 000000000000..194b54138417
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/pstore_ram.h>
+
+#include "fiq_watchdog.h"
+#include "fiq_debugger_priv.h"
+
+static DEFINE_RAW_SPINLOCK(fiq_watchdog_lock);
+
+static void fiq_watchdog_printf(struct fiq_debugger_output *output,
+				const char *fmt, ...)
+{
+	char buf[256];
+	va_list ap;
+	int len;
+
+	va_start(ap, fmt);
+	len = vscnprintf(buf, sizeof(buf), fmt, ap);
+	va_end(ap);
+
+	ramoops_console_write_buf(buf, len);
+}
+
+struct fiq_debugger_output fiq_watchdog_output = {
+	.printf = fiq_watchdog_printf,
+};
+
+void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp)
+{
+	char msg[24];
+	int len;
+
+	raw_spin_lock(&fiq_watchdog_lock);
+
+	len = scnprintf(msg, sizeof(msg), "watchdog fiq cpu %d\n",
+			THREAD_INFO(svc_sp)->cpu);
+	ramoops_console_write_buf(msg, len);
+
+	fiq_debugger_dump_stacktrace(&fiq_watchdog_output, regs, 100, svc_sp);
+
+	raw_spin_unlock(&fiq_watchdog_lock);
+}
diff --git a/drivers/staging/android/fiq_debugger/fiq_watchdog.h b/drivers/staging/android/fiq_debugger/fiq_watchdog.h
new file mode 100644
index 000000000000..c6b507f8d976
--- /dev/null
+++ b/drivers/staging/android/fiq_debugger/fiq_watchdog.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FIQ_WATCHDOG_H_
+#define _FIQ_WATCHDOG_H_
+
+void fiq_watchdog_triggered(const struct pt_regs *regs, void *svc_sp);
+
+#endif
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
new file mode 100644
index 000000000000..0f8fec1f84e5
--- /dev/null
+++ b/drivers/staging/android/ion/Kconfig
@@ -0,0 +1,35 @@
+menuconfig ION
+	bool "Ion Memory Manager"
+	depends on HAVE_MEMBLOCK
+	select GENERIC_ALLOCATOR
+	select DMA_SHARED_BUFFER
+	---help---
+	  Chose this option to enable the ION Memory Manager,
+	  used by Android to efficiently allocate buffers
+	  from userspace that can be shared between drivers.
+	  If you're not using Android its probably safe to
+	  say N here.
+
+config ION_TEST
+	tristate "Ion Test Device"
+	depends on ION
+	help
+	  Choose this option to create a device that can be used to test the
+	  kernel and device side ION functions.
+
+config ION_DUMMY
+	bool "Dummy Ion driver"
+	depends on ION
+	help
+	  Provides a dummy ION driver that registers the
+	  /dev/ion device and some basic heaps. This can
+	  be used for testing the ION infrastructure if
+	  one doesn't have access to hardware drivers that
+	  use ION.
+
+config ION_TEGRA
+	tristate "Ion for Tegra"
+	depends on ARCH_TEGRA && ION
+	help
+	  Choose this option if you wish to use ion on an nVidia Tegra.
+
diff --git a/drivers/staging/android/ion/Makefile b/drivers/staging/android/ion/Makefile
new file mode 100644
index 000000000000..b56fd2bf2b4f
--- /dev/null
+++ b/drivers/staging/android/ion/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_ION) +=	ion.o ion_heap.o ion_page_pool.o ion_system_heap.o \
+			ion_carveout_heap.o ion_chunk_heap.o ion_cma_heap.o
+obj-$(CONFIG_ION_TEST) += ion_test.o
+ifdef CONFIG_COMPAT
+obj-$(CONFIG_ION) += compat_ion.o
+endif
+
+obj-$(CONFIG_ION_DUMMY) += ion_dummy_driver.o
+obj-$(CONFIG_ION_TEGRA) += tegra/
+
diff --git a/drivers/staging/android/ion/compat_ion.c b/drivers/staging/android/ion/compat_ion.c
new file mode 100644
index 000000000000..a5e8c408fd78
--- /dev/null
+++ b/drivers/staging/android/ion/compat_ion.c
@@ -0,0 +1,195 @@
+/*
+ * drivers/gpu/ion/compat_ion.c
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/compat.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+
+#include "ion.h"
+#include "compat_ion.h"
+
+/* See drivers/staging/android/uapi/ion.h for the definition of these structs */
+struct compat_ion_allocation_data {
+	compat_size_t len;
+	compat_size_t align;
+	compat_uint_t heap_id_mask;
+	compat_uint_t flags;
+	compat_int_t handle;
+};
+
+struct compat_ion_custom_data {
+	compat_uint_t cmd;
+	compat_ulong_t arg;
+};
+
+struct compat_ion_handle_data {
+	compat_int_t handle;
+};
+
+#define COMPAT_ION_IOC_ALLOC	_IOWR(ION_IOC_MAGIC, 0, \
+				      struct compat_ion_allocation_data)
+#define COMPAT_ION_IOC_FREE	_IOWR(ION_IOC_MAGIC, 1, \
+				      struct compat_ion_handle_data)
+#define COMPAT_ION_IOC_CUSTOM	_IOWR(ION_IOC_MAGIC, 6, \
+				      struct compat_ion_custom_data)
+
+static int compat_get_ion_allocation_data(
+			struct compat_ion_allocation_data __user *data32,
+			struct ion_allocation_data __user *data)
+{
+	compat_size_t s;
+	compat_uint_t u;
+	compat_int_t i;
+	int err;
+
+	err = get_user(s, &data32->len);
+	err |= put_user(s, &data->len);
+	err |= get_user(s, &data32->align);
+	err |= put_user(s, &data->align);
+	err |= get_user(u, &data32->heap_id_mask);
+	err |= put_user(u, &data->heap_id_mask);
+	err |= get_user(u, &data32->flags);
+	err |= put_user(u, &data->flags);
+	err |= get_user(i, &data32->handle);
+	err |= put_user(i, &data->handle);
+
+	return err;
+}
+
+static int compat_get_ion_handle_data(
+			struct compat_ion_handle_data __user *data32,
+			struct ion_handle_data __user *data)
+{
+	compat_int_t i;
+	int err;
+
+	err = get_user(i, &data32->handle);
+	err |= put_user(i, &data->handle);
+
+	return err;
+}
+
+static int compat_put_ion_allocation_data(
+			struct compat_ion_allocation_data __user *data32,
+			struct ion_allocation_data __user *data)
+{
+	compat_size_t s;
+	compat_uint_t u;
+	compat_int_t i;
+	int err;
+
+	err = get_user(s, &data->len);
+	err |= put_user(s, &data32->len);
+	err |= get_user(s, &data->align);
+	err |= put_user(s, &data32->align);
+	err |= get_user(u, &data->heap_id_mask);
+	err |= put_user(u, &data32->heap_id_mask);
+	err |= get_user(u, &data->flags);
+	err |= put_user(u, &data32->flags);
+	err |= get_user(i, &data->handle);
+	err |= put_user(i, &data32->handle);
+
+	return err;
+}
+
+static int compat_get_ion_custom_data(
+			struct compat_ion_custom_data __user *data32,
+			struct ion_custom_data __user *data)
+{
+	compat_uint_t cmd;
+	compat_ulong_t arg;
+	int err;
+
+	err = get_user(cmd, &data32->cmd);
+	err |= put_user(cmd, &data->cmd);
+	err |= get_user(arg, &data32->arg);
+	err |= put_user(arg, &data->arg);
+
+	return err;
+};
+
+long compat_ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+		return -ENOTTY;
+
+	switch (cmd) {
+	case COMPAT_ION_IOC_ALLOC:
+	{
+		struct compat_ion_allocation_data __user *data32;
+		struct ion_allocation_data __user *data;
+		int err;
+
+		data32 = compat_ptr(arg);
+		data = compat_alloc_user_space(sizeof(*data));
+		if (data == NULL)
+			return -EFAULT;
+
+		err = compat_get_ion_allocation_data(data32, data);
+		if (err)
+			return err;
+		ret = filp->f_op->unlocked_ioctl(filp, ION_IOC_ALLOC,
+							(unsigned long)data);
+		err = compat_put_ion_allocation_data(data32, data);
+		return ret ? ret : err;
+	}
+	case COMPAT_ION_IOC_FREE:
+	{
+		struct compat_ion_handle_data __user *data32;
+		struct ion_handle_data __user *data;
+		int err;
+
+		data32 = compat_ptr(arg);
+		data = compat_alloc_user_space(sizeof(*data));
+		if (data == NULL)
+			return -EFAULT;
+
+		err = compat_get_ion_handle_data(data32, data);
+		if (err)
+			return err;
+
+		return filp->f_op->unlocked_ioctl(filp, ION_IOC_FREE,
+							(unsigned long)data);
+	}
+	case COMPAT_ION_IOC_CUSTOM: {
+		struct compat_ion_custom_data __user *data32;
+		struct ion_custom_data __user *data;
+		int err;
+
+		data32 = compat_ptr(arg);
+		data = compat_alloc_user_space(sizeof(*data));
+		if (data == NULL)
+			return -EFAULT;
+
+		err = compat_get_ion_custom_data(data32, data);
+		if (err)
+			return err;
+
+		return filp->f_op->unlocked_ioctl(filp, ION_IOC_CUSTOM,
+							(unsigned long)data);
+	}
+	case ION_IOC_SHARE:
+	case ION_IOC_MAP:
+	case ION_IOC_IMPORT:
+	case ION_IOC_SYNC:
+		return filp->f_op->unlocked_ioctl(filp, cmd,
+						(unsigned long)compat_ptr(arg));
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
diff --git a/drivers/staging/android/ion/compat_ion.h b/drivers/staging/android/ion/compat_ion.h
new file mode 100644
index 000000000000..3a9c8c08c240
--- /dev/null
+++ b/drivers/staging/android/ion/compat_ion.h
@@ -0,0 +1,30 @@
+/*
+
+ * drivers/gpu/ion/compat_ion.h
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_COMPAT_ION_H
+#define _LINUX_COMPAT_ION_H
+
+#if IS_ENABLED(CONFIG_COMPAT)
+
+long compat_ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+
+#else
+
+#define compat_ion_ioctl  NULL
+
+#endif /* CONFIG_COMPAT */
+#endif /* _LINUX_COMPAT_ION_H */
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
new file mode 100644
index 000000000000..bfee3e08071e
--- /dev/null
+++ b/drivers/staging/android/ion/ion.c
@@ -0,0 +1,1646 @@
+/*
+
+ * drivers/gpu/ion/ion.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/freezer.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/memblock.h>
+#include <linux/miscdevice.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/idr.h>
+
+#include "ion.h"
+#include "ion_priv.h"
+#include "compat_ion.h"
+
+/**
+ * struct ion_device - the metadata of the ion device node
+ * @dev:		the actual misc device
+ * @buffers:		an rb tree of all the existing buffers
+ * @buffer_lock:	lock protecting the tree of buffers
+ * @lock:		rwsem protecting the tree of heaps and clients
+ * @heaps:		list of all the heaps in the system
+ * @user_clients:	list of all the clients created from userspace
+ */
+struct ion_device {
+	struct miscdevice dev;
+	struct rb_root buffers;
+	struct mutex buffer_lock;
+	struct rw_semaphore lock;
+	struct plist_head heaps;
+	long (*custom_ioctl) (struct ion_client *client, unsigned int cmd,
+			      unsigned long arg);
+	struct rb_root clients;
+	struct dentry *debug_root;
+	struct dentry *heaps_debug_root;
+	struct dentry *clients_debug_root;
+};
+
+/**
+ * struct ion_client - a process/hw block local address space
+ * @node:		node in the tree of all clients
+ * @dev:		backpointer to ion device
+ * @handles:		an rb tree of all the handles in this client
+ * @idr:		an idr space for allocating handle ids
+ * @lock:		lock protecting the tree of handles
+ * @name:		used for debugging
+ * @display_name:	used for debugging (unique version of @name)
+ * @display_serial:	used for debugging (to make display_name unique)
+ * @task:		used for debugging
+ *
+ * A client represents a list of buffers this client may access.
+ * The mutex stored here is used to protect both handles tree
+ * as well as the handles themselves, and should be held while modifying either.
+ */
+struct ion_client {
+	struct rb_node node;
+	struct ion_device *dev;
+	struct rb_root handles;
+	struct idr idr;
+	struct mutex lock;
+	const char *name;
+	char *display_name;
+	int display_serial;
+	struct task_struct *task;
+	pid_t pid;
+	struct dentry *debug_root;
+};
+
+/**
+ * ion_handle - a client local reference to a buffer
+ * @ref:		reference count
+ * @client:		back pointer to the client the buffer resides in
+ * @buffer:		pointer to the buffer
+ * @node:		node in the client's handle rbtree
+ * @kmap_cnt:		count of times this client has mapped to kernel
+ * @id:			client-unique id allocated by client->idr
+ *
+ * Modifications to node, map_cnt or mapping should be protected by the
+ * lock in the client.  Other fields are never changed after initialization.
+ */
+struct ion_handle {
+	struct kref ref;
+	struct ion_client *client;
+	struct ion_buffer *buffer;
+	struct rb_node node;
+	unsigned int kmap_cnt;
+	int id;
+};
+
+bool ion_buffer_fault_user_mappings(struct ion_buffer *buffer)
+{
+	return (buffer->flags & ION_FLAG_CACHED) &&
+		!(buffer->flags & ION_FLAG_CACHED_NEEDS_SYNC);
+}
+
+bool ion_buffer_cached(struct ion_buffer *buffer)
+{
+	return !!(buffer->flags & ION_FLAG_CACHED);
+}
+
+static inline struct page *ion_buffer_page(struct page *page)
+{
+	return (struct page *)((unsigned long)page & ~(1UL));
+}
+
+static inline bool ion_buffer_page_is_dirty(struct page *page)
+{
+	return !!((unsigned long)page & 1UL);
+}
+
+static inline void ion_buffer_page_dirty(struct page **page)
+{
+	*page = (struct page *)((unsigned long)(*page) | 1UL);
+}
+
+static inline void ion_buffer_page_clean(struct page **page)
+{
+	*page = (struct page *)((unsigned long)(*page) & ~(1UL));
+}
+
+/* this function should only be called while dev->lock is held */
+static void ion_buffer_add(struct ion_device *dev,
+			   struct ion_buffer *buffer)
+{
+	struct rb_node **p = &dev->buffers.rb_node;
+	struct rb_node *parent = NULL;
+	struct ion_buffer *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct ion_buffer, node);
+
+		if (buffer < entry) {
+			p = &(*p)->rb_left;
+		} else if (buffer > entry) {
+			p = &(*p)->rb_right;
+		} else {
+			pr_err("%s: buffer already found.", __func__);
+			BUG();
+		}
+	}
+
+	rb_link_node(&buffer->node, parent, p);
+	rb_insert_color(&buffer->node, &dev->buffers);
+}
+
+/* this function should only be called while dev->lock is held */
+static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
+				     struct ion_device *dev,
+				     unsigned long len,
+				     unsigned long align,
+				     unsigned long flags)
+{
+	struct ion_buffer *buffer;
+	struct sg_table *table;
+	struct scatterlist *sg;
+	int i, ret;
+
+	buffer = kzalloc(sizeof(struct ion_buffer), GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+
+	buffer->heap = heap;
+	buffer->flags = flags;
+	kref_init(&buffer->ref);
+
+	ret = heap->ops->allocate(heap, buffer, len, align, flags);
+
+	if (ret) {
+		if (!(heap->flags & ION_HEAP_FLAG_DEFER_FREE))
+			goto err2;
+
+		ion_heap_freelist_drain(heap, 0);
+		ret = heap->ops->allocate(heap, buffer, len, align,
+					  flags);
+		if (ret)
+			goto err2;
+	}
+
+	buffer->dev = dev;
+	buffer->size = len;
+
+	table = heap->ops->map_dma(heap, buffer);
+	if (WARN_ONCE(table == NULL,
+			"heap->ops->map_dma should return ERR_PTR on error"))
+		table = ERR_PTR(-EINVAL);
+	if (IS_ERR(table)) {
+		heap->ops->free(buffer);
+		kfree(buffer);
+		return ERR_PTR(PTR_ERR(table));
+	}
+	buffer->sg_table = table;
+	if (ion_buffer_fault_user_mappings(buffer)) {
+		int num_pages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+		struct scatterlist *sg;
+		int i, j, k = 0;
+
+		buffer->pages = vmalloc(sizeof(struct page *) * num_pages);
+		if (!buffer->pages) {
+			ret = -ENOMEM;
+			goto err1;
+		}
+
+		for_each_sg(table->sgl, sg, table->nents, i) {
+			struct page *page = sg_page(sg);
+
+			for (j = 0; j < sg->length / PAGE_SIZE; j++)
+				buffer->pages[k++] = page++;
+		}
+
+		if (ret)
+			goto err;
+	}
+
+	buffer->dev = dev;
+	buffer->size = len;
+	INIT_LIST_HEAD(&buffer->vmas);
+	mutex_init(&buffer->lock);
+	/* this will set up dma addresses for the sglist -- it is not
+	   technically correct as per the dma api -- a specific
+	   device isn't really taking ownership here.  However, in practice on
+	   our systems the only dma_address space is physical addresses.
+	   Additionally, we can't afford the overhead of invalidating every
+	   allocation via dma_map_sg. The implicit contract here is that
+	   memory comming from the heaps is ready for dma, ie if it has a
+	   cached mapping that mapping has been invalidated */
+	for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
+		sg_dma_address(sg) = sg_phys(sg);
+		sg_dma_len(sg) = sg->length;
+	}
+	mutex_lock(&dev->buffer_lock);
+	ion_buffer_add(dev, buffer);
+	mutex_unlock(&dev->buffer_lock);
+	return buffer;
+
+err:
+	heap->ops->unmap_dma(heap, buffer);
+	heap->ops->free(buffer);
+err1:
+	if (buffer->pages)
+		vfree(buffer->pages);
+err2:
+	kfree(buffer);
+	return ERR_PTR(ret);
+}
+
+void ion_buffer_destroy(struct ion_buffer *buffer)
+{
+	if (WARN_ON(buffer->kmap_cnt > 0))
+		buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
+	buffer->heap->ops->unmap_dma(buffer->heap, buffer);
+	buffer->heap->ops->free(buffer);
+	if (buffer->pages)
+		vfree(buffer->pages);
+	kfree(buffer);
+}
+
+static void _ion_buffer_destroy(struct kref *kref)
+{
+	struct ion_buffer *buffer = container_of(kref, struct ion_buffer, ref);
+	struct ion_heap *heap = buffer->heap;
+	struct ion_device *dev = buffer->dev;
+
+	mutex_lock(&dev->buffer_lock);
+	rb_erase(&buffer->node, &dev->buffers);
+	mutex_unlock(&dev->buffer_lock);
+
+	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
+		ion_heap_freelist_add(heap, buffer);
+	else
+		ion_buffer_destroy(buffer);
+}
+
+static void ion_buffer_get(struct ion_buffer *buffer)
+{
+	kref_get(&buffer->ref);
+}
+
+static int ion_buffer_put(struct ion_buffer *buffer)
+{
+	return kref_put(&buffer->ref, _ion_buffer_destroy);
+}
+
+static void ion_buffer_add_to_handle(struct ion_buffer *buffer)
+{
+	mutex_lock(&buffer->lock);
+	buffer->handle_count++;
+	mutex_unlock(&buffer->lock);
+}
+
+static void ion_buffer_remove_from_handle(struct ion_buffer *buffer)
+{
+	/*
+	 * when a buffer is removed from a handle, if it is not in
+	 * any other handles, copy the taskcomm and the pid of the
+	 * process it's being removed from into the buffer.  At this
+	 * point there will be no way to track what processes this buffer is
+	 * being used by, it only exists as a dma_buf file descriptor.
+	 * The taskcomm and pid can provide a debug hint as to where this fd
+	 * is in the system
+	 */
+	mutex_lock(&buffer->lock);
+	buffer->handle_count--;
+	BUG_ON(buffer->handle_count < 0);
+	if (!buffer->handle_count) {
+		struct task_struct *task;
+
+		task = current->group_leader;
+		get_task_comm(buffer->task_comm, task);
+		buffer->pid = task_pid_nr(task);
+	}
+	mutex_unlock(&buffer->lock);
+}
+
+static struct ion_handle *ion_handle_create(struct ion_client *client,
+				     struct ion_buffer *buffer)
+{
+	struct ion_handle *handle;
+
+	handle = kzalloc(sizeof(struct ion_handle), GFP_KERNEL);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+	kref_init(&handle->ref);
+	RB_CLEAR_NODE(&handle->node);
+	handle->client = client;
+	ion_buffer_get(buffer);
+	ion_buffer_add_to_handle(buffer);
+	handle->buffer = buffer;
+
+	return handle;
+}
+
+static void ion_handle_kmap_put(struct ion_handle *);
+
+static void ion_handle_destroy(struct kref *kref)
+{
+	struct ion_handle *handle = container_of(kref, struct ion_handle, ref);
+	struct ion_client *client = handle->client;
+	struct ion_buffer *buffer = handle->buffer;
+
+	mutex_lock(&buffer->lock);
+	while (handle->kmap_cnt)
+		ion_handle_kmap_put(handle);
+	mutex_unlock(&buffer->lock);
+
+	idr_remove(&client->idr, handle->id);
+	if (!RB_EMPTY_NODE(&handle->node))
+		rb_erase(&handle->node, &client->handles);
+
+	ion_buffer_remove_from_handle(buffer);
+	ion_buffer_put(buffer);
+
+	kfree(handle);
+}
+
+struct ion_buffer *ion_handle_buffer(struct ion_handle *handle)
+{
+	return handle->buffer;
+}
+
+static void ion_handle_get(struct ion_handle *handle)
+{
+	kref_get(&handle->ref);
+}
+
+static int ion_handle_put(struct ion_handle *handle)
+{
+	struct ion_client *client = handle->client;
+	int ret;
+
+	mutex_lock(&client->lock);
+	ret = kref_put(&handle->ref, ion_handle_destroy);
+	mutex_unlock(&client->lock);
+
+	return ret;
+}
+
+static struct ion_handle *ion_handle_lookup(struct ion_client *client,
+					    struct ion_buffer *buffer)
+{
+	struct rb_node *n = client->handles.rb_node;
+
+	while (n) {
+		struct ion_handle *entry = rb_entry(n, struct ion_handle, node);
+
+		if (buffer < entry->buffer)
+			n = n->rb_left;
+		else if (buffer > entry->buffer)
+			n = n->rb_right;
+		else
+			return entry;
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+static struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
+						int id)
+{
+	struct ion_handle *handle;
+
+	mutex_lock(&client->lock);
+	handle = idr_find(&client->idr, id);
+	if (handle)
+		ion_handle_get(handle);
+	mutex_unlock(&client->lock);
+
+	return handle ? handle : ERR_PTR(-EINVAL);
+}
+
+static bool ion_handle_validate(struct ion_client *client,
+				struct ion_handle *handle)
+{
+	WARN_ON(!mutex_is_locked(&client->lock));
+	return (idr_find(&client->idr, handle->id) == handle);
+}
+
+static int ion_handle_add(struct ion_client *client, struct ion_handle *handle)
+{
+	int id;
+	struct rb_node **p = &client->handles.rb_node;
+	struct rb_node *parent = NULL;
+	struct ion_handle *entry;
+
+	id = idr_alloc(&client->idr, handle, 1, 0, GFP_KERNEL);
+	if (id < 0)
+		return id;
+
+	handle->id = id;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct ion_handle, node);
+
+		if (handle->buffer < entry->buffer)
+			p = &(*p)->rb_left;
+		else if (handle->buffer > entry->buffer)
+			p = &(*p)->rb_right;
+		else
+			WARN(1, "%s: buffer already found.", __func__);
+	}
+
+	rb_link_node(&handle->node, parent, p);
+	rb_insert_color(&handle->node, &client->handles);
+
+	return 0;
+}
+
+struct ion_handle *ion_alloc(struct ion_client *client, size_t len,
+			     size_t align, unsigned int heap_id_mask,
+			     unsigned int flags)
+{
+	struct ion_handle *handle;
+	struct ion_device *dev = client->dev;
+	struct ion_buffer *buffer = NULL;
+	struct ion_heap *heap;
+	int ret;
+
+	pr_debug("%s: len %zu align %zu heap_id_mask %u flags %x\n", __func__,
+		 len, align, heap_id_mask, flags);
+	/*
+	 * traverse the list of heaps available in this system in priority
+	 * order.  If the heap type is supported by the client, and matches the
+	 * request of the caller allocate from it.  Repeat until allocate has
+	 * succeeded or all heaps have been tried
+	 */
+	len = PAGE_ALIGN(len);
+
+	if (!len)
+		return ERR_PTR(-EINVAL);
+
+	down_read(&dev->lock);
+	plist_for_each_entry(heap, &dev->heaps, node) {
+		/* if the caller didn't specify this heap id */
+		if (!((1 << heap->id) & heap_id_mask))
+			continue;
+		buffer = ion_buffer_create(heap, dev, len, align, flags);
+		if (!IS_ERR(buffer))
+			break;
+	}
+	up_read(&dev->lock);
+
+	if (buffer == NULL)
+		return ERR_PTR(-ENODEV);
+
+	if (IS_ERR(buffer))
+		return ERR_PTR(PTR_ERR(buffer));
+
+	handle = ion_handle_create(client, buffer);
+
+	/*
+	 * ion_buffer_create will create a buffer with a ref_cnt of 1,
+	 * and ion_handle_create will take a second reference, drop one here
+	 */
+	ion_buffer_put(buffer);
+
+	if (IS_ERR(handle))
+		return handle;
+
+	mutex_lock(&client->lock);
+	ret = ion_handle_add(client, handle);
+	mutex_unlock(&client->lock);
+	if (ret) {
+		ion_handle_put(handle);
+		handle = ERR_PTR(ret);
+	}
+
+	return handle;
+}
+EXPORT_SYMBOL(ion_alloc);
+
+void ion_free(struct ion_client *client, struct ion_handle *handle)
+{
+	bool valid_handle;
+
+	BUG_ON(client != handle->client);
+
+	mutex_lock(&client->lock);
+	valid_handle = ion_handle_validate(client, handle);
+
+	if (!valid_handle) {
+		WARN(1, "%s: invalid handle passed to free.\n", __func__);
+		mutex_unlock(&client->lock);
+		return;
+	}
+	mutex_unlock(&client->lock);
+	ion_handle_put(handle);
+}
+EXPORT_SYMBOL(ion_free);
+
+int ion_phys(struct ion_client *client, struct ion_handle *handle,
+	     ion_phys_addr_t *addr, size_t *len)
+{
+	struct ion_buffer *buffer;
+	int ret;
+
+	mutex_lock(&client->lock);
+	if (!ion_handle_validate(client, handle)) {
+		mutex_unlock(&client->lock);
+		return -EINVAL;
+	}
+
+	buffer = handle->buffer;
+
+	if (!buffer->heap->ops->phys) {
+		pr_err("%s: ion_phys is not implemented by this heap.\n",
+		       __func__);
+		mutex_unlock(&client->lock);
+		return -ENODEV;
+	}
+	mutex_unlock(&client->lock);
+	ret = buffer->heap->ops->phys(buffer->heap, buffer, addr, len);
+	return ret;
+}
+EXPORT_SYMBOL(ion_phys);
+
+static void *ion_buffer_kmap_get(struct ion_buffer *buffer)
+{
+	void *vaddr;
+
+	if (buffer->kmap_cnt) {
+		buffer->kmap_cnt++;
+		return buffer->vaddr;
+	}
+	vaddr = buffer->heap->ops->map_kernel(buffer->heap, buffer);
+	if (WARN_ONCE(vaddr == NULL,
+			"heap->ops->map_kernel should return ERR_PTR on error"))
+		return ERR_PTR(-EINVAL);
+	if (IS_ERR(vaddr))
+		return vaddr;
+	buffer->vaddr = vaddr;
+	buffer->kmap_cnt++;
+	return vaddr;
+}
+
+static void *ion_handle_kmap_get(struct ion_handle *handle)
+{
+	struct ion_buffer *buffer = handle->buffer;
+	void *vaddr;
+
+	if (handle->kmap_cnt) {
+		handle->kmap_cnt++;
+		return buffer->vaddr;
+	}
+	vaddr = ion_buffer_kmap_get(buffer);
+	if (IS_ERR(vaddr))
+		return vaddr;
+	handle->kmap_cnt++;
+	return vaddr;
+}
+
+static void ion_buffer_kmap_put(struct ion_buffer *buffer)
+{
+	buffer->kmap_cnt--;
+	if (!buffer->kmap_cnt) {
+		buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
+		buffer->vaddr = NULL;
+	}
+}
+
+static void ion_handle_kmap_put(struct ion_handle *handle)
+{
+	struct ion_buffer *buffer = handle->buffer;
+
+	handle->kmap_cnt--;
+	if (!handle->kmap_cnt)
+		ion_buffer_kmap_put(buffer);
+}
+
+void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle)
+{
+	struct ion_buffer *buffer;
+	void *vaddr;
+
+	mutex_lock(&client->lock);
+	if (!ion_handle_validate(client, handle)) {
+		pr_err("%s: invalid handle passed to map_kernel.\n",
+		       __func__);
+		mutex_unlock(&client->lock);
+		return ERR_PTR(-EINVAL);
+	}
+
+	buffer = handle->buffer;
+
+	if (!handle->buffer->heap->ops->map_kernel) {
+		pr_err("%s: map_kernel is not implemented by this heap.\n",
+		       __func__);
+		mutex_unlock(&client->lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&buffer->lock);
+	vaddr = ion_handle_kmap_get(handle);
+	mutex_unlock(&buffer->lock);
+	mutex_unlock(&client->lock);
+	return vaddr;
+}
+EXPORT_SYMBOL(ion_map_kernel);
+
+void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle)
+{
+	struct ion_buffer *buffer;
+
+	mutex_lock(&client->lock);
+	buffer = handle->buffer;
+	mutex_lock(&buffer->lock);
+	ion_handle_kmap_put(handle);
+	mutex_unlock(&buffer->lock);
+	mutex_unlock(&client->lock);
+}
+EXPORT_SYMBOL(ion_unmap_kernel);
+
+static int ion_debug_client_show(struct seq_file *s, void *unused)
+{
+	struct ion_client *client = s->private;
+	struct rb_node *n;
+	size_t sizes[ION_NUM_HEAP_IDS] = {0};
+	const char *names[ION_NUM_HEAP_IDS] = {NULL};
+	int i;
+
+	mutex_lock(&client->lock);
+	for (n = rb_first(&client->handles); n; n = rb_next(n)) {
+		struct ion_handle *handle = rb_entry(n, struct ion_handle,
+						     node);
+		unsigned int id = handle->buffer->heap->id;
+
+		if (!names[id])
+			names[id] = handle->buffer->heap->name;
+		sizes[id] += handle->buffer->size;
+	}
+	mutex_unlock(&client->lock);
+
+	seq_printf(s, "%16.16s: %16.16s\n", "heap_name", "size_in_bytes");
+	for (i = 0; i < ION_NUM_HEAP_IDS; i++) {
+		if (!names[i])
+			continue;
+		seq_printf(s, "%16.16s: %16zu\n", names[i], sizes[i]);
+	}
+	return 0;
+}
+
+static int ion_debug_client_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ion_debug_client_show, inode->i_private);
+}
+
+static const struct file_operations debug_client_fops = {
+	.open = ion_debug_client_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int ion_get_client_serial(const struct rb_root *root,
+					const unsigned char *name)
+{
+	int serial = -1;
+	struct rb_node *node;
+
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		struct ion_client *client = rb_entry(node, struct ion_client,
+						node);
+
+		if (strcmp(client->name, name))
+			continue;
+		serial = max(serial, client->display_serial);
+	}
+	return serial + 1;
+}
+
+struct ion_client *ion_client_create(struct ion_device *dev,
+				     const char *name)
+{
+	struct ion_client *client;
+	struct task_struct *task;
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct ion_client *entry;
+	pid_t pid;
+
+	if (!name) {
+		pr_err("%s: Name cannot be null\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	get_task_struct(current->group_leader);
+	task_lock(current->group_leader);
+	pid = task_pid_nr(current->group_leader);
+	/* don't bother to store task struct for kernel threads,
+	   they can't be killed anyway */
+	if (current->group_leader->flags & PF_KTHREAD) {
+		put_task_struct(current->group_leader);
+		task = NULL;
+	} else {
+		task = current->group_leader;
+	}
+	task_unlock(current->group_leader);
+
+	client = kzalloc(sizeof(struct ion_client), GFP_KERNEL);
+	if (!client)
+		goto err_put_task_struct;
+
+	client->dev = dev;
+	client->handles = RB_ROOT;
+	idr_init(&client->idr);
+	mutex_init(&client->lock);
+	client->task = task;
+	client->pid = pid;
+	client->name = kstrdup(name, GFP_KERNEL);
+	if (!client->name)
+		goto err_free_client;
+
+	down_write(&dev->lock);
+	client->display_serial = ion_get_client_serial(&dev->clients, name);
+	client->display_name = kasprintf(
+		GFP_KERNEL, "%s-%d", name, client->display_serial);
+	if (!client->display_name) {
+		up_write(&dev->lock);
+		goto err_free_client_name;
+	}
+	p = &dev->clients.rb_node;
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct ion_client, node);
+
+		if (client < entry)
+			p = &(*p)->rb_left;
+		else if (client > entry)
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&client->node, parent, p);
+	rb_insert_color(&client->node, &dev->clients);
+
+	client->debug_root = debugfs_create_file(client->display_name, 0664,
+						dev->clients_debug_root,
+						client, &debug_client_fops);
+	if (!client->debug_root) {
+		char buf[256], *path;
+		path = dentry_path(dev->clients_debug_root, buf, 256);
+		pr_err("Failed to create client debugfs at %s/%s\n",
+			path, client->display_name);
+	}
+
+	up_write(&dev->lock);
+
+	return client;
+
+err_free_client_name:
+	kfree(client->name);
+err_free_client:
+	kfree(client);
+err_put_task_struct:
+	if (task)
+		put_task_struct(current->group_leader);
+	return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(ion_client_create);
+
+void ion_client_destroy(struct ion_client *client)
+{
+	struct ion_device *dev = client->dev;
+	struct rb_node *n;
+
+	pr_debug("%s: %d\n", __func__, __LINE__);
+	while ((n = rb_first(&client->handles))) {
+		struct ion_handle *handle = rb_entry(n, struct ion_handle,
+						     node);
+		ion_handle_destroy(&handle->ref);
+	}
+
+	idr_destroy(&client->idr);
+
+	down_write(&dev->lock);
+	if (client->task)
+		put_task_struct(client->task);
+	rb_erase(&client->node, &dev->clients);
+	debugfs_remove_recursive(client->debug_root);
+	up_write(&dev->lock);
+
+	kfree(client->display_name);
+	kfree(client->name);
+	kfree(client);
+}
+EXPORT_SYMBOL(ion_client_destroy);
+
+struct sg_table *ion_sg_table(struct ion_client *client,
+			      struct ion_handle *handle)
+{
+	struct ion_buffer *buffer;
+	struct sg_table *table;
+
+	mutex_lock(&client->lock);
+	if (!ion_handle_validate(client, handle)) {
+		pr_err("%s: invalid handle passed to map_dma.\n",
+		       __func__);
+		mutex_unlock(&client->lock);
+		return ERR_PTR(-EINVAL);
+	}
+	buffer = handle->buffer;
+	table = buffer->sg_table;
+	mutex_unlock(&client->lock);
+	return table;
+}
+EXPORT_SYMBOL(ion_sg_table);
+
+static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
+				       struct device *dev,
+				       enum dma_data_direction direction);
+
+static struct sg_table *ion_map_dma_buf(struct dma_buf_attachment *attachment,
+					enum dma_data_direction direction)
+{
+	struct dma_buf *dmabuf = attachment->dmabuf;
+	struct ion_buffer *buffer = dmabuf->priv;
+
+	ion_buffer_sync_for_device(buffer, attachment->dev, direction);
+	return buffer->sg_table;
+}
+
+static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment,
+			      struct sg_table *table,
+			      enum dma_data_direction direction)
+{
+}
+
+void ion_pages_sync_for_device(struct device *dev, struct page *page,
+		size_t size, enum dma_data_direction dir)
+{
+	struct scatterlist sg;
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, page, size, 0);
+	/*
+	 * This is not correct - sg_dma_address needs a dma_addr_t that is valid
+	 * for the the targeted device, but this works on the currently targeted
+	 * hardware.
+	 */
+	sg_dma_address(&sg) = page_to_phys(page);
+	dma_sync_sg_for_device(dev, &sg, 1, dir);
+}
+
+struct ion_vma_list {
+	struct list_head list;
+	struct vm_area_struct *vma;
+};
+
+static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
+				       struct device *dev,
+				       enum dma_data_direction dir)
+{
+	struct ion_vma_list *vma_list;
+	int pages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+	int i;
+
+	pr_debug("%s: syncing for device %s\n", __func__,
+		 dev ? dev_name(dev) : "null");
+
+	if (!ion_buffer_fault_user_mappings(buffer))
+		return;
+
+	mutex_lock(&buffer->lock);
+	for (i = 0; i < pages; i++) {
+		struct page *page = buffer->pages[i];
+
+		if (ion_buffer_page_is_dirty(page))
+			ion_pages_sync_for_device(dev, ion_buffer_page(page),
+							PAGE_SIZE, dir);
+
+		ion_buffer_page_clean(buffer->pages + i);
+	}
+	list_for_each_entry(vma_list, &buffer->vmas, list) {
+		struct vm_area_struct *vma = vma_list->vma;
+
+		zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start,
+			       NULL);
+	}
+	mutex_unlock(&buffer->lock);
+}
+
+static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct ion_buffer *buffer = vma->vm_private_data;
+	unsigned long pfn;
+	int ret;
+
+	mutex_lock(&buffer->lock);
+	ion_buffer_page_dirty(buffer->pages + vmf->pgoff);
+	BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]);
+
+	pfn = page_to_pfn(ion_buffer_page(buffer->pages[vmf->pgoff]));
+	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+	mutex_unlock(&buffer->lock);
+	if (ret)
+		return VM_FAULT_ERROR;
+
+	return VM_FAULT_NOPAGE;
+}
+
+static void ion_vm_open(struct vm_area_struct *vma)
+{
+	struct ion_buffer *buffer = vma->vm_private_data;
+	struct ion_vma_list *vma_list;
+
+	vma_list = kmalloc(sizeof(struct ion_vma_list), GFP_KERNEL);
+	if (!vma_list)
+		return;
+	vma_list->vma = vma;
+	mutex_lock(&buffer->lock);
+	list_add(&vma_list->list, &buffer->vmas);
+	mutex_unlock(&buffer->lock);
+	pr_debug("%s: adding %p\n", __func__, vma);
+}
+
+static void ion_vm_close(struct vm_area_struct *vma)
+{
+	struct ion_buffer *buffer = vma->vm_private_data;
+	struct ion_vma_list *vma_list, *tmp;
+
+	pr_debug("%s\n", __func__);
+	mutex_lock(&buffer->lock);
+	list_for_each_entry_safe(vma_list, tmp, &buffer->vmas, list) {
+		if (vma_list->vma != vma)
+			continue;
+		list_del(&vma_list->list);
+		kfree(vma_list);
+		pr_debug("%s: deleting %p\n", __func__, vma);
+		break;
+	}
+	mutex_unlock(&buffer->lock);
+}
+
+static struct vm_operations_struct ion_vma_ops = {
+	.open = ion_vm_open,
+	.close = ion_vm_close,
+	.fault = ion_vm_fault,
+};
+
+static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct ion_buffer *buffer = dmabuf->priv;
+	int ret = 0;
+
+	if (!buffer->heap->ops->map_user) {
+		pr_err("%s: this heap does not define a method for mapping "
+		       "to userspace\n", __func__);
+		return -EINVAL;
+	}
+
+	if (ion_buffer_fault_user_mappings(buffer)) {
+		vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND |
+							VM_DONTDUMP;
+		vma->vm_private_data = buffer;
+		vma->vm_ops = &ion_vma_ops;
+		ion_vm_open(vma);
+		return 0;
+	}
+
+	if (!(buffer->flags & ION_FLAG_CACHED))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+	mutex_lock(&buffer->lock);
+	/* now map it to userspace */
+	ret = buffer->heap->ops->map_user(buffer->heap, buffer, vma);
+	mutex_unlock(&buffer->lock);
+
+	if (ret)
+		pr_err("%s: failure mapping buffer to userspace\n",
+		       __func__);
+
+	return ret;
+}
+
+static void ion_dma_buf_release(struct dma_buf *dmabuf)
+{
+	struct ion_buffer *buffer = dmabuf->priv;
+
+	ion_buffer_put(buffer);
+}
+
+static void *ion_dma_buf_kmap(struct dma_buf *dmabuf, unsigned long offset)
+{
+	struct ion_buffer *buffer = dmabuf->priv;
+
+	return buffer->vaddr + offset * PAGE_SIZE;
+}
+
+static void ion_dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long offset,
+			       void *ptr)
+{
+	return;
+}
+
+static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
+					size_t len,
+					enum dma_data_direction direction)
+{
+	struct ion_buffer *buffer = dmabuf->priv;
+	void *vaddr;
+
+	if (!buffer->heap->ops->map_kernel) {
+		pr_err("%s: map kernel is not implemented by this heap.\n",
+		       __func__);
+		return -ENODEV;
+	}
+
+	mutex_lock(&buffer->lock);
+	vaddr = ion_buffer_kmap_get(buffer);
+	mutex_unlock(&buffer->lock);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+	return 0;
+}
+
+static void ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start,
+				       size_t len,
+				       enum dma_data_direction direction)
+{
+	struct ion_buffer *buffer = dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+	ion_buffer_kmap_put(buffer);
+	mutex_unlock(&buffer->lock);
+}
+
+static struct dma_buf_ops dma_buf_ops = {
+	.map_dma_buf = ion_map_dma_buf,
+	.unmap_dma_buf = ion_unmap_dma_buf,
+	.mmap = ion_mmap,
+	.release = ion_dma_buf_release,
+	.begin_cpu_access = ion_dma_buf_begin_cpu_access,
+	.end_cpu_access = ion_dma_buf_end_cpu_access,
+	.kmap_atomic = ion_dma_buf_kmap,
+	.kunmap_atomic = ion_dma_buf_kunmap,
+	.kmap = ion_dma_buf_kmap,
+	.kunmap = ion_dma_buf_kunmap,
+};
+
+struct dma_buf *ion_share_dma_buf(struct ion_client *client,
+						struct ion_handle *handle)
+{
+	struct ion_buffer *buffer;
+	struct dma_buf *dmabuf;
+	bool valid_handle;
+
+	mutex_lock(&client->lock);
+	valid_handle = ion_handle_validate(client, handle);
+	if (!valid_handle) {
+		WARN(1, "%s: invalid handle passed to share.\n", __func__);
+		mutex_unlock(&client->lock);
+		return ERR_PTR(-EINVAL);
+	}
+	buffer = handle->buffer;
+	ion_buffer_get(buffer);
+	mutex_unlock(&client->lock);
+
+	dmabuf = dma_buf_export(buffer, &dma_buf_ops, buffer->size, O_RDWR);
+	if (IS_ERR(dmabuf)) {
+		ion_buffer_put(buffer);
+		return dmabuf;
+	}
+
+	return dmabuf;
+}
+EXPORT_SYMBOL(ion_share_dma_buf);
+
+int ion_share_dma_buf_fd(struct ion_client *client, struct ion_handle *handle)
+{
+	struct dma_buf *dmabuf;
+	int fd;
+
+	dmabuf = ion_share_dma_buf(client, handle);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	fd = dma_buf_fd(dmabuf, O_CLOEXEC);
+	if (fd < 0)
+		dma_buf_put(dmabuf);
+
+	return fd;
+}
+EXPORT_SYMBOL(ion_share_dma_buf_fd);
+
+struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd)
+{
+	struct dma_buf *dmabuf;
+	struct ion_buffer *buffer;
+	struct ion_handle *handle;
+	int ret;
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR(dmabuf))
+		return ERR_PTR(PTR_ERR(dmabuf));
+	/* if this memory came from ion */
+
+	if (dmabuf->ops != &dma_buf_ops) {
+		pr_err("%s: can not import dmabuf from another exporter\n",
+		       __func__);
+		dma_buf_put(dmabuf);
+		return ERR_PTR(-EINVAL);
+	}
+	buffer = dmabuf->priv;
+
+	mutex_lock(&client->lock);
+	/* if a handle exists for this buffer just take a reference to it */
+	handle = ion_handle_lookup(client, buffer);
+	if (!IS_ERR(handle)) {
+		ion_handle_get(handle);
+		mutex_unlock(&client->lock);
+		goto end;
+	}
+	mutex_unlock(&client->lock);
+
+	handle = ion_handle_create(client, buffer);
+	if (IS_ERR(handle))
+		goto end;
+
+	mutex_lock(&client->lock);
+	ret = ion_handle_add(client, handle);
+	mutex_unlock(&client->lock);
+	if (ret) {
+		ion_handle_put(handle);
+		handle = ERR_PTR(ret);
+	}
+
+end:
+	dma_buf_put(dmabuf);
+	return handle;
+}
+EXPORT_SYMBOL(ion_import_dma_buf);
+
+static int ion_sync_for_device(struct ion_client *client, int fd)
+{
+	struct dma_buf *dmabuf;
+	struct ion_buffer *buffer;
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	/* if this memory came from ion */
+	if (dmabuf->ops != &dma_buf_ops) {
+		pr_err("%s: can not sync dmabuf from another exporter\n",
+		       __func__);
+		dma_buf_put(dmabuf);
+		return -EINVAL;
+	}
+	buffer = dmabuf->priv;
+
+	dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+			       buffer->sg_table->nents, DMA_BIDIRECTIONAL);
+	dma_buf_put(dmabuf);
+	return 0;
+}
+
+/* fix up the cases where the ioctl direction bits are incorrect */
+static unsigned int ion_ioctl_dir(unsigned int cmd)
+{
+	switch (cmd) {
+	case ION_IOC_SYNC:
+	case ION_IOC_FREE:
+	case ION_IOC_CUSTOM:
+		return _IOC_WRITE;
+	default:
+		return _IOC_DIR(cmd);
+	}
+}
+
+static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct ion_client *client = filp->private_data;
+	struct ion_device *dev = client->dev;
+	struct ion_handle *cleanup_handle = NULL;
+	int ret = 0;
+	unsigned int dir;
+
+	union {
+		struct ion_fd_data fd;
+		struct ion_allocation_data allocation;
+		struct ion_handle_data handle;
+		struct ion_custom_data custom;
+	} data;
+
+	dir = ion_ioctl_dir(cmd);
+
+	if (_IOC_SIZE(cmd) > sizeof(data))
+		return -EINVAL;
+
+	if (dir & _IOC_WRITE)
+		if (copy_from_user(&data, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+
+	switch (cmd) {
+	case ION_IOC_ALLOC:
+	{
+		struct ion_handle *handle;
+
+		handle = ion_alloc(client, data.allocation.len,
+						data.allocation.align,
+						data.allocation.heap_id_mask,
+						data.allocation.flags);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+
+		data.allocation.handle = handle->id;
+
+		cleanup_handle = handle;
+		break;
+	}
+	case ION_IOC_FREE:
+	{
+		struct ion_handle *handle;
+
+		handle = ion_handle_get_by_id(client, data.handle.handle);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		ion_free(client, handle);
+		ion_handle_put(handle);
+		break;
+	}
+	case ION_IOC_SHARE:
+	case ION_IOC_MAP:
+	{
+		struct ion_handle *handle;
+
+		handle = ion_handle_get_by_id(client, data.handle.handle);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		data.fd.fd = ion_share_dma_buf_fd(client, handle);
+		ion_handle_put(handle);
+		if (data.fd.fd < 0)
+			ret = data.fd.fd;
+		break;
+	}
+	case ION_IOC_IMPORT:
+	{
+		struct ion_handle *handle;
+
+		handle = ion_import_dma_buf(client, data.fd.fd);
+		if (IS_ERR(handle))
+			ret = PTR_ERR(handle);
+		else
+			data.handle.handle = handle->id;
+		break;
+	}
+	case ION_IOC_SYNC:
+	{
+		ret = ion_sync_for_device(client, data.fd.fd);
+		break;
+	}
+	case ION_IOC_CUSTOM:
+	{
+		if (!dev->custom_ioctl)
+			return -ENOTTY;
+		ret = dev->custom_ioctl(client, data.custom.cmd,
+						data.custom.arg);
+		break;
+	}
+	default:
+		return -ENOTTY;
+	}
+
+	if (dir & _IOC_READ) {
+		if (copy_to_user((void __user *)arg, &data, _IOC_SIZE(cmd))) {
+			if (cleanup_handle)
+				ion_free(client, cleanup_handle);
+			return -EFAULT;
+		}
+	}
+	return ret;
+}
+
+static int ion_release(struct inode *inode, struct file *file)
+{
+	struct ion_client *client = file->private_data;
+
+	pr_debug("%s: %d\n", __func__, __LINE__);
+	ion_client_destroy(client);
+	return 0;
+}
+
+static int ion_open(struct inode *inode, struct file *file)
+{
+	struct miscdevice *miscdev = file->private_data;
+	struct ion_device *dev = container_of(miscdev, struct ion_device, dev);
+	struct ion_client *client;
+	char debug_name[64];
+
+	pr_debug("%s: %d\n", __func__, __LINE__);
+	snprintf(debug_name, 64, "%u", task_pid_nr(current->group_leader));
+	client = ion_client_create(dev, debug_name);
+	if (IS_ERR(client))
+		return PTR_ERR(client);
+	file->private_data = client;
+
+	return 0;
+}
+
+static const struct file_operations ion_fops = {
+	.owner          = THIS_MODULE,
+	.open           = ion_open,
+	.release        = ion_release,
+	.unlocked_ioctl = ion_ioctl,
+	.compat_ioctl   = compat_ion_ioctl,
+};
+
+static size_t ion_debug_heap_total(struct ion_client *client,
+				   unsigned int id)
+{
+	size_t size = 0;
+	struct rb_node *n;
+
+	mutex_lock(&client->lock);
+	for (n = rb_first(&client->handles); n; n = rb_next(n)) {
+		struct ion_handle *handle = rb_entry(n,
+						     struct ion_handle,
+						     node);
+		if (handle->buffer->heap->id == id)
+			size += handle->buffer->size;
+	}
+	mutex_unlock(&client->lock);
+	return size;
+}
+
+static int ion_debug_heap_show(struct seq_file *s, void *unused)
+{
+	struct ion_heap *heap = s->private;
+	struct ion_device *dev = heap->dev;
+	struct rb_node *n;
+	size_t total_size = 0;
+	size_t total_orphaned_size = 0;
+
+	seq_printf(s, "%16.s %16.s %16.s\n", "client", "pid", "size");
+	seq_printf(s, "----------------------------------------------------\n");
+
+	for (n = rb_first(&dev->clients); n; n = rb_next(n)) {
+		struct ion_client *client = rb_entry(n, struct ion_client,
+						     node);
+		size_t size = ion_debug_heap_total(client, heap->id);
+
+		if (!size)
+			continue;
+		if (client->task) {
+			char task_comm[TASK_COMM_LEN];
+
+			get_task_comm(task_comm, client->task);
+			seq_printf(s, "%16.s %16u %16zu\n", task_comm,
+				   client->pid, size);
+		} else {
+			seq_printf(s, "%16.s %16u %16zu\n", client->name,
+				   client->pid, size);
+		}
+	}
+	seq_printf(s, "----------------------------------------------------\n");
+	seq_printf(s, "orphaned allocations (info is from last known client):"
+		   "\n");
+	mutex_lock(&dev->buffer_lock);
+	for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
+		struct ion_buffer *buffer = rb_entry(n, struct ion_buffer,
+						     node);
+		if (buffer->heap->id != heap->id)
+			continue;
+		total_size += buffer->size;
+		if (!buffer->handle_count) {
+			seq_printf(s, "%16.s %16u %16zu %d %d\n",
+				   buffer->task_comm, buffer->pid,
+				   buffer->size, buffer->kmap_cnt,
+				   atomic_read(&buffer->ref.refcount));
+			total_orphaned_size += buffer->size;
+		}
+	}
+	mutex_unlock(&dev->buffer_lock);
+	seq_printf(s, "----------------------------------------------------\n");
+	seq_printf(s, "%16.s %16zu\n", "total orphaned",
+		   total_orphaned_size);
+	seq_printf(s, "%16.s %16zu\n", "total ", total_size);
+	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
+		seq_printf(s, "%16.s %16zu\n", "deferred free",
+				heap->free_list_size);
+	seq_printf(s, "----------------------------------------------------\n");
+
+	if (heap->debug_show)
+		heap->debug_show(heap, s, unused);
+
+	return 0;
+}
+
+static int ion_debug_heap_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ion_debug_heap_show, inode->i_private);
+}
+
+static const struct file_operations debug_heap_fops = {
+	.open = ion_debug_heap_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+#ifdef DEBUG_HEAP_SHRINKER
+static int debug_shrink_set(void *data, u64 val)
+{
+	struct ion_heap *heap = data;
+	struct shrink_control sc;
+	int objs;
+
+	sc.gfp_mask = -1;
+	sc.nr_to_scan = 0;
+
+	if (!val)
+		return 0;
+
+	objs = heap->shrinker.shrink(&heap->shrinker, &sc);
+	sc.nr_to_scan = objs;
+
+	heap->shrinker.shrink(&heap->shrinker, &sc);
+	return 0;
+}
+
+static int debug_shrink_get(void *data, u64 *val)
+{
+	struct ion_heap *heap = data;
+	struct shrink_control sc;
+	int objs;
+
+	sc.gfp_mask = -1;
+	sc.nr_to_scan = 0;
+
+	objs = heap->shrinker.shrink(&heap->shrinker, &sc);
+	*val = objs;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(debug_shrink_fops, debug_shrink_get,
+			debug_shrink_set, "%llu\n");
+#endif
+
+void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap)
+{
+	struct dentry *debug_file;
+
+	if (!heap->ops->allocate || !heap->ops->free || !heap->ops->map_dma ||
+	    !heap->ops->unmap_dma)
+		pr_err("%s: can not add heap with invalid ops struct.\n",
+		       __func__);
+
+	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
+		ion_heap_init_deferred_free(heap);
+
+	if ((heap->flags & ION_HEAP_FLAG_DEFER_FREE) || heap->ops->shrink)
+		ion_heap_init_shrinker(heap);
+
+	heap->dev = dev;
+	down_write(&dev->lock);
+	/* use negative heap->id to reverse the priority -- when traversing
+	   the list later attempt higher id numbers first */
+	plist_node_init(&heap->node, -heap->id);
+	plist_add(&heap->node, &dev->heaps);
+	debug_file = debugfs_create_file(heap->name, 0664,
+					dev->heaps_debug_root, heap,
+					&debug_heap_fops);
+
+	if (!debug_file) {
+		char buf[256], *path;
+
+		path = dentry_path(dev->heaps_debug_root, buf, 256);
+		pr_err("Failed to create heap debugfs at %s/%s\n",
+			path, heap->name);
+	}
+
+#ifdef DEBUG_HEAP_SHRINKER
+	if (heap->shrinker.shrink) {
+		char debug_name[64];
+
+		snprintf(debug_name, 64, "%s_shrink", heap->name);
+		debug_file = debugfs_create_file(
+			debug_name, 0644, dev->heaps_debug_root, heap,
+			&debug_shrink_fops);
+		if (!debug_file) {
+			char buf[256], *path;
+
+			path = dentry_path(dev->heaps_debug_root, buf, 256);
+			pr_err("Failed to create heap shrinker debugfs at %s/%s\n",
+				path, debug_name);
+		}
+	}
+#endif
+	up_write(&dev->lock);
+}
+
+struct ion_device *ion_device_create(long (*custom_ioctl)
+				     (struct ion_client *client,
+				      unsigned int cmd,
+				      unsigned long arg))
+{
+	struct ion_device *idev;
+	int ret;
+
+	idev = kzalloc(sizeof(struct ion_device), GFP_KERNEL);
+	if (!idev)
+		return ERR_PTR(-ENOMEM);
+
+	idev->dev.minor = MISC_DYNAMIC_MINOR;
+	idev->dev.name = "ion";
+	idev->dev.fops = &ion_fops;
+	idev->dev.parent = NULL;
+	ret = misc_register(&idev->dev);
+	if (ret) {
+		pr_err("ion: failed to register misc device.\n");
+		return ERR_PTR(ret);
+	}
+
+	idev->debug_root = debugfs_create_dir("ion", NULL);
+	if (!idev->debug_root) {
+		pr_err("ion: failed to create debugfs root directory.\n");
+		goto debugfs_done;
+	}
+	idev->heaps_debug_root = debugfs_create_dir("heaps", idev->debug_root);
+	if (!idev->heaps_debug_root) {
+		pr_err("ion: failed to create debugfs heaps directory.\n");
+		goto debugfs_done;
+	}
+	idev->clients_debug_root = debugfs_create_dir("clients",
+						idev->debug_root);
+	if (!idev->clients_debug_root)
+		pr_err("ion: failed to create debugfs clients directory.\n");
+
+debugfs_done:
+
+	idev->custom_ioctl = custom_ioctl;
+	idev->buffers = RB_ROOT;
+	mutex_init(&idev->buffer_lock);
+	init_rwsem(&idev->lock);
+	plist_head_init(&idev->heaps);
+	idev->clients = RB_ROOT;
+	return idev;
+}
+
+void ion_device_destroy(struct ion_device *dev)
+{
+	misc_deregister(&dev->dev);
+	debugfs_remove_recursive(dev->debug_root);
+	/* XXX need to free the heaps and clients ? */
+	kfree(dev);
+}
+
+void __init ion_reserve(struct ion_platform_data *data)
+{
+	int i;
+
+	for (i = 0; i < data->nr; i++) {
+		if (data->heaps[i].size == 0)
+			continue;
+
+		if (data->heaps[i].base == 0) {
+			phys_addr_t paddr;
+
+			paddr = memblock_alloc_base(data->heaps[i].size,
+						    data->heaps[i].align,
+						    MEMBLOCK_ALLOC_ANYWHERE);
+			if (!paddr) {
+				pr_err("%s: error allocating memblock for "
+				       "heap %d\n",
+					__func__, i);
+				continue;
+			}
+			data->heaps[i].base = paddr;
+		} else {
+			int ret = memblock_reserve(data->heaps[i].base,
+					       data->heaps[i].size);
+			if (ret)
+				pr_err("memblock reserve of %zx@%lx failed\n",
+				       data->heaps[i].size,
+				       data->heaps[i].base);
+		}
+		pr_info("%s: %s reserved base %lx size %zu\n", __func__,
+			data->heaps[i].name,
+			data->heaps[i].base,
+			data->heaps[i].size);
+	}
+}
diff --git a/drivers/staging/android/ion/ion.h b/drivers/staging/android/ion/ion.h
new file mode 100644
index 000000000000..dcd2a0cdb192
--- /dev/null
+++ b/drivers/staging/android/ion/ion.h
@@ -0,0 +1,204 @@
+/*
+ * drivers/staging/android/ion/ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_ION_H
+#define _LINUX_ION_H
+
+#include <linux/types.h>
+
+#include "../uapi/ion.h"
+
+struct ion_handle;
+struct ion_device;
+struct ion_heap;
+struct ion_mapper;
+struct ion_client;
+struct ion_buffer;
+
+/* This should be removed some day when phys_addr_t's are fully
+   plumbed in the kernel, and all instances of ion_phys_addr_t should
+   be converted to phys_addr_t.  For the time being many kernel interfaces
+   do not accept phys_addr_t's that would have to */
+#define ion_phys_addr_t unsigned long
+
+/**
+ * struct ion_platform_heap - defines a heap in the given platform
+ * @type:	type of the heap from ion_heap_type enum
+ * @id:		unique identifier for heap.  When allocating higher numbers
+ *		will be allocated from first.  At allocation these are passed
+ *		as a bit mask and therefore can not exceed ION_NUM_HEAP_IDS.
+ * @name:	used for debug purposes
+ * @base:	base address of heap in physical memory if applicable
+ * @size:	size of the heap in bytes if applicable
+ * @align:	required alignment in physical memory if applicable
+ * @priv:	private info passed from the board file
+ *
+ * Provided by the board file.
+ */
+struct ion_platform_heap {
+	enum ion_heap_type type;
+	unsigned int id;
+	const char *name;
+	ion_phys_addr_t base;
+	size_t size;
+	ion_phys_addr_t align;
+	void *priv;
+};
+
+/**
+ * struct ion_platform_data - array of platform heaps passed from board file
+ * @nr:		number of structures in the array
+ * @heaps:	array of platform_heap structions
+ *
+ * Provided by the board file in the form of platform data to a platform device.
+ */
+struct ion_platform_data {
+	int nr;
+	struct ion_platform_heap *heaps;
+};
+
+/**
+ * ion_reserve() - reserve memory for ion heaps if applicable
+ * @data:	platform data specifying starting physical address and
+ *		size
+ *
+ * Calls memblock reserve to set aside memory for heaps that are
+ * located at specific memory addresses or of specfic sizes not
+ * managed by the kernel
+ */
+void ion_reserve(struct ion_platform_data *data);
+
+/**
+ * ion_client_create() -  allocate a client and returns it
+ * @dev:		the global ion device
+ * @heap_type_mask:	mask of heaps this client can allocate from
+ * @name:		used for debugging
+ */
+struct ion_client *ion_client_create(struct ion_device *dev,
+				     const char *name);
+
+/**
+ * ion_client_destroy() -  free's a client and all it's handles
+ * @client:	the client
+ *
+ * Free the provided client and all it's resources including
+ * any handles it is holding.
+ */
+void ion_client_destroy(struct ion_client *client);
+
+/**
+ * ion_alloc - allocate ion memory
+ * @client:		the client
+ * @len:		size of the allocation
+ * @align:		requested allocation alignment, lots of hardware blocks
+ *			have alignment requirements of some kind
+ * @heap_id_mask:	mask of heaps to allocate from, if multiple bits are set
+ *			heaps will be tried in order from highest to lowest
+ *			id
+ * @flags:		heap flags, the low 16 bits are consumed by ion, the
+ *			high 16 bits are passed on to the respective heap and
+ *			can be heap custom
+ *
+ * Allocate memory in one of the heaps provided in heap mask and return
+ * an opaque handle to it.
+ */
+struct ion_handle *ion_alloc(struct ion_client *client, size_t len,
+			     size_t align, unsigned int heap_id_mask,
+			     unsigned int flags);
+
+/**
+ * ion_free - free a handle
+ * @client:	the client
+ * @handle:	the handle to free
+ *
+ * Free the provided handle.
+ */
+void ion_free(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_phys - returns the physical address and len of a handle
+ * @client:	the client
+ * @handle:	the handle
+ * @addr:	a pointer to put the address in
+ * @len:	a pointer to put the length in
+ *
+ * This function queries the heap for a particular handle to get the
+ * handle's physical address.  It't output is only correct if
+ * a heap returns physically contiguous memory -- in other cases
+ * this api should not be implemented -- ion_sg_table should be used
+ * instead.  Returns -EINVAL if the handle is invalid.  This has
+ * no implications on the reference counting of the handle --
+ * the returned value may not be valid if the caller is not
+ * holding a reference.
+ */
+int ion_phys(struct ion_client *client, struct ion_handle *handle,
+	     ion_phys_addr_t *addr, size_t *len);
+
+/**
+ * ion_map_dma - return an sg_table describing a handle
+ * @client:	the client
+ * @handle:	the handle
+ *
+ * This function returns the sg_table describing
+ * a particular ion handle.
+ */
+struct sg_table *ion_sg_table(struct ion_client *client,
+			      struct ion_handle *handle);
+
+/**
+ * ion_map_kernel - create mapping for the given handle
+ * @client:	the client
+ * @handle:	handle to map
+ *
+ * Map the given handle into the kernel and return a kernel address that
+ * can be used to access this address.
+ */
+void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_unmap_kernel() - destroy a kernel mapping for a handle
+ * @client:	the client
+ * @handle:	handle to unmap
+ */
+void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_share_dma_buf() - share buffer as dma-buf
+ * @client:	the client
+ * @handle:	the handle
+ */
+struct dma_buf *ion_share_dma_buf(struct ion_client *client,
+						struct ion_handle *handle);
+
+/**
+ * ion_share_dma_buf_fd() - given an ion client, create a dma-buf fd
+ * @client:	the client
+ * @handle:	the handle
+ */
+int ion_share_dma_buf_fd(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_import_dma_buf() - given an dma-buf fd from the ion exporter get handle
+ * @client:	the client
+ * @fd:		the dma-buf fd
+ *
+ * Given an dma-buf fd that was allocated through ion via ion_share_dma_buf,
+ * import that fd and return a handle representing it.  If a dma-buf from
+ * another exporter is passed in this function will return ERR_PTR(-EINVAL)
+ */
+struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd);
+
+#endif /* _LINUX_ION_H */
diff --git a/drivers/staging/android/ion/ion_carveout_heap.c b/drivers/staging/android/ion/ion_carveout_heap.c
new file mode 100644
index 000000000000..5165de2ce343
--- /dev/null
+++ b/drivers/staging/android/ion/ion_carveout_heap.c
@@ -0,0 +1,194 @@
+/*
+ * drivers/gpu/ion/ion_carveout_heap.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "ion.h"
+#include "ion_priv.h"
+
+struct ion_carveout_heap {
+	struct ion_heap heap;
+	struct gen_pool *pool;
+	ion_phys_addr_t base;
+};
+
+ion_phys_addr_t ion_carveout_allocate(struct ion_heap *heap,
+				      unsigned long size,
+				      unsigned long align)
+{
+	struct ion_carveout_heap *carveout_heap =
+		container_of(heap, struct ion_carveout_heap, heap);
+	unsigned long offset = gen_pool_alloc(carveout_heap->pool, size);
+
+	if (!offset)
+		return ION_CARVEOUT_ALLOCATE_FAIL;
+
+	return offset;
+}
+
+void ion_carveout_free(struct ion_heap *heap, ion_phys_addr_t addr,
+		       unsigned long size)
+{
+	struct ion_carveout_heap *carveout_heap =
+		container_of(heap, struct ion_carveout_heap, heap);
+
+	if (addr == ION_CARVEOUT_ALLOCATE_FAIL)
+		return;
+	gen_pool_free(carveout_heap->pool, addr, size);
+}
+
+static int ion_carveout_heap_phys(struct ion_heap *heap,
+				  struct ion_buffer *buffer,
+				  ion_phys_addr_t *addr, size_t *len)
+{
+	struct sg_table *table = buffer->priv_virt;
+	struct page *page = sg_page(table->sgl);
+	ion_phys_addr_t paddr = PFN_PHYS(page_to_pfn(page));
+
+	*addr = paddr;
+	*len = buffer->size;
+	return 0;
+}
+
+static int ion_carveout_heap_allocate(struct ion_heap *heap,
+				      struct ion_buffer *buffer,
+				      unsigned long size, unsigned long align,
+				      unsigned long flags)
+{
+	struct sg_table *table;
+	ion_phys_addr_t paddr;
+	int ret;
+
+	if (align > PAGE_SIZE)
+		return -EINVAL;
+
+	table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+	ret = sg_alloc_table(table, 1, GFP_KERNEL);
+	if (ret)
+		goto err_free;
+
+	paddr = ion_carveout_allocate(heap, size, align);
+	if (paddr == ION_CARVEOUT_ALLOCATE_FAIL) {
+		ret = -ENOMEM;
+		goto err_free_table;
+	}
+
+	sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(paddr)), size, 0);
+	buffer->priv_virt = table;
+
+	return 0;
+
+err_free_table:
+	sg_free_table(table);
+err_free:
+	kfree(table);
+	return ret;
+}
+
+static void ion_carveout_heap_free(struct ion_buffer *buffer)
+{
+	struct ion_heap *heap = buffer->heap;
+	struct sg_table *table = buffer->priv_virt;
+	struct page *page = sg_page(table->sgl);
+	ion_phys_addr_t paddr = PFN_PHYS(page_to_pfn(page));
+
+	ion_heap_buffer_zero(buffer);
+
+	if (ion_buffer_cached(buffer))
+		dma_sync_sg_for_device(NULL, table->sgl, table->nents,
+							DMA_BIDIRECTIONAL);
+
+	ion_carveout_free(heap, paddr, buffer->size);
+	sg_free_table(table);
+	kfree(table);
+}
+
+static struct sg_table *ion_carveout_heap_map_dma(struct ion_heap *heap,
+						  struct ion_buffer *buffer)
+{
+	return buffer->priv_virt;
+}
+
+static void ion_carveout_heap_unmap_dma(struct ion_heap *heap,
+					struct ion_buffer *buffer)
+{
+	return;
+}
+
+static struct ion_heap_ops carveout_heap_ops = {
+	.allocate = ion_carveout_heap_allocate,
+	.free = ion_carveout_heap_free,
+	.phys = ion_carveout_heap_phys,
+	.map_dma = ion_carveout_heap_map_dma,
+	.unmap_dma = ion_carveout_heap_unmap_dma,
+	.map_user = ion_heap_map_user,
+	.map_kernel = ion_heap_map_kernel,
+	.unmap_kernel = ion_heap_unmap_kernel,
+};
+
+struct ion_heap *ion_carveout_heap_create(struct ion_platform_heap *heap_data)
+{
+	struct ion_carveout_heap *carveout_heap;
+	int ret;
+
+	struct page *page;
+	size_t size;
+
+	page = pfn_to_page(PFN_DOWN(heap_data->base));
+	size = heap_data->size;
+
+	ion_pages_sync_for_device(NULL, page, size, DMA_BIDIRECTIONAL);
+
+	ret = ion_heap_pages_zero(page, size, pgprot_writecombine(PAGE_KERNEL));
+	if (ret)
+		return ERR_PTR(ret);
+
+	carveout_heap = kzalloc(sizeof(struct ion_carveout_heap), GFP_KERNEL);
+	if (!carveout_heap)
+		return ERR_PTR(-ENOMEM);
+
+	carveout_heap->pool = gen_pool_create(12, -1);
+	if (!carveout_heap->pool) {
+		kfree(carveout_heap);
+		return ERR_PTR(-ENOMEM);
+	}
+	carveout_heap->base = heap_data->base;
+	gen_pool_add(carveout_heap->pool, carveout_heap->base, heap_data->size,
+		     -1);
+	carveout_heap->heap.ops = &carveout_heap_ops;
+	carveout_heap->heap.type = ION_HEAP_TYPE_CARVEOUT;
+	carveout_heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE;
+
+	return &carveout_heap->heap;
+}
+
+void ion_carveout_heap_destroy(struct ion_heap *heap)
+{
+	struct ion_carveout_heap *carveout_heap =
+	     container_of(heap, struct  ion_carveout_heap, heap);
+
+	gen_pool_destroy(carveout_heap->pool);
+	kfree(carveout_heap);
+	carveout_heap = NULL;
+}
diff --git a/drivers/staging/android/ion/ion_chunk_heap.c b/drivers/staging/android/ion/ion_chunk_heap.c
new file mode 100644
index 000000000000..ca20d6279602
--- /dev/null
+++ b/drivers/staging/android/ion/ion_chunk_heap.c
@@ -0,0 +1,195 @@
+/*
+ * drivers/gpu/ion/ion_chunk_heap.c
+ *
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "ion.h"
+#include "ion_priv.h"
+
+struct ion_chunk_heap {
+	struct ion_heap heap;
+	struct gen_pool *pool;
+	ion_phys_addr_t base;
+	unsigned long chunk_size;
+	unsigned long size;
+	unsigned long allocated;
+};
+
+static int ion_chunk_heap_allocate(struct ion_heap *heap,
+				      struct ion_buffer *buffer,
+				      unsigned long size, unsigned long align,
+				      unsigned long flags)
+{
+	struct ion_chunk_heap *chunk_heap =
+		container_of(heap, struct ion_chunk_heap, heap);
+	struct sg_table *table;
+	struct scatterlist *sg;
+	int ret, i;
+	unsigned long num_chunks;
+	unsigned long allocated_size;
+
+	if (align > chunk_heap->chunk_size)
+		return -EINVAL;
+
+	allocated_size = ALIGN(size, chunk_heap->chunk_size);
+	num_chunks = allocated_size / chunk_heap->chunk_size;
+
+	if (allocated_size > chunk_heap->size - chunk_heap->allocated)
+		return -ENOMEM;
+
+	table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+	ret = sg_alloc_table(table, num_chunks, GFP_KERNEL);
+	if (ret) {
+		kfree(table);
+		return ret;
+	}
+
+	sg = table->sgl;
+	for (i = 0; i < num_chunks; i++) {
+		unsigned long paddr = gen_pool_alloc(chunk_heap->pool,
+						     chunk_heap->chunk_size);
+		if (!paddr)
+			goto err;
+		sg_set_page(sg, pfn_to_page(PFN_DOWN(paddr)),
+				chunk_heap->chunk_size, 0);
+		sg = sg_next(sg);
+	}
+
+	buffer->priv_virt = table;
+	chunk_heap->allocated += allocated_size;
+	return 0;
+err:
+	sg = table->sgl;
+	for (i -= 1; i >= 0; i--) {
+		gen_pool_free(chunk_heap->pool, page_to_phys(sg_page(sg)),
+			      sg->length);
+		sg = sg_next(sg);
+	}
+	sg_free_table(table);
+	kfree(table);
+	return -ENOMEM;
+}
+
+static void ion_chunk_heap_free(struct ion_buffer *buffer)
+{
+	struct ion_heap *heap = buffer->heap;
+	struct ion_chunk_heap *chunk_heap =
+		container_of(heap, struct ion_chunk_heap, heap);
+	struct sg_table *table = buffer->priv_virt;
+	struct scatterlist *sg;
+	int i;
+	unsigned long allocated_size;
+
+	allocated_size = ALIGN(buffer->size, chunk_heap->chunk_size);
+
+	ion_heap_buffer_zero(buffer);
+
+	if (ion_buffer_cached(buffer))
+		dma_sync_sg_for_device(NULL, table->sgl, table->nents,
+								DMA_BIDIRECTIONAL);
+
+	for_each_sg(table->sgl, sg, table->nents, i) {
+		gen_pool_free(chunk_heap->pool, page_to_phys(sg_page(sg)),
+			      sg->length);
+	}
+	chunk_heap->allocated -= allocated_size;
+	sg_free_table(table);
+	kfree(table);
+}
+
+static struct sg_table *ion_chunk_heap_map_dma(struct ion_heap *heap,
+					       struct ion_buffer *buffer)
+{
+	return buffer->priv_virt;
+}
+
+static void ion_chunk_heap_unmap_dma(struct ion_heap *heap,
+				     struct ion_buffer *buffer)
+{
+	return;
+}
+
+static struct ion_heap_ops chunk_heap_ops = {
+	.allocate = ion_chunk_heap_allocate,
+	.free = ion_chunk_heap_free,
+	.map_dma = ion_chunk_heap_map_dma,
+	.unmap_dma = ion_chunk_heap_unmap_dma,
+	.map_user = ion_heap_map_user,
+	.map_kernel = ion_heap_map_kernel,
+	.unmap_kernel = ion_heap_unmap_kernel,
+};
+
+struct ion_heap *ion_chunk_heap_create(struct ion_platform_heap *heap_data)
+{
+	struct ion_chunk_heap *chunk_heap;
+	int ret;
+	struct page *page;
+	size_t size;
+
+	page = pfn_to_page(PFN_DOWN(heap_data->base));
+	size = heap_data->size;
+
+	ion_pages_sync_for_device(NULL, page, size, DMA_BIDIRECTIONAL);
+
+	ret = ion_heap_pages_zero(page, size, pgprot_writecombine(PAGE_KERNEL));
+	if (ret)
+		return ERR_PTR(ret);
+
+	chunk_heap = kzalloc(sizeof(struct ion_chunk_heap), GFP_KERNEL);
+	if (!chunk_heap)
+		return ERR_PTR(-ENOMEM);
+
+	chunk_heap->chunk_size = (unsigned long)heap_data->priv;
+	chunk_heap->pool = gen_pool_create(get_order(chunk_heap->chunk_size) +
+					   PAGE_SHIFT, -1);
+	if (!chunk_heap->pool) {
+		ret = -ENOMEM;
+		goto error_gen_pool_create;
+	}
+	chunk_heap->base = heap_data->base;
+	chunk_heap->size = heap_data->size;
+	chunk_heap->allocated = 0;
+
+	gen_pool_add(chunk_heap->pool, chunk_heap->base, heap_data->size, -1);
+	chunk_heap->heap.ops = &chunk_heap_ops;
+	chunk_heap->heap.type = ION_HEAP_TYPE_CHUNK;
+	chunk_heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE;
+	pr_info("%s: base %lu size %zu align %ld\n", __func__, chunk_heap->base,
+		heap_data->size, heap_data->align);
+
+	return &chunk_heap->heap;
+
+error_gen_pool_create:
+	kfree(chunk_heap);
+	return ERR_PTR(ret);
+}
+
+void ion_chunk_heap_destroy(struct ion_heap *heap)
+{
+	struct ion_chunk_heap *chunk_heap =
+	     container_of(heap, struct  ion_chunk_heap, heap);
+
+	gen_pool_destroy(chunk_heap->pool);
+	kfree(chunk_heap);
+	chunk_heap = NULL;
+}
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
new file mode 100644
index 000000000000..978bc9ef99a3
--- /dev/null
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -0,0 +1,199 @@
+/*
+ * drivers/gpu/ion/ion_cma_heap.c
+ *
+ * Copyright (C) Linaro 2012
+ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+
+#include "ion.h"
+#include "ion_priv.h"
+
+#define ION_CMA_ALLOCATE_FAILED -1
+
+struct ion_cma_heap {
+	struct ion_heap heap;
+	struct device *dev;
+};
+
+#define to_cma_heap(x) container_of(x, struct ion_cma_heap, heap)
+
+struct ion_cma_buffer_info {
+	void *cpu_addr;
+	dma_addr_t handle;
+	struct sg_table *table;
+};
+
+
+/* ION CMA heap operations functions */
+static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
+			    unsigned long len, unsigned long align,
+			    unsigned long flags)
+{
+	struct ion_cma_heap *cma_heap = to_cma_heap(heap);
+	struct device *dev = cma_heap->dev;
+	struct ion_cma_buffer_info *info;
+
+	dev_dbg(dev, "Request buffer allocation len %ld\n", len);
+
+	if (buffer->flags & ION_FLAG_CACHED)
+		return -EINVAL;
+
+	if (align > PAGE_SIZE)
+		return -EINVAL;
+
+	info = kzalloc(sizeof(struct ion_cma_buffer_info), GFP_KERNEL);
+	if (!info) {
+		dev_err(dev, "Can't allocate buffer info\n");
+		return ION_CMA_ALLOCATE_FAILED;
+	}
+
+	info->cpu_addr = dma_alloc_coherent(dev, len, &(info->handle),
+						GFP_HIGHUSER | __GFP_ZERO);
+
+	if (!info->cpu_addr) {
+		dev_err(dev, "Fail to allocate buffer\n");
+		goto err;
+	}
+
+	info->table = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!info->table) {
+		dev_err(dev, "Fail to allocate sg table\n");
+		goto free_mem;
+	}
+
+	if (dma_get_sgtable(dev, info->table, info->cpu_addr, info->handle, len))
+		goto free_table;
+	/* keep this for memory release */
+	buffer->priv_virt = info;
+	dev_dbg(dev, "Allocate buffer %p\n", buffer);
+	return 0;
+
+free_table:
+	kfree(info->table);
+free_mem:
+	dma_free_coherent(dev, len, info->cpu_addr, info->handle);
+err:
+	kfree(info);
+	return ION_CMA_ALLOCATE_FAILED;
+}
+
+static void ion_cma_free(struct ion_buffer *buffer)
+{
+	struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
+	struct device *dev = cma_heap->dev;
+	struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+	dev_dbg(dev, "Release buffer %p\n", buffer);
+	/* release memory */
+	dma_free_coherent(dev, buffer->size, info->cpu_addr, info->handle);
+	/* release sg table */
+	sg_free_table(info->table);
+	kfree(info->table);
+	kfree(info);
+}
+
+/* return physical address in addr */
+static int ion_cma_phys(struct ion_heap *heap, struct ion_buffer *buffer,
+			ion_phys_addr_t *addr, size_t *len)
+{
+	struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
+	struct device *dev = cma_heap->dev;
+	struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+	dev_dbg(dev, "Return buffer %p physical address 0x%pa\n", buffer,
+		&info->handle);
+
+	*addr = info->handle;
+	*len = buffer->size;
+
+	return 0;
+}
+
+static struct sg_table *ion_cma_heap_map_dma(struct ion_heap *heap,
+					     struct ion_buffer *buffer)
+{
+	struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+	return info->table;
+}
+
+static void ion_cma_heap_unmap_dma(struct ion_heap *heap,
+				   struct ion_buffer *buffer)
+{
+	return;
+}
+
+static int ion_cma_mmap(struct ion_heap *mapper, struct ion_buffer *buffer,
+			struct vm_area_struct *vma)
+{
+	struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
+	struct device *dev = cma_heap->dev;
+	struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+	return dma_mmap_coherent(dev, vma, info->cpu_addr, info->handle,
+				 buffer->size);
+}
+
+static void *ion_cma_map_kernel(struct ion_heap *heap,
+				struct ion_buffer *buffer)
+{
+	struct ion_cma_buffer_info *info = buffer->priv_virt;
+	/* kernel memory mapping has been done at allocation time */
+	return info->cpu_addr;
+}
+
+static void ion_cma_unmap_kernel(struct ion_heap *heap,
+					struct ion_buffer *buffer)
+{
+}
+
+static struct ion_heap_ops ion_cma_ops = {
+	.allocate = ion_cma_allocate,
+	.free = ion_cma_free,
+	.map_dma = ion_cma_heap_map_dma,
+	.unmap_dma = ion_cma_heap_unmap_dma,
+	.phys = ion_cma_phys,
+	.map_user = ion_cma_mmap,
+	.map_kernel = ion_cma_map_kernel,
+	.unmap_kernel = ion_cma_unmap_kernel,
+};
+
+struct ion_heap *ion_cma_heap_create(struct ion_platform_heap *data)
+{
+	struct ion_cma_heap *cma_heap;
+
+	cma_heap = kzalloc(sizeof(struct ion_cma_heap), GFP_KERNEL);
+
+	if (!cma_heap)
+		return ERR_PTR(-ENOMEM);
+
+	cma_heap->heap.ops = &ion_cma_ops;
+	/* get device from private heaps data, later it will be
+	 * used to make the link with reserved CMA memory */
+	cma_heap->dev = data->priv;
+	cma_heap->heap.type = ION_HEAP_TYPE_DMA;
+	return &cma_heap->heap;
+}
+
+void ion_cma_heap_destroy(struct ion_heap *heap)
+{
+	struct ion_cma_heap *cma_heap = to_cma_heap(heap);
+
+	kfree(cma_heap);
+}
diff --git a/drivers/staging/android/ion/ion_dummy_driver.c b/drivers/staging/android/ion/ion_dummy_driver.c
new file mode 100644
index 000000000000..55b2002753f2
--- /dev/null
+++ b/drivers/staging/android/ion/ion_dummy_driver.c
@@ -0,0 +1,158 @@
+/*
+ * drivers/gpu/ion/ion_dummy_driver.c
+ *
+ * Copyright (C) 2013 Linaro, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
+#include "ion.h"
+#include "ion_priv.h"
+
+struct ion_device *idev;
+struct ion_heap **heaps;
+
+void *carveout_ptr;
+void *chunk_ptr;
+
+struct ion_platform_heap dummy_heaps[] = {
+		{
+			.id	= ION_HEAP_TYPE_SYSTEM,
+			.type	= ION_HEAP_TYPE_SYSTEM,
+			.name	= "system",
+		},
+		{
+			.id	= ION_HEAP_TYPE_SYSTEM_CONTIG,
+			.type	= ION_HEAP_TYPE_SYSTEM_CONTIG,
+			.name	= "system contig",
+		},
+		{
+			.id	= ION_HEAP_TYPE_CARVEOUT,
+			.type	= ION_HEAP_TYPE_CARVEOUT,
+			.name	= "carveout",
+			.size	= SZ_4M,
+		},
+		{
+			.id	= ION_HEAP_TYPE_CHUNK,
+			.type	= ION_HEAP_TYPE_CHUNK,
+			.name	= "chunk",
+			.size	= SZ_4M,
+			.align	= SZ_16K,
+			.priv	= (void *)(SZ_16K),
+		},
+};
+
+struct ion_platform_data dummy_ion_pdata = {
+	.nr = 4,
+	.heaps = dummy_heaps,
+};
+
+static int __init ion_dummy_init(void)
+{
+	int i, err;
+
+	idev = ion_device_create(NULL);
+	heaps = kzalloc(sizeof(struct ion_heap *) * dummy_ion_pdata.nr,
+			GFP_KERNEL);
+	if (!heaps)
+		return PTR_ERR(heaps);
+
+
+	/* Allocate a dummy carveout heap */
+	carveout_ptr = alloc_pages_exact(
+				dummy_heaps[ION_HEAP_TYPE_CARVEOUT].size,
+				GFP_KERNEL);
+	if (carveout_ptr)
+		dummy_heaps[ION_HEAP_TYPE_CARVEOUT].base =
+						virt_to_phys(carveout_ptr);
+	else
+		pr_err("ion_dummy: Could not allocate carveout\n");
+
+	/* Allocate a dummy chunk heap */
+	chunk_ptr = alloc_pages_exact(
+				dummy_heaps[ION_HEAP_TYPE_CHUNK].size,
+				GFP_KERNEL);
+	if (chunk_ptr)
+		dummy_heaps[ION_HEAP_TYPE_CHUNK].base = virt_to_phys(chunk_ptr);
+	else
+		pr_err("ion_dummy: Could not allocate chunk\n");
+
+	for (i = 0; i < dummy_ion_pdata.nr; i++) {
+		struct ion_platform_heap *heap_data = &dummy_ion_pdata.heaps[i];
+
+		if (heap_data->type == ION_HEAP_TYPE_CARVEOUT &&
+							!heap_data->base)
+			continue;
+
+		if (heap_data->type == ION_HEAP_TYPE_CHUNK && !heap_data->base)
+			continue;
+
+		heaps[i] = ion_heap_create(heap_data);
+		if (IS_ERR_OR_NULL(heaps[i])) {
+			err = PTR_ERR(heaps[i]);
+			goto err;
+		}
+		ion_device_add_heap(idev, heaps[i]);
+	}
+	return 0;
+err:
+	for (i = 0; i < dummy_ion_pdata.nr; i++) {
+		if (heaps[i])
+			ion_heap_destroy(heaps[i]);
+	}
+	kfree(heaps);
+
+	if (carveout_ptr) {
+		free_pages_exact(carveout_ptr,
+				dummy_heaps[ION_HEAP_TYPE_CARVEOUT].size);
+		carveout_ptr = NULL;
+	}
+	if (chunk_ptr) {
+		free_pages_exact(chunk_ptr,
+				dummy_heaps[ION_HEAP_TYPE_CHUNK].size);
+		chunk_ptr = NULL;
+	}
+	return err;
+}
+
+static void __exit ion_dummy_exit(void)
+{
+	int i;
+
+	ion_device_destroy(idev);
+
+	for (i = 0; i < dummy_ion_pdata.nr; i++)
+		ion_heap_destroy(heaps[i]);
+	kfree(heaps);
+
+	if (carveout_ptr) {
+		free_pages_exact(carveout_ptr,
+				dummy_heaps[ION_HEAP_TYPE_CARVEOUT].size);
+		carveout_ptr = NULL;
+	}
+	if (chunk_ptr) {
+		free_pages_exact(chunk_ptr,
+				dummy_heaps[ION_HEAP_TYPE_CHUNK].size);
+		chunk_ptr = NULL;
+	}
+
+	return;
+}
+
+module_init(ion_dummy_init);
+module_exit(ion_dummy_exit);
+
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
new file mode 100644
index 000000000000..e1bbd8b1b4f4
--- /dev/null
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -0,0 +1,371 @@
+/*
+ * drivers/gpu/ion/ion_heap.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/rtmutex.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/vmalloc.h>
+#include "ion.h"
+#include "ion_priv.h"
+
+void *ion_heap_map_kernel(struct ion_heap *heap,
+			  struct ion_buffer *buffer)
+{
+	struct scatterlist *sg;
+	int i, j;
+	void *vaddr;
+	pgprot_t pgprot;
+	struct sg_table *table = buffer->sg_table;
+	int npages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+	struct page **pages = vmalloc(sizeof(struct page *) * npages);
+	struct page **tmp = pages;
+
+	if (!pages)
+		return NULL;
+
+	if (buffer->flags & ION_FLAG_CACHED)
+		pgprot = PAGE_KERNEL;
+	else
+		pgprot = pgprot_writecombine(PAGE_KERNEL);
+
+	for_each_sg(table->sgl, sg, table->nents, i) {
+		int npages_this_entry = PAGE_ALIGN(sg->length) / PAGE_SIZE;
+		struct page *page = sg_page(sg);
+
+		BUG_ON(i >= npages);
+		for (j = 0; j < npages_this_entry; j++)
+			*(tmp++) = page++;
+	}
+	vaddr = vmap(pages, npages, VM_MAP, pgprot);
+	vfree(pages);
+
+	if (vaddr == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	return vaddr;
+}
+
+void ion_heap_unmap_kernel(struct ion_heap *heap,
+			   struct ion_buffer *buffer)
+{
+	vunmap(buffer->vaddr);
+}
+
+int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
+		      struct vm_area_struct *vma)
+{
+	struct sg_table *table = buffer->sg_table;
+	unsigned long addr = vma->vm_start;
+	unsigned long offset = vma->vm_pgoff * PAGE_SIZE;
+	struct scatterlist *sg;
+	int i;
+	int ret;
+
+	for_each_sg(table->sgl, sg, table->nents, i) {
+		struct page *page = sg_page(sg);
+		unsigned long remainder = vma->vm_end - addr;
+		unsigned long len = sg->length;
+
+		if (offset >= sg->length) {
+			offset -= sg->length;
+			continue;
+		} else if (offset) {
+			page += offset / PAGE_SIZE;
+			len = sg->length - offset;
+			offset = 0;
+		}
+		len = min(len, remainder);
+		ret = remap_pfn_range(vma, addr, page_to_pfn(page), len,
+				vma->vm_page_prot);
+		if (ret)
+			return ret;
+		addr += len;
+		if (addr >= vma->vm_end)
+			return 0;
+	}
+	return 0;
+}
+
+static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
+{
+	void *addr = vm_map_ram(pages, num, -1, pgprot);
+
+	if (!addr)
+		return -ENOMEM;
+	memset(addr, 0, PAGE_SIZE * num);
+	vm_unmap_ram(addr, num);
+
+	return 0;
+}
+
+static int ion_heap_sglist_zero(struct scatterlist *sgl, unsigned int nents,
+						pgprot_t pgprot)
+{
+	int p = 0;
+	int ret = 0;
+	struct sg_page_iter piter;
+	struct page *pages[32];
+
+	for_each_sg_page(sgl, &piter, nents, 0) {
+		pages[p++] = sg_page_iter_page(&piter);
+		if (p == ARRAY_SIZE(pages)) {
+			ret = ion_heap_clear_pages(pages, p, pgprot);
+			if (ret)
+				return ret;
+			p = 0;
+		}
+	}
+	if (p)
+		ret = ion_heap_clear_pages(pages, p, pgprot);
+
+	return ret;
+}
+
+int ion_heap_buffer_zero(struct ion_buffer *buffer)
+{
+	struct sg_table *table = buffer->sg_table;
+	pgprot_t pgprot;
+
+	if (buffer->flags & ION_FLAG_CACHED)
+		pgprot = PAGE_KERNEL;
+	else
+		pgprot = pgprot_writecombine(PAGE_KERNEL);
+
+	return ion_heap_sglist_zero(table->sgl, table->nents, pgprot);
+}
+
+int ion_heap_pages_zero(struct page *page, size_t size, pgprot_t pgprot)
+{
+	struct scatterlist sg;
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, page, size, 0);
+	return ion_heap_sglist_zero(&sg, 1, pgprot);
+}
+
+void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer)
+{
+	spin_lock(&heap->free_lock);
+	list_add(&buffer->list, &heap->free_list);
+	heap->free_list_size += buffer->size;
+	spin_unlock(&heap->free_lock);
+	wake_up(&heap->waitqueue);
+}
+
+size_t ion_heap_freelist_size(struct ion_heap *heap)
+{
+	size_t size;
+
+	spin_lock(&heap->free_lock);
+	size = heap->free_list_size;
+	spin_unlock(&heap->free_lock);
+
+	return size;
+}
+
+static size_t _ion_heap_freelist_drain(struct ion_heap *heap, size_t size,
+				bool skip_pools)
+{
+	struct ion_buffer *buffer;
+	size_t total_drained = 0;
+
+	if (ion_heap_freelist_size(heap) == 0)
+		return 0;
+
+	spin_lock(&heap->free_lock);
+	if (size == 0)
+		size = heap->free_list_size;
+
+	while (!list_empty(&heap->free_list)) {
+		if (total_drained >= size)
+			break;
+		buffer = list_first_entry(&heap->free_list, struct ion_buffer,
+					  list);
+		list_del(&buffer->list);
+		heap->free_list_size -= buffer->size;
+		if (skip_pools)
+			buffer->private_flags |= ION_PRIV_FLAG_SHRINKER_FREE;
+		total_drained += buffer->size;
+		spin_unlock(&heap->free_lock);
+		ion_buffer_destroy(buffer);
+		spin_lock(&heap->free_lock);
+	}
+	spin_unlock(&heap->free_lock);
+
+	return total_drained;
+}
+
+size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size)
+{
+	return _ion_heap_freelist_drain(heap, size, false);
+}
+
+size_t ion_heap_freelist_shrink(struct ion_heap *heap, size_t size)
+{
+	return _ion_heap_freelist_drain(heap, size, true);
+}
+
+static int ion_heap_deferred_free(void *data)
+{
+	struct ion_heap *heap = data;
+
+	while (true) {
+		struct ion_buffer *buffer;
+
+		wait_event_freezable(heap->waitqueue,
+				     ion_heap_freelist_size(heap) > 0);
+
+		spin_lock(&heap->free_lock);
+		if (list_empty(&heap->free_list)) {
+			spin_unlock(&heap->free_lock);
+			continue;
+		}
+		buffer = list_first_entry(&heap->free_list, struct ion_buffer,
+					  list);
+		list_del(&buffer->list);
+		heap->free_list_size -= buffer->size;
+		spin_unlock(&heap->free_lock);
+		ion_buffer_destroy(buffer);
+	}
+
+	return 0;
+}
+
+int ion_heap_init_deferred_free(struct ion_heap *heap)
+{
+	struct sched_param param = { .sched_priority = 0 };
+
+	INIT_LIST_HEAD(&heap->free_list);
+	heap->free_list_size = 0;
+	spin_lock_init(&heap->free_lock);
+	init_waitqueue_head(&heap->waitqueue);
+	heap->task = kthread_run(ion_heap_deferred_free, heap,
+				 "%s", heap->name);
+	sched_setscheduler(heap->task, SCHED_IDLE, &param);
+	if (IS_ERR(heap->task)) {
+		pr_err("%s: creating thread for deferred free failed\n",
+		       __func__);
+		return PTR_RET(heap->task);
+	}
+	return 0;
+}
+
+static int ion_heap_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct ion_heap *heap = container_of(shrinker, struct ion_heap,
+					     shrinker);
+	int total = 0;
+	int freed = 0;
+	int to_scan = sc->nr_to_scan;
+
+	if (to_scan == 0)
+		goto out;
+
+	/*
+	 * shrink the free list first, no point in zeroing the memory if we're
+	 * just going to reclaim it. Also, skip any possible page pooling.
+	 */
+	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
+		freed = ion_heap_freelist_shrink(heap, to_scan * PAGE_SIZE) /
+				PAGE_SIZE;
+
+	to_scan -= freed;
+	if (to_scan < 0)
+		to_scan = 0;
+
+out:
+	total = ion_heap_freelist_size(heap) / PAGE_SIZE;
+	if (heap->ops->shrink)
+		total += heap->ops->shrink(heap, sc->gfp_mask, to_scan);
+	return total;
+}
+
+void ion_heap_init_shrinker(struct ion_heap *heap)
+{
+	heap->shrinker.shrink = ion_heap_shrink;
+	heap->shrinker.seeks = DEFAULT_SEEKS;
+	heap->shrinker.batch = 0;
+	register_shrinker(&heap->shrinker);
+}
+
+struct ion_heap *ion_heap_create(struct ion_platform_heap *heap_data)
+{
+	struct ion_heap *heap = NULL;
+
+	switch (heap_data->type) {
+	case ION_HEAP_TYPE_SYSTEM_CONTIG:
+		heap = ion_system_contig_heap_create(heap_data);
+		break;
+	case ION_HEAP_TYPE_SYSTEM:
+		heap = ion_system_heap_create(heap_data);
+		break;
+	case ION_HEAP_TYPE_CARVEOUT:
+		heap = ion_carveout_heap_create(heap_data);
+		break;
+	case ION_HEAP_TYPE_CHUNK:
+		heap = ion_chunk_heap_create(heap_data);
+		break;
+	case ION_HEAP_TYPE_DMA:
+		heap = ion_cma_heap_create(heap_data);
+		break;
+	default:
+		pr_err("%s: Invalid heap type %d\n", __func__,
+		       heap_data->type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (IS_ERR_OR_NULL(heap)) {
+		pr_err("%s: error creating heap %s type %d base %lu size %zu\n",
+		       __func__, heap_data->name, heap_data->type,
+		       heap_data->base, heap_data->size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	heap->name = heap_data->name;
+	heap->id = heap_data->id;
+	return heap;
+}
+
+void ion_heap_destroy(struct ion_heap *heap)
+{
+	if (!heap)
+		return;
+
+	switch (heap->type) {
+	case ION_HEAP_TYPE_SYSTEM_CONTIG:
+		ion_system_contig_heap_destroy(heap);
+		break;
+	case ION_HEAP_TYPE_SYSTEM:
+		ion_system_heap_destroy(heap);
+		break;
+	case ION_HEAP_TYPE_CARVEOUT:
+		ion_carveout_heap_destroy(heap);
+		break;
+	case ION_HEAP_TYPE_CHUNK:
+		ion_chunk_heap_destroy(heap);
+		break;
+	case ION_HEAP_TYPE_DMA:
+		ion_cma_heap_destroy(heap);
+		break;
+	default:
+		pr_err("%s: Invalid heap type %d\n", __func__,
+		       heap->type);
+	}
+}
diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c
new file mode 100644
index 000000000000..0e20e62ec4bc
--- /dev/null
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -0,0 +1,190 @@
+/*
+ * drivers/gpu/ion/ion_mem_pool.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "ion_priv.h"
+
+struct ion_page_pool_item {
+	struct page *page;
+	struct list_head list;
+};
+
+static void *ion_page_pool_alloc_pages(struct ion_page_pool *pool)
+{
+	struct page *page = alloc_pages(pool->gfp_mask, pool->order);
+
+	if (!page)
+		return NULL;
+	ion_pages_sync_for_device(NULL, page, PAGE_SIZE << pool->order,
+						DMA_BIDIRECTIONAL);
+	return page;
+}
+
+static void ion_page_pool_free_pages(struct ion_page_pool *pool,
+				     struct page *page)
+{
+	__free_pages(page, pool->order);
+}
+
+static int ion_page_pool_add(struct ion_page_pool *pool, struct page *page)
+{
+	struct ion_page_pool_item *item;
+
+	item = kmalloc(sizeof(struct ion_page_pool_item), GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	mutex_lock(&pool->mutex);
+	item->page = page;
+	if (PageHighMem(page)) {
+		list_add_tail(&item->list, &pool->high_items);
+		pool->high_count++;
+	} else {
+		list_add_tail(&item->list, &pool->low_items);
+		pool->low_count++;
+	}
+	mutex_unlock(&pool->mutex);
+	return 0;
+}
+
+static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high)
+{
+	struct ion_page_pool_item *item;
+	struct page *page;
+
+	if (high) {
+		BUG_ON(!pool->high_count);
+		item = list_first_entry(&pool->high_items,
+					struct ion_page_pool_item, list);
+		pool->high_count--;
+	} else {
+		BUG_ON(!pool->low_count);
+		item = list_first_entry(&pool->low_items,
+					struct ion_page_pool_item, list);
+		pool->low_count--;
+	}
+
+	list_del(&item->list);
+	page = item->page;
+	kfree(item);
+	return page;
+}
+
+void *ion_page_pool_alloc(struct ion_page_pool *pool)
+{
+	struct page *page = NULL;
+
+	BUG_ON(!pool);
+
+	mutex_lock(&pool->mutex);
+	if (pool->high_count)
+		page = ion_page_pool_remove(pool, true);
+	else if (pool->low_count)
+		page = ion_page_pool_remove(pool, false);
+	mutex_unlock(&pool->mutex);
+
+	if (!page)
+		page = ion_page_pool_alloc_pages(pool);
+
+	return page;
+}
+
+void ion_page_pool_free(struct ion_page_pool *pool, struct page *page)
+{
+	int ret;
+
+	ret = ion_page_pool_add(pool, page);
+	if (ret)
+		ion_page_pool_free_pages(pool, page);
+}
+
+static int ion_page_pool_total(struct ion_page_pool *pool, bool high)
+{
+	int total = 0;
+
+	total += high ? (pool->high_count + pool->low_count) *
+		(1 << pool->order) :
+			pool->low_count * (1 << pool->order);
+	return total;
+}
+
+int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
+				int nr_to_scan)
+{
+	int i;
+	bool high;
+
+	high = !!(gfp_mask & __GFP_HIGHMEM);
+
+	for (i = 0; i < nr_to_scan; i++) {
+		struct page *page;
+
+		mutex_lock(&pool->mutex);
+		if (pool->low_count) {
+			page = ion_page_pool_remove(pool, false);
+		} else if (high && pool->high_count) {
+			page = ion_page_pool_remove(pool, true);
+		} else {
+			mutex_unlock(&pool->mutex);
+			break;
+		}
+		mutex_unlock(&pool->mutex);
+		ion_page_pool_free_pages(pool, page);
+	}
+
+	return ion_page_pool_total(pool, high);
+}
+
+struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order)
+{
+	struct ion_page_pool *pool = kmalloc(sizeof(struct ion_page_pool),
+					     GFP_KERNEL);
+	if (!pool)
+		return NULL;
+	pool->high_count = 0;
+	pool->low_count = 0;
+	INIT_LIST_HEAD(&pool->low_items);
+	INIT_LIST_HEAD(&pool->high_items);
+	pool->gfp_mask = gfp_mask;
+	pool->order = order;
+	mutex_init(&pool->mutex);
+	plist_node_init(&pool->list, order);
+
+	return pool;
+}
+
+void ion_page_pool_destroy(struct ion_page_pool *pool)
+{
+	kfree(pool);
+}
+
+static int __init ion_page_pool_init(void)
+{
+	return 0;
+}
+
+static void __exit ion_page_pool_exit(void)
+{
+}
+
+module_init(ion_page_pool_init);
+module_exit(ion_page_pool_exit);
diff --git a/drivers/staging/android/ion/ion_priv.h b/drivers/staging/android/ion/ion_priv.h
new file mode 100644
index 000000000000..8871f35f384b
--- /dev/null
+++ b/drivers/staging/android/ion/ion_priv.h
@@ -0,0 +1,406 @@
+/*
+ * drivers/gpu/ion/ion_priv.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ION_PRIV_H
+#define _ION_PRIV_H
+
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/kref.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/shrinker.h>
+#include <linux/types.h>
+#include <linux/device.h>
+
+#include "ion.h"
+
+struct ion_buffer *ion_handle_buffer(struct ion_handle *handle);
+
+/**
+ * struct ion_buffer - metadata for a particular buffer
+ * @ref:		refernce count
+ * @node:		node in the ion_device buffers tree
+ * @dev:		back pointer to the ion_device
+ * @heap:		back pointer to the heap the buffer came from
+ * @flags:		buffer specific flags
+ * @private_flags:	internal buffer specific flags
+ * @size:		size of the buffer
+ * @priv_virt:		private data to the buffer representable as
+ *			a void *
+ * @priv_phys:		private data to the buffer representable as
+ *			an ion_phys_addr_t (and someday a phys_addr_t)
+ * @lock:		protects the buffers cnt fields
+ * @kmap_cnt:		number of times the buffer is mapped to the kernel
+ * @vaddr:		the kenrel mapping if kmap_cnt is not zero
+ * @dmap_cnt:		number of times the buffer is mapped for dma
+ * @sg_table:		the sg table for the buffer if dmap_cnt is not zero
+ * @pages:		flat array of pages in the buffer -- used by fault
+ *			handler and only valid for buffers that are faulted in
+ * @vmas:		list of vma's mapping this buffer
+ * @handle_count:	count of handles referencing this buffer
+ * @task_comm:		taskcomm of last client to reference this buffer in a
+ *			handle, used for debugging
+ * @pid:		pid of last client to reference this buffer in a
+ *			handle, used for debugging
+*/
+struct ion_buffer {
+	struct kref ref;
+	union {
+		struct rb_node node;
+		struct list_head list;
+	};
+	struct ion_device *dev;
+	struct ion_heap *heap;
+	unsigned long flags;
+	unsigned long private_flags;
+	size_t size;
+	union {
+		void *priv_virt;
+		ion_phys_addr_t priv_phys;
+	};
+	struct mutex lock;
+	int kmap_cnt;
+	void *vaddr;
+	int dmap_cnt;
+	struct sg_table *sg_table;
+	struct page **pages;
+	struct list_head vmas;
+	/* used to track orphaned buffers */
+	int handle_count;
+	char task_comm[TASK_COMM_LEN];
+	pid_t pid;
+};
+void ion_buffer_destroy(struct ion_buffer *buffer);
+
+/**
+ * struct ion_heap_ops - ops to operate on a given heap
+ * @allocate:		allocate memory
+ * @free:		free memory
+ * @phys		get physical address of a buffer (only define on
+ *			physically contiguous heaps)
+ * @map_dma		map the memory for dma to a scatterlist
+ * @unmap_dma		unmap the memory for dma
+ * @map_kernel		map memory to the kernel
+ * @unmap_kernel	unmap memory to the kernel
+ * @map_user		map memory to userspace
+ *
+ * allocate, phys, and map_user return 0 on success, -errno on error.
+ * map_dma and map_kernel return pointer on success, ERR_PTR on
+ * error. @free will be called with ION_PRIV_FLAG_SHRINKER_FREE set in
+ * the buffer's private_flags when called from a shrinker. In that
+ * case, the pages being free'd must be truly free'd back to the
+ * system, not put in a page pool or otherwise cached.
+ */
+struct ion_heap_ops {
+	int (*allocate) (struct ion_heap *heap,
+			 struct ion_buffer *buffer, unsigned long len,
+			 unsigned long align, unsigned long flags);
+	void (*free) (struct ion_buffer *buffer);
+	int (*phys) (struct ion_heap *heap, struct ion_buffer *buffer,
+		     ion_phys_addr_t *addr, size_t *len);
+	struct sg_table *(*map_dma) (struct ion_heap *heap,
+					struct ion_buffer *buffer);
+	void (*unmap_dma) (struct ion_heap *heap, struct ion_buffer *buffer);
+	void * (*map_kernel) (struct ion_heap *heap, struct ion_buffer *buffer);
+	void (*unmap_kernel) (struct ion_heap *heap, struct ion_buffer *buffer);
+	int (*map_user) (struct ion_heap *mapper, struct ion_buffer *buffer,
+			 struct vm_area_struct *vma);
+	int (*shrink)(struct ion_heap *heap, gfp_t gfp_mask, int nr_to_scan);
+};
+
+/**
+ * heap flags - flags between the heaps and core ion code
+ */
+#define ION_HEAP_FLAG_DEFER_FREE (1 << 0)
+
+/**
+ * private flags - flags internal to ion
+ */
+/*
+ * Buffer is being freed from a shrinker function. Skip any possible
+ * heap-specific caching mechanism (e.g. page pools). Guarantees that
+ * any buffer storage that came from the system allocator will be
+ * returned to the system allocator.
+ */
+#define ION_PRIV_FLAG_SHRINKER_FREE (1 << 0)
+
+/**
+ * struct ion_heap - represents a heap in the system
+ * @node:		rb node to put the heap on the device's tree of heaps
+ * @dev:		back pointer to the ion_device
+ * @type:		type of heap
+ * @ops:		ops struct as above
+ * @flags:		flags
+ * @id:			id of heap, also indicates priority of this heap when
+ *			allocating.  These are specified by platform data and
+ *			MUST be unique
+ * @name:		used for debugging
+ * @shrinker:		a shrinker for the heap
+ * @free_list:		free list head if deferred free is used
+ * @free_list_size	size of the deferred free list in bytes
+ * @lock:		protects the free list
+ * @waitqueue:		queue to wait on from deferred free thread
+ * @task:		task struct of deferred free thread
+ * @debug_show:		called when heap debug file is read to add any
+ *			heap specific debug info to output
+ *
+ * Represents a pool of memory from which buffers can be made.  In some
+ * systems the only heap is regular system memory allocated via vmalloc.
+ * On others, some blocks might require large physically contiguous buffers
+ * that are allocated from a specially reserved heap.
+ */
+struct ion_heap {
+	struct plist_node node;
+	struct ion_device *dev;
+	enum ion_heap_type type;
+	struct ion_heap_ops *ops;
+	unsigned long flags;
+	unsigned int id;
+	const char *name;
+	struct shrinker shrinker;
+	struct list_head free_list;
+	size_t free_list_size;
+	spinlock_t free_lock;
+	wait_queue_head_t waitqueue;
+	struct task_struct *task;
+
+	int (*debug_show)(struct ion_heap *heap, struct seq_file *, void *);
+};
+
+/**
+ * ion_buffer_cached - this ion buffer is cached
+ * @buffer:		buffer
+ *
+ * indicates whether this ion buffer is cached
+ */
+bool ion_buffer_cached(struct ion_buffer *buffer);
+
+/**
+ * ion_buffer_fault_user_mappings - fault in user mappings of this buffer
+ * @buffer:		buffer
+ *
+ * indicates whether userspace mappings of this buffer will be faulted
+ * in, this can affect how buffers are allocated from the heap.
+ */
+bool ion_buffer_fault_user_mappings(struct ion_buffer *buffer);
+
+/**
+ * ion_device_create - allocates and returns an ion device
+ * @custom_ioctl:	arch specific ioctl function if applicable
+ *
+ * returns a valid device or -PTR_ERR
+ */
+struct ion_device *ion_device_create(long (*custom_ioctl)
+				     (struct ion_client *client,
+				      unsigned int cmd,
+				      unsigned long arg));
+
+/**
+ * ion_device_destroy - free and device and it's resource
+ * @dev:		the device
+ */
+void ion_device_destroy(struct ion_device *dev);
+
+/**
+ * ion_device_add_heap - adds a heap to the ion device
+ * @dev:		the device
+ * @heap:		the heap to add
+ */
+void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap);
+
+/**
+ * some helpers for common operations on buffers using the sg_table
+ * and vaddr fields
+ */
+void *ion_heap_map_kernel(struct ion_heap *, struct ion_buffer *);
+void ion_heap_unmap_kernel(struct ion_heap *, struct ion_buffer *);
+int ion_heap_map_user(struct ion_heap *, struct ion_buffer *,
+			struct vm_area_struct *);
+int ion_heap_buffer_zero(struct ion_buffer *buffer);
+int ion_heap_pages_zero(struct page *page, size_t size, pgprot_t pgprot);
+
+/**
+ * ion_heap_init_shrinker
+ * @heap:		the heap
+ *
+ * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag or defines the shrink op
+ * this function will be called to setup a shrinker to shrink the freelists
+ * and call the heap's shrink op.
+ */
+void ion_heap_init_shrinker(struct ion_heap *heap);
+
+/**
+ * ion_heap_init_deferred_free -- initialize deferred free functionality
+ * @heap:		the heap
+ *
+ * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag this function will
+ * be called to setup deferred frees. Calls to free the buffer will
+ * return immediately and the actual free will occur some time later
+ */
+int ion_heap_init_deferred_free(struct ion_heap *heap);
+
+/**
+ * ion_heap_freelist_add - add a buffer to the deferred free list
+ * @heap:		the heap
+ * @buffer:		the buffer
+ *
+ * Adds an item to the deferred freelist.
+ */
+void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer);
+
+/**
+ * ion_heap_freelist_drain - drain the deferred free list
+ * @heap:		the heap
+ * @size:		ammount of memory to drain in bytes
+ *
+ * Drains the indicated amount of memory from the deferred freelist immediately.
+ * Returns the total amount freed.  The total freed may be higher depending
+ * on the size of the items in the list, or lower if there is insufficient
+ * total memory on the freelist.
+ */
+size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size);
+
+/**
+ * ion_heap_freelist_shrink - drain the deferred free
+ *				list, skipping any heap-specific
+ *				pooling or caching mechanisms
+ *
+ * @heap:		the heap
+ * @size:		amount of memory to drain in bytes
+ *
+ * Drains the indicated amount of memory from the deferred freelist immediately.
+ * Returns the total amount freed.  The total freed may be higher depending
+ * on the size of the items in the list, or lower if there is insufficient
+ * total memory on the freelist.
+ *
+ * Unlike with @ion_heap_freelist_drain, don't put any pages back into
+ * page pools or otherwise cache the pages. Everything must be
+ * genuinely free'd back to the system. If you're free'ing from a
+ * shrinker you probably want to use this. Note that this relies on
+ * the heap.ops.free callback honoring the ION_PRIV_FLAG_SHRINKER_FREE
+ * flag.
+ */
+size_t ion_heap_freelist_shrink(struct ion_heap *heap,
+					size_t size);
+
+/**
+ * ion_heap_freelist_size - returns the size of the freelist in bytes
+ * @heap:		the heap
+ */
+size_t ion_heap_freelist_size(struct ion_heap *heap);
+
+
+/**
+ * functions for creating and destroying the built in ion heaps.
+ * architectures can add their own custom architecture specific
+ * heaps as appropriate.
+ */
+
+struct ion_heap *ion_heap_create(struct ion_platform_heap *);
+void ion_heap_destroy(struct ion_heap *);
+struct ion_heap *ion_system_heap_create(struct ion_platform_heap *);
+void ion_system_heap_destroy(struct ion_heap *);
+
+struct ion_heap *ion_system_contig_heap_create(struct ion_platform_heap *);
+void ion_system_contig_heap_destroy(struct ion_heap *);
+
+struct ion_heap *ion_carveout_heap_create(struct ion_platform_heap *);
+void ion_carveout_heap_destroy(struct ion_heap *);
+
+struct ion_heap *ion_chunk_heap_create(struct ion_platform_heap *);
+void ion_chunk_heap_destroy(struct ion_heap *);
+struct ion_heap *ion_cma_heap_create(struct ion_platform_heap *);
+void ion_cma_heap_destroy(struct ion_heap *);
+
+/**
+ * kernel api to allocate/free from carveout -- used when carveout is
+ * used to back an architecture specific custom heap
+ */
+ion_phys_addr_t ion_carveout_allocate(struct ion_heap *heap, unsigned long size,
+				      unsigned long align);
+void ion_carveout_free(struct ion_heap *heap, ion_phys_addr_t addr,
+		       unsigned long size);
+/**
+ * The carveout heap returns physical addresses, since 0 may be a valid
+ * physical address, this is used to indicate allocation failed
+ */
+#define ION_CARVEOUT_ALLOCATE_FAIL -1
+
+/**
+ * functions for creating and destroying a heap pool -- allows you
+ * to keep a pool of pre allocated memory to use from your heap.  Keeping
+ * a pool of memory that is ready for dma, ie any cached mapping have been
+ * invalidated from the cache, provides a significant peformance benefit on
+ * many systems */
+
+/**
+ * struct ion_page_pool - pagepool struct
+ * @high_count:		number of highmem items in the pool
+ * @low_count:		number of lowmem items in the pool
+ * @high_items:		list of highmem items
+ * @low_items:		list of lowmem items
+ * @mutex:		lock protecting this struct and especially the count
+ *			item list
+ * @gfp_mask:		gfp_mask to use from alloc
+ * @order:		order of pages in the pool
+ * @list:		plist node for list of pools
+ *
+ * Allows you to keep a pool of pre allocated pages to use from your heap.
+ * Keeping a pool of pages that is ready for dma, ie any cached mapping have
+ * been invalidated from the cache, provides a significant peformance benefit
+ * on many systems
+ */
+struct ion_page_pool {
+	int high_count;
+	int low_count;
+	struct list_head high_items;
+	struct list_head low_items;
+	struct mutex mutex;
+	gfp_t gfp_mask;
+	unsigned int order;
+	struct plist_node list;
+};
+
+struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order);
+void ion_page_pool_destroy(struct ion_page_pool *);
+void *ion_page_pool_alloc(struct ion_page_pool *);
+void ion_page_pool_free(struct ion_page_pool *, struct page *);
+
+/** ion_page_pool_shrink - shrinks the size of the memory cached in the pool
+ * @pool:		the pool
+ * @gfp_mask:		the memory type to reclaim
+ * @nr_to_scan:		number of items to shrink in pages
+ *
+ * returns the number of items freed in pages
+ */
+int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
+			  int nr_to_scan);
+
+/**
+ * ion_pages_sync_for_device - cache flush pages for use with the specified
+ *                             device
+ * @dev:		the device the pages will be used with
+ * @page:		the first page to be flushed
+ * @size:		size in bytes of region to be flushed
+ * @dir:		direction of dma transfer
+ */
+void ion_pages_sync_for_device(struct device *dev, struct page *page,
+		size_t size, enum dma_data_direction dir);
+
+#endif /* _ION_PRIV_H */
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
new file mode 100644
index 000000000000..5945445ee5f3
--- /dev/null
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -0,0 +1,451 @@
+/*
+ * drivers/gpu/ion/ion_system_heap.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <asm/page.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "ion.h"
+#include "ion_priv.h"
+
+static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
+				     __GFP_NORETRY) & ~__GFP_WAIT;
+static gfp_t low_order_gfp_flags  = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN);
+static const unsigned int orders[] = {8, 4, 0};
+static const int num_orders = ARRAY_SIZE(orders);
+static int order_to_index(unsigned int order)
+{
+	int i;
+
+	for (i = 0; i < num_orders; i++)
+		if (order == orders[i])
+			return i;
+	BUG();
+	return -1;
+}
+
+static unsigned int order_to_size(int order)
+{
+	return PAGE_SIZE << order;
+}
+
+struct ion_system_heap {
+	struct ion_heap heap;
+	struct ion_page_pool **pools;
+};
+
+struct page_info {
+	struct page *page;
+	unsigned int order;
+	struct list_head list;
+};
+
+static struct page *alloc_buffer_page(struct ion_system_heap *heap,
+				      struct ion_buffer *buffer,
+				      unsigned long order)
+{
+	bool cached = ion_buffer_cached(buffer);
+	struct ion_page_pool *pool = heap->pools[order_to_index(order)];
+	struct page *page;
+
+	if (!cached) {
+		page = ion_page_pool_alloc(pool);
+	} else {
+		gfp_t gfp_flags = low_order_gfp_flags;
+
+		if (order > 4)
+			gfp_flags = high_order_gfp_flags;
+		page = alloc_pages(gfp_flags, order);
+		if (!page)
+			return NULL;
+		ion_pages_sync_for_device(NULL, page, PAGE_SIZE << order,
+						DMA_BIDIRECTIONAL);
+	}
+	if (!page)
+		return NULL;
+
+	return page;
+}
+
+static void free_buffer_page(struct ion_system_heap *heap,
+			     struct ion_buffer *buffer, struct page *page,
+			     unsigned int order)
+{
+	bool cached = ion_buffer_cached(buffer);
+
+	if (!cached && !(buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE)) {
+		struct ion_page_pool *pool = heap->pools[order_to_index(order)];
+
+		ion_page_pool_free(pool, page);
+	} else {
+		__free_pages(page, order);
+	}
+}
+
+
+static struct page_info *alloc_largest_available(struct ion_system_heap *heap,
+						 struct ion_buffer *buffer,
+						 unsigned long size,
+						 unsigned int max_order)
+{
+	struct page *page;
+	struct page_info *info;
+	int i;
+
+	info = kmalloc(sizeof(struct page_info), GFP_KERNEL);
+	if (!info)
+		return NULL;
+
+	for (i = 0; i < num_orders; i++) {
+		if (size < order_to_size(orders[i]))
+			continue;
+		if (max_order < orders[i])
+			continue;
+
+		page = alloc_buffer_page(heap, buffer, orders[i]);
+		if (!page)
+			continue;
+
+		info->page = page;
+		info->order = orders[i];
+		INIT_LIST_HEAD(&info->list);
+		return info;
+	}
+	kfree(info);
+
+	return NULL;
+}
+
+static int ion_system_heap_allocate(struct ion_heap *heap,
+				     struct ion_buffer *buffer,
+				     unsigned long size, unsigned long align,
+				     unsigned long flags)
+{
+	struct ion_system_heap *sys_heap = container_of(heap,
+							struct ion_system_heap,
+							heap);
+	struct sg_table *table;
+	struct scatterlist *sg;
+	int ret;
+	struct list_head pages;
+	struct page_info *info, *tmp_info;
+	int i = 0;
+	unsigned long size_remaining = PAGE_ALIGN(size);
+	unsigned int max_order = orders[0];
+
+	if (align > PAGE_SIZE)
+		return -EINVAL;
+
+	if (size / PAGE_SIZE > totalram_pages / 2)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pages);
+	while (size_remaining > 0) {
+		info = alloc_largest_available(sys_heap, buffer, size_remaining,
+						max_order);
+		if (!info)
+			goto err;
+		list_add_tail(&info->list, &pages);
+		size_remaining -= (1 << info->order) * PAGE_SIZE;
+		max_order = info->order;
+		i++;
+	}
+	table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!table)
+		goto err;
+
+	ret = sg_alloc_table(table, i, GFP_KERNEL);
+	if (ret)
+		goto err1;
+
+	sg = table->sgl;
+	list_for_each_entry_safe(info, tmp_info, &pages, list) {
+		struct page *page = info->page;
+		sg_set_page(sg, page, (1 << info->order) * PAGE_SIZE, 0);
+		sg = sg_next(sg);
+		list_del(&info->list);
+		kfree(info);
+	}
+
+	buffer->priv_virt = table;
+	return 0;
+err1:
+	kfree(table);
+err:
+	list_for_each_entry_safe(info, tmp_info, &pages, list) {
+		free_buffer_page(sys_heap, buffer, info->page, info->order);
+		kfree(info);
+	}
+	return -ENOMEM;
+}
+
+static void ion_system_heap_free(struct ion_buffer *buffer)
+{
+	struct ion_heap *heap = buffer->heap;
+	struct ion_system_heap *sys_heap = container_of(heap,
+							struct ion_system_heap,
+							heap);
+	struct sg_table *table = buffer->sg_table;
+	bool cached = ion_buffer_cached(buffer);
+	struct scatterlist *sg;
+	LIST_HEAD(pages);
+	int i;
+
+	/* uncached pages come from the page pools, zero them before returning
+	   for security purposes (other allocations are zerod at alloc time */
+	if (!cached && !(buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE))
+		ion_heap_buffer_zero(buffer);
+
+	for_each_sg(table->sgl, sg, table->nents, i)
+		free_buffer_page(sys_heap, buffer, sg_page(sg),
+				get_order(sg->length));
+	sg_free_table(table);
+	kfree(table);
+}
+
+static struct sg_table *ion_system_heap_map_dma(struct ion_heap *heap,
+						struct ion_buffer *buffer)
+{
+	return buffer->priv_virt;
+}
+
+static void ion_system_heap_unmap_dma(struct ion_heap *heap,
+				      struct ion_buffer *buffer)
+{
+	return;
+}
+
+static int ion_system_heap_shrink(struct ion_heap *heap, gfp_t gfp_mask,
+					int nr_to_scan)
+{
+	struct ion_system_heap *sys_heap;
+	int nr_total = 0;
+	int i;
+
+	sys_heap = container_of(heap, struct ion_system_heap, heap);
+
+	for (i = 0; i < num_orders; i++) {
+		struct ion_page_pool *pool = sys_heap->pools[i];
+
+		nr_total += ion_page_pool_shrink(pool, gfp_mask, nr_to_scan);
+	}
+
+	return nr_total;
+}
+
+static struct ion_heap_ops system_heap_ops = {
+	.allocate = ion_system_heap_allocate,
+	.free = ion_system_heap_free,
+	.map_dma = ion_system_heap_map_dma,
+	.unmap_dma = ion_system_heap_unmap_dma,
+	.map_kernel = ion_heap_map_kernel,
+	.unmap_kernel = ion_heap_unmap_kernel,
+	.map_user = ion_heap_map_user,
+	.shrink = ion_system_heap_shrink,
+};
+
+static int ion_system_heap_debug_show(struct ion_heap *heap, struct seq_file *s,
+				      void *unused)
+{
+
+	struct ion_system_heap *sys_heap = container_of(heap,
+							struct ion_system_heap,
+							heap);
+	int i;
+
+	for (i = 0; i < num_orders; i++) {
+		struct ion_page_pool *pool = sys_heap->pools[i];
+
+		seq_printf(s, "%d order %u highmem pages in pool = %lu total\n",
+			   pool->high_count, pool->order,
+			   (1 << pool->order) * PAGE_SIZE * pool->high_count);
+		seq_printf(s, "%d order %u lowmem pages in pool = %lu total\n",
+			   pool->low_count, pool->order,
+			   (1 << pool->order) * PAGE_SIZE * pool->low_count);
+	}
+	return 0;
+}
+
+struct ion_heap *ion_system_heap_create(struct ion_platform_heap *unused)
+{
+	struct ion_system_heap *heap;
+	int i;
+
+	heap = kzalloc(sizeof(struct ion_system_heap), GFP_KERNEL);
+	if (!heap)
+		return ERR_PTR(-ENOMEM);
+	heap->heap.ops = &system_heap_ops;
+	heap->heap.type = ION_HEAP_TYPE_SYSTEM;
+	heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE;
+	heap->pools = kzalloc(sizeof(struct ion_page_pool *) * num_orders,
+			      GFP_KERNEL);
+	if (!heap->pools)
+		goto err_alloc_pools;
+	for (i = 0; i < num_orders; i++) {
+		struct ion_page_pool *pool;
+		gfp_t gfp_flags = low_order_gfp_flags;
+
+		if (orders[i] > 4)
+			gfp_flags = high_order_gfp_flags;
+		pool = ion_page_pool_create(gfp_flags, orders[i]);
+		if (!pool)
+			goto err_create_pool;
+		heap->pools[i] = pool;
+	}
+
+	heap->heap.debug_show = ion_system_heap_debug_show;
+	return &heap->heap;
+err_create_pool:
+	for (i = 0; i < num_orders; i++)
+		if (heap->pools[i])
+			ion_page_pool_destroy(heap->pools[i]);
+	kfree(heap->pools);
+err_alloc_pools:
+	kfree(heap);
+	return ERR_PTR(-ENOMEM);
+}
+
+void ion_system_heap_destroy(struct ion_heap *heap)
+{
+	struct ion_system_heap *sys_heap = container_of(heap,
+							struct ion_system_heap,
+							heap);
+	int i;
+
+	for (i = 0; i < num_orders; i++)
+		ion_page_pool_destroy(sys_heap->pools[i]);
+	kfree(sys_heap->pools);
+	kfree(sys_heap);
+}
+
+static int ion_system_contig_heap_allocate(struct ion_heap *heap,
+					   struct ion_buffer *buffer,
+					   unsigned long len,
+					   unsigned long align,
+					   unsigned long flags)
+{
+	int order = get_order(len);
+	struct page *page;
+	struct sg_table *table;
+	unsigned long i;
+	int ret;
+
+	if (align > (PAGE_SIZE << order))
+		return -EINVAL;
+
+	page = alloc_pages(low_order_gfp_flags, order);
+	if (!page)
+		return -ENOMEM;
+
+	split_page(page, order);
+
+	len = PAGE_ALIGN(len);
+	for (i = len >> PAGE_SHIFT; i < (1 << order); i++)
+		__free_page(page + i);
+
+	table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!table) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = sg_alloc_table(table, 1, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	sg_set_page(table->sgl, page, len, 0);
+
+	buffer->priv_virt = table;
+
+	ion_pages_sync_for_device(NULL, page, len, DMA_BIDIRECTIONAL);
+
+	return 0;
+
+out:
+	for (i = 0; i < len >> PAGE_SHIFT; i++)
+		__free_page(page + i);
+	kfree(table);
+	return ret;
+}
+
+static void ion_system_contig_heap_free(struct ion_buffer *buffer)
+{
+	struct sg_table *table = buffer->priv_virt;
+	struct page *page = sg_page(table->sgl);
+	unsigned long pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
+	unsigned long i;
+
+	for (i = 0; i < pages; i++)
+		__free_page(page + i);
+	sg_free_table(table);
+	kfree(table);
+}
+
+static int ion_system_contig_heap_phys(struct ion_heap *heap,
+				       struct ion_buffer *buffer,
+				       ion_phys_addr_t *addr, size_t *len)
+{
+	struct sg_table *table = buffer->priv_virt;
+	struct page *page = sg_page(table->sgl);
+	*addr = page_to_phys(page);
+	*len = buffer->size;
+	return 0;
+}
+
+static struct sg_table *ion_system_contig_heap_map_dma(struct ion_heap *heap,
+						struct ion_buffer *buffer)
+{
+	return buffer->priv_virt;
+}
+
+static void ion_system_contig_heap_unmap_dma(struct ion_heap *heap,
+					     struct ion_buffer *buffer)
+{
+}
+
+static struct ion_heap_ops kmalloc_ops = {
+	.allocate = ion_system_contig_heap_allocate,
+	.free = ion_system_contig_heap_free,
+	.phys = ion_system_contig_heap_phys,
+	.map_dma = ion_system_contig_heap_map_dma,
+	.unmap_dma = ion_system_contig_heap_unmap_dma,
+	.map_kernel = ion_heap_map_kernel,
+	.unmap_kernel = ion_heap_unmap_kernel,
+	.map_user = ion_heap_map_user,
+};
+
+struct ion_heap *ion_system_contig_heap_create(struct ion_platform_heap *unused)
+{
+	struct ion_heap *heap;
+
+	heap = kzalloc(sizeof(struct ion_heap), GFP_KERNEL);
+	if (!heap)
+		return ERR_PTR(-ENOMEM);
+	heap->ops = &kmalloc_ops;
+	heap->type = ION_HEAP_TYPE_SYSTEM_CONTIG;
+	return heap;
+}
+
+void ion_system_contig_heap_destroy(struct ion_heap *heap)
+{
+	kfree(heap);
+}
+
diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c
new file mode 100644
index 000000000000..654acb5c8eba
--- /dev/null
+++ b/drivers/staging/android/ion/ion_test.c
@@ -0,0 +1,282 @@
+/*
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) "ion-test: " fmt
+
+#include <linux/dma-buf.h>
+#include <linux/dma-direction.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include "ion.h"
+#include "../uapi/ion_test.h"
+
+#define u64_to_uptr(x) ((void __user *)(unsigned long)(x))
+
+struct ion_test_device {
+	struct miscdevice misc;
+};
+
+struct ion_test_data {
+	struct dma_buf *dma_buf;
+	struct device *dev;
+};
+
+static int ion_handle_test_dma(struct device *dev, struct dma_buf *dma_buf,
+		void __user *ptr, size_t offset, size_t size, bool write)
+{
+	int ret = 0;
+	struct dma_buf_attachment *attach;
+	struct sg_table *table;
+	pgprot_t pgprot = pgprot_writecombine(PAGE_KERNEL);
+	enum dma_data_direction dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	struct sg_page_iter sg_iter;
+	unsigned long offset_page;
+
+	attach = dma_buf_attach(dma_buf, dev);
+	if (IS_ERR(attach))
+		return PTR_ERR(attach);
+
+	table = dma_buf_map_attachment(attach, dir);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	offset_page = offset >> PAGE_SHIFT;
+	offset %= PAGE_SIZE;
+
+	for_each_sg_page(table->sgl, &sg_iter, table->nents, offset_page) {
+		struct page *page = sg_page_iter_page(&sg_iter);
+		void *vaddr = vmap(&page, 1, VM_MAP, pgprot);
+		size_t to_copy = PAGE_SIZE - offset;
+
+		to_copy = min(to_copy, size);
+		if (!vaddr) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (write)
+			ret = copy_from_user(vaddr + offset, ptr, to_copy);
+		else
+			ret = copy_to_user(ptr, vaddr + offset, to_copy);
+
+		vunmap(vaddr);
+		if (ret) {
+			ret = -EFAULT;
+			goto err;
+		}
+		size -= to_copy;
+		if (!size)
+			break;
+		ptr += to_copy;
+		offset = 0;
+	}
+
+err:
+	dma_buf_unmap_attachment(attach, table, dir);
+	dma_buf_detach(dma_buf, attach);
+	return ret;
+}
+
+static int ion_handle_test_kernel(struct dma_buf *dma_buf, void __user *ptr,
+		size_t offset, size_t size, bool write)
+{
+	int ret;
+	unsigned long page_offset = offset >> PAGE_SHIFT;
+	size_t copy_offset = offset % PAGE_SIZE;
+	size_t copy_size = size;
+	enum dma_data_direction dir = write ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	if (offset > dma_buf->size || size > dma_buf->size - offset)
+		return -EINVAL;
+
+	ret = dma_buf_begin_cpu_access(dma_buf, offset, size, dir);
+	if (ret)
+		return ret;
+
+	while (copy_size > 0) {
+		size_t to_copy;
+		void *vaddr = dma_buf_kmap(dma_buf, page_offset);
+
+		if (!vaddr)
+			goto err;
+
+		to_copy = min_t(size_t, PAGE_SIZE - copy_offset, copy_size);
+
+		if (write)
+			ret = copy_from_user(vaddr + copy_offset, ptr, to_copy);
+		else
+			ret = copy_to_user(ptr, vaddr + copy_offset, to_copy);
+
+		dma_buf_kunmap(dma_buf, page_offset, vaddr);
+		if (ret) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		copy_size -= to_copy;
+		ptr += to_copy;
+		page_offset++;
+		copy_offset = 0;
+	}
+err:
+	dma_buf_end_cpu_access(dma_buf, offset, size, dir);
+	return ret;
+}
+
+static long ion_test_ioctl(struct file *filp, unsigned int cmd,
+						unsigned long arg)
+{
+	struct ion_test_data *test_data = filp->private_data;
+	int ret = 0;
+
+	union {
+		struct ion_test_rw_data test_rw;
+	} data;
+
+	if (_IOC_SIZE(cmd) > sizeof(data))
+		return -EINVAL;
+
+	if (_IOC_DIR(cmd) & _IOC_WRITE)
+		if (copy_from_user(&data, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+
+	switch (cmd) {
+	case ION_IOC_TEST_SET_FD:
+	{
+		struct dma_buf *dma_buf = NULL;
+		int fd = arg;
+
+		if (fd >= 0) {
+			dma_buf = dma_buf_get((int)arg);
+			if (IS_ERR(dma_buf))
+				return PTR_ERR(dma_buf);
+		}
+		if (test_data->dma_buf)
+			dma_buf_put(test_data->dma_buf);
+		test_data->dma_buf = dma_buf;
+		break;
+	}
+	case ION_IOC_TEST_DMA_MAPPING:
+	{
+		ret = ion_handle_test_dma(test_data->dev, test_data->dma_buf,
+					u64_to_uptr(data.test_rw.ptr),
+					data.test_rw.offset, data.test_rw.size,
+					data.test_rw.write);
+		break;
+	}
+	case ION_IOC_TEST_KERNEL_MAPPING:
+	{
+		ret = ion_handle_test_kernel(test_data->dma_buf,
+					u64_to_uptr(data.test_rw.ptr),
+					data.test_rw.offset, data.test_rw.size,
+					data.test_rw.write);
+		break;
+	}
+	default:
+		return -ENOTTY;
+	}
+
+	if (_IOC_DIR(cmd) & _IOC_READ) {
+		if (copy_to_user((void __user *)arg, &data, sizeof(data)))
+			return -EFAULT;
+	}
+	return ret;
+}
+
+static int ion_test_open(struct inode *inode, struct file *file)
+{
+	struct ion_test_data *data;
+	struct miscdevice *miscdev = file->private_data;
+
+	data = kzalloc(sizeof(struct ion_test_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->dev = miscdev->parent;
+
+	file->private_data = data;
+
+	return 0;
+}
+
+static int ion_test_release(struct inode *inode, struct file *file)
+{
+	struct ion_test_data *data = file->private_data;
+
+	kfree(data);
+
+	return 0;
+}
+
+static const struct file_operations ion_test_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = ion_test_ioctl,
+	.compat_ioctl = ion_test_ioctl,
+	.open = ion_test_open,
+	.release = ion_test_release,
+};
+
+static int __init ion_test_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct ion_test_device *testdev;
+
+	testdev = devm_kzalloc(&pdev->dev, sizeof(struct ion_test_device),
+				GFP_KERNEL);
+	if (!testdev)
+		return -ENOMEM;
+
+	testdev->misc.minor = MISC_DYNAMIC_MINOR;
+	testdev->misc.name = "ion-test";
+	testdev->misc.fops = &ion_test_fops;
+	testdev->misc.parent = &pdev->dev;
+	ret = misc_register(&testdev->misc);
+	if (ret) {
+		pr_err("failed to register misc device.\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, testdev);
+
+	return 0;
+}
+
+static struct platform_driver ion_test_platform_driver = {
+	.driver = {
+		.name = "ion-test",
+	},
+};
+
+static int __init ion_test_init(void)
+{
+	platform_device_register_simple("ion-test", -1, NULL, 0);
+	return platform_driver_probe(&ion_test_platform_driver, ion_test_probe);
+}
+
+static void __exit ion_test_exit(void)
+{
+	platform_driver_unregister(&ion_test_platform_driver);
+}
+
+module_init(ion_test_init);
+module_exit(ion_test_exit);
diff --git a/drivers/staging/android/ion/tegra/Makefile b/drivers/staging/android/ion/tegra/Makefile
new file mode 100644
index 000000000000..11cd003fb08f
--- /dev/null
+++ b/drivers/staging/android/ion/tegra/Makefile
@@ -0,0 +1 @@
+obj-y += tegra_ion.o
diff --git a/drivers/staging/android/ion/tegra/tegra_ion.c b/drivers/staging/android/ion/tegra/tegra_ion.c
new file mode 100644
index 000000000000..3474c65f87fa
--- /dev/null
+++ b/drivers/staging/android/ion/tegra/tegra_ion.c
@@ -0,0 +1,84 @@
+/*
+ * drivers/gpu/tegra/tegra_ion.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include "../ion.h"
+#include "../ion_priv.h"
+
+static struct ion_device *idev;
+static int num_heaps;
+static struct ion_heap **heaps;
+
+static int tegra_ion_probe(struct platform_device *pdev)
+{
+	struct ion_platform_data *pdata = pdev->dev.platform_data;
+	int err;
+	int i;
+
+	num_heaps = pdata->nr;
+
+	heaps = kzalloc(sizeof(struct ion_heap *) * pdata->nr, GFP_KERNEL);
+
+	idev = ion_device_create(NULL);
+	if (IS_ERR_OR_NULL(idev)) {
+		kfree(heaps);
+		return PTR_ERR(idev);
+	}
+
+	/* create the heaps as specified in the board file */
+	for (i = 0; i < num_heaps; i++) {
+		struct ion_platform_heap *heap_data = &pdata->heaps[i];
+
+		heaps[i] = ion_heap_create(heap_data);
+		if (IS_ERR_OR_NULL(heaps[i])) {
+			err = PTR_ERR(heaps[i]);
+			goto err;
+		}
+		ion_device_add_heap(idev, heaps[i]);
+	}
+	platform_set_drvdata(pdev, idev);
+	return 0;
+err:
+	for (i = 0; i < num_heaps; i++) {
+		if (heaps[i])
+			ion_heap_destroy(heaps[i]);
+	}
+	kfree(heaps);
+	return err;
+}
+
+static int tegra_ion_remove(struct platform_device *pdev)
+{
+	struct ion_device *idev = platform_get_drvdata(pdev);
+	int i;
+
+	ion_device_destroy(idev);
+	for (i = 0; i < num_heaps; i++)
+		ion_heap_destroy(heaps[i]);
+	kfree(heaps);
+	return 0;
+}
+
+static struct platform_driver ion_driver = {
+	.probe = tegra_ion_probe,
+	.remove = tegra_ion_remove,
+	.driver = { .name = "ion-tegra" }
+};
+
+module_platform_driver(ion_driver);
+
diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c
deleted file mode 100644
index 34519ea14b54..000000000000
--- a/drivers/staging/android/logger.c
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * drivers/misc/logger.c
- *
- * A Logging Subsystem
- *
- * Copyright (C) 2007-2008 Google, Inc.
- *
- * Robert Love <rlove@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#define pr_fmt(fmt) "logger: " fmt
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/uaccess.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/vmalloc.h>
-#include <linux/aio.h>
-#include "logger.h"
-
-#include <asm/ioctls.h>
-
-/**
- * struct logger_log - represents a specific log, such as 'main' or 'radio'
- * @buffer:	The actual ring buffer
- * @misc:	The "misc" device representing the log
- * @wq:		The wait queue for @readers
- * @readers:	This log's readers
- * @mutex:	The mutex that protects the @buffer
- * @w_off:	The current write head offset
- * @head:	The head, or location that readers start reading at.
- * @size:	The size of the log
- * @logs:	The list of log channels
- *
- * This structure lives from module insertion until module removal, so it does
- * not need additional reference counting. The structure is protected by the
- * mutex 'mutex'.
- */
-struct logger_log {
-	unsigned char		*buffer;
-	struct miscdevice	misc;
-	wait_queue_head_t	wq;
-	struct list_head	readers;
-	struct mutex		mutex;
-	size_t			w_off;
-	size_t			head;
-	size_t			size;
-	struct list_head	logs;
-};
-
-static LIST_HEAD(log_list);
-
-
-/**
- * struct logger_reader - a logging device open for reading
- * @log:	The associated log
- * @list:	The associated entry in @logger_log's list
- * @r_off:	The current read head offset.
- * @r_all:	Reader can read all entries
- * @r_ver:	Reader ABI version
- *
- * This object lives from open to release, so we don't need additional
- * reference counting. The structure is protected by log->mutex.
- */
-struct logger_reader {
-	struct logger_log	*log;
-	struct list_head	list;
-	size_t			r_off;
-	bool			r_all;
-	int			r_ver;
-};
-
-/* logger_offset - returns index 'n' into the log via (optimized) modulus */
-static size_t logger_offset(struct logger_log *log, size_t n)
-{
-	return n & (log->size - 1);
-}
-
-
-/*
- * file_get_log - Given a file structure, return the associated log
- *
- * This isn't aesthetic. We have several goals:
- *
- *	1) Need to quickly obtain the associated log during an I/O operation
- *	2) Readers need to maintain state (logger_reader)
- *	3) Writers need to be very fast (open() should be a near no-op)
- *
- * In the reader case, we can trivially go file->logger_reader->logger_log.
- * For a writer, we don't want to maintain a logger_reader, so we just go
- * file->logger_log. Thus what file->private_data points at depends on whether
- * or not the file was opened for reading. This function hides that dirtiness.
- */
-static inline struct logger_log *file_get_log(struct file *file)
-{
-	if (file->f_mode & FMODE_READ) {
-		struct logger_reader *reader = file->private_data;
-		return reader->log;
-	} else
-		return file->private_data;
-}
-
-/*
- * get_entry_header - returns a pointer to the logger_entry header within
- * 'log' starting at offset 'off'. A temporary logger_entry 'scratch' must
- * be provided. Typically the return value will be a pointer within
- * 'logger->buf'.  However, a pointer to 'scratch' may be returned if
- * the log entry spans the end and beginning of the circular buffer.
- */
-static struct logger_entry *get_entry_header(struct logger_log *log,
-		size_t off, struct logger_entry *scratch)
-{
-	size_t len = min(sizeof(struct logger_entry), log->size - off);
-	if (len != sizeof(struct logger_entry)) {
-		memcpy(((void *) scratch), log->buffer + off, len);
-		memcpy(((void *) scratch) + len, log->buffer,
-			sizeof(struct logger_entry) - len);
-		return scratch;
-	}
-
-	return (struct logger_entry *) (log->buffer + off);
-}
-
-/*
- * get_entry_msg_len - Grabs the length of the message of the entry
- * starting from from 'off'.
- *
- * An entry length is 2 bytes (16 bits) in host endian order.
- * In the log, the length does not include the size of the log entry structure.
- * This function returns the size including the log entry structure.
- *
- * Caller needs to hold log->mutex.
- */
-static __u32 get_entry_msg_len(struct logger_log *log, size_t off)
-{
-	struct logger_entry scratch;
-	struct logger_entry *entry;
-
-	entry = get_entry_header(log, off, &scratch);
-	return entry->len;
-}
-
-static size_t get_user_hdr_len(int ver)
-{
-	if (ver < 2)
-		return sizeof(struct user_logger_entry_compat);
-	else
-		return sizeof(struct logger_entry);
-}
-
-static ssize_t copy_header_to_user(int ver, struct logger_entry *entry,
-					 char __user *buf)
-{
-	void *hdr;
-	size_t hdr_len;
-	struct user_logger_entry_compat v1;
-
-	if (ver < 2) {
-		v1.len      = entry->len;
-		v1.__pad    = 0;
-		v1.pid      = entry->pid;
-		v1.tid      = entry->tid;
-		v1.sec      = entry->sec;
-		v1.nsec     = entry->nsec;
-		hdr         = &v1;
-		hdr_len     = sizeof(struct user_logger_entry_compat);
-	} else {
-		hdr         = entry;
-		hdr_len     = sizeof(struct logger_entry);
-	}
-
-	return copy_to_user(buf, hdr, hdr_len);
-}
-
-/*
- * do_read_log_to_user - reads exactly 'count' bytes from 'log' into the
- * user-space buffer 'buf'. Returns 'count' on success.
- *
- * Caller must hold log->mutex.
- */
-static ssize_t do_read_log_to_user(struct logger_log *log,
-				   struct logger_reader *reader,
-				   char __user *buf,
-				   size_t count)
-{
-	struct logger_entry scratch;
-	struct logger_entry *entry;
-	size_t len;
-	size_t msg_start;
-
-	/*
-	 * First, copy the header to userspace, using the version of
-	 * the header requested
-	 */
-	entry = get_entry_header(log, reader->r_off, &scratch);
-	if (copy_header_to_user(reader->r_ver, entry, buf))
-		return -EFAULT;
-
-	count -= get_user_hdr_len(reader->r_ver);
-	buf += get_user_hdr_len(reader->r_ver);
-	msg_start = logger_offset(log,
-		reader->r_off + sizeof(struct logger_entry));
-
-	/*
-	 * We read from the msg in two disjoint operations. First, we read from
-	 * the current msg head offset up to 'count' bytes or to the end of
-	 * the log, whichever comes first.
-	 */
-	len = min(count, log->size - msg_start);
-	if (copy_to_user(buf, log->buffer + msg_start, len))
-		return -EFAULT;
-
-	/*
-	 * Second, we read any remaining bytes, starting back at the head of
-	 * the log.
-	 */
-	if (count != len)
-		if (copy_to_user(buf + len, log->buffer, count - len))
-			return -EFAULT;
-
-	reader->r_off = logger_offset(log, reader->r_off +
-		sizeof(struct logger_entry) + count);
-
-	return count + get_user_hdr_len(reader->r_ver);
-}
-
-/*
- * get_next_entry_by_uid - Starting at 'off', returns an offset into
- * 'log->buffer' which contains the first entry readable by 'euid'
- */
-static size_t get_next_entry_by_uid(struct logger_log *log,
-		size_t off, kuid_t euid)
-{
-	while (off != log->w_off) {
-		struct logger_entry *entry;
-		struct logger_entry scratch;
-		size_t next_len;
-
-		entry = get_entry_header(log, off, &scratch);
-
-		if (uid_eq(entry->euid, euid))
-			return off;
-
-		next_len = sizeof(struct logger_entry) + entry->len;
-		off = logger_offset(log, off + next_len);
-	}
-
-	return off;
-}
-
-/*
- * logger_read - our log's read() method
- *
- * Behavior:
- *
- *	- O_NONBLOCK works
- *	- If there are no log entries to read, blocks until log is written to
- *	- Atomically reads exactly one log entry
- *
- * Will set errno to EINVAL if read
- * buffer is insufficient to hold next entry.
- */
-static ssize_t logger_read(struct file *file, char __user *buf,
-			   size_t count, loff_t *pos)
-{
-	struct logger_reader *reader = file->private_data;
-	struct logger_log *log = reader->log;
-	ssize_t ret;
-	DEFINE_WAIT(wait);
-
-start:
-	while (1) {
-		mutex_lock(&log->mutex);
-
-		prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE);
-
-		ret = (log->w_off == reader->r_off);
-		mutex_unlock(&log->mutex);
-		if (!ret)
-			break;
-
-		if (file->f_flags & O_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-
-		schedule();
-	}
-
-	finish_wait(&log->wq, &wait);
-	if (ret)
-		return ret;
-
-	mutex_lock(&log->mutex);
-
-	if (!reader->r_all)
-		reader->r_off = get_next_entry_by_uid(log,
-			reader->r_off, current_euid());
-
-	/* is there still something to read or did we race? */
-	if (unlikely(log->w_off == reader->r_off)) {
-		mutex_unlock(&log->mutex);
-		goto start;
-	}
-
-	/* get the size of the next entry */
-	ret = get_user_hdr_len(reader->r_ver) +
-		get_entry_msg_len(log, reader->r_off);
-	if (count < ret) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* get exactly one entry from the log */
-	ret = do_read_log_to_user(log, reader, buf, ret);
-
-out:
-	mutex_unlock(&log->mutex);
-
-	return ret;
-}
-
-/*
- * get_next_entry - return the offset of the first valid entry at least 'len'
- * bytes after 'off'.
- *
- * Caller must hold log->mutex.
- */
-static size_t get_next_entry(struct logger_log *log, size_t off, size_t len)
-{
-	size_t count = 0;
-
-	do {
-		size_t nr = sizeof(struct logger_entry) +
-			get_entry_msg_len(log, off);
-		off = logger_offset(log, off + nr);
-		count += nr;
-	} while (count < len);
-
-	return off;
-}
-
-/*
- * is_between - is a < c < b, accounting for wrapping of a, b, and c
- *    positions in the buffer
- *
- * That is, if a<b, check for c between a and b
- * and if a>b, check for c outside (not between) a and b
- *
- * |------- a xxxxxxxx b --------|
- *               c^
- *
- * |xxxxx b --------- a xxxxxxxxx|
- *    c^
- *  or                    c^
- */
-static inline int is_between(size_t a, size_t b, size_t c)
-{
-	if (a < b) {
-		/* is c between a and b? */
-		if (a < c && c <= b)
-			return 1;
-	} else {
-		/* is c outside of b through a? */
-		if (c <= b || a < c)
-			return 1;
-	}
-
-	return 0;
-}
-
-/*
- * fix_up_readers - walk the list of all readers and "fix up" any who were
- * lapped by the writer; also do the same for the default "start head".
- * We do this by "pulling forward" the readers and start head to the first
- * entry after the new write head.
- *
- * The caller needs to hold log->mutex.
- */
-static void fix_up_readers(struct logger_log *log, size_t len)
-{
-	size_t old = log->w_off;
-	size_t new = logger_offset(log, old + len);
-	struct logger_reader *reader;
-
-	if (is_between(old, new, log->head))
-		log->head = get_next_entry(log, log->head, len);
-
-	list_for_each_entry(reader, &log->readers, list)
-		if (is_between(old, new, reader->r_off))
-			reader->r_off = get_next_entry(log, reader->r_off, len);
-}
-
-/*
- * do_write_log - writes 'len' bytes from 'buf' to 'log'
- *
- * The caller needs to hold log->mutex.
- */
-static void do_write_log(struct logger_log *log, const void *buf, size_t count)
-{
-	size_t len;
-
-	len = min(count, log->size - log->w_off);
-	memcpy(log->buffer + log->w_off, buf, len);
-
-	if (count != len)
-		memcpy(log->buffer, buf + len, count - len);
-
-	log->w_off = logger_offset(log, log->w_off + count);
-
-}
-
-/*
- * do_write_log_user - writes 'len' bytes from the user-space buffer 'buf' to
- * the log 'log'
- *
- * The caller needs to hold log->mutex.
- *
- * Returns 'count' on success, negative error code on failure.
- */
-static ssize_t do_write_log_from_user(struct logger_log *log,
-				      const void __user *buf, size_t count)
-{
-	size_t len;
-
-	len = min(count, log->size - log->w_off);
-	if (len && copy_from_user(log->buffer + log->w_off, buf, len))
-		return -EFAULT;
-
-	if (count != len)
-		if (copy_from_user(log->buffer, buf + len, count - len))
-			/*
-			 * Note that by not updating w_off, this abandons the
-			 * portion of the new entry that *was* successfully
-			 * copied, just above.  This is intentional to avoid
-			 * message corruption from missing fragments.
-			 */
-			return -EFAULT;
-
-	log->w_off = logger_offset(log, log->w_off + count);
-
-	return count;
-}
-
-/*
- * logger_aio_write - our write method, implementing support for write(),
- * writev(), and aio_write(). Writes are our fast path, and we try to optimize
- * them above all else.
- */
-static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t ppos)
-{
-	struct logger_log *log = file_get_log(iocb->ki_filp);
-	size_t orig;
-	struct logger_entry header;
-	struct timespec now;
-	ssize_t ret = 0;
-
-	now = current_kernel_time();
-
-	header.pid = current->tgid;
-	header.tid = current->pid;
-	header.sec = now.tv_sec;
-	header.nsec = now.tv_nsec;
-	header.euid = current_euid();
-	header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD);
-	header.hdr_size = sizeof(struct logger_entry);
-
-	/* null writes succeed, return zero */
-	if (unlikely(!header.len))
-		return 0;
-
-	mutex_lock(&log->mutex);
-
-	orig = log->w_off;
-
-	/*
-	 * Fix up any readers, pulling them forward to the first readable
-	 * entry after (what will be) the new write offset. We do this now
-	 * because if we partially fail, we can end up with clobbered log
-	 * entries that encroach on readable buffer.
-	 */
-	fix_up_readers(log, sizeof(struct logger_entry) + header.len);
-
-	do_write_log(log, &header, sizeof(struct logger_entry));
-
-	while (nr_segs-- > 0) {
-		size_t len;
-		ssize_t nr;
-
-		/* figure out how much of this vector we can keep */
-		len = min_t(size_t, iov->iov_len, header.len - ret);
-
-		/* write out this segment's payload */
-		nr = do_write_log_from_user(log, iov->iov_base, len);
-		if (unlikely(nr < 0)) {
-			log->w_off = orig;
-			mutex_unlock(&log->mutex);
-			return nr;
-		}
-
-		iov++;
-		ret += nr;
-	}
-
-	mutex_unlock(&log->mutex);
-
-	/* wake up any blocked readers */
-	wake_up_interruptible(&log->wq);
-
-	return ret;
-}
-
-static struct logger_log *get_log_from_minor(int minor)
-{
-	struct logger_log *log;
-
-	list_for_each_entry(log, &log_list, logs)
-		if (log->misc.minor == minor)
-			return log;
-	return NULL;
-}
-
-/*
- * logger_open - the log's open() file operation
- *
- * Note how near a no-op this is in the write-only case. Keep it that way!
- */
-static int logger_open(struct inode *inode, struct file *file)
-{
-	struct logger_log *log;
-	int ret;
-
-	ret = nonseekable_open(inode, file);
-	if (ret)
-		return ret;
-
-	log = get_log_from_minor(MINOR(inode->i_rdev));
-	if (!log)
-		return -ENODEV;
-
-	if (file->f_mode & FMODE_READ) {
-		struct logger_reader *reader;
-
-		reader = kmalloc(sizeof(struct logger_reader), GFP_KERNEL);
-		if (!reader)
-			return -ENOMEM;
-
-		reader->log = log;
-		reader->r_ver = 1;
-		reader->r_all = in_egroup_p(inode->i_gid) ||
-			capable(CAP_SYSLOG);
-
-		INIT_LIST_HEAD(&reader->list);
-
-		mutex_lock(&log->mutex);
-		reader->r_off = log->head;
-		list_add_tail(&reader->list, &log->readers);
-		mutex_unlock(&log->mutex);
-
-		file->private_data = reader;
-	} else
-		file->private_data = log;
-
-	return 0;
-}
-
-/*
- * logger_release - the log's release file operation
- *
- * Note this is a total no-op in the write-only case. Keep it that way!
- */
-static int logger_release(struct inode *ignored, struct file *file)
-{
-	if (file->f_mode & FMODE_READ) {
-		struct logger_reader *reader = file->private_data;
-		struct logger_log *log = reader->log;
-
-		mutex_lock(&log->mutex);
-		list_del(&reader->list);
-		mutex_unlock(&log->mutex);
-
-		kfree(reader);
-	}
-
-	return 0;
-}
-
-/*
- * logger_poll - the log's poll file operation, for poll/select/epoll
- *
- * Note we always return POLLOUT, because you can always write() to the log.
- * Note also that, strictly speaking, a return value of POLLIN does not
- * guarantee that the log is readable without blocking, as there is a small
- * chance that the writer can lap the reader in the interim between poll()
- * returning and the read() request.
- */
-static unsigned int logger_poll(struct file *file, poll_table *wait)
-{
-	struct logger_reader *reader;
-	struct logger_log *log;
-	unsigned int ret = POLLOUT | POLLWRNORM;
-
-	if (!(file->f_mode & FMODE_READ))
-		return ret;
-
-	reader = file->private_data;
-	log = reader->log;
-
-	poll_wait(file, &log->wq, wait);
-
-	mutex_lock(&log->mutex);
-	if (!reader->r_all)
-		reader->r_off = get_next_entry_by_uid(log,
-			reader->r_off, current_euid());
-
-	if (log->w_off != reader->r_off)
-		ret |= POLLIN | POLLRDNORM;
-	mutex_unlock(&log->mutex);
-
-	return ret;
-}
-
-static long logger_set_version(struct logger_reader *reader, void __user *arg)
-{
-	int version;
-	if (copy_from_user(&version, arg, sizeof(int)))
-		return -EFAULT;
-
-	if ((version < 1) || (version > 2))
-		return -EINVAL;
-
-	reader->r_ver = version;
-	return 0;
-}
-
-static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct logger_log *log = file_get_log(file);
-	struct logger_reader *reader;
-	long ret = -EINVAL;
-	void __user *argp = (void __user *) arg;
-
-	mutex_lock(&log->mutex);
-
-	switch (cmd) {
-	case LOGGER_GET_LOG_BUF_SIZE:
-		ret = log->size;
-		break;
-	case LOGGER_GET_LOG_LEN:
-		if (!(file->f_mode & FMODE_READ)) {
-			ret = -EBADF;
-			break;
-		}
-		reader = file->private_data;
-		if (log->w_off >= reader->r_off)
-			ret = log->w_off - reader->r_off;
-		else
-			ret = (log->size - reader->r_off) + log->w_off;
-		break;
-	case LOGGER_GET_NEXT_ENTRY_LEN:
-		if (!(file->f_mode & FMODE_READ)) {
-			ret = -EBADF;
-			break;
-		}
-		reader = file->private_data;
-
-		if (!reader->r_all)
-			reader->r_off = get_next_entry_by_uid(log,
-				reader->r_off, current_euid());
-
-		if (log->w_off != reader->r_off)
-			ret = get_user_hdr_len(reader->r_ver) +
-				get_entry_msg_len(log, reader->r_off);
-		else
-			ret = 0;
-		break;
-	case LOGGER_FLUSH_LOG:
-		if (!(file->f_mode & FMODE_WRITE)) {
-			ret = -EBADF;
-			break;
-		}
-		if (!(in_egroup_p(file->f_dentry->d_inode->i_gid) ||
-				capable(CAP_SYSLOG))) {
-			ret = -EPERM;
-			break;
-		}
-		list_for_each_entry(reader, &log->readers, list)
-			reader->r_off = log->w_off;
-		log->head = log->w_off;
-		ret = 0;
-		break;
-	case LOGGER_GET_VERSION:
-		if (!(file->f_mode & FMODE_READ)) {
-			ret = -EBADF;
-			break;
-		}
-		reader = file->private_data;
-		ret = reader->r_ver;
-		break;
-	case LOGGER_SET_VERSION:
-		if (!(file->f_mode & FMODE_READ)) {
-			ret = -EBADF;
-			break;
-		}
-		reader = file->private_data;
-		ret = logger_set_version(reader, argp);
-		break;
-	}
-
-	mutex_unlock(&log->mutex);
-
-	return ret;
-}
-
-static const struct file_operations logger_fops = {
-	.owner = THIS_MODULE,
-	.read = logger_read,
-	.aio_write = logger_aio_write,
-	.poll = logger_poll,
-	.unlocked_ioctl = logger_ioctl,
-	.compat_ioctl = logger_ioctl,
-	.open = logger_open,
-	.release = logger_release,
-};
-
-/*
- * Log size must must be a power of two, and greater than
- * (LOGGER_ENTRY_MAX_PAYLOAD + sizeof(struct logger_entry)).
- */
-static int __init create_log(char *log_name, int size)
-{
-	int ret = 0;
-	struct logger_log *log;
-	unsigned char *buffer;
-
-	buffer = vmalloc(size);
-	if (buffer == NULL)
-		return -ENOMEM;
-
-	log = kzalloc(sizeof(struct logger_log), GFP_KERNEL);
-	if (log == NULL) {
-		ret = -ENOMEM;
-		goto out_free_buffer;
-	}
-	log->buffer = buffer;
-
-	log->misc.minor = MISC_DYNAMIC_MINOR;
-	log->misc.name = kstrdup(log_name, GFP_KERNEL);
-	if (log->misc.name == NULL) {
-		ret = -ENOMEM;
-		goto out_free_log;
-	}
-
-	log->misc.fops = &logger_fops;
-	log->misc.parent = NULL;
-
-	init_waitqueue_head(&log->wq);
-	INIT_LIST_HEAD(&log->readers);
-	mutex_init(&log->mutex);
-	log->w_off = 0;
-	log->head = 0;
-	log->size = size;
-
-	INIT_LIST_HEAD(&log->logs);
-	list_add_tail(&log->logs, &log_list);
-
-	/* finally, initialize the misc device for this log */
-	ret = misc_register(&log->misc);
-	if (unlikely(ret)) {
-		pr_err("failed to register misc device for log '%s'!\n",
-				log->misc.name);
-		goto out_free_log;
-	}
-
-	pr_info("created %luK log '%s'\n",
-		(unsigned long) log->size >> 10, log->misc.name);
-
-	return 0;
-
-out_free_log:
-	kfree(log);
-
-out_free_buffer:
-	vfree(buffer);
-	return ret;
-}
-
-static int __init logger_init(void)
-{
-	int ret;
-
-	ret = create_log(LOGGER_LOG_MAIN, 256*1024);
-	if (unlikely(ret))
-		goto out;
-
-	ret = create_log(LOGGER_LOG_EVENTS, 256*1024);
-	if (unlikely(ret))
-		goto out;
-
-	ret = create_log(LOGGER_LOG_RADIO, 256*1024);
-	if (unlikely(ret))
-		goto out;
-
-	ret = create_log(LOGGER_LOG_SYSTEM, 256*1024);
-	if (unlikely(ret))
-		goto out;
-
-out:
-	return ret;
-}
-
-static void __exit logger_exit(void)
-{
-	struct logger_log *current_log, *next_log;
-
-	list_for_each_entry_safe(current_log, next_log, &log_list, logs) {
-		/* we have to delete all the entry inside log_list */
-		misc_deregister(&current_log->misc);
-		vfree(current_log->buffer);
-		kfree(current_log->misc.name);
-		list_del(&current_log->logs);
-		kfree(current_log);
-	}
-}
-
-
-device_initcall(logger_init);
-module_exit(logger_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Love, <rlove@google.com>");
-MODULE_DESCRIPTION("Android Logger");
diff --git a/drivers/staging/android/logger.h b/drivers/staging/android/logger.h
deleted file mode 100644
index 70af7d805dff..000000000000
--- a/drivers/staging/android/logger.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/* include/linux/logger.h
- *
- * Copyright (C) 2007-2008 Google, Inc.
- * Author: Robert Love <rlove@android.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef _LINUX_LOGGER_H
-#define _LINUX_LOGGER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/**
- * struct user_logger_entry_compat - defines a single entry that is given to a logger
- * @len:	The length of the payload
- * @__pad:	Two bytes of padding that appear to be required
- * @pid:	The generating process' process ID
- * @tid:	The generating process' thread ID
- * @sec:	The number of seconds that have elapsed since the Epoch
- * @nsec:	The number of nanoseconds that have elapsed since @sec
- * @msg:	The message that is to be logged
- *
- * The userspace structure for version 1 of the logger_entry ABI.
- * This structure is returned to userspace unless the caller requests
- * an upgrade to a newer ABI version.
- */
-struct user_logger_entry_compat {
-	__u16		len;
-	__u16		__pad;
-	__s32		pid;
-	__s32		tid;
-	__s32		sec;
-	__s32		nsec;
-	char		msg[0];
-};
-
-/**
- * struct logger_entry - defines a single entry that is given to a logger
- * @len:	The length of the payload
- * @hdr_size:	sizeof(struct logger_entry_v2)
- * @pid:	The generating process' process ID
- * @tid:	The generating process' thread ID
- * @sec:	The number of seconds that have elapsed since the Epoch
- * @nsec:	The number of nanoseconds that have elapsed since @sec
- * @euid:	Effective UID of logger
- * @msg:	The message that is to be logged
- *
- * The structure for version 2 of the logger_entry ABI.
- * This structure is returned to userspace if ioctl(LOGGER_SET_VERSION)
- * is called with version >= 2
- */
-struct logger_entry {
-	__u16		len;
-	__u16		hdr_size;
-	__s32		pid;
-	__s32		tid;
-	__s32		sec;
-	__s32		nsec;
-	kuid_t		euid;
-	char		msg[0];
-};
-
-#define LOGGER_LOG_RADIO	"log_radio"	/* radio-related messages */
-#define LOGGER_LOG_EVENTS	"log_events"	/* system/hardware events */
-#define LOGGER_LOG_SYSTEM	"log_system"	/* system/framework messages */
-#define LOGGER_LOG_MAIN		"log_main"	/* everything else */
-
-#define LOGGER_ENTRY_MAX_PAYLOAD	4076
-
-#define __LOGGERIO	0xAE
-
-#define LOGGER_GET_LOG_BUF_SIZE		_IO(__LOGGERIO, 1) /* size of log */
-#define LOGGER_GET_LOG_LEN		_IO(__LOGGERIO, 2) /* used log len */
-#define LOGGER_GET_NEXT_ENTRY_LEN	_IO(__LOGGERIO, 3) /* next entry len */
-#define LOGGER_FLUSH_LOG		_IO(__LOGGERIO, 4) /* flush log */
-#define LOGGER_GET_VERSION		_IO(__LOGGERIO, 5) /* abi version */
-#define LOGGER_SET_VERSION		_IO(__LOGGERIO, 6) /* abi version */
-
-#endif /* _LINUX_LOGGER_H */
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index fe74494868ef..a56e0894f668 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -39,7 +39,6 @@
 #include <linux/sched.h>
 #include <linux/swap.h>
 #include <linux/rcupdate.h>
-#include <linux/profile.h>
 #include <linux/notifier.h>
 
 static uint32_t lowmem_debug_level = 1;
@@ -74,6 +73,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	int tasksize;
 	int i;
 	short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
+	int minfree = 0;
 	int selected_tasksize = 0;
 	short selected_oom_score_adj;
 	int array_size = ARRAY_SIZE(lowmem_adj);
@@ -86,8 +86,8 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	if (lowmem_minfree_size < array_size)
 		array_size = lowmem_minfree_size;
 	for (i = 0; i < array_size; i++) {
-		if (other_free < lowmem_minfree[i] &&
-		    other_file < lowmem_minfree[i]) {
+		minfree = lowmem_minfree[i];
+		if (other_free < minfree && other_file < minfree) {
 			min_score_adj = lowmem_adj[i];
 			break;
 		}
@@ -144,13 +144,22 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 		selected = p;
 		selected_tasksize = tasksize;
 		selected_oom_score_adj = oom_score_adj;
-		lowmem_print(2, "select %d (%s), adj %hd, size %d, to kill\n",
-			     p->pid, p->comm, oom_score_adj, tasksize);
+		lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n",
+			     p->comm, p->pid, oom_score_adj, tasksize);
 	}
 	if (selected) {
-		lowmem_print(1, "send sigkill to %d (%s), adj %hd, size %d\n",
-			     selected->pid, selected->comm,
-			     selected_oom_score_adj, selected_tasksize);
+		lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
+				"   to free %ldkB on behalf of '%s' (%d) because\n" \
+				"   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
+				"   Free memory is %ldkB above reserved\n",
+			     selected->comm, selected->pid,
+			     selected_oom_score_adj,
+			     selected_tasksize * (long)(PAGE_SIZE / 1024),
+			     current->comm, current->pid,
+			     other_file * (long)(PAGE_SIZE / 1024),
+			     minfree * (long)(PAGE_SIZE / 1024),
+			     min_score_adj,
+			     other_free * (long)(PAGE_SIZE / 1024));
 		lowmem_deathpending_timeout = jiffies + HZ;
 		send_sig(SIGKILL, selected, 0);
 		set_tsk_thread_flag(selected, TIF_MEMDIE);
@@ -178,9 +187,94 @@ static void __exit lowmem_exit(void)
 	unregister_shrinker(&lowmem_shrinker);
 }
 
+#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
+static short lowmem_oom_adj_to_oom_score_adj(short oom_adj)
+{
+	if (oom_adj == OOM_ADJUST_MAX)
+		return OOM_SCORE_ADJ_MAX;
+	else
+		return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+}
+
+static void lowmem_autodetect_oom_adj_values(void)
+{
+	int i;
+	short oom_adj;
+	short oom_score_adj;
+	int array_size = ARRAY_SIZE(lowmem_adj);
+
+	if (lowmem_adj_size < array_size)
+		array_size = lowmem_adj_size;
+
+	if (array_size <= 0)
+		return;
+
+	oom_adj = lowmem_adj[array_size - 1];
+	if (oom_adj > OOM_ADJUST_MAX)
+		return;
+
+	oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
+	if (oom_score_adj <= OOM_ADJUST_MAX)
+		return;
+
+	lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n");
+	for (i = 0; i < array_size; i++) {
+		oom_adj = lowmem_adj[i];
+		oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
+		lowmem_adj[i] = oom_score_adj;
+		lowmem_print(1, "oom_adj %d => oom_score_adj %d\n",
+			     oom_adj, oom_score_adj);
+	}
+}
+
+static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+
+	ret = param_array_ops.set(val, kp);
+
+	/* HACK: Autodetect oom_adj values in lowmem_adj array */
+	lowmem_autodetect_oom_adj_values();
+
+	return ret;
+}
+
+static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp)
+{
+	return param_array_ops.get(buffer, kp);
+}
+
+static void lowmem_adj_array_free(void *arg)
+{
+	param_array_ops.free(arg);
+}
+
+static struct kernel_param_ops lowmem_adj_array_ops = {
+	.set = lowmem_adj_array_set,
+	.get = lowmem_adj_array_get,
+	.free = lowmem_adj_array_free,
+};
+
+static const struct kparam_array __param_arr_adj = {
+	.max = ARRAY_SIZE(lowmem_adj),
+	.num = &lowmem_adj_size,
+	.ops = &param_ops_short,
+	.elemsize = sizeof(lowmem_adj[0]),
+	.elem = lowmem_adj,
+};
+#endif
+
 module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
+#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
+__module_param_call(MODULE_PARAM_PREFIX, adj,
+		    &lowmem_adj_array_ops,
+		    .arr = &__param_arr_adj,
+		    S_IRUGO | S_IWUSR, -1);
+__MODULE_PARM_TYPE(adj, "array of short");
+#else
 module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
 			 S_IRUGO | S_IWUSR);
+#endif
 module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
 			 S_IRUGO | S_IWUSR);
 module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
diff --git a/drivers/staging/android/sw_sync.c b/drivers/staging/android/sw_sync.c
index 4928f93bdf3d..820af5cfb830 100644
--- a/drivers/staging/android/sw_sync.c
+++ b/drivers/staging/android/sw_sync.c
@@ -97,6 +97,7 @@ static void sw_sync_pt_value_str(struct sync_pt *sync_pt,
 				       char *str, int size)
 {
 	struct sw_sync_pt *pt = (struct sw_sync_pt *)sync_pt;
+
 	snprintf(str, size, "%d", pt->value);
 }
 
@@ -156,6 +157,7 @@ static int sw_sync_open(struct inode *inode, struct file *file)
 static int sw_sync_release(struct inode *inode, struct file *file)
 {
 	struct sw_sync_timeline *obj = file->private_data;
+
 	sync_timeline_destroy(&obj->obj);
 	return 0;
 }
diff --git a/drivers/staging/android/sw_sync.h b/drivers/staging/android/sw_sync.h
index 585040be5f18..1a50669ec8a9 100644
--- a/drivers/staging/android/sw_sync.h
+++ b/drivers/staging/android/sw_sync.h
@@ -18,10 +18,9 @@
 #define _LINUX_SW_SYNC_H
 
 #include <linux/types.h>
-
-#ifdef __KERNEL__
-
+#include <linux/kconfig.h>
 #include "sync.h"
+#include "uapi/sw_sync.h"
 
 struct sw_sync_timeline {
 	struct	sync_timeline	obj;
@@ -35,24 +34,26 @@ struct sw_sync_pt {
 	u32			value;
 };
 
+#if IS_ENABLED(CONFIG_SW_SYNC)
 struct sw_sync_timeline *sw_sync_timeline_create(const char *name);
 void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc);
 
 struct sync_pt *sw_sync_pt_create(struct sw_sync_timeline *obj, u32 value);
-
-#endif /* __KERNEL __ */
-
-struct sw_sync_create_fence_data {
-	__u32	value;
-	char	name[32];
-	__s32	fence; /* fd of new fence */
-};
-
-#define SW_SYNC_IOC_MAGIC	'W'
-
-#define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\
-		struct sw_sync_create_fence_data)
-#define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
-
+#else
+static inline struct sw_sync_timeline *sw_sync_timeline_create(const char *name)
+{
+	return NULL;
+}
+
+static inline void sw_sync_timeline_inc(struct sw_sync_timeline *obj, u32 inc)
+{
+}
+
+static inline struct sync_pt *sw_sync_pt_create(struct sw_sync_timeline *obj,
+		u32 value)
+{
+	return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_SW_SYNC) */
 
 #endif /* _LINUX_SW_SYNC_H */
diff --git a/drivers/staging/android/sync.c b/drivers/staging/android/sync.c
index 3893a3574769..61e624905916 100644
--- a/drivers/staging/android/sync.c
+++ b/drivers/staging/android/sync.c
@@ -79,27 +79,27 @@ static void sync_timeline_free(struct kref *kref)
 		container_of(kref, struct sync_timeline, kref);
 	unsigned long flags;
 
-	if (obj->ops->release_obj)
-		obj->ops->release_obj(obj);
-
 	spin_lock_irqsave(&sync_timeline_list_lock, flags);
 	list_del(&obj->sync_timeline_list);
 	spin_unlock_irqrestore(&sync_timeline_list_lock, flags);
 
+	if (obj->ops->release_obj)
+		obj->ops->release_obj(obj);
+
 	kfree(obj);
 }
 
 void sync_timeline_destroy(struct sync_timeline *obj)
 {
 	obj->destroyed = true;
+	smp_wmb();
 
 	/*
-	 * If this is not the last reference, signal any children
-	 * that their parent is going away.
+	 * signal any children that their parent is going away.
 	 */
+	sync_timeline_signal(obj);
 
-	if (!kref_put(&obj->kref, sync_timeline_free))
-		sync_timeline_signal(obj);
+	kref_put(&obj->kref, sync_timeline_free);
 }
 EXPORT_SYMBOL(sync_timeline_destroy);
 
@@ -384,6 +384,7 @@ static void sync_fence_detach_pts(struct sync_fence *fence)
 
 	list_for_each_safe(pos, n, &fence->pt_list_head) {
 		struct sync_pt *pt = container_of(pos, struct sync_pt, pt_list);
+
 		sync_timeline_remove_pt(pt);
 	}
 }
@@ -394,6 +395,7 @@ static void sync_fence_free_pts(struct sync_fence *fence)
 
 	list_for_each_safe(pos, n, &fence->pt_list_head) {
 		struct sync_pt *pt = container_of(pos, struct sync_pt, pt_list);
+
 		sync_pt_free(pt);
 	}
 }
@@ -827,6 +829,7 @@ static long sync_fence_ioctl(struct file *file, unsigned int cmd,
 			     unsigned long arg)
 {
 	struct sync_fence *fence = file->private_data;
+
 	switch (cmd) {
 	case SYNC_IOC_WAIT:
 		return sync_fence_ioctl_wait(fence, arg);
@@ -856,18 +859,21 @@ static const char *sync_status_str(int status)
 static void sync_print_pt(struct seq_file *s, struct sync_pt *pt, bool fence)
 {
 	int status = pt->status;
+
 	seq_printf(s, "  %s%spt %s",
 		   fence ? pt->parent->name : "",
 		   fence ? "_" : "",
 		   sync_status_str(status));
 	if (pt->status) {
 		struct timeval tv = ktime_to_timeval(pt->timestamp);
+
 		seq_printf(s, "@%ld.%06ld", tv.tv_sec, tv.tv_usec);
 	}
 
 	if (pt->parent->ops->timeline_value_str &&
 	    pt->parent->ops->pt_value_str) {
 		char value[64];
+
 		pt->parent->ops->pt_value_str(pt, value, sizeof(value));
 		seq_printf(s, ": %s", value);
 		if (fence) {
@@ -892,6 +898,7 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj)
 
 	if (obj->ops->timeline_value_str) {
 		char value[64];
+
 		obj->ops->timeline_value_str(obj, value, sizeof(value));
 		seq_printf(s, ": %s", value);
 	} else if (obj->ops->print_obj) {
@@ -1001,6 +1008,7 @@ void sync_dump(void)
 	for (i = 0; i < s.count; i += DUMP_CHUNK) {
 		if ((s.count - i) > DUMP_CHUNK) {
 			char c = s.buf[i + DUMP_CHUNK];
+
 			s.buf[i + DUMP_CHUNK] = 0;
 			pr_cont("%s", s.buf + i);
 			s.buf[i + DUMP_CHUNK] = c;
diff --git a/drivers/staging/android/sync.h b/drivers/staging/android/sync.h
index 38ea986dc70f..75da9e85ac69 100644
--- a/drivers/staging/android/sync.h
+++ b/drivers/staging/android/sync.h
@@ -14,14 +14,14 @@
 #define _LINUX_SYNC_H
 
 #include <linux/types.h>
-#ifdef __KERNEL__
-
 #include <linux/kref.h>
 #include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 
+#include "uapi/sync.h"
+
 struct sync_timeline;
 struct sync_pt;
 struct sync_fence;
@@ -341,86 +341,4 @@ int sync_fence_cancel_async(struct sync_fence *fence,
  */
 int sync_fence_wait(struct sync_fence *fence, long timeout);
 
-#endif /* __KERNEL__ */
-
-/**
- * struct sync_merge_data - data passed to merge ioctl
- * @fd2:	file descriptor of second fence
- * @name:	name of new fence
- * @fence:	returns the fd of the new fence to userspace
- */
-struct sync_merge_data {
-	__s32	fd2; /* fd of second fence */
-	char	name[32]; /* name of new fence */
-	__s32	fence; /* fd on newly created fence */
-};
-
-/**
- * struct sync_pt_info - detailed sync_pt information
- * @len:		length of sync_pt_info including any driver_data
- * @obj_name:		name of parent sync_timeline
- * @driver_name:	name of driver implmenting the parent
- * @status:		status of the sync_pt 0:active 1:signaled <0:error
- * @timestamp_ns:	timestamp of status change in nanoseconds
- * @driver_data:	any driver dependant data
- */
-struct sync_pt_info {
-	__u32	len;
-	char	obj_name[32];
-	char	driver_name[32];
-	__s32	status;
-	__u64	timestamp_ns;
-
-	__u8	driver_data[0];
-};
-
-/**
- * struct sync_fence_info_data - data returned from fence info ioctl
- * @len:	ioctl caller writes the size of the buffer its passing in.
- *		ioctl returns length of sync_fence_data reutnred to userspace
- *		including pt_info.
- * @name:	name of fence
- * @status:	status of fence. 1: signaled 0:active <0:error
- * @pt_info:	a sync_pt_info struct for every sync_pt in the fence
- */
-struct sync_fence_info_data {
-	__u32	len;
-	char	name[32];
-	__s32	status;
-
-	__u8	pt_info[0];
-};
-
-#define SYNC_IOC_MAGIC		'>'
-
-/**
- * DOC: SYNC_IOC_WAIT - wait for a fence to signal
- *
- * pass timeout in milliseconds.  Waits indefinitely timeout < 0.
- */
-#define SYNC_IOC_WAIT		_IOW(SYNC_IOC_MAGIC, 0, __s32)
-
-/**
- * DOC: SYNC_IOC_MERGE - merge two fences
- *
- * Takes a struct sync_merge_data.  Creates a new fence containing copies of
- * the sync_pts in both the calling fd and sync_merge_data.fd2.  Returns the
- * new fence's fd in sync_merge_data.fence
- */
-#define SYNC_IOC_MERGE		_IOWR(SYNC_IOC_MAGIC, 1, struct sync_merge_data)
-
-/**
- * DOC: SYNC_IOC_FENCE_INFO - get detailed information on a fence
- *
- * Takes a struct sync_fence_info_data with extra space allocated for pt_info.
- * Caller should write the size of the buffer into len.  On return, len is
- * updated to reflect the total size of the sync_fence_info_data including
- * pt_info.
- *
- * pt_info is a buffer containing sync_pt_infos for every sync_pt in the fence.
- * To itterate over the sync_pt_infos, use the sync_pt_info.len field.
- */
-#define SYNC_IOC_FENCE_INFO	_IOWR(SYNC_IOC_MAGIC, 2,\
-	struct sync_fence_info_data)
-
 #endif /* _LINUX_SYNC_H */
diff --git a/drivers/staging/android/timed_gpio.c b/drivers/staging/android/timed_gpio.c
index e81451425c01..ae9966d1f7cc 100644
--- a/drivers/staging/android/timed_gpio.c
+++ b/drivers/staging/android/timed_gpio.c
@@ -51,6 +51,7 @@ static int gpio_get_time(struct timed_output_dev *dev)
 	if (hrtimer_active(&data->timer)) {
 		ktime_t r = hrtimer_get_remaining(&data->timer);
 		struct timeval t = ktime_to_timeval(r);
+
 		return t.tv_sec * 1000 + t.tv_usec / 1000;
 	} else
 		return 0;
diff --git a/drivers/staging/android/uapi/ashmem.h b/drivers/staging/android/uapi/ashmem.h
new file mode 100644
index 000000000000..13df42d200b7
--- /dev/null
+++ b/drivers/staging/android/uapi/ashmem.h
@@ -0,0 +1,48 @@
+/*
+ * drivers/staging/android/uapi/ashmem.h
+ *
+ * Copyright 2008 Google Inc.
+ * Author: Robert Love
+ *
+ * This file is dual licensed.  It may be redistributed and/or modified
+ * under the terms of the Apache 2.0 License OR version 2 of the GNU
+ * General Public License.
+ */
+
+#ifndef _UAPI_LINUX_ASHMEM_H
+#define _UAPI_LINUX_ASHMEM_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define ASHMEM_NAME_LEN		256
+
+#define ASHMEM_NAME_DEF		"dev/ashmem"
+
+/* Return values from ASHMEM_PIN: Was the mapping purged while unpinned? */
+#define ASHMEM_NOT_PURGED	0
+#define ASHMEM_WAS_PURGED	1
+
+/* Return values from ASHMEM_GET_PIN_STATUS: Is the mapping pinned? */
+#define ASHMEM_IS_UNPINNED	0
+#define ASHMEM_IS_PINNED	1
+
+struct ashmem_pin {
+	__u32 offset;	/* offset into region, in bytes, page-aligned */
+	__u32 len;	/* length forward from offset, in bytes, page-aligned */
+};
+
+#define __ASHMEMIOC		0x77
+
+#define ASHMEM_SET_NAME		_IOW(__ASHMEMIOC, 1, char[ASHMEM_NAME_LEN])
+#define ASHMEM_GET_NAME		_IOR(__ASHMEMIOC, 2, char[ASHMEM_NAME_LEN])
+#define ASHMEM_SET_SIZE		_IOW(__ASHMEMIOC, 3, size_t)
+#define ASHMEM_GET_SIZE		_IO(__ASHMEMIOC, 4)
+#define ASHMEM_SET_PROT_MASK	_IOW(__ASHMEMIOC, 5, unsigned long)
+#define ASHMEM_GET_PROT_MASK	_IO(__ASHMEMIOC, 6)
+#define ASHMEM_PIN		_IOW(__ASHMEMIOC, 7, struct ashmem_pin)
+#define ASHMEM_UNPIN		_IOW(__ASHMEMIOC, 8, struct ashmem_pin)
+#define ASHMEM_GET_PIN_STATUS	_IO(__ASHMEMIOC, 9)
+#define ASHMEM_PURGE_ALL_CACHES	_IO(__ASHMEMIOC, 10)
+
+#endif	/* _UAPI_LINUX_ASHMEM_H */
diff --git a/drivers/staging/android/uapi/ion.h b/drivers/staging/android/uapi/ion.h
new file mode 100644
index 000000000000..f09e7c154d69
--- /dev/null
+++ b/drivers/staging/android/uapi/ion.h
@@ -0,0 +1,196 @@
+/*
+ * drivers/staging/android/uapi/ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_ION_H
+#define _UAPI_LINUX_ION_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+typedef int ion_user_handle_t;
+
+/**
+ * enum ion_heap_types - list of all possible types of heaps
+ * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
+ * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
+ * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
+ * 				 carveout heap, allocations are physically
+ * 				 contiguous
+ * @ION_HEAP_TYPE_DMA:		 memory allocated via DMA API
+ * @ION_NUM_HEAPS:		 helper for iterating over heaps, a bit mask
+ * 				 is used to identify the heaps, so only 32
+ * 				 total heap types are supported
+ */
+enum ion_heap_type {
+	ION_HEAP_TYPE_SYSTEM,
+	ION_HEAP_TYPE_SYSTEM_CONTIG,
+	ION_HEAP_TYPE_CARVEOUT,
+	ION_HEAP_TYPE_CHUNK,
+	ION_HEAP_TYPE_DMA,
+	ION_HEAP_TYPE_CUSTOM, /* must be last so device specific heaps always
+				 are at the end of this enum */
+	ION_NUM_HEAPS = 16,
+};
+
+#define ION_HEAP_SYSTEM_MASK		(1 << ION_HEAP_TYPE_SYSTEM)
+#define ION_HEAP_SYSTEM_CONTIG_MASK	(1 << ION_HEAP_TYPE_SYSTEM_CONTIG)
+#define ION_HEAP_CARVEOUT_MASK		(1 << ION_HEAP_TYPE_CARVEOUT)
+#define ION_HEAP_TYPE_DMA_MASK		(1 << ION_HEAP_TYPE_DMA)
+
+#define ION_NUM_HEAP_IDS		sizeof(unsigned int) * 8
+
+/**
+ * allocation flags - the lower 16 bits are used by core ion, the upper 16
+ * bits are reserved for use by the heaps themselves.
+ */
+#define ION_FLAG_CACHED 1		/* mappings of this buffer should be
+					   cached, ion will do cache
+					   maintenance when the buffer is
+					   mapped for dma */
+#define ION_FLAG_CACHED_NEEDS_SYNC 2	/* mappings of this buffer will created
+					   at mmap time, if this is set
+					   caches must be managed manually */
+
+/**
+ * DOC: Ion Userspace API
+ *
+ * create a client by opening /dev/ion
+ * most operations handled via following ioctls
+ *
+ */
+
+/**
+ * struct ion_allocation_data - metadata passed from userspace for allocations
+ * @len:		size of the allocation
+ * @align:		required alignment of the allocation
+ * @heap_id_mask:	mask of heap ids to allocate from
+ * @flags:		flags passed to heap
+ * @handle:		pointer that will be populated with a cookie to use to 
+ *			refer to this allocation
+ *
+ * Provided by userspace as an argument to the ioctl
+ */
+struct ion_allocation_data {
+	size_t len;
+	size_t align;
+	unsigned int heap_id_mask;
+	unsigned int flags;
+	ion_user_handle_t handle;
+};
+
+/**
+ * struct ion_fd_data - metadata passed to/from userspace for a handle/fd pair
+ * @handle:	a handle
+ * @fd:		a file descriptor representing that handle
+ *
+ * For ION_IOC_SHARE or ION_IOC_MAP userspace populates the handle field with
+ * the handle returned from ion alloc, and the kernel returns the file
+ * descriptor to share or map in the fd field.  For ION_IOC_IMPORT, userspace
+ * provides the file descriptor and the kernel returns the handle.
+ */
+struct ion_fd_data {
+	ion_user_handle_t handle;
+	int fd;
+};
+
+/**
+ * struct ion_handle_data - a handle passed to/from the kernel
+ * @handle:	a handle
+ */
+struct ion_handle_data {
+	ion_user_handle_t handle;
+};
+
+/**
+ * struct ion_custom_data - metadata passed to/from userspace for a custom ioctl
+ * @cmd:	the custom ioctl function to call
+ * @arg:	additional data to pass to the custom ioctl, typically a user
+ *		pointer to a predefined structure
+ *
+ * This works just like the regular cmd and arg fields of an ioctl.
+ */
+struct ion_custom_data {
+	unsigned int cmd;
+	unsigned long arg;
+};
+
+#define ION_IOC_MAGIC		'I'
+
+/**
+ * DOC: ION_IOC_ALLOC - allocate memory
+ *
+ * Takes an ion_allocation_data struct and returns it with the handle field
+ * populated with the opaque handle for the allocation.
+ */
+#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
+				      struct ion_allocation_data)
+
+/**
+ * DOC: ION_IOC_FREE - free memory
+ *
+ * Takes an ion_handle_data struct and frees the handle.
+ */
+#define ION_IOC_FREE		_IOWR(ION_IOC_MAGIC, 1, struct ion_handle_data)
+
+/**
+ * DOC: ION_IOC_MAP - get a file descriptor to mmap
+ *
+ * Takes an ion_fd_data struct with the handle field populated with a valid
+ * opaque handle.  Returns the struct with the fd field set to a file
+ * descriptor open in the current address space.  This file descriptor
+ * can then be used as an argument to mmap.
+ */
+#define ION_IOC_MAP		_IOWR(ION_IOC_MAGIC, 2, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_SHARE - creates a file descriptor to use to share an allocation
+ *
+ * Takes an ion_fd_data struct with the handle field populated with a valid
+ * opaque handle.  Returns the struct with the fd field set to a file
+ * descriptor open in the current address space.  This file descriptor
+ * can then be passed to another process.  The corresponding opaque handle can
+ * be retrieved via ION_IOC_IMPORT.
+ */
+#define ION_IOC_SHARE		_IOWR(ION_IOC_MAGIC, 4, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_IMPORT - imports a shared file descriptor
+ *
+ * Takes an ion_fd_data struct with the fd field populated with a valid file
+ * descriptor obtained from ION_IOC_SHARE and returns the struct with the handle
+ * filed set to the corresponding opaque handle.
+ */
+#define ION_IOC_IMPORT		_IOWR(ION_IOC_MAGIC, 5, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_SYNC - syncs a shared file descriptors to memory
+ *
+ * Deprecated in favor of using the dma_buf api's correctly (syncing
+ * will happend automatically when the buffer is mapped to a device).
+ * If necessary should be used after touching a cached buffer from the cpu,
+ * this will make the buffer in memory coherent.
+ */
+#define ION_IOC_SYNC		_IOWR(ION_IOC_MAGIC, 7, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl
+ *
+ * Takes the argument of the architecture specific ioctl to call and
+ * passes appropriate userdata for that ioctl
+ */
+#define ION_IOC_CUSTOM		_IOWR(ION_IOC_MAGIC, 6, struct ion_custom_data)
+
+#endif /* _UAPI_LINUX_ION_H */
diff --git a/drivers/staging/android/uapi/ion_test.h b/drivers/staging/android/uapi/ion_test.h
new file mode 100644
index 000000000000..ffef06f63133
--- /dev/null
+++ b/drivers/staging/android/uapi/ion_test.h
@@ -0,0 +1,70 @@
+/*
+ * drivers/staging/android/uapi/ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_ION_TEST_H
+#define _UAPI_LINUX_ION_TEST_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * struct ion_test_rw_data - metadata passed to the kernel to read handle
+ * @ptr:	a pointer to an area at least as large as size
+ * @offset:	offset into the ion buffer to start reading
+ * @size:	size to read or write
+ * @write:	1 to write, 0 to read
+ */
+struct ion_test_rw_data {
+	__u64 ptr;
+	__u64 offset;
+	__u64 size;
+	int write;
+	int __padding;
+};
+
+#define ION_IOC_MAGIC		'I'
+
+/**
+ * DOC: ION_IOC_TEST_SET_DMA_BUF - attach a dma buf to the test driver
+ *
+ * Attaches a dma buf fd to the test driver.  Passing a second fd or -1 will
+ * release the first fd.
+ */
+#define ION_IOC_TEST_SET_FD \
+			_IO(ION_IOC_MAGIC, 0xf0)
+
+/**
+ * DOC: ION_IOC_TEST_DMA_MAPPING - read or write memory from a handle as DMA
+ *
+ * Reads or writes the memory from a handle using an uncached mapping.  Can be
+ * used by unit tests to emulate a DMA engine as close as possible.  Only
+ * expected to be used for debugging and testing, may not always be available.
+ */
+#define ION_IOC_TEST_DMA_MAPPING \
+			_IOW(ION_IOC_MAGIC, 0xf1, struct ion_test_rw_data)
+
+/**
+ * DOC: ION_IOC_TEST_KERNEL_MAPPING - read or write memory from a handle
+ *
+ * Reads or writes the memory from a handle using a kernel mapping.  Can be
+ * used by unit tests to test heap map_kernel functions.  Only expected to be
+ * used for debugging and testing, may not always be available.
+ */
+#define ION_IOC_TEST_KERNEL_MAPPING \
+			_IOW(ION_IOC_MAGIC, 0xf2, struct ion_test_rw_data)
+
+
+#endif /* _UAPI_LINUX_ION_H */
diff --git a/drivers/staging/android/uapi/sw_sync.h b/drivers/staging/android/uapi/sw_sync.h
new file mode 100644
index 000000000000..9b5d4869505c
--- /dev/null
+++ b/drivers/staging/android/uapi/sw_sync.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_SW_SYNC_H
+#define _UAPI_LINUX_SW_SYNC_H
+
+#include <linux/types.h>
+
+struct sw_sync_create_fence_data {
+	__u32	value;
+	char	name[32];
+	__s32	fence; /* fd of new fence */
+};
+
+#define SW_SYNC_IOC_MAGIC	'W'
+
+#define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\
+		struct sw_sync_create_fence_data)
+#define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
+
+#endif /* _UAPI_LINUX_SW_SYNC_H */
diff --git a/drivers/staging/android/uapi/sync.h b/drivers/staging/android/uapi/sync.h
new file mode 100644
index 000000000000..57fdaadc4b04
--- /dev/null
+++ b/drivers/staging/android/uapi/sync.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_SYNC_H
+#define _UAPI_LINUX_SYNC_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * struct sync_merge_data - data passed to merge ioctl
+ * @fd2:	file descriptor of second fence
+ * @name:	name of new fence
+ * @fence:	returns the fd of the new fence to userspace
+ */
+struct sync_merge_data {
+	__s32	fd2; /* fd of second fence */
+	char	name[32]; /* name of new fence */
+	__s32	fence; /* fd on newly created fence */
+};
+
+/**
+ * struct sync_pt_info - detailed sync_pt information
+ * @len:		length of sync_pt_info including any driver_data
+ * @obj_name:		name of parent sync_timeline
+ * @driver_name:	name of driver implmenting the parent
+ * @status:		status of the sync_pt 0:active 1:signaled <0:error
+ * @timestamp_ns:	timestamp of status change in nanoseconds
+ * @driver_data:	any driver dependant data
+ */
+struct sync_pt_info {
+	__u32	len;
+	char	obj_name[32];
+	char	driver_name[32];
+	__s32	status;
+	__u64	timestamp_ns;
+
+	__u8	driver_data[0];
+};
+
+/**
+ * struct sync_fence_info_data - data returned from fence info ioctl
+ * @len:	ioctl caller writes the size of the buffer its passing in.
+ *		ioctl returns length of sync_fence_data reutnred to userspace
+ *		including pt_info.
+ * @name:	name of fence
+ * @status:	status of fence. 1: signaled 0:active <0:error
+ * @pt_info:	a sync_pt_info struct for every sync_pt in the fence
+ */
+struct sync_fence_info_data {
+	__u32	len;
+	char	name[32];
+	__s32	status;
+
+	__u8	pt_info[0];
+};
+
+#define SYNC_IOC_MAGIC		'>'
+
+/**
+ * DOC: SYNC_IOC_WAIT - wait for a fence to signal
+ *
+ * pass timeout in milliseconds.  Waits indefinitely timeout < 0.
+ */
+#define SYNC_IOC_WAIT		_IOW(SYNC_IOC_MAGIC, 0, __s32)
+
+/**
+ * DOC: SYNC_IOC_MERGE - merge two fences
+ *
+ * Takes a struct sync_merge_data.  Creates a new fence containing copies of
+ * the sync_pts in both the calling fd and sync_merge_data.fd2.  Returns the
+ * new fence's fd in sync_merge_data.fence
+ */
+#define SYNC_IOC_MERGE		_IOWR(SYNC_IOC_MAGIC, 1, struct sync_merge_data)
+
+/**
+ * DOC: SYNC_IOC_FENCE_INFO - get detailed information on a fence
+ *
+ * Takes a struct sync_fence_info_data with extra space allocated for pt_info.
+ * Caller should write the size of the buffer into len.  On return, len is
+ * updated to reflect the total size of the sync_fence_info_data including
+ * pt_info.
+ *
+ * pt_info is a buffer containing sync_pt_infos for every sync_pt in the fence.
+ * To itterate over the sync_pt_infos, use the sync_pt_info.len field.
+ */
+#define SYNC_IOC_FENCE_INFO	_IOWR(SYNC_IOC_MAGIC, 2,\
+	struct sync_fence_info_data)
+
+#endif /* _UAPI_LINUX_SYNC_H */
diff --git a/drivers/switch/Kconfig b/drivers/switch/Kconfig
new file mode 100644
index 000000000000..19404b6f7778
--- /dev/null
+++ b/drivers/switch/Kconfig
@@ -0,0 +1,15 @@
+menuconfig SWITCH
+	tristate "Switch class support"
+	help
+	  Say Y here to enable switch class support. This allows
+	  monitoring switches by userspace via sysfs and uevent.
+
+if SWITCH
+
+config SWITCH_GPIO
+	tristate "GPIO Swith support"
+	depends on GPIOLIB
+	help
+	  Say Y here to enable GPIO based switch support.
+
+endif # SWITCH
diff --git a/drivers/switch/Makefile b/drivers/switch/Makefile
new file mode 100644
index 000000000000..f7606ed4a719
--- /dev/null
+++ b/drivers/switch/Makefile
@@ -0,0 +1,4 @@
+# Switch Class Driver
+obj-$(CONFIG_SWITCH)		+= switch_class.o
+obj-$(CONFIG_SWITCH_GPIO)	+= switch_gpio.o
+
diff --git a/drivers/switch/switch_class.c b/drivers/switch/switch_class.c
new file mode 100644
index 000000000000..e373b625806e
--- /dev/null
+++ b/drivers/switch/switch_class.c
@@ -0,0 +1,174 @@
+/*
+ *  drivers/switch/switch_class.c
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/switch.h>
+
+struct class *switch_class;
+static atomic_t device_count;
+
+static ssize_t state_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct switch_dev *sdev = (struct switch_dev *)
+		dev_get_drvdata(dev);
+
+	if (sdev->print_state) {
+		int ret = sdev->print_state(sdev, buf);
+		if (ret >= 0)
+			return ret;
+	}
+	return sprintf(buf, "%d\n", sdev->state);
+}
+
+static ssize_t name_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct switch_dev *sdev = (struct switch_dev *)
+		dev_get_drvdata(dev);
+
+	if (sdev->print_name) {
+		int ret = sdev->print_name(sdev, buf);
+		if (ret >= 0)
+			return ret;
+	}
+	return sprintf(buf, "%s\n", sdev->name);
+}
+
+static DEVICE_ATTR(state, S_IRUGO, state_show, NULL);
+static DEVICE_ATTR(name, S_IRUGO, name_show, NULL);
+
+void switch_set_state(struct switch_dev *sdev, int state)
+{
+	char name_buf[120];
+	char state_buf[120];
+	char *prop_buf;
+	char *envp[3];
+	int env_offset = 0;
+	int length;
+
+	if (sdev->state != state) {
+		sdev->state = state;
+
+		prop_buf = (char *)get_zeroed_page(GFP_KERNEL);
+		if (prop_buf) {
+			length = name_show(sdev->dev, NULL, prop_buf);
+			if (length > 0) {
+				if (prop_buf[length - 1] == '\n')
+					prop_buf[length - 1] = 0;
+				snprintf(name_buf, sizeof(name_buf),
+					"SWITCH_NAME=%s", prop_buf);
+				envp[env_offset++] = name_buf;
+			}
+			length = state_show(sdev->dev, NULL, prop_buf);
+			if (length > 0) {
+				if (prop_buf[length - 1] == '\n')
+					prop_buf[length - 1] = 0;
+				snprintf(state_buf, sizeof(state_buf),
+					"SWITCH_STATE=%s", prop_buf);
+				envp[env_offset++] = state_buf;
+			}
+			envp[env_offset] = NULL;
+			kobject_uevent_env(&sdev->dev->kobj, KOBJ_CHANGE, envp);
+			free_page((unsigned long)prop_buf);
+		} else {
+			printk(KERN_ERR "out of memory in switch_set_state\n");
+			kobject_uevent(&sdev->dev->kobj, KOBJ_CHANGE);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(switch_set_state);
+
+static int create_switch_class(void)
+{
+	if (!switch_class) {
+		switch_class = class_create(THIS_MODULE, "switch");
+		if (IS_ERR(switch_class))
+			return PTR_ERR(switch_class);
+		atomic_set(&device_count, 0);
+	}
+
+	return 0;
+}
+
+int switch_dev_register(struct switch_dev *sdev)
+{
+	int ret;
+
+	if (!switch_class) {
+		ret = create_switch_class();
+		if (ret < 0)
+			return ret;
+	}
+
+	sdev->index = atomic_inc_return(&device_count);
+	sdev->dev = device_create(switch_class, NULL,
+		MKDEV(0, sdev->index), NULL, sdev->name);
+	if (IS_ERR(sdev->dev))
+		return PTR_ERR(sdev->dev);
+
+	ret = device_create_file(sdev->dev, &dev_attr_state);
+	if (ret < 0)
+		goto err_create_file_1;
+	ret = device_create_file(sdev->dev, &dev_attr_name);
+	if (ret < 0)
+		goto err_create_file_2;
+
+	dev_set_drvdata(sdev->dev, sdev);
+	sdev->state = 0;
+	return 0;
+
+err_create_file_2:
+	device_remove_file(sdev->dev, &dev_attr_state);
+err_create_file_1:
+	device_destroy(switch_class, MKDEV(0, sdev->index));
+	printk(KERN_ERR "switch: Failed to register driver %s\n", sdev->name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(switch_dev_register);
+
+void switch_dev_unregister(struct switch_dev *sdev)
+{
+	device_remove_file(sdev->dev, &dev_attr_name);
+	device_remove_file(sdev->dev, &dev_attr_state);
+	device_destroy(switch_class, MKDEV(0, sdev->index));
+	dev_set_drvdata(sdev->dev, NULL);
+}
+EXPORT_SYMBOL_GPL(switch_dev_unregister);
+
+static int __init switch_class_init(void)
+{
+	return create_switch_class();
+}
+
+static void __exit switch_class_exit(void)
+{
+	class_destroy(switch_class);
+}
+
+module_init(switch_class_init);
+module_exit(switch_class_exit);
+
+MODULE_AUTHOR("Mike Lockwood <lockwood@android.com>");
+MODULE_DESCRIPTION("Switch class driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/switch/switch_gpio.c b/drivers/switch/switch_gpio.c
new file mode 100644
index 000000000000..621d62d20c99
--- /dev/null
+++ b/drivers/switch/switch_gpio.c
@@ -0,0 +1,172 @@
+/*
+ *  drivers/switch/switch_gpio.c
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/switch.h>
+#include <linux/workqueue.h>
+#include <linux/gpio.h>
+
+struct gpio_switch_data {
+	struct switch_dev sdev;
+	unsigned gpio;
+	const char *name_on;
+	const char *name_off;
+	const char *state_on;
+	const char *state_off;
+	int irq;
+	struct work_struct work;
+};
+
+static void gpio_switch_work(struct work_struct *work)
+{
+	int state;
+	struct gpio_switch_data	*data =
+		container_of(work, struct gpio_switch_data, work);
+
+	state = gpio_get_value(data->gpio);
+	switch_set_state(&data->sdev, state);
+}
+
+static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
+{
+	struct gpio_switch_data *switch_data =
+	    (struct gpio_switch_data *)dev_id;
+
+	schedule_work(&switch_data->work);
+	return IRQ_HANDLED;
+}
+
+static ssize_t switch_gpio_print_state(struct switch_dev *sdev, char *buf)
+{
+	struct gpio_switch_data	*switch_data =
+		container_of(sdev, struct gpio_switch_data, sdev);
+	const char *state;
+	if (switch_get_state(sdev))
+		state = switch_data->state_on;
+	else
+		state = switch_data->state_off;
+
+	if (state)
+		return sprintf(buf, "%s\n", state);
+	return -1;
+}
+
+static int gpio_switch_probe(struct platform_device *pdev)
+{
+	struct gpio_switch_platform_data *pdata = pdev->dev.platform_data;
+	struct gpio_switch_data *switch_data;
+	int ret = 0;
+
+	if (!pdata)
+		return -EBUSY;
+
+	switch_data = kzalloc(sizeof(struct gpio_switch_data), GFP_KERNEL);
+	if (!switch_data)
+		return -ENOMEM;
+
+	switch_data->sdev.name = pdata->name;
+	switch_data->gpio = pdata->gpio;
+	switch_data->name_on = pdata->name_on;
+	switch_data->name_off = pdata->name_off;
+	switch_data->state_on = pdata->state_on;
+	switch_data->state_off = pdata->state_off;
+	switch_data->sdev.print_state = switch_gpio_print_state;
+
+	ret = switch_dev_register(&switch_data->sdev);
+	if (ret < 0)
+		goto err_switch_dev_register;
+
+	ret = gpio_request(switch_data->gpio, pdev->name);
+	if (ret < 0)
+		goto err_request_gpio;
+
+	ret = gpio_direction_input(switch_data->gpio);
+	if (ret < 0)
+		goto err_set_gpio_input;
+
+	INIT_WORK(&switch_data->work, gpio_switch_work);
+
+	switch_data->irq = gpio_to_irq(switch_data->gpio);
+	if (switch_data->irq < 0) {
+		ret = switch_data->irq;
+		goto err_detect_irq_num_failed;
+	}
+
+	ret = request_irq(switch_data->irq, gpio_irq_handler,
+			  IRQF_TRIGGER_LOW, pdev->name, switch_data);
+	if (ret < 0)
+		goto err_request_irq;
+
+	/* Perform initial detection */
+	gpio_switch_work(&switch_data->work);
+
+	return 0;
+
+err_request_irq:
+err_detect_irq_num_failed:
+err_set_gpio_input:
+	gpio_free(switch_data->gpio);
+err_request_gpio:
+	switch_dev_unregister(&switch_data->sdev);
+err_switch_dev_register:
+	kfree(switch_data);
+
+	return ret;
+}
+
+static int gpio_switch_remove(struct platform_device *pdev)
+{
+	struct gpio_switch_data *switch_data = platform_get_drvdata(pdev);
+
+	cancel_work_sync(&switch_data->work);
+	gpio_free(switch_data->gpio);
+	switch_dev_unregister(&switch_data->sdev);
+	kfree(switch_data);
+
+	return 0;
+}
+
+static struct platform_driver gpio_switch_driver = {
+	.probe		= gpio_switch_probe,
+	.remove		= gpio_switch_remove,
+	.driver		= {
+		.name	= "switch-gpio",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init gpio_switch_init(void)
+{
+	return platform_driver_register(&gpio_switch_driver);
+}
+
+static void __exit gpio_switch_exit(void)
+{
+	platform_driver_unregister(&gpio_switch_driver);
+}
+
+module_init(gpio_switch_init);
+module_exit(gpio_switch_exit);
+
+MODULE_AUTHOR("Mike Lockwood <lockwood@android.com>");
+MODULE_DESCRIPTION("GPIO Switch driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index e92e1dcb6e3f..a91bc4527402 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -42,6 +42,17 @@ config THERMAL_OF
 	  Say 'Y' here if you need to build thermal infrastructure
 	  based on device tree.
 
+config THERMAL_WRITABLE_TRIPS
+	bool "Enable writable trip points"
+	help
+	  This option allows the system integrator to choose whether
+	  trip temperatures can be changed from userspace. The
+	  writable trips need to be specified when setting up the
+	  thermal zone but the choice here takes precedence.
+
+	  Say 'Y' here if you would like to allow userspace tools to
+	  change trip temperatures.
+
 choice
 	prompt "Default Thermal governor"
 	default THERMAL_DEFAULT_GOV_STEP_WISE
@@ -71,6 +82,14 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
 	  Select this if you want to let the user space manage the
 	  lpatform thermals.
 
+config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
+	bool "power_allocator"
+	select THERMAL_GOV_POWER_ALLOCATOR
+	help
+	  Select this if you want to control temperature based on
+	  system and device power allocation. This governor can only
+	  operate on cooling devices that implement the power API.
+
 endchoice
 
 config THERMAL_GOV_FAIR_SHARE
@@ -88,6 +107,13 @@ config THERMAL_GOV_USER_SPACE
 	help
 	  Enable this to let the user space manage the platform thermals.
 
+config THERMAL_GOV_POWER_ALLOCATOR
+	bool "Power allocator thermal governor"
+	select THERMAL_POWER_ACTOR
+	help
+	  Enable this to manage platform thermals by dynamically
+	  allocating and limiting power to devices.
+
 config CPU_THERMAL
 	bool "generic cpu cooling support"
 	depends on CPU_FREQ
@@ -102,6 +128,19 @@ config CPU_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config DEVFREQ_THERMAL
+	bool "generic device cooling support"
+	depends on PM_DEVFREQ
+	depends on PM_OPP
+	help
+	  This implements the generic devfreq cooling mechanism through
+	  frequency reduction for devices using devfreq.
+
+	  This will throttle the device by limiting the maximum allowed DVFS
+	  frequency corresponding to the cooling level.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
@@ -182,6 +221,14 @@ config DB8500_CPUFREQ_COOLING
 	  bound cpufreq cooling device turns active to set CPU frequency low to
 	  cool down the CPU.
 
+config SCPI_THERMAL
+	tristate "Temperature sensor on ARM SoC based on SCPI Firmware interface"
+	depends on ARM_SCPI_PROTOCOL
+	depends on OF
+	help
+	  Support for the SCPI thermal sensor driver in the Linux thermal
+	  framework.
+
 config INTEL_POWERCLAMP
 	tristate "Intel PowerClamp idle injection driver"
 	depends on THERMAL
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 71f8eef49c1c..19c364e29cb7 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -13,10 +13,14 @@ thermal_sys-$(CONFIG_THERMAL_OF)		+= of-thermal.o
 thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE)	+= fair_share.o
 thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE)	+= step_wise.o
 thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o
+thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR)	+= power_allocator.o
 
 # cpufreq cooling
 thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 
+# devfreq cooling
+thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
@@ -26,5 +30,6 @@ obj-$(CONFIG_DOVE_THERMAL)  	+= dove_thermal.o
 obj-$(CONFIG_DB8500_THERMAL)	+= db8500_thermal.o
 obj-$(CONFIG_ARMADA_THERMAL)	+= armada_thermal.o
 obj-$(CONFIG_DB8500_CPUFREQ_COOLING)	+= db8500_cpufreq_cooling.o
+obj-$(CONFIG_SCPI_THERMAL)	+= scpi-thermal.o
 obj-$(CONFIG_INTEL_POWERCLAMP)	+= intel_powerclamp.o
 
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 4246262c4bd2..e0258780692d 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -24,10 +24,26 @@
 #include <linux/thermal.h>
 #include <linux/cpufreq.h>
 #include <linux/err.h>
+#include <linux/opp.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 
+#include <trace/events/thermal.h>
+
+/**
+ * struct power_table - frequency to power conversion
+ * @frequency:	frequency in KHz
+ * @power:	power in mW
+ *
+ * This structure is built when the cooling device registers and helps
+ * in translating frequency to power and viceversa.
+ */
+struct power_table {
+	u32 frequency;
+	u32 power;
+};
+
 /**
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
@@ -39,6 +55,15 @@
  * @cpufreq_val: integer value representing the absolute value of the clipped
  *	frequency.
  * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
+ * @last_load: load measured by the latest call to cpufreq_get_actual_power()
+ * @time_in_idle: previous reading of the absolute time that this cpu was idle
+ * @time_in_idle_timestamp: wall time of the last invocation of
+ *	get_cpu_idle_time_us()
+ * @dyn_power_table: array of struct power_table for frequency to power
+ *	conversion, sorted in ascending order.
+ * @dyn_power_table_entries: number of entries in the @dyn_power_table array
+ * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
+ * @plat_get_static_power: callback to calculate the static power
  *
  * This structure is required for keeping information of each
  * cpufreq_cooling_device registered. In order to prevent corruption of this a
@@ -50,6 +75,13 @@ struct cpufreq_cooling_device {
 	unsigned int cpufreq_state;
 	unsigned int cpufreq_val;
 	struct cpumask allowed_cpus;
+	u32 last_load;
+	u64 *time_in_idle;
+	u64 *time_in_idle_timestamp;
+	struct power_table *dyn_power_table;
+	int dyn_power_table_entries;
+	struct device *cpu_dev;
+	get_static_t plat_get_static_power;
 };
 static DEFINE_IDR(cpufreq_idr);
 static DEFINE_MUTEX(cooling_cpufreq_lock);
@@ -324,22 +356,236 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 	struct cpufreq_policy *policy = data;
 	unsigned long max_freq = 0;
 
-	if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID)
-		return 0;
+	switch (event) {
+
+	case CPUFREQ_ADJUST:
+		if (notify_device == NOTIFY_INVALID)
+			return NOTIFY_DONE;
+
+		if (cpumask_test_cpu(policy->cpu, &notify_device->allowed_cpus))
+			max_freq = notify_device->cpufreq_val;
+		else
+			return NOTIFY_DONE;
+
+		/* Never exceed user_policy.max */
+		if (max_freq > policy->user_policy.max)
+			max_freq = policy->user_policy.max;
+
+		if (policy->max != max_freq)
+			cpufreq_verify_within_limits(policy, 0, max_freq);
+
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+/**
+ * build_dyn_power_table() - create a dynamic power to frequency table
+ * @cpufreq_device:	the cpufreq cooling device in which to store the table
+ * @capacitance: dynamic power coefficient for these cpus
+ *
+ * Build a dynamic power to frequency table for this cpu and store it
+ * in @cpufreq_device.  This table will be used in cpu_power_to_freq() and
+ * cpu_freq_to_power() to convert between power and frequency
+ * efficiently.  Power is stored in mW, frequency in KHz.  The
+ * resulting table is in ascending order.
+ *
+ * Return: 0 on success, -E* on error.
+ */
+static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
+				 u32 capacitance)
+{
+	struct power_table *power_table;
+	struct opp *opp;
+	struct device *dev = NULL;
+	int num_opps = 0, cpu, i, ret = 0;
+	unsigned long freq;
+
+	rcu_read_lock();
+
+	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+		dev = get_cpu_device(cpu);
+		if (!dev) {
+			dev_warn(&cpufreq_device->cool_dev->device,
+				 "No cpu device for cpu %d\n", cpu);
+			continue;
+		}
+
+		num_opps = opp_get_opp_count(dev);
+		if (num_opps > 0) {
+			break;
+		} else if (num_opps < 0) {
+			ret = num_opps;
+			goto unlock;
+		}
+	}
+
+	if (num_opps == 0) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
+
+	for (freq = 0, i = 0;
+	     opp = opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
+	     freq++, i++) {
+		u32 freq_mhz, voltage_mv;
+		u64 power;
+
+		freq_mhz = freq / 1000000;
+		voltage_mv = opp_get_voltage(opp) / 1000;
+
+
+		/*
+		 * Do the multiplication with MHz and millivolt so as
+		 * to not overflow.
+		 */
+		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
+		do_div(power, 1000000000);
+
+		/* frequency is stored in power_table in KHz */
+		power_table[i].frequency = freq / 1000;
+
+		/* power is stored in mW */
+		power_table[i].power = power;
+	}
 
-	if (cpumask_test_cpu(policy->cpu, &notify_device->allowed_cpus))
-		max_freq = notify_device->cpufreq_val;
+	if (i == 0) {
+		ret = PTR_ERR(opp);
+		goto unlock;
+	}
+
+	cpufreq_device->cpu_dev = dev;
+	cpufreq_device->dyn_power_table = power_table;
+	cpufreq_device->dyn_power_table_entries = i;
+
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
+			     u32 freq)
+{
+	int i;
+	struct power_table *pt = cpufreq_device->dyn_power_table;
+
+	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+		if (freq < pt[i].frequency)
+			break;
+
+	return pt[i - 1].power;
+}
+
+static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
+			     u32 power)
+{
+	int i;
+	struct power_table *pt = cpufreq_device->dyn_power_table;
+
+	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
+		if (power < pt[i].power)
+			break;
+
+	return pt[i - 1].frequency;
+}
+
+/**
+ * get_load() - get load for a cpu since last updated
+ * @cpufreq_device:	&struct cpufreq_cooling_device for this cpu
+ * @cpu:	cpu number
+ *
+ * Return: The average load of cpu @cpu in percentage since this
+ * function was last called.
+ */
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu)
+{
+	u32 load;
+	u64 now, now_idle, delta_time, delta_idle;
+
+	now_idle = get_cpu_idle_time(cpu, &now, 0);
+	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu];
+	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu];
+
+	if (delta_time <= delta_idle)
+		load = 0;
 	else
+		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
+
+	cpufreq_device->time_in_idle[cpu] = now_idle;
+	cpufreq_device->time_in_idle_timestamp[cpu] = now;
+
+	return load;
+}
+
+/**
+ * get_static_power() - calculate the static power consumed by the cpus
+ * @cpufreq_device:	struct &cpufreq_cooling_device for this cpu cdev
+ * @tz:		thermal zone device in which we're operating
+ * @freq:	frequency in KHz
+ * @power:	pointer in which to store the calculated static power
+ *
+ * Calculate the static power consumed by the cpus described by
+ * @cpu_actor running at frequency @freq.  This function relies on a
+ * platform specific function that should have been provided when the
+ * actor was registered.  If it wasn't, the static power is assumed to
+ * be negligible.  The calculated static power is stored in @power.
+ *
+ * Return: 0 on success, -E* on failure.
+ */
+static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
+			    struct thermal_zone_device *tz, unsigned long freq,
+			    u32 *power)
+{
+	struct opp *opp;
+	unsigned long voltage;
+	struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
+	unsigned long freq_hz = freq * 1000;
+
+	if (!cpufreq_device->plat_get_static_power ||
+	    !cpufreq_device->cpu_dev) {
+		*power = 0;
 		return 0;
+	}
 
-	/* Never exceed user_policy.max */
-	if (max_freq > policy->user_policy.max)
-		max_freq = policy->user_policy.max;
+	rcu_read_lock();
 
-	if (policy->max != max_freq)
-		cpufreq_verify_within_limits(policy, 0, max_freq);
+	opp = opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
+				  true);
+	voltage = opp_get_voltage(opp);
 
-	return 0;
+	rcu_read_unlock();
+
+	if (voltage == 0) {
+		dev_warn_ratelimited(cpufreq_device->cpu_dev,
+				     "Failed to get voltage for frequency %lu: %ld\n",
+				     freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		return -EINVAL;
+	}
+
+	return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
+						     voltage, power);
+}
+
+/**
+ * get_dynamic_power() - calculate the dynamic power
+ * @cpufreq_device:	&cpufreq_cooling_device for this cdev
+ * @freq:	current frequency
+ *
+ * Return: the dynamic power consumed by the cpus described by
+ * @cpufreq_device.
+ */
+static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
+			     unsigned long freq)
+{
+	u32 raw_cpu_power;
+
+	raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
+	return (raw_cpu_power * cpufreq_device->last_load) / 100;
 }
 
 /* cpufreq cooling device callback functions are defined below */
@@ -411,8 +657,205 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	return cpufreq_apply_cooling(cpufreq_device, state);
 }
 
+/**
+ * cpufreq_get_requested_power() - get the current power
+ * @cdev:	&thermal_cooling_device pointer
+ * @tz:		a valid thermal zone device pointer
+ * @power:	pointer in which to store the resulting power
+ *
+ * Calculate the current power consumption of the cpus in milliwatts
+ * and store it in @power.  This function should actually calculate
+ * the requested power, but it's hard to get the frequency that
+ * cpufreq would have assigned if there were no thermal limits.
+ * Instead, we calculate the current power on the assumption that the
+ * immediate future will look like the immediate past.
+ *
+ * We use the current frequency and the average load since this
+ * function was last called.  In reality, there could have been
+ * multiple opps since this function was last called and that affects
+ * the load calculation.  While it's not perfectly accurate, this
+ * simplification is good enough and works.  REVISIT this, as more
+ * complex code may be needed if experiments show that it's not
+ * accurate enough.
+ *
+ * Return: 0 on success, -E* if getting the static power failed.
+ */
+static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       u32 *power)
+{
+	unsigned long freq;
+	int i = 0, cpu, ret;
+	u32 static_power, dynamic_power, total_load = 0;
+	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	u32 *load_cpu = NULL;
+
+	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+	/*
+	 * All the CPUs are offline, thus the requested power by
+	 * the cdev is 0
+	 */
+	if (cpu >= nr_cpu_ids) {
+		*power = 0;
+		return 0;
+	}
+
+	freq = cpufreq_quick_get(cpu);
+
+	if (trace_thermal_power_cpu_get_power_enabled()) {
+		u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
+
+		load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu),
+					GFP_KERNEL);
+	}
+
+	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+		u32 load;
+
+		if (cpu_online(cpu))
+			load = get_load(cpufreq_device, cpu);
+		else
+			load = 0;
+
+		total_load += load;
+		if (trace_thermal_power_cpu_limit_enabled() && load_cpu)
+			load_cpu[i] = load;
+
+		i++;
+	}
+
+	cpufreq_device->last_load = total_load;
+
+	dynamic_power = get_dynamic_power(cpufreq_device, freq);
+	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	if (ret) {
+		if (load_cpu)
+			devm_kfree(&cdev->device, load_cpu);
+		return ret;
+	}
+
+	if (load_cpu) {
+		trace_thermal_power_cpu_get_power(
+			&cpufreq_device->allowed_cpus,
+			freq, load_cpu, i, dynamic_power, static_power);
+
+		devm_kfree(&cdev->device, load_cpu);
+	}
+
+	*power = static_power + dynamic_power;
+	return 0;
+}
+
+/**
+ * cpufreq_state2power() - convert a cpu cdev state to power consumed
+ * @cdev:	&thermal_cooling_device pointer
+ * @tz:		a valid thermal zone device pointer
+ * @state:	cooling device state to be converted
+ * @power:	pointer in which to store the resulting power
+ *
+ * Convert cooling device state @state into power consumption in
+ * milliwatts assuming 100% load.  Store the calculated power in
+ * @power.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device state could not
+ * be converted into a frequency or other -E* if there was an error
+ * when calculating the static power.
+ */
+static int cpufreq_state2power(struct thermal_cooling_device *cdev,
+			       struct thermal_zone_device *tz,
+			       unsigned long state, u32 *power)
+{
+	unsigned int freq, num_cpus;
+	cpumask_t cpumask;
+	u32 static_power, dynamic_power;
+	int ret;
+	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+	cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
+	num_cpus = cpumask_weight(&cpumask);
+
+	/* None of our cpus are online, so no power */
+	if (num_cpus == 0) {
+		*power = 0;
+		return 0;
+	}
+
+	freq = get_cpu_frequency(cpumask_any(&cpumask), state);
+	if (!freq)
+		return -EINVAL;
+
+	dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
+	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	if (ret)
+		return ret;
+
+	*power = static_power + dynamic_power;
+	return 0;
+}
+
+/**
+ * cpufreq_power2state() - convert power to a cooling device state
+ * @cdev:	&thermal_cooling_device pointer
+ * @tz:		a valid thermal zone device pointer
+ * @power:	power in milliwatts to be converted
+ * @state:	pointer in which to store the resulting state
+ *
+ * Calculate a cooling device state for the cpus described by @cdev
+ * that would allow them to consume at most @power mW and store it in
+ * @state.  Note that this calculation depends on external factors
+ * such as the cpu load or the current static power.  Calling this
+ * function with the same power as input can yield different cooling
+ * device states depending on those external factors.
+ *
+ * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
+ * the calculated frequency could not be converted to a valid state.
+ * The latter should not happen unless the frequencies available to
+ * cpufreq have changed since the initialization of the cpu cooling
+ * device.
+ */
+static int cpufreq_power2state(struct thermal_cooling_device *cdev,
+			       struct thermal_zone_device *tz, u32 power,
+			       unsigned long *state)
+{
+	unsigned int cpu, cur_freq, target_freq;
+	int ret;
+	s32 dyn_power;
+	u32 last_load, normalised_power, static_power;
+	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+
+	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
+
+	/* None of our cpus are online */
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+
+	cur_freq = cpufreq_quick_get(cpu);
+	ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
+	if (ret)
+		return ret;
+
+	dyn_power = power - static_power;
+	dyn_power = dyn_power > 0 ? dyn_power : 0;
+	last_load = cpufreq_device->last_load ?: 1;
+	normalised_power = (dyn_power * 100) / last_load;
+	target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
+
+	*state = cpufreq_cooling_get_level(cpu, target_freq);
+	if (*state == THERMAL_CSTATE_INVALID) {
+		dev_warn_ratelimited(&cdev->device,
+				     "Failed to convert %dKHz for cpu %d into a cdev state\n",
+				     target_freq, cpu);
+		return -EINVAL;
+	}
+
+	trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
+				      target_freq, *state, power);
+	return 0;
+}
+
 /* Bind cpufreq callbacks to thermal cooling device ops */
-static struct thermal_cooling_device_ops const cpufreq_cooling_ops = {
+static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
 	.get_max_state = cpufreq_get_max_state,
 	.get_cur_state = cpufreq_get_cur_state,
 	.set_cur_state = cpufreq_set_cur_state,
@@ -427,6 +870,9 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @capacitance: dynamic power coefficient for these cpus
+ * @plat_static_func: function to calculate the static power consumed by these
+ *                    cpus (optional)
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -438,13 +884,15 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
  */
 static struct thermal_cooling_device *
 __cpufreq_cooling_register(struct device_node *np,
-			   const struct cpumask *clip_cpus)
+			const struct cpumask *clip_cpus, u32 capacitance,
+			get_static_t plat_static_func)
 {
 	struct thermal_cooling_device *cool_dev;
 	struct cpufreq_cooling_device *cpufreq_dev = NULL;
 	unsigned int min = 0, max = 0;
 	char dev_name[THERMAL_NAME_LENGTH];
 	int ret = 0, i;
+	unsigned int num_cpus;
 	struct cpufreq_policy policy;
 
 	/* Verify that all the clip cpus have same freq_min, freq_max limit */
@@ -466,12 +914,43 @@ __cpufreq_cooling_register(struct device_node *np,
 	if (!cpufreq_dev)
 		return ERR_PTR(-ENOMEM);
 
+	num_cpus = cpumask_weight(clip_cpus);
+	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
+					    sizeof(*cpufreq_dev->time_in_idle),
+					    GFP_KERNEL);
+	if (!cpufreq_dev->time_in_idle) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto free_cdev;
+	}
+
+	cpufreq_dev->time_in_idle_timestamp =
+		kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
+			GFP_KERNEL);
+	if (!cpufreq_dev->time_in_idle_timestamp) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto free_time_in_idle;
+	}
+
 	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 
+	if (capacitance) {
+		cpufreq_cooling_ops.get_requested_power =
+			cpufreq_get_requested_power;
+		cpufreq_cooling_ops.state2power = cpufreq_state2power;
+		cpufreq_cooling_ops.power2state = cpufreq_power2state;
+		cpufreq_dev->plat_get_static_power = plat_static_func;
+
+		ret = build_dyn_power_table(cpufreq_dev, capacitance);
+		if (ret) {
+			cool_dev = ERR_PTR(ret);
+			goto free_time_in_idle_timestamp;
+		}
+	}
+
 	ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
 	if (ret) {
-		kfree(cpufreq_dev);
-		return ERR_PTR(-EINVAL);
+		cool_dev = ERR_PTR(-EINVAL);
+		goto free_time_in_idle_timestamp;
 	}
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
@@ -479,11 +958,8 @@ __cpufreq_cooling_register(struct device_node *np,
 
 	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
 						      &cpufreq_cooling_ops);
-	if (IS_ERR(cool_dev)) {
-		release_idr(&cpufreq_idr, cpufreq_dev->id);
-		kfree(cpufreq_dev);
-		return cool_dev;
-	}
+	if (IS_ERR(cool_dev))
+		goto remove_idr;
 	cpufreq_dev->cool_dev = cool_dev;
 	cpufreq_dev->cpufreq_state = 0;
 	mutex_lock(&cooling_cpufreq_lock);
@@ -497,6 +973,17 @@ __cpufreq_cooling_register(struct device_node *np,
 	mutex_unlock(&cooling_cpufreq_lock);
 
 	return cool_dev;
+
+remove_idr:
+	release_idr(&cpufreq_idr, cpufreq_dev->id);
+free_time_in_idle_timestamp:
+	kfree(cpufreq_dev->time_in_idle_timestamp);
+free_time_in_idle:
+	kfree(cpufreq_dev->time_in_idle);
+free_cdev:
+	kfree(cpufreq_dev);
+
+	return cool_dev;
 }
 
 /**
@@ -513,7 +1000,7 @@ __cpufreq_cooling_register(struct device_node *np,
 struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus);
+	return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
@@ -537,11 +1024,78 @@ of_cpufreq_cooling_register(struct device_node *np,
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus);
+	return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
 
 /**
+ * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @capacitance:	dynamic power coefficient for these cpus
+ * @plat_static_func:	function to calculate the static power consumed by these
+ *			cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x".  This api can support multiple
+ * instances of cpufreq cooling devices.  Using this function, the
+ * cooling device will implement the power extensions by using a
+ * simple cpu power model.  The cpus must have registered their OPPs
+ * using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus.  If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+			       get_static_t plat_static_func)
+{
+	return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+				plat_static_func);
+}
+EXPORT_SYMBOL(cpufreq_power_cooling_register);
+
+/**
+ * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
+ * @np:	a valid struct device_node to the cooling device device tree node
+ * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @capacitance:	dynamic power coefficient for these cpus
+ * @plat_static_func:	function to calculate the static power consumed by these
+ *			cpus (optional)
+ *
+ * This interface function registers the cpufreq cooling device with
+ * the name "thermal-cpufreq-%x".  This api can support multiple
+ * instances of cpufreq cooling devices.  Using this API, the cpufreq
+ * cooling device will be linked to the device tree node provided.
+ * Using this function, the cooling device will implement the power
+ * extensions by using a simple cpu power model.  The cpus must have
+ * registered their OPPs using the OPP library.
+ *
+ * An optional @plat_static_func may be provided to calculate the
+ * static power consumed by these cpus.  If the platform's static
+ * power consumption is unknown or negligible, make it NULL.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func)
+{
+	if (!np)
+		return ERR_PTR(-EINVAL);
+
+	return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+				plat_static_func);
+}
+EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
+
+/**
  * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
  * @cdev: thermal cooling device pointer.
  *
@@ -566,6 +1120,8 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
 	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
 	release_idr(&cpufreq_idr, cpufreq_dev->id);
+	kfree(cpufreq_dev->time_in_idle_timestamp);
+	kfree(cpufreq_dev->time_in_idle);
 	kfree(cpufreq_dev);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index 1e3b3bf9f993..e3ccc2218eb3 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -76,7 +76,7 @@ static int db8500_cdev_bind(struct thermal_zone_device *thermal,
 		upper = lower = i > max_state ? max_state : i;
 
 		ret = thermal_zone_bind_cooling_device(thermal, i, cdev,
-			upper, lower);
+			upper, lower, THERMAL_WEIGHT_DEFAULT);
 
 		dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type,
 			i, ret, ret ? "fail" : "succeed");
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
new file mode 100644
index 000000000000..1efb7b672680
--- /dev/null
+++ b/drivers/thermal/devfreq_cooling.c
@@ -0,0 +1,372 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/opp.h>
+#include <linux/thermal.h>
+
+#include <trace/events/thermal.h>
+
+static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
+		unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+
+	*state = df->profile->max_state - 1;
+
+	return 0;
+}
+
+static int devfreq_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+		unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+
+	*state = dfc->cooling_state;
+
+	return 0;
+}
+
+static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+		unsigned long state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	unsigned long freq;
+
+	if (state == dfc->cooling_state)
+		return 0;
+
+	dev_dbg(dev, "Setting cooling state %lu\n", state);
+
+	if (state == THERMAL_NO_LIMIT) {
+		freq = 0;
+	} else {
+		unsigned long index;
+		unsigned long max_state = df->profile->max_state;
+
+		if (state >= max_state)
+			return -EINVAL;
+
+		index = (max_state - 1) - state;
+		freq = df->profile->freq_table[index];
+	}
+
+	if (df->max_freq != freq)
+		devfreq_qos_set_max(df, freq);
+
+	dfc->cooling_state = state;
+
+	return 0;
+}
+
+static unsigned long
+freq_get_state(struct devfreq *df, unsigned long freq)
+{
+       unsigned long state = THERMAL_CSTATE_INVALID;
+       int i;
+
+       for (i = 0; i < df->profile->max_state; i++) {
+               if (df->profile->freq_table[i] == freq)
+                       state = (df->profile->max_state - 1) - i;
+       }
+
+       return state;
+}
+
+static unsigned long
+state_get_freq(struct devfreq *df, unsigned long state)
+{
+	if (state >= df->profile->max_state)
+		return 0;
+
+	return df->profile->freq_table[state];
+}
+
+static unsigned long
+get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	unsigned long voltage;
+	struct opp *opp;
+
+	rcu_read_lock();
+	opp = opp_find_freq_exact(dev, freq, true);
+	voltage = opp_get_voltage(opp) / 1000; /* mV */
+	rcu_read_unlock();
+
+	if (voltage == 0) {
+		dev_warn_ratelimited(dev,
+				     "Failed to get voltage for frequency %lu: %ld\n",
+				     freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		return 0;
+	}
+
+	return dfc->power_ops->get_static_power(voltage);
+}
+
+static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev,
+					       struct thermal_zone_device *tz,
+					       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq_dev_status *status = &dfc->last_status;
+	struct devfreq *df = dfc->devfreq;
+	unsigned long state;
+	unsigned long freq = status->current_frequency;
+	u32 load, dyn_power, static_power;
+
+	/* Get dynamic power for state */
+	state = freq_get_state(df, freq);
+	dyn_power = dfc->power_table[state];
+
+	/* Scale dynamic power for utilization */
+	dyn_power = (dyn_power * status->busy_time) / status->total_time;
+
+	/* Get static power */
+	static_power = get_static_power(dfc, freq);
+
+	load = (100 * status->busy_time) / status->total_time;
+	trace_thermal_power_devfreq_get_power(cdev, freq, load, dyn_power,
+					      static_power);
+
+	*power = dyn_power + static_power;
+
+	return 0;
+}
+
+static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
+				       struct thermal_zone_device *tz,
+				       unsigned long state,
+				       u32 *power)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq *df = dfc->devfreq;
+	unsigned long freq;
+	u32 static_power;
+
+	freq = state_get_freq(df, state);
+	static_power = get_static_power(dfc, freq);
+
+	*power = dfc->power_table[state] + static_power;
+	return 0;
+}
+
+static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
+			               struct thermal_zone_device *tz, u32 power,
+				       unsigned long *state)
+{
+	struct devfreq_cooling_device *dfc = cdev->devdata;
+	struct devfreq_dev_status *status = &dfc->last_status;
+	struct devfreq *df = dfc->devfreq;
+	unsigned long freq = status->current_frequency;
+	unsigned long busy_time;
+	s32 dyn_power;
+	u32 static_power;
+	int i;
+
+	static_power = get_static_power(dfc, freq);
+
+	dyn_power = power - static_power;
+	dyn_power = dyn_power > 0 ? dyn_power : 0;
+
+	/* Scale dynamic power for utilization */
+	busy_time = status->busy_time ?: 1;
+	dyn_power = (dyn_power * status->total_time) / busy_time;
+
+	/*
+	 * Find the first cooling state that is within the power
+	 * budget for dynamic power.
+	 */
+	for (i = 0; i < df->profile->max_state - 1; i++)
+		if (dyn_power >= dfc->power_table[i])
+			break;
+
+	*state = i;
+	trace_thermal_power_devfreq_limit(cdev, freq, *state, power);
+	return 0;
+}
+
+static struct thermal_cooling_device_ops const devfreq_cooling_ops = {
+	.get_max_state = devfreq_cooling_get_max_state,
+	.get_cur_state = devfreq_cooling_get_cur_state,
+	.set_cur_state = devfreq_cooling_set_cur_state,
+	.get_requested_power = devfreq_cooling_get_requested_power,
+	.state2power = devfreq_cooling_state2power,
+	.power2state = devfreq_cooling_power2state,
+};
+
+/**
+ * devfreq_cooling_gen_power_table(): - Generate power table.
+ * @dfc: Pointer to devfreq cooling device.
+ *
+ * Generate a table with the device's maximum power usage at each cooling
+ * state (OPP). The static and dynamic powerm using the appropriate voltage and
+ * frequency for the state, is aquired from the struct devfreq_cooling_ops, and
+ * summed to make the maximum power draw.
+ *
+ * The table is malloced, and a pointer put in dfc->power_table. This must be
+ * freed when unregistering the devfreq cooling device.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+static int devfreq_cooling_gen_power_table(struct devfreq_cooling_device *dfc)
+{
+	struct devfreq *df = dfc->devfreq;
+	struct device *dev = df->dev.parent;
+	int num_opps;
+	struct devfreq_cooling_ops *callbacks = dfc->power_ops;
+	unsigned long freq;
+	u32 *table;
+	int i;
+
+	rcu_read_lock();
+	num_opps = opp_get_opp_count(dev);
+	rcu_read_unlock();
+
+	if (num_opps != dfc->devfreq->profile->max_state)
+		return -ERANGE;
+
+	table = kcalloc(num_opps, sizeof(table[0]), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) {
+		unsigned long power_dyn, voltage;
+		struct opp *opp;
+
+		opp = opp_find_freq_floor(dev, &freq);
+		if (IS_ERR(opp))
+			break;
+
+		voltage = opp_get_voltage(opp) / 1000; /* mV */
+
+		power_dyn = callbacks->get_dynamic_power(freq, voltage);
+
+		dev_info(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n",
+				freq / 1000000, voltage, power_dyn, power_dyn);
+
+		table[i] = power_dyn;
+	}
+	rcu_read_unlock();
+
+	if (i != num_opps) {
+		kfree(table);
+		return -EFAULT;
+	}
+
+	dfc->power_table = table;
+
+	return 0;
+}
+
+/**
+ * of_devfreq_cooling_register_power(): - Register devfreq cooling device,
+ *                                      with OF and power information.
+ * @np: Pointer to OF device_node.
+ * @df: Pointer to devfreq device.
+ * @ops: Pointer to power ops.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+		struct devfreq_cooling_ops *ops)
+{
+	struct thermal_cooling_device *cdev;
+	struct devfreq_cooling_device *dfc;
+
+	/* freq_table is required to look map state index to frequency. */
+	if (!df->profile->max_state && !df->profile->freq_table)
+		return ERR_PTR(-EINVAL);
+
+
+	dfc = kzalloc(sizeof(struct devfreq_cooling_device), GFP_KERNEL);
+	if (!dfc)
+		return ERR_PTR(-ENOMEM);
+
+	dfc->devfreq = df;
+
+	if (ops) {
+		if (!ops->get_static_power || !ops->get_dynamic_power) {
+			kfree(dfc);
+			return ERR_PTR(-EINVAL);
+		}
+		dfc->power_ops = ops;
+
+		devfreq_cooling_gen_power_table(dfc);
+	}
+
+	cdev = thermal_of_cooling_device_register(np, "devfreq", dfc,
+			&devfreq_cooling_ops);
+	if (IS_ERR(cdev)) {
+		dev_err(df->dev.parent,
+			"Failed to register devfreq cooling device (%ld)\n",
+			PTR_ERR(cdev));
+		kfree(dfc->power_table);
+		kfree(dfc);
+		return ERR_CAST(cdev);
+	}
+
+	dfc->cdev = cdev;
+
+	return dfc;
+}
+EXPORT_SYMBOL(of_devfreq_cooling_register_power);
+
+/**
+ * of_devfreq_cooling_register(): - Register devfreq cooling device,
+ *                                with OF information.
+ * @np: Pointer to OF device_node.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return of_devfreq_cooling_register_power(np, df, NULL);
+}
+EXPORT_SYMBOL(of_devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_register(): - Register devfreq cooling device.
+ * @df: Pointer to devfreq device.
+ */
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df)
+{
+	return of_devfreq_cooling_register(NULL, df);
+}
+EXPORT_SYMBOL(devfreq_cooling_register);
+
+/**
+ * devfreq_cooling_unregister(): - Unregister devfreq cooling device.
+ * @dfc: Pointer to devfreq cooling device to unregister.
+ */
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+	if (!dfc)
+		return;
+
+	thermal_cooling_device_unregister(dfc->cdev);
+
+	kfree(dfc->power_table);
+	kfree(dfc);
+}
+EXPORT_SYMBOL(devfreq_cooling_unregister);
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 944ba2f340c8..8c50b8d6afb7 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/thermal.h>
+#include <trace/events/thermal.h>
 
 #include "thermal_core.h"
 
@@ -34,6 +35,7 @@ static int get_trip_level(struct thermal_zone_device *tz)
 {
 	int count = 0;
 	unsigned long trip_temp;
+	enum thermal_trip_type trip_type;
 
 	if (tz->trips == 0 || !tz->ops->get_trip_temp)
 		return 0;
@@ -43,17 +45,27 @@ static int get_trip_level(struct thermal_zone_device *tz)
 		if (tz->temperature < trip_temp)
 			break;
 	}
+
+	/*
+	 * count > 0 only if temperature is greater than first trip
+	 * point, in which case, trip_point = count - 1
+	 */
+	if (count > 0) {
+		tz->ops->get_trip_type(tz, count - 1, &trip_type);
+		trace_thermal_zone_trip(tz, count - 1, trip_type);
+	}
+
 	return count;
 }
 
 static long get_target_state(struct thermal_zone_device *tz,
-		struct thermal_cooling_device *cdev, int weight, int level)
+		struct thermal_cooling_device *cdev, int percentage, int level)
 {
 	unsigned long max_state;
 
 	cdev->ops->get_max_state(cdev, &max_state);
 
-	return (long)(weight * level * max_state) / (100 * tz->trips);
+	return (long)(percentage * level * max_state) / (100 * tz->trips);
 }
 
 /**
@@ -65,7 +77,7 @@ static long get_target_state(struct thermal_zone_device *tz,
  *
  * Parameters used for Throttling:
  * P1. max_state: Maximum throttle state exposed by the cooling device.
- * P2. weight[i]/100:
+ * P2. percentage[i]/100:
  *	How 'effective' the 'i'th device is, in cooling the given zone.
  * P3. cur_trip_level/max_no_of_trips:
  *	This describes the extent to which the devices should be throttled.
@@ -76,28 +88,33 @@ static long get_target_state(struct thermal_zone_device *tz,
  */
 static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
 {
-	const struct thermal_zone_params *tzp;
-	struct thermal_cooling_device *cdev;
 	struct thermal_instance *instance;
-	int i;
+	int total_weight = 0;
+	int total_instance = 0;
 	int cur_trip_level = get_trip_level(tz);
 
-	if (!tz->tzp || !tz->tzp->tbp)
-		return -EINVAL;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if (instance->trip != trip)
+			continue;
 
-	tzp = tz->tzp;
+		total_weight += instance->weight;
+		total_instance++;
+	}
 
-	for (i = 0; i < tzp->num_tbps; i++) {
-		if (!tzp->tbp[i].cdev)
-			continue;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		int percentage;
+		struct thermal_cooling_device *cdev = instance->cdev;
 
-		cdev = tzp->tbp[i].cdev;
-		instance = get_thermal_instance(tz, cdev, trip);
-		if (!instance)
+		if (instance->trip != trip)
 			continue;
 
-		instance->target = get_target_state(tz, cdev,
-					tzp->tbp[i].weight, cur_trip_level);
+		if (!total_weight)
+			percentage = 100 / total_instance;
+		else
+			percentage = (instance->weight * 100) / total_weight;
+
+		instance->target = get_target_state(tz, cdev, percentage,
+						    cur_trip_level);
 
 		instance->cdev->updated = false;
 		thermal_cdev_update(cdev);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
new file mode 100644
index 000000000000..fde4c2876d14
--- /dev/null
+++ b/drivers/thermal/imx_thermal.c
@@ -0,0 +1,649 @@
+/*
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/cpu_cooling.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/types.h>
+
+#define REG_SET		0x4
+#define REG_CLR		0x8
+#define REG_TOG		0xc
+
+#define MISC0				0x0150
+#define MISC0_REFTOP_SELBIASOFF		(1 << 3)
+#define MISC1				0x0160
+#define MISC1_IRQ_TEMPHIGH		(1 << 29)
+/* Below LOW and PANIC bits are only for TEMPMON_IMX6SX */
+#define MISC1_IRQ_TEMPLOW		(1 << 28)
+#define MISC1_IRQ_TEMPPANIC		(1 << 27)
+
+#define TEMPSENSE0			0x0180
+#define TEMPSENSE0_ALARM_VALUE_SHIFT	20
+#define TEMPSENSE0_ALARM_VALUE_MASK	(0xfff << TEMPSENSE0_ALARM_VALUE_SHIFT)
+#define TEMPSENSE0_TEMP_CNT_SHIFT	8
+#define TEMPSENSE0_TEMP_CNT_MASK	(0xfff << TEMPSENSE0_TEMP_CNT_SHIFT)
+#define TEMPSENSE0_FINISHED		(1 << 2)
+#define TEMPSENSE0_MEASURE_TEMP		(1 << 1)
+#define TEMPSENSE0_POWER_DOWN		(1 << 0)
+
+#define TEMPSENSE1			0x0190
+#define TEMPSENSE1_MEASURE_FREQ		0xffff
+/* Below TEMPSENSE2 is only for TEMPMON_IMX6SX */
+#define TEMPSENSE2			0x0290
+#define TEMPSENSE2_LOW_VALUE_SHIFT	0
+#define TEMPSENSE2_LOW_VALUE_MASK	0xfff
+#define TEMPSENSE2_PANIC_VALUE_SHIFT	16
+#define TEMPSENSE2_PANIC_VALUE_MASK	0xfff0000
+
+#define OCOTP_ANA1			0x04e0
+
+/* The driver supports 1 passive trip point and 1 critical trip point */
+enum imx_thermal_trip {
+	IMX_TRIP_PASSIVE,
+	IMX_TRIP_CRITICAL,
+	IMX_TRIP_NUM,
+};
+
+/*
+ * It defines the temperature in millicelsius for passive trip point
+ * that will trigger cooling action when crossed.
+ */
+#define IMX_TEMP_PASSIVE		85000
+
+#define IMX_POLLING_DELAY		2000 /* millisecond */
+#define IMX_PASSIVE_DELAY		1000
+
+#define FACTOR0				10000000
+#define FACTOR1				15976
+#define FACTOR2				4297157
+
+#define TEMPMON_IMX6Q			1
+#define TEMPMON_IMX6SX			2
+
+struct thermal_soc_data {
+	u32 version;
+};
+
+static struct thermal_soc_data thermal_imx6q_data = {
+	.version = TEMPMON_IMX6Q,
+};
+
+static struct thermal_soc_data thermal_imx6sx_data = {
+	.version = TEMPMON_IMX6SX,
+};
+
+struct imx_thermal_data {
+	struct thermal_zone_device *tz;
+	struct thermal_cooling_device *cdev;
+	enum thermal_device_mode mode;
+	struct regmap *tempmon;
+	u32 c1, c2; /* See formula in imx_get_sensor_data() */
+	unsigned long temp_passive;
+	unsigned long temp_critical;
+	unsigned long alarm_temp;
+	unsigned long last_temp;
+	bool irq_enabled;
+	int irq;
+	struct clk *thermal_clk;
+	const struct thermal_soc_data *socdata;
+};
+
+static void imx_set_panic_temp(struct imx_thermal_data *data,
+			       signed long panic_temp)
+{
+	struct regmap *map = data->tempmon;
+	int critical_value;
+
+	critical_value = (data->c2 - panic_temp) / data->c1;
+	regmap_write(map, TEMPSENSE2 + REG_CLR, TEMPSENSE2_PANIC_VALUE_MASK);
+	regmap_write(map, TEMPSENSE2 + REG_SET, critical_value <<
+			TEMPSENSE2_PANIC_VALUE_SHIFT);
+}
+
+static void imx_set_alarm_temp(struct imx_thermal_data *data,
+			       signed long alarm_temp)
+{
+	struct regmap *map = data->tempmon;
+	int alarm_value;
+
+	data->alarm_temp = alarm_temp;
+	alarm_value = (data->c2 - alarm_temp) / data->c1;
+	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_ALARM_VALUE_MASK);
+	regmap_write(map, TEMPSENSE0 + REG_SET, alarm_value <<
+			TEMPSENSE0_ALARM_VALUE_SHIFT);
+}
+
+static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
+{
+	struct imx_thermal_data *data = tz->devdata;
+	struct regmap *map = data->tempmon;
+	unsigned int n_meas;
+	bool wait;
+	u32 val;
+
+	if (data->mode == THERMAL_DEVICE_ENABLED) {
+		/* Check if a measurement is currently in progress */
+		regmap_read(map, TEMPSENSE0, &val);
+		wait = !(val & TEMPSENSE0_FINISHED);
+	} else {
+		/*
+		 * Every time we measure the temperature, we will power on the
+		 * temperature sensor, enable measurements, take a reading,
+		 * disable measurements, power off the temperature sensor.
+		 */
+		regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+		regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+
+		wait = true;
+	}
+
+	/*
+	 * According to the temp sensor designers, it may require up to ~17us
+	 * to complete a measurement.
+	 */
+	if (wait)
+		usleep_range(20, 50);
+
+	regmap_read(map, TEMPSENSE0, &val);
+
+	if (data->mode != THERMAL_DEVICE_ENABLED) {
+		regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+		regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+	}
+
+	if ((val & TEMPSENSE0_FINISHED) == 0) {
+		dev_dbg(&tz->device, "temp measurement never finished\n");
+		return -EAGAIN;
+	}
+
+	n_meas = (val & TEMPSENSE0_TEMP_CNT_MASK) >> TEMPSENSE0_TEMP_CNT_SHIFT;
+
+	/* See imx_get_sensor_data() for formula derivation */
+	*temp = data->c2 - n_meas * data->c1;
+
+	/* Update alarm value to next higher trip point for TEMPMON_IMX6Q */
+	if (data->socdata->version == TEMPMON_IMX6Q) {
+		if (data->alarm_temp == data->temp_passive &&
+			*temp >= data->temp_passive)
+			imx_set_alarm_temp(data, data->temp_critical);
+		if (data->alarm_temp == data->temp_critical &&
+			*temp < data->temp_passive) {
+			imx_set_alarm_temp(data, data->temp_passive);
+			dev_dbg(&tz->device, "thermal alarm off: T < %lu\n",
+				data->alarm_temp / 1000);
+		}
+	}
+
+	if (*temp != data->last_temp) {
+		dev_dbg(&tz->device, "millicelsius: %ld\n", *temp);
+		data->last_temp = *temp;
+	}
+
+	/* Reenable alarm IRQ if temperature below alarm temperature */
+	if (!data->irq_enabled && *temp < data->alarm_temp) {
+		data->irq_enabled = true;
+		enable_irq(data->irq);
+	}
+
+	return 0;
+}
+
+static int imx_get_mode(struct thermal_zone_device *tz,
+			enum thermal_device_mode *mode)
+{
+	struct imx_thermal_data *data = tz->devdata;
+
+	*mode = data->mode;
+
+	return 0;
+}
+
+static int imx_set_mode(struct thermal_zone_device *tz,
+			enum thermal_device_mode mode)
+{
+	struct imx_thermal_data *data = tz->devdata;
+	struct regmap *map = data->tempmon;
+
+	if (mode == THERMAL_DEVICE_ENABLED) {
+		tz->polling_delay = IMX_POLLING_DELAY;
+		tz->passive_delay = IMX_PASSIVE_DELAY;
+
+		regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+		regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+
+		if (!data->irq_enabled) {
+			data->irq_enabled = true;
+			enable_irq(data->irq);
+		}
+	} else {
+		regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+		regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+
+		tz->polling_delay = 0;
+		tz->passive_delay = 0;
+
+		if (data->irq_enabled) {
+			disable_irq(data->irq);
+			data->irq_enabled = false;
+		}
+	}
+
+	data->mode = mode;
+	thermal_zone_device_update(tz);
+
+	return 0;
+}
+
+static int imx_get_trip_type(struct thermal_zone_device *tz, int trip,
+			     enum thermal_trip_type *type)
+{
+	*type = (trip == IMX_TRIP_PASSIVE) ? THERMAL_TRIP_PASSIVE :
+					     THERMAL_TRIP_CRITICAL;
+	return 0;
+}
+
+static int imx_get_crit_temp(struct thermal_zone_device *tz,
+			     unsigned long *temp)
+{
+	struct imx_thermal_data *data = tz->devdata;
+
+	*temp = data->temp_critical;
+	return 0;
+}
+
+static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip,
+			     unsigned long *temp)
+{
+	struct imx_thermal_data *data = tz->devdata;
+
+	*temp = (trip == IMX_TRIP_PASSIVE) ? data->temp_passive :
+					     data->temp_critical;
+	return 0;
+}
+
+static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip,
+			     unsigned long temp)
+{
+	struct imx_thermal_data *data = tz->devdata;
+
+	if (trip == IMX_TRIP_CRITICAL)
+		return -EPERM;
+
+	if (temp > IMX_TEMP_PASSIVE)
+		return -EINVAL;
+
+	data->temp_passive = temp;
+
+	imx_set_alarm_temp(data, temp);
+
+	return 0;
+}
+
+static int imx_bind(struct thermal_zone_device *tz,
+		    struct thermal_cooling_device *cdev)
+{
+	int ret;
+
+	ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev,
+					       THERMAL_NO_LIMIT,
+					       THERMAL_NO_LIMIT,
+					       THERMAL_WEIGHT_DEFAULT);
+	if (ret) {
+		dev_err(&tz->device,
+			"binding zone %s with cdev %s failed:%d\n",
+			tz->type, cdev->type, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int imx_unbind(struct thermal_zone_device *tz,
+		      struct thermal_cooling_device *cdev)
+{
+	int ret;
+
+	ret = thermal_zone_unbind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev);
+	if (ret) {
+		dev_err(&tz->device,
+			"unbinding zone %s with cdev %s failed:%d\n",
+			tz->type, cdev->type, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops imx_tz_ops = {
+	.bind = imx_bind,
+	.unbind = imx_unbind,
+	.get_temp = imx_get_temp,
+	.get_mode = imx_get_mode,
+	.set_mode = imx_set_mode,
+	.get_trip_type = imx_get_trip_type,
+	.get_trip_temp = imx_get_trip_temp,
+	.get_crit_temp = imx_get_crit_temp,
+	.set_trip_temp = imx_set_trip_temp,
+};
+
+static int imx_get_sensor_data(struct platform_device *pdev)
+{
+	struct imx_thermal_data *data = platform_get_drvdata(pdev);
+	struct regmap *map;
+	int t1, n1;
+	int ret;
+	u32 val;
+	u64 temp64;
+
+	map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+					      "fsl,tempmon-data");
+	if (IS_ERR(map)) {
+		ret = PTR_ERR(map);
+		dev_err(&pdev->dev, "failed to get sensor regmap: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(map, OCOTP_ANA1, &val);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to read sensor data: %d\n", ret);
+		return ret;
+	}
+
+	if (val == 0 || val == ~0) {
+		dev_err(&pdev->dev, "invalid sensor calibration data\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Sensor data layout:
+	 *   [31:20] - sensor value @ 25C
+	 * Use universal formula now and only need sensor value @ 25C
+	 * slope = 0.4297157 - (0.0015976 * 25C fuse)
+	 */
+	n1 = val >> 20;
+	t1 = 25; /* t1 always 25C */
+
+	/*
+	 * Derived from linear interpolation:
+	 * slope = 0.4297157 - (0.0015976 * 25C fuse)
+	 * slope = (FACTOR2 - FACTOR1 * n1) / FACTOR0
+	 * (Nmeas - n1) / (Tmeas - t1) = slope
+	 * We want to reduce this down to the minimum computation necessary
+	 * for each temperature read.  Also, we want Tmeas in millicelsius
+	 * and we don't want to lose precision from integer division. So...
+	 * Tmeas = (Nmeas - n1) / slope + t1
+	 * milli_Tmeas = 1000 * (Nmeas - n1) / slope + 1000 * t1
+	 * milli_Tmeas = -1000 * (n1 - Nmeas) / slope + 1000 * t1
+	 * Let constant c1 = (-1000 / slope)
+	 * milli_Tmeas = (n1 - Nmeas) * c1 + 1000 * t1
+	 * Let constant c2 = n1 *c1 + 1000 * t1
+	 * milli_Tmeas = c2 - Nmeas * c1
+	 */
+	temp64 = FACTOR0;
+	temp64 *= 1000;
+	do_div(temp64, FACTOR1 * n1 - FACTOR2);
+	data->c1 = temp64;
+	data->c2 = n1 * data->c1 + 1000 * t1;
+
+	/*
+	 * Set the default passive cooling trip point,
+	 * can be changed from userspace.
+	 */
+	data->temp_passive = IMX_TEMP_PASSIVE;
+
+	/*
+	 * The maximum die temperature set to 20 C higher than
+	 * IMX_TEMP_PASSIVE.
+	 */
+	data->temp_critical = 1000 * 20 + data->temp_passive;
+
+	return 0;
+}
+
+static irqreturn_t imx_thermal_alarm_irq(int irq, void *dev)
+{
+	struct imx_thermal_data *data = dev;
+
+	disable_irq_nosync(irq);
+	data->irq_enabled = false;
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev)
+{
+	struct imx_thermal_data *data = dev;
+
+	dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n",
+		data->alarm_temp / 1000);
+
+	thermal_zone_device_update(data->tz);
+
+	return IRQ_HANDLED;
+}
+
+static const struct of_device_id of_imx_thermal_match[] = {
+	{ .compatible = "fsl,imx6q-tempmon", .data = &thermal_imx6q_data, },
+	{ .compatible = "fsl,imx6sx-tempmon", .data = &thermal_imx6sx_data, },
+	{ /* end */ }
+};
+MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
+
+static int imx_thermal_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+		of_match_device(of_imx_thermal_match, &pdev->dev);
+	struct imx_thermal_data *data;
+	struct regmap *map;
+	int measure_freq;
+	int ret;
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "fsl,tempmon");
+	if (IS_ERR(map)) {
+		ret = PTR_ERR(map);
+		dev_err(&pdev->dev, "failed to get tempmon regmap: %d\n", ret);
+		return ret;
+	}
+	data->tempmon = map;
+
+	data->socdata = of_id->data;
+
+	/* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
+	if (data->socdata->version == TEMPMON_IMX6SX) {
+		regmap_write(map, MISC1 + REG_CLR, MISC1_IRQ_TEMPHIGH |
+			MISC1_IRQ_TEMPLOW | MISC1_IRQ_TEMPPANIC);
+		/*
+		 * reset value of LOW ALARM is incorrect, set it to lowest
+		 * value to avoid false trigger of low alarm.
+		 */
+		regmap_write(map, TEMPSENSE2 + REG_SET,
+			TEMPSENSE2_LOW_VALUE_MASK);
+	}
+
+	data->irq = platform_get_irq(pdev, 0);
+	if (data->irq < 0)
+		return data->irq;
+
+	ret = devm_request_threaded_irq(&pdev->dev, data->irq,
+			imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread,
+			0, "imx_thermal", data);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, data);
+
+	ret = imx_get_sensor_data(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to get sensor data\n");
+		return ret;
+	}
+
+	/* Make sure sensor is in known good state for measurements */
+	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+	regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ);
+	regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF);
+	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+
+	data->cdev = cpufreq_cooling_register(cpu_present_mask);
+	if (IS_ERR(data->cdev)) {
+		ret = PTR_ERR(data->cdev);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"failed to register cpufreq cooling device: %d\n",
+				ret);
+		return ret;
+	}
+
+	data->thermal_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(data->thermal_clk)) {
+		ret = PTR_ERR(data->thermal_clk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"failed to get thermal clk: %d\n", ret);
+		cpufreq_cooling_unregister(data->cdev);
+		return ret;
+	}
+
+	/*
+	 * Thermal sensor needs clk on to get correct value, normally
+	 * we should enable its clk before taking measurement and disable
+	 * clk after measurement is done, but if alarm function is enabled,
+	 * hardware will auto measure the temperature periodically, so we
+	 * need to keep the clk always on for alarm function.
+	 */
+	ret = clk_prepare_enable(data->thermal_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
+		cpufreq_cooling_unregister(data->cdev);
+		return ret;
+	}
+
+	data->tz = thermal_zone_device_register("imx_thermal_zone",
+						IMX_TRIP_NUM,
+						BIT(IMX_TRIP_PASSIVE), data,
+						&imx_tz_ops, NULL,
+						IMX_PASSIVE_DELAY,
+						IMX_POLLING_DELAY);
+	if (IS_ERR(data->tz)) {
+		ret = PTR_ERR(data->tz);
+		dev_err(&pdev->dev,
+			"failed to register thermal zone device %d\n", ret);
+		clk_disable_unprepare(data->thermal_clk);
+		cpufreq_cooling_unregister(data->cdev);
+		return ret;
+	}
+
+	/* Enable measurements at ~ 10 Hz */
+	regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ);
+	measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */
+	regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq);
+	imx_set_alarm_temp(data, data->temp_passive);
+
+	if (data->socdata->version == TEMPMON_IMX6SX)
+		imx_set_panic_temp(data, data->temp_critical);
+
+	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+
+	data->irq_enabled = true;
+	data->mode = THERMAL_DEVICE_ENABLED;
+
+	return 0;
+}
+
+static int imx_thermal_remove(struct platform_device *pdev)
+{
+	struct imx_thermal_data *data = platform_get_drvdata(pdev);
+	struct regmap *map = data->tempmon;
+
+	/* Disable measurements */
+	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+	if (!IS_ERR(data->thermal_clk))
+		clk_disable_unprepare(data->thermal_clk);
+
+	thermal_zone_device_unregister(data->tz);
+	cpufreq_cooling_unregister(data->cdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int imx_thermal_suspend(struct device *dev)
+{
+	struct imx_thermal_data *data = dev_get_drvdata(dev);
+	struct regmap *map = data->tempmon;
+
+	/*
+	 * Need to disable thermal sensor, otherwise, when thermal core
+	 * try to get temperature before thermal sensor resume, a wrong
+	 * temperature will be read as the thermal sensor is powered
+	 * down.
+	 */
+	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+	data->mode = THERMAL_DEVICE_DISABLED;
+	clk_disable_unprepare(data->thermal_clk);
+
+	return 0;
+}
+
+static int imx_thermal_resume(struct device *dev)
+{
+	struct imx_thermal_data *data = dev_get_drvdata(dev);
+	struct regmap *map = data->tempmon;
+
+	clk_prepare_enable(data->thermal_clk);
+	/* Enabled thermal sensor after resume */
+	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+	data->mode = THERMAL_DEVICE_ENABLED;
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops,
+			 imx_thermal_suspend, imx_thermal_resume);
+
+static struct platform_driver imx_thermal = {
+	.driver = {
+		.name	= "imx_thermal",
+		.pm	= &imx_thermal_pm_ops,
+		.of_match_table = of_imx_thermal_match,
+	},
+	.probe		= imx_thermal_probe,
+	.remove		= imx_thermal_remove,
+};
+module_platform_driver(imx_thermal);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("Thermal driver for Freescale i.MX SoCs");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:imx-thermal");
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 4b2b999b7611..573a98c0366e 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -157,7 +157,8 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
 			ret = thermal_zone_bind_cooling_device(thermal,
 						tbp->trip_id, cdev,
 						tbp->max,
-						tbp->min);
+						tbp->min,
+						tbp->usage);
 			if (ret)
 				return ret;
 		}
@@ -418,11 +419,17 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
 		}
 
 		if (sensor_specs.np == sensor_np && id == sensor_id) {
+			struct thermal_zone_device *tzd;
+
 			of_node_put(np);
-			return thermal_zone_of_add_sensor(child, sensor_np,
-							  data,
-							  get_temp,
-							  get_trend);
+			tzd = thermal_zone_of_add_sensor(child, sensor_np,
+							 data,
+							 get_temp,
+							 get_trend);
+			if (!IS_ERR(tzd))
+				tzd->ops->set_mode(tzd, THERMAL_DEVICE_ENABLED);
+
+			return tzd;
 		}
 	}
 	of_node_put(np);
@@ -498,7 +505,7 @@ static int thermal_of_populate_bind_params(struct device_node *np,
 	u32 prop;
 
 	/* Default weight. Usage is optional */
-	__tbp->usage = 0;
+	__tbp->usage = THERMAL_WEIGHT_DEFAULT;
 	ret = of_property_read_u32(np, "contribution", &prop);
 	if (ret == 0)
 		__tbp->usage = prop;
@@ -770,6 +777,8 @@ int __init of_parse_thermal_zones(void)
 	for_each_child_of_node(np, child) {
 		struct thermal_zone_device *zone;
 		struct thermal_zone_params *tzp;
+		int i, mask = 0;
+		u32 prop;
 
 		tz = thermal_of_build_thermal_zone(child);
 		if (IS_ERR(tz)) {
@@ -792,8 +801,14 @@ int __init of_parse_thermal_zones(void)
 		/* No hwmon because there might be hwmon drivers registering */
 		tzp->no_hwmon = true;
 
+		if (!of_property_read_u32(child, "sustainable-power", &prop))
+			tzp->sustainable_power = prop;
+
+		for (i = 0; i < tz->ntrips; i++)
+			mask |= 1 << i;
+
 		zone = thermal_zone_device_register(child->name, tz->ntrips,
-						    0, tz,
+						    mask, tz,
 						    ops, tzp,
 						    tz->passive_delay,
 						    tz->polling_delay);
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
new file mode 100644
index 000000000000..16eef40ac1c0
--- /dev/null
+++ b/drivers/thermal/power_allocator.c
@@ -0,0 +1,538 @@
+/*
+ * A power allocator to manage temperature
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "Power allocator: " fmt
+
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/thermal_power_allocator.h>
+
+#include "thermal_core.h"
+
+#define FRAC_BITS 10
+#define int_to_frac(x) ((x) << FRAC_BITS)
+#define frac_to_int(x) ((x) >> FRAC_BITS)
+
+/**
+ * mul_frac() - multiply two fixed-point numbers
+ * @x:	first multiplicand
+ * @y:	second multiplicand
+ *
+ * Return: the result of multiplying two fixed-point numbers.  The
+ * result is also a fixed-point number.
+ */
+static inline s64 mul_frac(s64 x, s64 y)
+{
+	return (x * y) >> FRAC_BITS;
+}
+
+/**
+ * div_frac() - divide two fixed-point numbers
+ * @x:	the dividend
+ * @y:	the divisor
+ *
+ * Return: the result of dividing two fixed-point numbers.  The
+ * result is also a fixed-point number.
+ */
+static inline s64 div_frac(s64 x, s64 y)
+{
+	return div_s64(x << FRAC_BITS, y);
+}
+
+/**
+ * struct power_allocator_params - parameters for the power allocator governor
+ * @err_integral:	accumulated error in the PID controller.
+ * @prev_err:	error in the previous iteration of the PID controller.
+ *		Used to calculate the derivative term.
+ * @trip_switch_on:	first passive trip point of the thermal zone.  The
+ *			governor switches on when this trip point is crossed.
+ * @trip_max_desired_temperature:	last passive trip point of the thermal
+ *					zone.  The temperature we are
+ *					controlling for.
+ */
+struct power_allocator_params {
+	s64 err_integral;
+	s32 prev_err;
+	int trip_switch_on;
+	int trip_max_desired_temperature;
+};
+
+/**
+ * pid_controller() - PID controller
+ * @tz:	thermal zone we are operating in
+ * @current_temp:	the current temperature in millicelsius
+ * @control_temp:	the target temperature in millicelsius
+ * @max_allocatable_power:	maximum allocatable power for this thermal zone
+ *
+ * This PID controller increases the available power budget so that the
+ * temperature of the thermal zone gets as close as possible to
+ * @control_temp and limits the power if it exceeds it.  k_po is the
+ * proportional term when we are overshooting, k_pu is the
+ * proportional term when we are undershooting.  integral_cutoff is a
+ * threshold below which we stop accumulating the error.  The
+ * accumulated error is only valid if the requested power will make
+ * the system warmer.  If the system is mostly idle, there's no point
+ * in accumulating positive error.
+ *
+ * Return: The power budget for the next period.
+ */
+static u32 pid_controller(struct thermal_zone_device *tz,
+			  unsigned long current_temp,
+			  unsigned long control_temp,
+			  u32 max_allocatable_power)
+{
+	s64 p, i, d, power_range;
+	s32 err, max_power_frac;
+	struct power_allocator_params *params = tz->governor_data;
+
+	max_power_frac = int_to_frac(max_allocatable_power);
+
+	err = ((s32)control_temp - (s32)current_temp);
+	err = int_to_frac(err);
+
+	/* Calculate the proportional term */
+	p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err);
+
+	/*
+	 * Calculate the integral term
+	 *
+	 * if the error is less than cut off allow integration (but
+	 * the integral is limited to max power)
+	 */
+	i = mul_frac(tz->tzp->k_i, params->err_integral);
+
+	if (err < int_to_frac(tz->tzp->integral_cutoff)) {
+		s64 i_next = i + mul_frac(tz->tzp->k_i, err);
+
+		if (abs64(i_next) < max_power_frac) {
+			i = i_next;
+			params->err_integral += err;
+		}
+	}
+
+	/*
+	 * Calculate the derivative term
+	 *
+	 * We do err - prev_err, so with a positive k_d, a decreasing
+	 * error (i.e. driving closer to the line) results in less
+	 * power being applied, slowing down the controller)
+	 */
+	d = mul_frac(tz->tzp->k_d, err - params->prev_err);
+	d = div_frac(d, tz->passive_delay);
+	params->prev_err = err;
+
+	power_range = p + i + d;
+
+	/* feed-forward the known sustainable dissipatable power */
+	power_range = tz->tzp->sustainable_power + frac_to_int(power_range);
+
+	power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power);
+
+	trace_thermal_power_allocator_pid(tz, frac_to_int(err),
+					  frac_to_int(params->err_integral),
+					  frac_to_int(p), frac_to_int(i),
+					  frac_to_int(d), power_range);
+
+	return power_range;
+}
+
+/**
+ * divvy_up_power() - divvy the allocated power between the actors
+ * @req_power:	each actor's requested power
+ * @max_power:	each actor's maximum available power
+ * @num_actors:	size of the @req_power, @max_power and @granted_power's array
+ * @total_req_power: sum of @req_power
+ * @power_range:	total allocated power
+ * @granted_power:	output array: each actor's granted power
+ * @extra_actor_power:	an appropriately sized array to be used in the
+ *			function as temporary storage of the extra power given
+ *			to the actors
+ *
+ * This function divides the total allocated power (@power_range)
+ * fairly between the actors.  It first tries to give each actor a
+ * share of the @power_range according to how much power it requested
+ * compared to the rest of the actors.  For example, if only one actor
+ * requests power, then it receives all the @power_range.  If
+ * three actors each requests 1mW, each receives a third of the
+ * @power_range.
+ *
+ * If any actor received more than their maximum power, then that
+ * surplus is re-divvied among the actors based on how far they are
+ * from their respective maximums.
+ *
+ * Granted power for each actor is written to @granted_power, which
+ * should've been allocated by the calling function.
+ */
+static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,
+			   u32 total_req_power, u32 power_range,
+			   u32 *granted_power, u32 *extra_actor_power)
+{
+	u32 extra_power, capped_extra_power;
+	int i;
+
+	/*
+	 * Prevent division by 0 if none of the actors request power.
+	 */
+	if (!total_req_power)
+		total_req_power = 1;
+
+	capped_extra_power = 0;
+	extra_power = 0;
+	for (i = 0; i < num_actors; i++) {
+		u64 req_range = req_power[i] * power_range;
+
+		granted_power[i] = DIV_ROUND_UP_ULL(req_range, total_req_power);
+
+		if (granted_power[i] > max_power[i]) {
+			extra_power += granted_power[i] - max_power[i];
+			granted_power[i] = max_power[i];
+		}
+
+		extra_actor_power[i] = max_power[i] - granted_power[i];
+		capped_extra_power += extra_actor_power[i];
+	}
+
+	if (!extra_power)
+		return;
+
+	/*
+	 * Re-divvy the reclaimed extra among actors based on
+	 * how far they are from the max
+	 */
+	extra_power = min(extra_power, capped_extra_power);
+	if (capped_extra_power > 0)
+		for (i = 0; i < num_actors; i++)
+			granted_power[i] += (extra_actor_power[i] *
+					extra_power) / capped_extra_power;
+}
+
+static int allocate_power(struct thermal_zone_device *tz,
+			  unsigned long current_temp,
+			  unsigned long control_temp)
+{
+	struct thermal_instance *instance;
+	struct power_allocator_params *params = tz->governor_data;
+	u32 *req_power, *max_power, *granted_power, *extra_actor_power;
+	u32 total_req_power, max_allocatable_power;
+	u32 total_granted_power, power_range;
+	int i, num_actors, total_weight, ret = 0;
+	int trip_max_desired_temperature = params->trip_max_desired_temperature;
+
+	mutex_lock(&tz->lock);
+
+	num_actors = 0;
+	total_weight = 0;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if ((instance->trip == trip_max_desired_temperature) &&
+		    cdev_is_power_actor(instance->cdev)) {
+			num_actors++;
+			total_weight += instance->weight;
+		}
+	}
+
+	/*
+	 * We need to allocate three arrays of the same size:
+	 * req_power, max_power and granted_power.  They are going to
+	 * be needed until this function returns.  Allocate them all
+	 * in one go to simplify the allocation and deallocation
+	 * logic.
+	 */
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power));
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power));
+	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power));
+	req_power = devm_kcalloc(&tz->device, num_actors * 4,
+				 sizeof(*req_power), GFP_KERNEL);
+	if (!req_power) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	max_power = &req_power[num_actors];
+	granted_power = &req_power[2 * num_actors];
+	extra_actor_power = &req_power[3 * num_actors];
+
+	i = 0;
+	total_req_power = 0;
+	max_allocatable_power = 0;
+
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		int weight;
+		struct thermal_cooling_device *cdev = instance->cdev;
+
+		if (instance->trip != trip_max_desired_temperature)
+			continue;
+
+		if (!cdev_is_power_actor(cdev))
+			continue;
+
+		if (cdev->ops->get_requested_power(cdev, tz, &req_power[i]))
+			continue;
+
+		if (!total_weight)
+			weight = 1 << FRAC_BITS;
+		else
+			weight = instance->weight;
+
+		req_power[i] = frac_to_int(weight * req_power[i]);
+
+		if (power_actor_get_max_power(cdev, tz, &max_power[i]))
+			continue;
+
+		total_req_power += req_power[i];
+		max_allocatable_power += max_power[i];
+
+		i++;
+	}
+
+	power_range = pid_controller(tz, current_temp, control_temp,
+				     max_allocatable_power);
+
+	divvy_up_power(req_power, max_power, num_actors, total_req_power,
+		       power_range, granted_power, extra_actor_power);
+
+	total_granted_power = 0;
+	i = 0;
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if (instance->trip != trip_max_desired_temperature)
+			continue;
+
+		if (!cdev_is_power_actor(instance->cdev))
+			continue;
+
+		power_actor_set_power(instance->cdev, instance,
+				      granted_power[i]);
+		total_granted_power += granted_power[i];
+
+		i++;
+	}
+
+	trace_thermal_power_allocator(tz, req_power, total_req_power,
+				      granted_power, total_granted_power,
+				      num_actors, power_range,
+				      max_allocatable_power, current_temp,
+				      (s32)control_temp - (s32)current_temp);
+
+	devm_kfree(&tz->device, req_power);
+unlock:
+	mutex_unlock(&tz->lock);
+
+	return ret;
+}
+
+static int get_governor_trips(struct thermal_zone_device *tz,
+			      struct power_allocator_params *params)
+{
+	int i, ret, last_passive;
+	bool found_first_passive;
+
+	found_first_passive = false;
+	last_passive = -1;
+	ret = -EINVAL;
+
+	for (i = 0; i < tz->trips; i++) {
+		enum thermal_trip_type type;
+
+		ret = tz->ops->get_trip_type(tz, i, &type);
+		if (ret)
+			return ret;
+
+		if (!found_first_passive) {
+			if (type == THERMAL_TRIP_PASSIVE) {
+				params->trip_switch_on = i;
+				found_first_passive = true;
+			}
+		} else if (type == THERMAL_TRIP_PASSIVE) {
+			last_passive = i;
+		} else {
+			break;
+		}
+	}
+
+	if (last_passive != -1) {
+		params->trip_max_desired_temperature = last_passive;
+		ret = 0;
+	} else {
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void reset_pid_controller(struct power_allocator_params *params)
+{
+	params->err_integral = 0;
+	params->prev_err = 0;
+}
+
+static void allow_maximum_power(struct thermal_zone_device *tz)
+{
+	struct thermal_instance *instance;
+	struct power_allocator_params *params = tz->governor_data;
+
+	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+		if ((instance->trip != params->trip_max_desired_temperature) ||
+		    (!cdev_is_power_actor(instance->cdev)))
+			continue;
+
+		instance->target = 0;
+		instance->cdev->updated = false;
+		thermal_cdev_update(instance->cdev);
+	}
+}
+
+/**
+ * power_allocator_bind() - bind the power_allocator governor to a thermal zone
+ * @tz:	thermal zone to bind it to
+ *
+ * Check that the thermal zone is valid for this governor, that is, it
+ * has two thermal trips.  If so, initialize the PID controller
+ * parameters and bind it to the thermal zone.
+ *
+ * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM
+ * if we ran out of memory.
+ */
+static int power_allocator_bind(struct thermal_zone_device *tz)
+{
+	int ret;
+	struct power_allocator_params *params;
+	unsigned long switch_on_temp, control_temp;
+	u32 temperature_threshold;
+
+	if (!tz->tzp || !tz->tzp->sustainable_power) {
+		dev_err(&tz->device,
+			"power_allocator: missing sustainable_power\n");
+		return -EINVAL;
+	}
+
+	params = devm_kzalloc(&tz->device, sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	ret = get_governor_trips(tz, params);
+	if (ret) {
+		dev_err(&tz->device,
+			"thermal zone %s has wrong trip setup for power allocator\n",
+			tz->type);
+		goto free;
+	}
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+				     &switch_on_temp);
+	if (ret)
+		goto free;
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+				     &control_temp);
+	if (ret)
+		goto free;
+
+	temperature_threshold = control_temp - switch_on_temp;
+
+	tz->tzp->k_po = tz->tzp->k_po ?:
+		int_to_frac(tz->tzp->sustainable_power) / temperature_threshold;
+	tz->tzp->k_pu = tz->tzp->k_pu ?:
+		int_to_frac(2 * tz->tzp->sustainable_power) /
+		temperature_threshold;
+	tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000;
+	/*
+	 * The default for k_d and integral_cutoff is 0, so we can
+	 * leave them as they are.
+	 */
+
+	reset_pid_controller(params);
+
+	tz->governor_data = params;
+
+	return 0;
+
+free:
+	devm_kfree(&tz->device, params);
+	return ret;
+}
+
+static void power_allocator_unbind(struct thermal_zone_device *tz)
+{
+	dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
+	devm_kfree(&tz->device, tz->governor_data);
+	tz->governor_data = NULL;
+}
+
+static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
+{
+	int ret;
+	unsigned long switch_on_temp, control_temp, current_temp;
+	struct power_allocator_params *params = tz->governor_data;
+
+	/*
+	 * We get called for every trip point but we only need to do
+	 * our calculations once
+	 */
+	if (trip != params->trip_max_desired_temperature)
+		return 0;
+
+	ret = thermal_zone_get_temp(tz, &current_temp);
+	if (ret) {
+		dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
+		return ret;
+	}
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
+				     &switch_on_temp);
+	if (ret) {
+		dev_warn(&tz->device,
+			 "Failed to get switch on temperature: %d\n", ret);
+		return ret;
+	}
+
+	if (current_temp < switch_on_temp) {
+		tz->passive = 0;
+		reset_pid_controller(params);
+		allow_maximum_power(tz);
+		return 0;
+	}
+
+	tz->passive = 1;
+
+	ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature,
+				&control_temp);
+	if (ret) {
+		dev_warn(&tz->device,
+			 "Failed to get the maximum desired temperature: %d\n",
+			 ret);
+		return ret;
+	}
+
+	return allocate_power(tz, current_temp, control_temp);
+}
+
+static struct thermal_governor thermal_gov_power_allocator = {
+	.name		= "power_allocator",
+	.bind_to_tz	= power_allocator_bind,
+	.unbind_from_tz	= power_allocator_unbind,
+	.throttle	= power_allocator_throttle,
+};
+
+int thermal_gov_power_allocator_register(void)
+{
+	return thermal_register_governor(&thermal_gov_power_allocator);
+}
+
+void thermal_gov_power_allocator_unregister(void)
+{
+	thermal_unregister_governor(&thermal_gov_power_allocator);
+}
diff --git a/drivers/thermal/scpi-thermal.c b/drivers/thermal/scpi-thermal.c
new file mode 100644
index 000000000000..41eb908e4b70
--- /dev/null
+++ b/drivers/thermal/scpi-thermal.c
@@ -0,0 +1,308 @@
+#include <linux/cpu_cooling.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/thermal.h>
+#include <linux/topology.h>
+
+#define SOC_SENSOR "SENSOR_TEMP_SOC"
+
+enum cluster_type {
+	CLUSTER_BIG = 0,
+	CLUSTER_LITTLE,
+	NUM_CLUSTERS
+};
+
+struct cluster_power_coefficients {
+	int dyn_coeff;
+	int static_coeff;
+	int cache_static_coeff;
+} cluster_data[] = {
+	[CLUSTER_BIG] = {
+		.dyn_coeff = 530,
+		.static_coeff = 103,		/* 75 mW @ 85C/0.9V */
+		.cache_static_coeff = 88,	/* 64 mW @ 85C/0.9V */
+	},
+	[CLUSTER_LITTLE] = {
+		.dyn_coeff = 140,
+		.static_coeff = 36,		/* 26 mW @ 85C/0.9V */
+		.cache_static_coeff = 73,	/* 53 mW @ 85C/0.9V */
+	},
+};
+
+struct scpi_sensor {
+	u16 sensor_id;
+	unsigned long prev_temp;
+	u32 alpha;
+	struct thermal_zone_device *tzd;
+	struct thermal_zone_device *gpu_tzd;
+	struct cpumask cluster[NUM_CLUSTERS];
+	struct thermal_cooling_device *cdevs[NUM_CLUSTERS];
+};
+
+static struct scpi_ops *scpi_ops;
+struct scpi_sensor scpi_temp_sensor;
+
+#define FRAC_BITS 10
+#define int_to_frac(x) ((x) << FRAC_BITS)
+#define frac_to_int(x) ((x) >> FRAC_BITS)
+
+/**
+ * mul_frac() - multiply two fixed-point numbers
+ * @x:	first multiplicand
+ * @y:	second multiplicand
+ *
+ * Return: the result of multiplying two fixed-point numbers.  The
+ * result is also a fixed-point number.
+ */
+static inline s64 mul_frac(s64 x, s64 y)
+{
+	return (x * y) >> FRAC_BITS;
+}
+
+static unsigned long get_temperature_scale(unsigned long temp)
+{
+	int i, t_exp = 1, t_scale = 0;
+	int coeff[] = { 32000, 4700, -80, 2 }; /* * 1E6 */
+
+	for (i = 0; i < 4; i++) {
+		t_scale += coeff[i] * t_exp;
+		t_exp *= temp;
+	}
+
+	return t_scale / 1000; /* the value returned needs to be /1E3 */
+}
+
+static unsigned long get_voltage_scale(unsigned long u_volt)
+{
+	unsigned long m_volt = u_volt / 1000;
+	unsigned long v_scale;
+
+	v_scale = m_volt * m_volt * m_volt; /* = (m_V^3) / (900 ^ 3) = */
+
+	return v_scale / 1000000; /* the value returned needs to be /(1E3) */
+}
+
+/* voltage in uV */
+static int get_static_power(cpumask_t *cpumask, int interval,
+			    unsigned long u_volt, u32 *static_power)
+{
+	unsigned long temperature, t_scale, v_scale;
+	u32 cpu_coeff;
+	int nr_cpus = cpumask_weight(cpumask);
+	enum cluster_type cluster =
+		topology_physical_package_id(cpumask_any(cpumask));
+
+	if (!scpi_temp_sensor.tzd)
+		return -ENODEV;
+
+	cpu_coeff = cluster_data[cluster].static_coeff;
+
+	/* temperature in mC */
+	temperature = scpi_temp_sensor.tzd->temperature / 1000;
+
+	t_scale = get_temperature_scale(temperature);
+	v_scale = get_voltage_scale(u_volt);
+
+	*static_power = nr_cpus * (cpu_coeff * t_scale * v_scale) / 1000000;
+
+	if (nr_cpus) {
+		u32 cache_coeff = cluster_data[cluster].cache_static_coeff;
+
+		/* cache leakage */
+		*static_power += (cache_coeff * v_scale * t_scale) / 1000000;
+	}
+
+	return 0;
+}
+
+static int get_temp_value(void *data, long *temp)
+{
+	struct scpi_sensor *sensor = (struct scpi_sensor *)data;
+	u32 val;
+	int ret;
+	unsigned long est_temp;
+
+	ret = scpi_ops->sensor_get_value(sensor->sensor_id, &val);
+	if (ret)
+		return ret;
+
+	if (!sensor->prev_temp)
+		sensor->prev_temp = val;
+
+	est_temp = mul_frac(sensor->alpha, val) +
+		mul_frac((int_to_frac(1) - sensor->alpha), sensor->prev_temp);
+
+	sensor->prev_temp = est_temp;
+	*temp = est_temp;
+
+	return 0;
+}
+
+static int get_temp_for_gpu(struct thermal_zone_device *tz, unsigned long *temp)
+{
+	struct scpi_sensor *sensor = (struct scpi_sensor *)tz->devdata;
+
+	*temp = sensor->tzd->temperature;
+
+	return 0;
+}
+
+static void update_debugfs(struct scpi_sensor *sensor_data)
+{
+	struct dentry *dentry_f, *filter_d;
+
+	filter_d = debugfs_create_dir("thermal_lpf_filter", NULL);
+	if (IS_ERR_OR_NULL(filter_d)) {
+		pr_warning("unable to create debugfs directory for the LPF filter\n");
+		return;
+	}
+
+	dentry_f = debugfs_create_u32("alpha", S_IWUSR | S_IRUGO, filter_d,
+				      &sensor_data->alpha);
+	if (IS_ERR_OR_NULL(dentry_f)) {
+		pr_warn("Unable to create debugfsfile: alpha\n");
+		return;
+	}
+}
+
+static struct thermal_zone_device_ops gpu_dummy_tz_ops = {
+	.get_temp = get_temp_for_gpu,
+};
+
+static struct thermal_zone_params gpu_dummy_tzp = {
+	.governor_name = "user_space",
+};
+
+static int scpi_thermal_probe(struct platform_device *pdev)
+{
+	struct scpi_sensor *sensor_data = &scpi_temp_sensor;
+	struct device_node *np;
+	int sensor, cpu;
+	int i;
+
+	if (!cpufreq_frequency_get_table(0)) {
+		dev_info(&pdev->dev,
+			"Frequency table not initialized. Deferring probe...\n");
+		return -EPROBE_DEFER;
+	}
+
+	scpi_ops = get_scpi_ops();
+	if (!scpi_ops)
+		return -EIO;
+
+	platform_set_drvdata(pdev, sensor_data);
+
+	for_each_possible_cpu(cpu) {
+		int cluster_id = topology_physical_package_id(cpu);
+		if (cluster_id > NUM_CLUSTERS) {
+			pr_warn("Cluster id: %d > %d\n", cluster_id, NUM_CLUSTERS);
+			goto error;
+		}
+
+		cpumask_set_cpu(cpu, &sensor_data->cluster[cluster_id]);
+	}
+
+	for (i = 0; i < NUM_CLUSTERS; i++) {
+		char node[16];
+		enum cluster_type cluster =
+			topology_physical_package_id(cpumask_any(&sensor_data->cluster[i]));
+
+		snprintf(node, 16, "cluster%d", i);
+		np = of_find_node_by_name(NULL, node);
+
+		if (!np)
+			dev_info(&pdev->dev, "Node not found: %s\n", node);
+
+		sensor_data->cdevs[i] =
+			of_cpufreq_power_cooling_register(np,
+						&sensor_data->cluster[i],
+						cluster_data[cluster].dyn_coeff,
+						get_static_power);
+
+		if (IS_ERR(sensor_data->cdevs[i]))
+			dev_warn(&pdev->dev,
+				"Error registering cooling device: %d\n", i);
+	}
+
+	if ((sensor = scpi_ops->sensor_get_id(SOC_SENSOR)) < 0) {
+		dev_warn(&pdev->dev, "%s not found. ret=%d\n", SOC_SENSOR, sensor);
+		goto error;
+	}
+
+	sensor_data->sensor_id = (u16)sensor;
+	dev_info(&pdev->dev, "Probed %s sensor. Id=%hu\n", SOC_SENSOR, sensor_data->sensor_id);
+
+	/*
+	 * alpha ~= 2 / (N + 1) with N the window of a rolling mean We
+	 * use 8-bit fixed point arithmetic.  For a rolling average of
+	 * window 20, alpha = 2 / (20 + 1) ~= 0.09523809523809523 .
+	 * In 8-bit fixed point arigthmetic, 0.09523809523809523 * 256
+	 * ~= 24
+	 */
+	sensor_data->alpha = 24;
+
+	sensor_data->tzd = thermal_zone_of_sensor_register(&pdev->dev,
+							sensor_data->sensor_id,
+							sensor_data,
+							get_temp_value, NULL);
+
+	if (IS_ERR(sensor_data->tzd)) {
+		dev_warn(&pdev->dev, "Error registering sensor: %p\n", sensor_data->tzd);
+		return PTR_ERR(sensor_data->tzd);
+	}
+
+	sensor_data->gpu_tzd = thermal_zone_device_register("gpu", 0, 0,
+							    sensor_data,
+							    &gpu_dummy_tz_ops,
+							    &gpu_dummy_tzp,
+							    0, 0);
+	if (IS_ERR(sensor_data->gpu_tzd)) {
+		int ret = PTR_ERR(sensor_data->gpu_tzd);
+
+		dev_warn(&pdev->dev, "Error register gpu thermal zone: %d\n",
+			 ret);
+		return ret;
+	}
+
+	update_debugfs(sensor_data);
+
+	thermal_zone_device_update(sensor_data->tzd);
+
+	return 0;
+
+error:
+	return -ENODEV;
+}
+
+static int scpi_thermal_remove(struct platform_device *pdev)
+{
+	struct scpi_sensor *sensor = platform_get_drvdata(pdev);
+
+	thermal_zone_device_unregister(sensor->tzd);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct of_device_id scpi_thermal_of_match[] = {
+	{ .compatible = "arm,scpi-thermal" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, scpi_thermal_of_match);
+
+static struct platform_driver scpi_thermal_platdrv = {
+	.driver = {
+		.name		= "scpi-thermal",
+		.owner		= THIS_MODULE,
+		.of_match_table = scpi_thermal_of_match,
+	},
+	.probe	= scpi_thermal_probe,
+	.remove	= scpi_thermal_remove,
+};
+module_platform_driver(scpi_thermal_platdrv);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index c83a3097ef90..4be38dee0a54 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/thermal.h>
+#include <trace/events/thermal.h>
 
 #include "thermal_core.h"
 
@@ -122,8 +123,10 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 
 	trend = get_tz_trend(tz, trip);
 
-	if (tz->temperature >= trip_temp)
+	if (tz->temperature >= trip_temp) {
 		throttle = true;
+		trace_thermal_zone_trip(tz, trip, trip_type);
+	}
 
 	dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n",
 				trip, trip_type, trip_temp, trend, throttle);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index edc0cb88f1d0..0aa2902a44fe 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -38,6 +38,9 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/thermal.h>
+
 #include "thermal_core.h"
 #include "thermal_hwmon.h"
 
@@ -72,6 +75,58 @@ static struct thermal_governor *__find_governor(const char *name)
 	return NULL;
 }
 
+/**
+ * bind_previous_governor() - bind the previous governor of the thermal zone
+ * @tz:		a valid pointer to a struct thermal_zone_device
+ * @failed_gov_name:	the name of the governor that failed to register
+ *
+ * Register the previous governor of the thermal zone after a new
+ * governor has failed to be bound.
+ */
+static void bind_previous_governor(struct thermal_zone_device *tz,
+				   const char *failed_gov_name)
+{
+	if (tz->governor && tz->governor->bind_to_tz) {
+		if (tz->governor->bind_to_tz(tz)) {
+			dev_err(&tz->device,
+				"governor %s failed to bind and the previous one (%s) failed to bind again, thermal zone %s has no governor\n",
+				failed_gov_name, tz->governor->name, tz->type);
+			tz->governor = NULL;
+		}
+	}
+}
+
+/**
+ * thermal_set_governor() - Switch to another governor
+ * @tz:		a valid pointer to a struct thermal_zone_device
+ * @new_gov:	pointer to the new governor
+ *
+ * Change the governor of thermal zone @tz.
+ *
+ * Return: 0 on success, an error if the new governor's bind_to_tz() failed.
+ */
+static int thermal_set_governor(struct thermal_zone_device *tz,
+				struct thermal_governor *new_gov)
+{
+	int ret = 0;
+
+	if (tz->governor && tz->governor->unbind_from_tz)
+		tz->governor->unbind_from_tz(tz);
+
+	if (new_gov && new_gov->bind_to_tz) {
+		ret = new_gov->bind_to_tz(tz);
+		if (ret) {
+			bind_previous_governor(tz, new_gov->name);
+
+			return ret;
+		}
+	}
+
+	tz->governor = new_gov;
+
+	return ret;
+}
+
 int thermal_register_governor(struct thermal_governor *governor)
 {
 	int err;
@@ -104,8 +159,15 @@ int thermal_register_governor(struct thermal_governor *governor)
 
 		name = pos->tzp->governor_name;
 
-		if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH))
-			pos->governor = governor;
+		if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH)) {
+			int ret;
+
+			ret = thermal_set_governor(pos, governor);
+			if (ret)
+				dev_err(&pos->device,
+					"Failed to set governor %s for thermal zone %s: %d\n",
+					governor->name, pos->type, ret);
+		}
 	}
 
 	mutex_unlock(&thermal_list_lock);
@@ -131,7 +193,7 @@ void thermal_unregister_governor(struct thermal_governor *governor)
 	list_for_each_entry(pos, &thermal_tz_list, node) {
 		if (!strnicmp(pos->governor->name, governor->name,
 						THERMAL_NAME_LENGTH))
-			pos->governor = NULL;
+			thermal_set_governor(pos, NULL);
 	}
 
 	mutex_unlock(&thermal_list_lock);
@@ -213,14 +275,16 @@ static void print_bind_err_msg(struct thermal_zone_device *tz,
 }
 
 static void __bind(struct thermal_zone_device *tz, int mask,
-			struct thermal_cooling_device *cdev)
+			struct thermal_cooling_device *cdev,
+			unsigned int weight)
 {
 	int i, ret;
 
 	for (i = 0; i < tz->trips; i++) {
 		if (mask & (1 << i)) {
 			ret = thermal_zone_bind_cooling_device(tz, i, cdev,
-					THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+					THERMAL_NO_LIMIT, THERMAL_NO_LIMIT,
+					weight);
 			if (ret)
 				print_bind_err_msg(tz, cdev, ret);
 		}
@@ -266,7 +330,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
 			if (tzp->tbp[i].match(pos, cdev))
 				continue;
 			tzp->tbp[i].cdev = cdev;
-			__bind(pos, tzp->tbp[i].trip_mask, cdev);
+			__bind(pos, tzp->tbp[i].trip_mask, cdev,
+			       tzp->tbp[i].weight);
 		}
 	}
 
@@ -304,7 +369,8 @@ static void bind_tz(struct thermal_zone_device *tz)
 			if (tzp->tbp[i].match(tz, pos))
 				continue;
 			tzp->tbp[i].cdev = pos;
-			__bind(tz, tzp->tbp[i].trip_mask, pos);
+			__bind(tz, tzp->tbp[i].trip_mask, pos,
+			       tzp->tbp[i].weight);
 		}
 	}
 exit:
@@ -356,6 +422,8 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 	if (tz->temperature < trip_temp)
 		return;
 
+	trace_thermal_zone_trip(tz, trip, trip_type);
+
 	if (tz->ops->notify)
 		tz->ops->notify(tz, trip, trip_type);
 
@@ -451,6 +519,7 @@ static void update_temperature(struct thermal_zone_device *tz)
 	tz->temperature = temp;
 	mutex_unlock(&tz->lock);
 
+	trace_thermal_temperature(tz);
 	dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
 				tz->last_temperature, tz->temperature);
 }
@@ -693,7 +762,8 @@ passive_store(struct device *dev, struct device_attribute *attr,
 				thermal_zone_bind_cooling_device(tz,
 						THERMAL_TRIPS_NONE, cdev,
 						THERMAL_NO_LIMIT,
-						THERMAL_NO_LIMIT);
+						THERMAL_NO_LIMIT,
+						THERMAL_WEIGHT_DEFAULT);
 		}
 		mutex_unlock(&thermal_list_lock);
 		if (!tz->passive_delay)
@@ -741,8 +811,9 @@ policy_store(struct device *dev, struct device_attribute *attr,
 	if (!gov)
 		goto exit;
 
-	tz->governor = gov;
-	ret = count;
+	ret = thermal_set_governor(tz, gov);
+	if (!ret)
+		ret = count;
 
 exit:
 	mutex_unlock(&thermal_governor_lock);
@@ -785,6 +856,154 @@ emul_temp_store(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
 #endif/*CONFIG_THERMAL_EMULATION*/
 
+static ssize_t
+sustainable_power_show(struct device *dev, struct device_attribute *devattr,
+		       char *buf)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+
+	if (tz->tzp)
+		return sprintf(buf, "%u\n", tz->tzp->sustainable_power);
+	else
+		return -EIO;
+}
+
+static ssize_t
+sustainable_power_store(struct device *dev, struct device_attribute *devattr,
+			const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	u32 sustainable_power;
+
+	if (!tz->tzp)
+		return -EIO;
+
+	if (kstrtou32(buf, 10, &sustainable_power))
+		return -EINVAL;
+
+	tz->tzp->sustainable_power = sustainable_power;
+
+	return count;
+}
+static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show,
+		sustainable_power_store);
+
+#define create_s32_tzp_attr(name)					\
+	static ssize_t							\
+	name##_show(struct device *dev, struct device_attribute *devattr, \
+		char *buf)						\
+	{								\
+	struct thermal_zone_device *tz = to_thermal_zone(dev);		\
+									\
+	if (tz->tzp)							\
+		return sprintf(buf, "%u\n", tz->tzp->name);		\
+	else								\
+		return -EIO;						\
+	}								\
+									\
+	static ssize_t							\
+	name##_store(struct device *dev, struct device_attribute *devattr, \
+		const char *buf, size_t count)				\
+	{								\
+		struct thermal_zone_device *tz = to_thermal_zone(dev);	\
+		s32 value;						\
+									\
+		if (!tz->tzp)						\
+			return -EIO;					\
+									\
+		if (kstrtos32(buf, 10, &value))				\
+			return -EINVAL;					\
+									\
+		tz->tzp->name = value;					\
+									\
+		return count;						\
+	}								\
+	static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store)
+
+create_s32_tzp_attr(k_po);
+create_s32_tzp_attr(k_pu);
+create_s32_tzp_attr(k_i);
+create_s32_tzp_attr(k_d);
+create_s32_tzp_attr(integral_cutoff);
+#undef create_s32_tzp_attr
+
+static struct device_attribute *dev_tzp_attrs[] = {
+	&dev_attr_sustainable_power,
+	&dev_attr_k_po,
+	&dev_attr_k_pu,
+	&dev_attr_k_i,
+	&dev_attr_k_d,
+	&dev_attr_integral_cutoff,
+};
+
+static int create_tzp_attrs(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_tzp_attrs); i++) {
+		int ret;
+		struct device_attribute *dev_attr = dev_tzp_attrs[i];
+
+		ret = device_create_file(dev, dev_attr);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * power_actor_get_max_power() - get the maximum power that a cdev can consume
+ * @cdev:	pointer to &thermal_cooling_device
+ * @tz:		a valid thermal zone device pointer
+ * @max_power:	pointer in which to store the maximum power
+ *
+ * Calculate the maximum power consumption in milliwats that the
+ * cooling device can currently consume and store it in @max_power.
+ *
+ * Return: 0 on success, -EINVAL if @cdev doesn't support the
+ * power_actor API or -E* on other error.
+ */
+int power_actor_get_max_power(struct thermal_cooling_device *cdev,
+			      struct thermal_zone_device *tz, u32 *max_power)
+{
+	if (!cdev_is_power_actor(cdev))
+		return -EINVAL;
+
+	return cdev->ops->state2power(cdev, tz, 0, max_power);
+}
+
+/**
+ * power_actor_set_power() - limit the maximum power that a cooling device can consume
+ * @cdev:	pointer to &thermal_cooling_device
+ * @instance:	thermal instance to update
+ * @power:	the power in milliwatts
+ *
+ * Set the cooling device to consume at most @power milliwatts.
+ *
+ * Return: 0 on success, -EINVAL if the cooling device does not
+ * implement the power actor API or -E* for other failures.
+ */
+int power_actor_set_power(struct thermal_cooling_device *cdev,
+			  struct thermal_instance *instance, u32 power)
+{
+	unsigned long state;
+	int ret;
+
+	if (!cdev_is_power_actor(cdev))
+		return -EINVAL;
+
+	ret = cdev->ops->power2state(cdev, instance->tz, power, &state);
+	if (ret)
+		return ret;
+
+	instance->target = state;
+	cdev->updated = false;
+	thermal_cdev_update(cdev);
+
+	return 0;
+}
+
 static DEVICE_ATTR(type, 0444, type_show, NULL);
 static DEVICE_ATTR(temp, 0444, temp_show, NULL);
 static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -876,6 +1095,34 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 		return sprintf(buf, "%d\n", instance->trip);
 }
 
+static ssize_t
+thermal_cooling_device_weight_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct thermal_instance *instance;
+
+	instance = container_of(attr, struct thermal_instance, weight_attr);
+
+	return sprintf(buf, "%d\n", instance->weight);
+}
+
+static ssize_t
+thermal_cooling_device_weight_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct thermal_instance *instance;
+	int ret, weight;
+
+	ret = kstrtoint(buf, 0, &weight);
+	if (ret)
+		return ret;
+
+	instance = container_of(attr, struct thermal_instance, weight_attr);
+	instance->weight = weight;
+
+	return count;
+}
 /* Device management */
 
 /**
@@ -890,6 +1137,9 @@ thermal_cooling_device_trip_point_show(struct device *dev,
  * @lower:	the Minimum cooling state can be used for this trip point.
  *		THERMAL_NO_LIMIT means no lower limit,
  *		and the cooling device can be in cooling state 0.
+ * @weight:	The weight of the cooling device to be bound to the
+ *		thermal zone. Use THERMAL_WEIGHT_DEFAULT for the
+ *		default value
  *
  * This interface function bind a thermal cooling device to the certain trip
  * point of a thermal zone device.
@@ -900,7 +1150,8 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 				     int trip,
 				     struct thermal_cooling_device *cdev,
-				     unsigned long upper, unsigned long lower)
+				     unsigned long upper, unsigned long lower,
+				     unsigned int weight)
 {
 	struct thermal_instance *dev;
 	struct thermal_instance *pos;
@@ -943,6 +1194,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	dev->upper = upper;
 	dev->lower = lower;
 	dev->target = THERMAL_NO_TARGET;
+	dev->weight = weight;
 
 	result = get_idr(&tz->idr, &tz->lock, &dev->id);
 	if (result)
@@ -963,6 +1215,16 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (result)
 		goto remove_symbol_link;
 
+	sprintf(dev->weight_attr_name, "cdev%d_weight", dev->id);
+	sysfs_attr_init(&dev->weight_attr.attr);
+	dev->weight_attr.attr.name = dev->weight_attr_name;
+	dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO;
+	dev->weight_attr.show = thermal_cooling_device_weight_show;
+	dev->weight_attr.store = thermal_cooling_device_weight_store;
+	result = device_create_file(&tz->device, &dev->weight_attr);
+	if (result)
+		goto remove_trip_file;
+
 	mutex_lock(&tz->lock);
 	mutex_lock(&cdev->lock);
 	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
@@ -980,6 +1242,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	if (!result)
 		return 0;
 
+	device_remove_file(&tz->device, &dev->weight_attr);
+remove_trip_file:
 	device_remove_file(&tz->device, &dev->attr);
 remove_symbol_link:
 	sysfs_remove_link(&tz->device.kobj, dev->name);
@@ -1271,6 +1535,7 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
 	mutex_unlock(&cdev->lock);
 	cdev->ops->set_cur_state(cdev, target);
 	cdev->updated = true;
+	trace_cdev_update(cdev, target);
 	dev_dbg(&cdev->device, "set to state %lu\n", target);
 }
 EXPORT_SYMBOL(thermal_cdev_update);
@@ -1351,7 +1616,8 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask)
 						tz->trip_temp_attrs[indx].name;
 		tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO;
 		tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show;
-		if (mask & (1 << indx)) {
+		if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) &&
+		    mask & (1 << indx)) {
 			tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR;
 			tz->trip_temp_attrs[indx].attr.store =
 							trip_point_temp_store;
@@ -1428,7 +1694,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)
 struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	int trips, int mask, void *devdata,
 	struct thermal_zone_device_ops *ops,
-	const struct thermal_zone_params *tzp,
+	struct thermal_zone_params *tzp,
 	int passive_delay, int polling_delay)
 {
 	struct thermal_zone_device *tz;
@@ -1436,6 +1702,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	int result;
 	int count;
 	int passive = 0;
+	struct thermal_governor *governor;
 
 	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
 		return ERR_PTR(-EINVAL);
@@ -1522,13 +1789,24 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 	if (result)
 		goto unregister;
 
+	/* Add thermal zone params */
+	result = create_tzp_attrs(&tz->device);
+	if (result)
+		goto unregister;
+
 	/* Update 'this' zone's governor information */
 	mutex_lock(&thermal_governor_lock);
 
 	if (tz->tzp)
-		tz->governor = __find_governor(tz->tzp->governor_name);
+		governor = __find_governor(tz->tzp->governor_name);
 	else
-		tz->governor = def_governor;
+		governor = def_governor;
+
+	result = thermal_set_governor(tz, governor);
+	if (result) {
+		mutex_unlock(&thermal_governor_lock);
+		goto unregister;
+	}
 
 	mutex_unlock(&thermal_governor_lock);
 
@@ -1618,7 +1896,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 		device_remove_file(&tz->device, &dev_attr_mode);
 	device_remove_file(&tz->device, &dev_attr_policy);
 	remove_trip_attrs(tz);
-	tz->governor = NULL;
+	thermal_set_governor(tz, NULL);
 
 	thermal_remove_hwmon_sysfs(tz);
 	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
@@ -1781,7 +2059,11 @@ static int __init thermal_register_governors(void)
 	if (result)
 		return result;
 
-	return thermal_gov_user_space_register();
+	result = thermal_gov_user_space_register();
+	if (result)
+		return result;
+
+	return thermal_gov_power_allocator_register();
 }
 
 static void thermal_unregister_governors(void)
@@ -1789,6 +2071,7 @@ static void thermal_unregister_governors(void)
 	thermal_gov_step_wise_unregister();
 	thermal_gov_fair_share_unregister();
 	thermal_gov_user_space_unregister();
+	thermal_gov_power_allocator_unregister();
 }
 
 static int __init thermal_init(void)
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 3db339fb636f..d4cb0bf3845a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -46,8 +46,11 @@ struct thermal_instance {
 	unsigned long target;	/* expected cooling state */
 	char attr_name[THERMAL_NAME_LENGTH];
 	struct device_attribute attr;
+	char weight_attr_name[THERMAL_NAME_LENGTH];
+	struct device_attribute weight_attr;
 	struct list_head tz_node; /* node in tz->thermal_instances */
 	struct list_head cdev_node; /* node in cdev->thermal_instances */
+	unsigned int weight; /* The weight of the cooling device */
 };
 
 int thermal_register_governor(struct thermal_governor *);
@@ -77,6 +80,14 @@ static inline int thermal_gov_user_space_register(void) { return 0; }
 static inline void thermal_gov_user_space_unregister(void) {}
 #endif /* CONFIG_THERMAL_GOV_USER_SPACE */
 
+#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
+int thermal_gov_power_allocator_register(void);
+void thermal_gov_power_allocator_unregister(void);
+#else
+static inline int thermal_gov_power_allocator_register(void) { return 0; }
+static inline void thermal_gov_power_allocator_unregister(void) {}
+#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
+
 /* device tree support */
 #ifdef CONFIG_THERMAL_OF
 int of_parse_thermal_zones(void);
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
new file mode 100644
index 000000000000..cb45e729adb5
--- /dev/null
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -0,0 +1,437 @@
+/*
+ * OMAP thermal driver interface
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ * Contact:
+ *   Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/thermal.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_cooling.h>
+#include <linux/of.h>
+
+#include "ti-thermal.h"
+#include "ti-bandgap.h"
+
+/* common data structures */
+struct ti_thermal_data {
+	struct thermal_zone_device *ti_thermal;
+	struct thermal_zone_device *pcb_tz;
+	struct thermal_cooling_device *cool_dev;
+	struct ti_bandgap *bgp;
+	enum thermal_device_mode mode;
+	struct work_struct thermal_wq;
+	int sensor_id;
+	bool our_zone;
+};
+
+static void ti_thermal_work(struct work_struct *work)
+{
+	struct ti_thermal_data *data = container_of(work,
+					struct ti_thermal_data, thermal_wq);
+
+	thermal_zone_device_update(data->ti_thermal);
+
+	dev_dbg(&data->ti_thermal->device, "updated thermal zone %s\n",
+		data->ti_thermal->type);
+}
+
+/**
+ * ti_thermal_hotspot_temperature - returns sensor extrapolated temperature
+ * @t:	omap sensor temperature
+ * @s:	omap sensor slope value
+ * @c:	omap sensor const value
+ */
+static inline int ti_thermal_hotspot_temperature(int t, int s, int c)
+{
+	int delta = t * s / 1000 + c;
+
+	if (delta < 0)
+		delta = 0;
+
+	return t + delta;
+}
+
+/* thermal zone ops */
+/* Get temperature callback function for thermal zone*/
+static inline int __ti_thermal_get_temp(void *devdata, long *temp)
+{
+	struct thermal_zone_device *pcb_tz = NULL;
+	struct ti_thermal_data *data = devdata;
+	struct ti_bandgap *bgp;
+	const struct ti_temp_sensor *s;
+	int ret, tmp, slope, constant;
+	unsigned long pcb_temp;
+
+	if (!data)
+		return 0;
+
+	bgp = data->bgp;
+	s = &bgp->conf->sensors[data->sensor_id];
+
+	ret = ti_bandgap_read_temperature(bgp, data->sensor_id, &tmp);
+	if (ret)
+		return ret;
+
+	/* Default constants */
+	slope = s->slope;
+	constant = s->constant;
+
+	pcb_tz = data->pcb_tz;
+	/* In case pcb zone is available, use the extrapolation rule with it */
+	if (!IS_ERR(pcb_tz)) {
+		ret = thermal_zone_get_temp(pcb_tz, &pcb_temp);
+		if (!ret) {
+			tmp -= pcb_temp; /* got a valid PCB temp */
+			slope = s->slope_pcb;
+			constant = s->constant_pcb;
+		} else {
+			dev_err(bgp->dev,
+				"Failed to read PCB state. Using defaults\n");
+			ret = 0;
+		}
+	}
+	*temp = ti_thermal_hotspot_temperature(tmp, slope, constant);
+
+	return ret;
+}
+
+static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
+				      unsigned long *temp)
+{
+	struct ti_thermal_data *data = thermal->devdata;
+
+	return __ti_thermal_get_temp(data, temp);
+}
+
+/* Bind callback functions for thermal zone */
+static int ti_thermal_bind(struct thermal_zone_device *thermal,
+			   struct thermal_cooling_device *cdev)
+{
+	struct ti_thermal_data *data = thermal->devdata;
+	int id;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	/* check if this is the cooling device we registered */
+	if (data->cool_dev != cdev)
+		return 0;
+
+	id = data->sensor_id;
+
+	/* Simple thing, two trips, one passive another critical */
+	return thermal_zone_bind_cooling_device(thermal, 0, cdev,
+	/* bind with min and max states defined by cpu_cooling */
+						THERMAL_NO_LIMIT,
+						THERMAL_NO_LIMIT,
+						THERMAL_WEIGHT_DEFAULT);
+}
+
+/* Unbind callback functions for thermal zone */
+static int ti_thermal_unbind(struct thermal_zone_device *thermal,
+			     struct thermal_cooling_device *cdev)
+{
+	struct ti_thermal_data *data = thermal->devdata;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	/* check if this is the cooling device we registered */
+	if (data->cool_dev != cdev)
+		return 0;
+
+	/* Simple thing, two trips, one passive another critical */
+	return thermal_zone_unbind_cooling_device(thermal, 0, cdev);
+}
+
+/* Get mode callback functions for thermal zone */
+static int ti_thermal_get_mode(struct thermal_zone_device *thermal,
+			       enum thermal_device_mode *mode)
+{
+	struct ti_thermal_data *data = thermal->devdata;
+
+	if (data)
+		*mode = data->mode;
+
+	return 0;
+}
+
+/* Set mode callback functions for thermal zone */
+static int ti_thermal_set_mode(struct thermal_zone_device *thermal,
+			       enum thermal_device_mode mode)
+{
+	struct ti_thermal_data *data = thermal->devdata;
+	struct ti_bandgap *bgp;
+
+	bgp = data->bgp;
+
+	if (!data->ti_thermal) {
+		dev_notice(&thermal->device, "thermal zone not registered\n");
+		return 0;
+	}
+
+	mutex_lock(&data->ti_thermal->lock);
+
+	if (mode == THERMAL_DEVICE_ENABLED)
+		data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
+	else
+		data->ti_thermal->polling_delay = 0;
+
+	mutex_unlock(&data->ti_thermal->lock);
+
+	data->mode = mode;
+	ti_bandgap_write_update_interval(bgp, data->sensor_id,
+					data->ti_thermal->polling_delay);
+	thermal_zone_device_update(data->ti_thermal);
+	dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n",
+		data->ti_thermal->polling_delay);
+
+	return 0;
+}
+
+/* Get trip type callback functions for thermal zone */
+static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal,
+				    int trip, enum thermal_trip_type *type)
+{
+	if (!ti_thermal_is_valid_trip(trip))
+		return -EINVAL;
+
+	if (trip + 1 == OMAP_TRIP_NUMBER)
+		*type = THERMAL_TRIP_CRITICAL;
+	else
+		*type = THERMAL_TRIP_PASSIVE;
+
+	return 0;
+}
+
+/* Get trip temperature callback functions for thermal zone */
+static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal,
+				    int trip, unsigned long *temp)
+{
+	if (!ti_thermal_is_valid_trip(trip))
+		return -EINVAL;
+
+	*temp = ti_thermal_get_trip_value(trip);
+
+	return 0;
+}
+
+static int __ti_thermal_get_trend(void *p, long *trend)
+{
+	struct ti_thermal_data *data = p;
+	struct ti_bandgap *bgp;
+	int id, tr, ret = 0;
+
+	bgp = data->bgp;
+	id = data->sensor_id;
+
+	ret = ti_bandgap_get_trend(bgp, id, &tr);
+	if (ret)
+		return ret;
+
+	*trend = tr;
+
+	return 0;
+}
+
+/* Get the temperature trend callback functions for thermal zone */
+static int ti_thermal_get_trend(struct thermal_zone_device *thermal,
+				int trip, enum thermal_trend *trend)
+{
+	int ret;
+	long tr;
+
+	ret = __ti_thermal_get_trend(thermal->devdata, &tr);
+	if (ret)
+		return ret;
+
+	if (tr > 0)
+		*trend = THERMAL_TREND_RAISING;
+	else if (tr < 0)
+		*trend = THERMAL_TREND_DROPPING;
+	else
+		*trend = THERMAL_TREND_STABLE;
+
+	return 0;
+}
+
+/* Get critical temperature callback functions for thermal zone */
+static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
+				    unsigned long *temp)
+{
+	/* shutdown zone */
+	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
+}
+
+static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
+	.get_temp = __ti_thermal_get_temp,
+	.get_trend = __ti_thermal_get_trend,
+};
+
+static struct thermal_zone_device_ops ti_thermal_ops = {
+	.get_temp = ti_thermal_get_temp,
+	.get_trend = ti_thermal_get_trend,
+	.bind = ti_thermal_bind,
+	.unbind = ti_thermal_unbind,
+	.get_mode = ti_thermal_get_mode,
+	.set_mode = ti_thermal_set_mode,
+	.get_trip_type = ti_thermal_get_trip_type,
+	.get_trip_temp = ti_thermal_get_trip_temp,
+	.get_crit_temp = ti_thermal_get_crit_temp,
+};
+
+static struct ti_thermal_data
+*ti_thermal_build_data(struct ti_bandgap *bgp, int id)
+{
+	struct ti_thermal_data *data;
+
+	data = devm_kzalloc(bgp->dev, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		dev_err(bgp->dev, "kzalloc fail\n");
+		return NULL;
+	}
+	data->sensor_id = id;
+	data->bgp = bgp;
+	data->mode = THERMAL_DEVICE_ENABLED;
+	/* pcb_tz will be either valid or PTR_ERR() */
+	data->pcb_tz = thermal_zone_get_zone_by_name("pcb");
+	INIT_WORK(&data->thermal_wq, ti_thermal_work);
+
+	return data;
+}
+
+int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
+			     char *domain)
+{
+	struct ti_thermal_data *data;
+
+	data = ti_bandgap_get_sensor_data(bgp, id);
+
+	if (!data || IS_ERR(data))
+		data = ti_thermal_build_data(bgp, id);
+
+	if (!data)
+		return -EINVAL;
+
+	/* in case this is specified by DT */
+	data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
+					data, &ti_of_thermal_ops);
+	if (IS_ERR(data->ti_thermal)) {
+		/* Create thermal zone */
+		data->ti_thermal = thermal_zone_device_register(domain,
+				OMAP_TRIP_NUMBER, 0, data, &ti_thermal_ops,
+				NULL, FAST_TEMP_MONITORING_RATE,
+				FAST_TEMP_MONITORING_RATE);
+		if (IS_ERR(data->ti_thermal)) {
+			dev_err(bgp->dev, "thermal zone device is NULL\n");
+			return PTR_ERR(data->ti_thermal);
+		}
+		data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
+		data->our_zone = true;
+	}
+	ti_bandgap_set_sensor_data(bgp, id, data);
+	ti_bandgap_write_update_interval(bgp, data->sensor_id,
+					data->ti_thermal->polling_delay);
+
+	return 0;
+}
+
+int ti_thermal_remove_sensor(struct ti_bandgap *bgp, int id)
+{
+	struct ti_thermal_data *data;
+
+	data = ti_bandgap_get_sensor_data(bgp, id);
+
+	if (data && data->ti_thermal) {
+		if (data->our_zone)
+			thermal_zone_device_unregister(data->ti_thermal);
+		else
+			thermal_zone_of_sensor_unregister(bgp->dev,
+							  data->ti_thermal);
+	}
+
+	return 0;
+}
+
+int ti_thermal_report_sensor_temperature(struct ti_bandgap *bgp, int id)
+{
+	struct ti_thermal_data *data;
+
+	data = ti_bandgap_get_sensor_data(bgp, id);
+
+	schedule_work(&data->thermal_wq);
+
+	return 0;
+}
+
+int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
+{
+	struct ti_thermal_data *data;
+	struct device_node *np = bgp->dev->of_node;
+
+	/*
+	 * We are assuming here that if one deploys the zone
+	 * using DT, then it must be aware that the cooling device
+	 * loading has to happen via cpufreq driver.
+	 */
+	if (of_find_property(np, "#thermal-sensor-cells", NULL))
+		return 0;
+
+	data = ti_bandgap_get_sensor_data(bgp, id);
+	if (!data || IS_ERR(data))
+		data = ti_thermal_build_data(bgp, id);
+
+	if (!data)
+		return -EINVAL;
+
+	/* Register cooling device */
+	data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
+	if (IS_ERR(data->cool_dev)) {
+		int ret = PTR_ERR(data->cool_dev);
+
+		if (ret != -EPROBE_DEFER)
+			dev_err(bgp->dev,
+				"Failed to register cpu cooling device %d\n",
+				ret);
+
+		return ret;
+	}
+	ti_bandgap_set_sensor_data(bgp, id, data);
+
+	return 0;
+}
+
+int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id)
+{
+	struct ti_thermal_data *data;
+
+	data = ti_bandgap_get_sensor_data(bgp, id);
+
+	if (data)
+		cpufreq_cooling_unregister(data->cool_dev);
+
+	return 0;
+}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 1fabb22ae615..57977ec53a7a 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -95,6 +95,9 @@ static void __uart_start(struct tty_struct *tty)
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->uart_port;
 
+	if (port->ops->wake_peer)
+		port->ops->wake_peer(port);
+
 	if (!uart_circ_empty(&state->xmit) && state->xmit.buf &&
 	    !tty->stopped && !tty->hw_stopped)
 		port->ops->start_tx(port);
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index f41aa0d0c414..9de2eb28085e 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -818,6 +818,27 @@ config USB_G_PRINTER
 	  For more information, see Documentation/usb/gadget_printer.txt
 	  which includes sample code for accessing the device file.
 
+config USB_G_ANDROID
+	boolean "Android Composite Gadget"
+	depends on SND
+	select SND_PCM
+	select SND_RAWMIDI
+	select USB_F_ACM
+	select USB_LIBCOMPOSITE
+	select USB_U_SERIAL
+	help
+	  The Android Composite Gadget supports multiple USB
+	  functions: adb, acm, mass storage, mtp, accessory
+	  and rndis.
+	  Each function can be configured and enabled/disabled
+	  dynamically from userspace through a sysfs interface.
+
+config USB_ANDROID_RNDIS_DWORD_ALIGNED
+	boolean "Use double word aligned"
+	depends on USB_G_ANDROID
+	help
+		Provides dword aligned for DMA controller.
+
 if TTY
 
 config USB_CDC_COMPOSITE
diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile
index 6afd16659e78..0ec50ae5b639 100644
--- a/drivers/usb/gadget/Makefile
+++ b/drivers/usb/gadget/Makefile
@@ -65,6 +65,7 @@ g_nokia-y			:= nokia.o
 g_webcam-y			:= webcam.o
 g_ncm-y				:= ncm.o
 g_acm_ms-y			:= acm_ms.o
+g_android-y			:= android.o
 g_tcm_usb_gadget-y		:= tcm_usb_gadget.o
 
 obj-$(CONFIG_USB_ZERO)		+= g_zero.o
@@ -84,4 +85,5 @@ obj-$(CONFIG_USB_G_NOKIA)	+= g_nokia.o
 obj-$(CONFIG_USB_G_WEBCAM)	+= g_webcam.o
 obj-$(CONFIG_USB_G_NCM)		+= g_ncm.o
 obj-$(CONFIG_USB_G_ACM_MS)	+= g_acm_ms.o
+obj-$(CONFIG_USB_G_ANDROID)	+= g_android.o
 obj-$(CONFIG_USB_GADGET_TARGET)	+= tcm_usb_gadget.o
diff --git a/drivers/usb/gadget/android.c b/drivers/usb/gadget/android.c
new file mode 100644
index 000000000000..61c5e582bf47
--- /dev/null
+++ b/drivers/usb/gadget/android.c
@@ -0,0 +1,1587 @@
+/*
+ * Gadget Driver for Android
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *         Benoit Goby <benoit@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/utsname.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb/ch9.h>
+#include <linux/usb/composite.h>
+#include <linux/usb/gadget.h>
+
+#include "gadget_chips.h"
+
+#include "f_fs.c"
+#include "f_audio_source.c"
+#include "f_midi.c"
+#include "f_mass_storage.c"
+#include "f_mtp.c"
+#include "f_accessory.c"
+#define USB_ETH_RNDIS y
+#include "f_rndis.c"
+#include "rndis.c"
+#include "u_ether.c"
+
+MODULE_AUTHOR("Mike Lockwood");
+MODULE_DESCRIPTION("Android Composite USB Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+
+static const char longname[] = "Gadget Android";
+
+/* Default vendor and product IDs, overridden by userspace */
+#define VENDOR_ID		0x18D1
+#define PRODUCT_ID		0x0001
+
+/* f_midi configuration */
+#define MIDI_INPUT_PORTS    1
+#define MIDI_OUTPUT_PORTS   1
+#define MIDI_BUFFER_SIZE    256
+#define MIDI_QUEUE_LENGTH   32
+
+struct android_usb_function {
+	char *name;
+	void *config;
+
+	struct device *dev;
+	char *dev_name;
+	struct device_attribute **attributes;
+
+	/* for android_dev.enabled_functions */
+	struct list_head enabled_list;
+
+	/* Optional: initialization during gadget bind */
+	int (*init)(struct android_usb_function *, struct usb_composite_dev *);
+	/* Optional: cleanup during gadget unbind */
+	void (*cleanup)(struct android_usb_function *);
+	/* Optional: called when the function is added the list of
+	 *		enabled functions */
+	void (*enable)(struct android_usb_function *);
+	/* Optional: called when it is removed */
+	void (*disable)(struct android_usb_function *);
+
+	int (*bind_config)(struct android_usb_function *,
+			   struct usb_configuration *);
+
+	/* Optional: called when the configuration is removed */
+	void (*unbind_config)(struct android_usb_function *,
+			      struct usb_configuration *);
+	/* Optional: handle ctrl requests before the device is configured */
+	int (*ctrlrequest)(struct android_usb_function *,
+					struct usb_composite_dev *,
+					const struct usb_ctrlrequest *);
+};
+
+struct android_dev {
+	struct android_usb_function **functions;
+	struct list_head enabled_functions;
+	struct usb_composite_dev *cdev;
+	struct device *dev;
+
+	void (*setup_complete)(struct usb_ep *ep,
+				struct usb_request *req);
+
+	bool enabled;
+	int disable_depth;
+	struct mutex mutex;
+	bool connected;
+	bool sw_connected;
+	struct work_struct work;
+	char ffs_aliases[256];
+};
+
+static struct class *android_class;
+static struct android_dev *_android_dev;
+static int android_bind_config(struct usb_configuration *c);
+static void android_unbind_config(struct usb_configuration *c);
+
+/* string IDs are assigned dynamically */
+#define STRING_MANUFACTURER_IDX		0
+#define STRING_PRODUCT_IDX		1
+#define STRING_SERIAL_IDX		2
+
+static char manufacturer_string[256];
+static char product_string[256];
+static char serial_string[256];
+
+/* String Table */
+static struct usb_string strings_dev[] = {
+	[STRING_MANUFACTURER_IDX].s = manufacturer_string,
+	[STRING_PRODUCT_IDX].s = product_string,
+	[STRING_SERIAL_IDX].s = serial_string,
+	{  }			/* end of list */
+};
+
+static struct usb_gadget_strings stringtab_dev = {
+	.language	= 0x0409,	/* en-us */
+	.strings	= strings_dev,
+};
+
+static struct usb_gadget_strings *dev_strings[] = {
+	&stringtab_dev,
+	NULL,
+};
+
+static struct usb_device_descriptor device_desc = {
+	.bLength              = sizeof(device_desc),
+	.bDescriptorType      = USB_DT_DEVICE,
+	.bcdUSB               = __constant_cpu_to_le16(0x0200),
+	.bDeviceClass         = USB_CLASS_PER_INTERFACE,
+	.idVendor             = __constant_cpu_to_le16(VENDOR_ID),
+	.idProduct            = __constant_cpu_to_le16(PRODUCT_ID),
+	.bcdDevice            = __constant_cpu_to_le16(0xffff),
+	.bNumConfigurations   = 1,
+};
+
+static struct usb_configuration android_config_driver = {
+	.label		= "android",
+	.unbind		= android_unbind_config,
+	.bConfigurationValue = 1,
+	.bmAttributes	= USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER,
+	.MaxPower	= 500, /* 500ma */
+};
+
+static void android_work(struct work_struct *data)
+{
+	struct android_dev *dev = container_of(data, struct android_dev, work);
+	struct usb_composite_dev *cdev = dev->cdev;
+	char *disconnected[2] = { "USB_STATE=DISCONNECTED", NULL };
+	char *connected[2]    = { "USB_STATE=CONNECTED", NULL };
+	char *configured[2]   = { "USB_STATE=CONFIGURED", NULL };
+	char **uevent_envp = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (cdev->config)
+		uevent_envp = configured;
+	else if (dev->connected != dev->sw_connected)
+		uevent_envp = dev->connected ? connected : disconnected;
+	dev->sw_connected = dev->connected;
+	spin_unlock_irqrestore(&cdev->lock, flags);
+
+	if (uevent_envp) {
+		kobject_uevent_env(&dev->dev->kobj, KOBJ_CHANGE, uevent_envp);
+		pr_info("%s: sent uevent %s\n", __func__, uevent_envp[0]);
+	} else {
+		pr_info("%s: did not send uevent (%d %d %p)\n", __func__,
+			 dev->connected, dev->sw_connected, cdev->config);
+	}
+}
+
+static void android_enable(struct android_dev *dev)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+
+	if (WARN_ON(!dev->disable_depth))
+		return;
+
+	if (--dev->disable_depth == 0) {
+		usb_add_config(cdev, &android_config_driver,
+					android_bind_config);
+		usb_gadget_connect(cdev->gadget);
+	}
+}
+
+static void android_disable(struct android_dev *dev)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+
+	if (dev->disable_depth++ == 0) {
+		usb_gadget_disconnect(cdev->gadget);
+		/* Cancel pending control requests */
+		usb_ep_dequeue(cdev->gadget->ep0, cdev->req);
+		usb_remove_config(cdev, &android_config_driver);
+	}
+}
+
+/*-------------------------------------------------------------------------*/
+/* Supported functions initialization */
+
+struct functionfs_config {
+	bool opened;
+	bool enabled;
+	struct ffs_data *data;
+};
+
+static int ffs_function_init(struct android_usb_function *f,
+			     struct usb_composite_dev *cdev)
+{
+	f->config = kzalloc(sizeof(struct functionfs_config), GFP_KERNEL);
+	if (!f->config)
+		return -ENOMEM;
+
+	return functionfs_init();
+}
+
+static void ffs_function_cleanup(struct android_usb_function *f)
+{
+	functionfs_cleanup();
+	kfree(f->config);
+}
+
+static void ffs_function_enable(struct android_usb_function *f)
+{
+	struct android_dev *dev = _android_dev;
+	struct functionfs_config *config = f->config;
+
+	config->enabled = true;
+
+	/* Disable the gadget until the function is ready */
+	if (!config->opened)
+		android_disable(dev);
+}
+
+static void ffs_function_disable(struct android_usb_function *f)
+{
+	struct android_dev *dev = _android_dev;
+	struct functionfs_config *config = f->config;
+
+	config->enabled = false;
+
+	/* Balance the disable that was called in closed_callback */
+	if (!config->opened)
+		android_enable(dev);
+}
+
+static int ffs_function_bind_config(struct android_usb_function *f,
+				    struct usb_configuration *c)
+{
+	struct functionfs_config *config = f->config;
+	return functionfs_bind_config(c->cdev, c, config->data);
+}
+
+static ssize_t
+ffs_aliases_show(struct device *pdev, struct device_attribute *attr, char *buf)
+{
+	struct android_dev *dev = _android_dev;
+	int ret;
+
+	mutex_lock(&dev->mutex);
+	ret = sprintf(buf, "%s\n", dev->ffs_aliases);
+	mutex_unlock(&dev->mutex);
+
+	return ret;
+}
+
+static ssize_t
+ffs_aliases_store(struct device *pdev, struct device_attribute *attr,
+					const char *buf, size_t size)
+{
+	struct android_dev *dev = _android_dev;
+	char buff[256];
+
+	mutex_lock(&dev->mutex);
+
+	if (dev->enabled) {
+		mutex_unlock(&dev->mutex);
+		return -EBUSY;
+	}
+
+	strlcpy(buff, buf, sizeof(buff));
+	strlcpy(dev->ffs_aliases, strim(buff), sizeof(dev->ffs_aliases));
+
+	mutex_unlock(&dev->mutex);
+
+	return size;
+}
+
+static DEVICE_ATTR(aliases, S_IRUGO | S_IWUSR, ffs_aliases_show,
+					       ffs_aliases_store);
+static struct device_attribute *ffs_function_attributes[] = {
+	&dev_attr_aliases,
+	NULL
+};
+
+static struct android_usb_function ffs_function = {
+	.name		= "ffs",
+	.init		= ffs_function_init,
+	.enable		= ffs_function_enable,
+	.disable	= ffs_function_disable,
+	.cleanup	= ffs_function_cleanup,
+	.bind_config	= ffs_function_bind_config,
+	.attributes	= ffs_function_attributes,
+};
+
+static int functionfs_ready_callback(struct ffs_data *ffs)
+{
+	struct android_dev *dev = _android_dev;
+	struct functionfs_config *config = ffs_function.config;
+	int ret = 0;
+
+	mutex_lock(&dev->mutex);
+
+	ret = functionfs_bind(ffs, dev->cdev);
+	if (ret)
+		goto err;
+
+	config->data = ffs;
+	config->opened = true;
+
+	if (config->enabled)
+		android_enable(dev);
+
+err:
+	mutex_unlock(&dev->mutex);
+	return ret;
+}
+
+static void functionfs_closed_callback(struct ffs_data *ffs)
+{
+	struct android_dev *dev = _android_dev;
+	struct functionfs_config *config = ffs_function.config;
+
+	mutex_lock(&dev->mutex);
+
+	if (config->enabled)
+		android_disable(dev);
+
+	config->opened = false;
+	config->data = NULL;
+
+	functionfs_unbind(ffs);
+
+	mutex_unlock(&dev->mutex);
+}
+
+static void *functionfs_acquire_dev_callback(const char *dev_name)
+{
+	return 0;
+}
+
+static void functionfs_release_dev_callback(struct ffs_data *ffs_data)
+{
+}
+
+#define MAX_ACM_INSTANCES 4
+struct acm_function_config {
+	int instances;
+	int instances_on;
+	struct usb_function *f_acm[MAX_ACM_INSTANCES];
+	struct usb_function_instance *f_acm_inst[MAX_ACM_INSTANCES];
+};
+
+static int
+acm_function_init(struct android_usb_function *f,
+		struct usb_composite_dev *cdev)
+{
+	int i;
+	int ret;
+	struct acm_function_config *config;
+
+	config = kzalloc(sizeof(struct acm_function_config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+	f->config = config;
+
+	for (i = 0; i < MAX_ACM_INSTANCES; i++) {
+		config->f_acm_inst[i] = usb_get_function_instance("acm");
+		if (IS_ERR(config->f_acm_inst[i])) {
+			ret = PTR_ERR(config->f_acm_inst[i]);
+			goto err_usb_get_function_instance;
+		}
+		config->f_acm[i] = usb_get_function(config->f_acm_inst[i]);
+		if (IS_ERR(config->f_acm[i])) {
+			ret = PTR_ERR(config->f_acm[i]);
+			goto err_usb_get_function;
+		}
+	}
+	return 0;
+err_usb_get_function_instance:
+	while (i-- > 0) {
+		usb_put_function(config->f_acm[i]);
+err_usb_get_function:
+		usb_put_function_instance(config->f_acm_inst[i]);
+	}
+	return ret;
+}
+
+static void acm_function_cleanup(struct android_usb_function *f)
+{
+	int i;
+	struct acm_function_config *config = f->config;
+
+	for (i = 0; i < MAX_ACM_INSTANCES; i++) {
+		usb_put_function(config->f_acm[i]);
+		usb_put_function_instance(config->f_acm_inst[i]);
+	}
+	kfree(f->config);
+	f->config = NULL;
+}
+
+static int
+acm_function_bind_config(struct android_usb_function *f,
+		struct usb_configuration *c)
+{
+	int i;
+	int ret = 0;
+	struct acm_function_config *config = f->config;
+
+	config->instances_on = config->instances;
+	for (i = 0; i < config->instances_on; i++) {
+		ret = usb_add_function(c, config->f_acm[i]);
+		if (ret) {
+			pr_err("Could not bind acm%u config\n", i);
+			goto err_usb_add_function;
+		}
+	}
+
+	return 0;
+
+err_usb_add_function:
+	while (i-- > 0)
+		usb_remove_function(c, config->f_acm[i]);
+	return ret;
+}
+
+static void acm_function_unbind_config(struct android_usb_function *f,
+				       struct usb_configuration *c)
+{
+	int i;
+	struct acm_function_config *config = f->config;
+
+	for (i = 0; i < config->instances_on; i++)
+		usb_remove_function(c, config->f_acm[i]);
+}
+
+static ssize_t acm_instances_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct acm_function_config *config = f->config;
+	return sprintf(buf, "%d\n", config->instances);
+}
+
+static ssize_t acm_instances_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct acm_function_config *config = f->config;
+	int value;
+
+	sscanf(buf, "%d", &value);
+	if (value > MAX_ACM_INSTANCES)
+		value = MAX_ACM_INSTANCES;
+	config->instances = value;
+	return size;
+}
+
+static DEVICE_ATTR(instances, S_IRUGO | S_IWUSR, acm_instances_show,
+						 acm_instances_store);
+static struct device_attribute *acm_function_attributes[] = {
+	&dev_attr_instances,
+	NULL
+};
+
+static struct android_usb_function acm_function = {
+	.name		= "acm",
+	.init		= acm_function_init,
+	.cleanup	= acm_function_cleanup,
+	.bind_config	= acm_function_bind_config,
+	.unbind_config	= acm_function_unbind_config,
+	.attributes	= acm_function_attributes,
+};
+
+
+static int
+mtp_function_init(struct android_usb_function *f,
+		struct usb_composite_dev *cdev)
+{
+	return mtp_setup();
+}
+
+static void mtp_function_cleanup(struct android_usb_function *f)
+{
+	mtp_cleanup();
+}
+
+static int
+mtp_function_bind_config(struct android_usb_function *f,
+		struct usb_configuration *c)
+{
+	return mtp_bind_config(c, false);
+}
+
+static int
+ptp_function_init(struct android_usb_function *f,
+		struct usb_composite_dev *cdev)
+{
+	/* nothing to do - initialization is handled by mtp_function_init */
+	return 0;
+}
+
+static void ptp_function_cleanup(struct android_usb_function *f)
+{
+	/* nothing to do - cleanup is handled by mtp_function_cleanup */
+}
+
+static int
+ptp_function_bind_config(struct android_usb_function *f,
+		struct usb_configuration *c)
+{
+	return mtp_bind_config(c, true);
+}
+
+static int mtp_function_ctrlrequest(struct android_usb_function *f,
+					struct usb_composite_dev *cdev,
+					const struct usb_ctrlrequest *c)
+{
+	return mtp_ctrlrequest(cdev, c);
+}
+
+static struct android_usb_function mtp_function = {
+	.name		= "mtp",
+	.init		= mtp_function_init,
+	.cleanup	= mtp_function_cleanup,
+	.bind_config	= mtp_function_bind_config,
+	.ctrlrequest	= mtp_function_ctrlrequest,
+};
+
+/* PTP function is same as MTP with slightly different interface descriptor */
+static struct android_usb_function ptp_function = {
+	.name		= "ptp",
+	.init		= ptp_function_init,
+	.cleanup	= ptp_function_cleanup,
+	.bind_config	= ptp_function_bind_config,
+};
+
+
+struct rndis_function_config {
+	u8      ethaddr[ETH_ALEN];
+	u32     vendorID;
+	char	manufacturer[256];
+	/* "Wireless" RNDIS; auto-detected by Windows */
+	bool	wceis;
+	struct eth_dev *dev;
+};
+
+static int
+rndis_function_init(struct android_usb_function *f,
+		struct usb_composite_dev *cdev)
+{
+	f->config = kzalloc(sizeof(struct rndis_function_config), GFP_KERNEL);
+	if (!f->config)
+		return -ENOMEM;
+	return 0;
+}
+
+static void rndis_function_cleanup(struct android_usb_function *f)
+{
+	kfree(f->config);
+	f->config = NULL;
+}
+
+static int
+rndis_function_bind_config(struct android_usb_function *f,
+		struct usb_configuration *c)
+{
+	int ret;
+	struct eth_dev *dev;
+	struct rndis_function_config *rndis = f->config;
+
+	if (!rndis) {
+		pr_err("%s: rndis_pdata\n", __func__);
+		return -1;
+	}
+
+	pr_info("%s MAC: %02X:%02X:%02X:%02X:%02X:%02X\n", __func__,
+		rndis->ethaddr[0], rndis->ethaddr[1], rndis->ethaddr[2],
+		rndis->ethaddr[3], rndis->ethaddr[4], rndis->ethaddr[5]);
+
+	dev = gether_setup_name(c->cdev->gadget, rndis->ethaddr, "rndis");
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		pr_err("%s: gether_setup failed\n", __func__);
+		return ret;
+	}
+	rndis->dev = dev;
+
+	if (rndis->wceis) {
+		/* "Wireless" RNDIS; auto-detected by Windows */
+		rndis_iad_descriptor.bFunctionClass =
+						USB_CLASS_WIRELESS_CONTROLLER;
+		rndis_iad_descriptor.bFunctionSubClass = 0x01;
+		rndis_iad_descriptor.bFunctionProtocol = 0x03;
+		rndis_control_intf.bInterfaceClass =
+						USB_CLASS_WIRELESS_CONTROLLER;
+		rndis_control_intf.bInterfaceSubClass =	 0x01;
+		rndis_control_intf.bInterfaceProtocol =	 0x03;
+	}
+
+	return rndis_bind_config_vendor(c, rndis->ethaddr, rndis->vendorID,
+					   rndis->manufacturer, rndis->dev);
+}
+
+static void rndis_function_unbind_config(struct android_usb_function *f,
+						struct usb_configuration *c)
+{
+	struct rndis_function_config *rndis = f->config;
+	gether_cleanup(rndis->dev);
+}
+
+static ssize_t rndis_manufacturer_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *config = f->config;
+	return sprintf(buf, "%s\n", config->manufacturer);
+}
+
+static ssize_t rndis_manufacturer_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *config = f->config;
+
+	if (size >= sizeof(config->manufacturer))
+		return -EINVAL;
+	if (sscanf(buf, "%s", config->manufacturer) == 1)
+		return size;
+	return -1;
+}
+
+static DEVICE_ATTR(manufacturer, S_IRUGO | S_IWUSR, rndis_manufacturer_show,
+						    rndis_manufacturer_store);
+
+static ssize_t rndis_wceis_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *config = f->config;
+	return sprintf(buf, "%d\n", config->wceis);
+}
+
+static ssize_t rndis_wceis_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *config = f->config;
+	int value;
+
+	if (sscanf(buf, "%d", &value) == 1) {
+		config->wceis = value;
+		return size;
+	}
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(wceis, S_IRUGO | S_IWUSR, rndis_wceis_show,
+					     rndis_wceis_store);
+
+static ssize_t rndis_ethaddr_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *rndis = f->config;
+	return sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x\n",
+		rndis->ethaddr[0], rndis->ethaddr[1], rndis->ethaddr[2],
+		rndis->ethaddr[3], rndis->ethaddr[4], rndis->ethaddr[5]);
+}
+
+static ssize_t rndis_ethaddr_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *rndis = f->config;
+
+	if (sscanf(buf, "%02x:%02x:%02x:%02x:%02x:%02x\n",
+		    (int *)&rndis->ethaddr[0], (int *)&rndis->ethaddr[1],
+		    (int *)&rndis->ethaddr[2], (int *)&rndis->ethaddr[3],
+		    (int *)&rndis->ethaddr[4], (int *)&rndis->ethaddr[5]) == 6)
+		return size;
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(ethaddr, S_IRUGO | S_IWUSR, rndis_ethaddr_show,
+					       rndis_ethaddr_store);
+
+static ssize_t rndis_vendorID_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *config = f->config;
+	return sprintf(buf, "%04x\n", config->vendorID);
+}
+
+static ssize_t rndis_vendorID_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct rndis_function_config *config = f->config;
+	int value;
+
+	if (sscanf(buf, "%04x", &value) == 1) {
+		config->vendorID = value;
+		return size;
+	}
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(vendorID, S_IRUGO | S_IWUSR, rndis_vendorID_show,
+						rndis_vendorID_store);
+
+static struct device_attribute *rndis_function_attributes[] = {
+	&dev_attr_manufacturer,
+	&dev_attr_wceis,
+	&dev_attr_ethaddr,
+	&dev_attr_vendorID,
+	NULL
+};
+
+static struct android_usb_function rndis_function = {
+	.name		= "rndis",
+	.init		= rndis_function_init,
+	.cleanup	= rndis_function_cleanup,
+	.bind_config	= rndis_function_bind_config,
+	.unbind_config	= rndis_function_unbind_config,
+	.attributes	= rndis_function_attributes,
+};
+
+
+struct mass_storage_function_config {
+	struct fsg_config fsg;
+	struct fsg_common *common;
+};
+
+static int mass_storage_function_init(struct android_usb_function *f,
+					struct usb_composite_dev *cdev)
+{
+	struct mass_storage_function_config *config;
+	struct fsg_common *common;
+	int err;
+
+	config = kzalloc(sizeof(struct mass_storage_function_config),
+								GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	config->fsg.nluns = 1;
+	config->fsg.luns[0].removable = 1;
+
+	common = fsg_common_init(NULL, cdev, &config->fsg);
+	if (IS_ERR(common)) {
+		kfree(config);
+		return PTR_ERR(common);
+	}
+
+	err = sysfs_create_link(&f->dev->kobj,
+				&common->luns[0].dev.kobj,
+				"lun");
+	if (err) {
+		kfree(config);
+		return err;
+	}
+
+	config->common = common;
+	f->config = config;
+	return 0;
+}
+
+static void mass_storage_function_cleanup(struct android_usb_function *f)
+{
+	kfree(f->config);
+	f->config = NULL;
+}
+
+static int mass_storage_function_bind_config(struct android_usb_function *f,
+						struct usb_configuration *c)
+{
+	struct mass_storage_function_config *config = f->config;
+	return fsg_bind_config(c->cdev, c, config->common);
+}
+
+static ssize_t mass_storage_inquiry_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct mass_storage_function_config *config = f->config;
+	return sprintf(buf, "%s\n", config->common->inquiry_string);
+}
+
+static ssize_t mass_storage_inquiry_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct mass_storage_function_config *config = f->config;
+	if (size >= sizeof(config->common->inquiry_string))
+		return -EINVAL;
+	if (sscanf(buf, "%s", config->common->inquiry_string) != 1)
+		return -EINVAL;
+	return size;
+}
+
+static DEVICE_ATTR(inquiry_string, S_IRUGO | S_IWUSR,
+					mass_storage_inquiry_show,
+					mass_storage_inquiry_store);
+
+static struct device_attribute *mass_storage_function_attributes[] = {
+	&dev_attr_inquiry_string,
+	NULL
+};
+
+static struct android_usb_function mass_storage_function = {
+	.name		= "mass_storage",
+	.init		= mass_storage_function_init,
+	.cleanup	= mass_storage_function_cleanup,
+	.bind_config	= mass_storage_function_bind_config,
+	.attributes	= mass_storage_function_attributes,
+};
+
+
+static int accessory_function_init(struct android_usb_function *f,
+					struct usb_composite_dev *cdev)
+{
+	return acc_setup();
+}
+
+static void accessory_function_cleanup(struct android_usb_function *f)
+{
+	acc_cleanup();
+}
+
+static int accessory_function_bind_config(struct android_usb_function *f,
+						struct usb_configuration *c)
+{
+	return acc_bind_config(c);
+}
+
+static int accessory_function_ctrlrequest(struct android_usb_function *f,
+						struct usb_composite_dev *cdev,
+						const struct usb_ctrlrequest *c)
+{
+	return acc_ctrlrequest(cdev, c);
+}
+
+static struct android_usb_function accessory_function = {
+	.name		= "accessory",
+	.init		= accessory_function_init,
+	.cleanup	= accessory_function_cleanup,
+	.bind_config	= accessory_function_bind_config,
+	.ctrlrequest	= accessory_function_ctrlrequest,
+};
+
+static int audio_source_function_init(struct android_usb_function *f,
+			struct usb_composite_dev *cdev)
+{
+	struct audio_source_config *config;
+
+	config = kzalloc(sizeof(struct audio_source_config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+	config->card = -1;
+	config->device = -1;
+	f->config = config;
+	return 0;
+}
+
+static void audio_source_function_cleanup(struct android_usb_function *f)
+{
+	kfree(f->config);
+}
+
+static int audio_source_function_bind_config(struct android_usb_function *f,
+						struct usb_configuration *c)
+{
+	struct audio_source_config *config = f->config;
+
+	return audio_source_bind_config(c, config);
+}
+
+static void audio_source_function_unbind_config(struct android_usb_function *f,
+						struct usb_configuration *c)
+{
+	struct audio_source_config *config = f->config;
+
+	config->card = -1;
+	config->device = -1;
+}
+
+static ssize_t audio_source_pcm_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct audio_source_config *config = f->config;
+
+	/* print PCM card and device numbers */
+	return sprintf(buf, "%d %d\n", config->card, config->device);
+}
+
+static DEVICE_ATTR(pcm, S_IRUGO, audio_source_pcm_show, NULL);
+
+static struct device_attribute *audio_source_function_attributes[] = {
+	&dev_attr_pcm,
+	NULL
+};
+
+static struct android_usb_function audio_source_function = {
+	.name		= "audio_source",
+	.init		= audio_source_function_init,
+	.cleanup	= audio_source_function_cleanup,
+	.bind_config	= audio_source_function_bind_config,
+	.unbind_config	= audio_source_function_unbind_config,
+	.attributes	= audio_source_function_attributes,
+};
+
+static int midi_function_init(struct android_usb_function *f,
+					struct usb_composite_dev *cdev)
+{
+	struct midi_alsa_config *config;
+
+	config = kzalloc(sizeof(struct midi_alsa_config), GFP_KERNEL);
+	f->config = config;
+	if (!config)
+		return -ENOMEM;
+	config->card = -1;
+	config->device = -1;
+	return 0;
+}
+
+static void midi_function_cleanup(struct android_usb_function *f)
+{
+	kfree(f->config);
+}
+
+static int midi_function_bind_config(struct android_usb_function *f,
+						struct usb_configuration *c)
+{
+	struct midi_alsa_config *config = f->config;
+
+	return f_midi_bind_config(c, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+			MIDI_INPUT_PORTS, MIDI_OUTPUT_PORTS, MIDI_BUFFER_SIZE,
+			MIDI_QUEUE_LENGTH, config);
+}
+
+static ssize_t midi_alsa_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct android_usb_function *f = dev_get_drvdata(dev);
+	struct midi_alsa_config *config = f->config;
+
+	/* print ALSA card and device numbers */
+	return sprintf(buf, "%d %d\n", config->card, config->device);
+}
+
+static DEVICE_ATTR(alsa, S_IRUGO, midi_alsa_show, NULL);
+
+static struct device_attribute *midi_function_attributes[] = {
+	&dev_attr_alsa,
+	NULL
+};
+
+static struct android_usb_function midi_function = {
+	.name		= "midi",
+	.init		= midi_function_init,
+	.cleanup	= midi_function_cleanup,
+	.bind_config	= midi_function_bind_config,
+	.attributes	= midi_function_attributes,
+};
+
+static struct android_usb_function *supported_functions[] = {
+	&ffs_function,
+	&acm_function,
+	&mtp_function,
+	&ptp_function,
+	&rndis_function,
+	&mass_storage_function,
+	&accessory_function,
+	&audio_source_function,
+	&midi_function,
+	NULL
+};
+
+static int android_init_functions(struct android_usb_function **functions,
+				  struct usb_composite_dev *cdev)
+{
+	struct android_dev *dev = _android_dev;
+	struct android_usb_function *f;
+	struct device_attribute **attrs;
+	struct device_attribute *attr;
+	int err;
+	int index = 0;
+
+	for (; (f = *functions++); index++) {
+		f->dev_name = kasprintf(GFP_KERNEL, "f_%s", f->name);
+		f->dev = device_create(android_class, dev->dev,
+				MKDEV(0, index), f, f->dev_name);
+		if (IS_ERR(f->dev)) {
+			pr_err("%s: Failed to create dev %s", __func__,
+							f->dev_name);
+			err = PTR_ERR(f->dev);
+			goto err_create;
+		}
+
+		if (f->init) {
+			err = f->init(f, cdev);
+			if (err) {
+				pr_err("%s: Failed to init %s", __func__,
+								f->name);
+				goto err_out;
+			}
+		}
+
+		attrs = f->attributes;
+		if (attrs) {
+			while ((attr = *attrs++) && !err)
+				err = device_create_file(f->dev, attr);
+		}
+		if (err) {
+			pr_err("%s: Failed to create function %s attributes",
+					__func__, f->name);
+			goto err_out;
+		}
+	}
+	return 0;
+
+err_out:
+	device_destroy(android_class, f->dev->devt);
+err_create:
+	kfree(f->dev_name);
+	return err;
+}
+
+static void android_cleanup_functions(struct android_usb_function **functions)
+{
+	struct android_usb_function *f;
+
+	while (*functions) {
+		f = *functions++;
+
+		if (f->dev) {
+			device_destroy(android_class, f->dev->devt);
+			kfree(f->dev_name);
+		}
+
+		if (f->cleanup)
+			f->cleanup(f);
+	}
+}
+
+static int
+android_bind_enabled_functions(struct android_dev *dev,
+			       struct usb_configuration *c)
+{
+	struct android_usb_function *f;
+	int ret;
+
+	list_for_each_entry(f, &dev->enabled_functions, enabled_list) {
+		ret = f->bind_config(f, c);
+		if (ret) {
+			pr_err("%s: %s failed", __func__, f->name);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static void
+android_unbind_enabled_functions(struct android_dev *dev,
+			       struct usb_configuration *c)
+{
+	struct android_usb_function *f;
+
+	list_for_each_entry(f, &dev->enabled_functions, enabled_list) {
+		if (f->unbind_config)
+			f->unbind_config(f, c);
+	}
+}
+
+static int android_enable_function(struct android_dev *dev, char *name)
+{
+	struct android_usb_function **functions = dev->functions;
+	struct android_usb_function *f;
+	while ((f = *functions++)) {
+		if (!strcmp(name, f->name)) {
+			list_add_tail(&f->enabled_list,
+						&dev->enabled_functions);
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+/*-------------------------------------------------------------------------*/
+/* /sys/class/android_usb/android%d/ interface */
+
+static ssize_t
+functions_show(struct device *pdev, struct device_attribute *attr, char *buf)
+{
+	struct android_dev *dev = dev_get_drvdata(pdev);
+	struct android_usb_function *f;
+	char *buff = buf;
+
+	mutex_lock(&dev->mutex);
+
+	list_for_each_entry(f, &dev->enabled_functions, enabled_list)
+		buff += sprintf(buff, "%s,", f->name);
+
+	mutex_unlock(&dev->mutex);
+
+	if (buff != buf)
+		*(buff-1) = '\n';
+	return buff - buf;
+}
+
+static ssize_t
+functions_store(struct device *pdev, struct device_attribute *attr,
+			       const char *buff, size_t size)
+{
+	struct android_dev *dev = dev_get_drvdata(pdev);
+	char *name;
+	char buf[256], *b;
+	char aliases[256], *a;
+	int err;
+	int is_ffs;
+	int ffs_enabled = 0;
+
+	mutex_lock(&dev->mutex);
+
+	if (dev->enabled) {
+		mutex_unlock(&dev->mutex);
+		return -EBUSY;
+	}
+
+	INIT_LIST_HEAD(&dev->enabled_functions);
+
+	strlcpy(buf, buff, sizeof(buf));
+	b = strim(buf);
+
+	while (b) {
+		name = strsep(&b, ",");
+		if (!name)
+			continue;
+
+		is_ffs = 0;
+		strlcpy(aliases, dev->ffs_aliases, sizeof(aliases));
+		a = aliases;
+
+		while (a) {
+			char *alias = strsep(&a, ",");
+			if (alias && !strcmp(name, alias)) {
+				is_ffs = 1;
+				break;
+			}
+		}
+
+		if (is_ffs) {
+			if (ffs_enabled)
+				continue;
+			err = android_enable_function(dev, "ffs");
+			if (err)
+				pr_err("android_usb: Cannot enable ffs (%d)",
+									err);
+			else
+				ffs_enabled = 1;
+			continue;
+		}
+
+		err = android_enable_function(dev, name);
+		if (err)
+			pr_err("android_usb: Cannot enable '%s' (%d)",
+							   name, err);
+	}
+
+	mutex_unlock(&dev->mutex);
+
+	return size;
+}
+
+static ssize_t enable_show(struct device *pdev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct android_dev *dev = dev_get_drvdata(pdev);
+	return sprintf(buf, "%d\n", dev->enabled);
+}
+
+static ssize_t enable_store(struct device *pdev, struct device_attribute *attr,
+			    const char *buff, size_t size)
+{
+	struct android_dev *dev = dev_get_drvdata(pdev);
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct android_usb_function *f;
+	int enabled = 0;
+
+
+	if (!cdev)
+		return -ENODEV;
+
+	mutex_lock(&dev->mutex);
+
+	sscanf(buff, "%d", &enabled);
+	if (enabled && !dev->enabled) {
+		/*
+		 * Update values in composite driver's copy of
+		 * device descriptor.
+		 */
+		cdev->desc.idVendor = device_desc.idVendor;
+		cdev->desc.idProduct = device_desc.idProduct;
+		cdev->desc.bcdDevice = device_desc.bcdDevice;
+		cdev->desc.bDeviceClass = device_desc.bDeviceClass;
+		cdev->desc.bDeviceSubClass = device_desc.bDeviceSubClass;
+		cdev->desc.bDeviceProtocol = device_desc.bDeviceProtocol;
+		list_for_each_entry(f, &dev->enabled_functions, enabled_list) {
+			if (f->enable)
+				f->enable(f);
+		}
+		android_enable(dev);
+		dev->enabled = true;
+	} else if (!enabled && dev->enabled) {
+		android_disable(dev);
+		list_for_each_entry(f, &dev->enabled_functions, enabled_list) {
+			if (f->disable)
+				f->disable(f);
+		}
+		dev->enabled = false;
+	} else {
+		pr_err("android_usb: already %s\n",
+				dev->enabled ? "enabled" : "disabled");
+	}
+
+	mutex_unlock(&dev->mutex);
+	return size;
+}
+
+static ssize_t state_show(struct device *pdev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct android_dev *dev = dev_get_drvdata(pdev);
+	struct usb_composite_dev *cdev = dev->cdev;
+	char *state = "DISCONNECTED";
+	unsigned long flags;
+
+	if (!cdev)
+		goto out;
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (cdev->config)
+		state = "CONFIGURED";
+	else if (dev->connected)
+		state = "CONNECTED";
+	spin_unlock_irqrestore(&cdev->lock, flags);
+out:
+	return sprintf(buf, "%s\n", state);
+}
+
+#define DESCRIPTOR_ATTR(field, format_string)				\
+static ssize_t								\
+field ## _show(struct device *dev, struct device_attribute *attr,	\
+		char *buf)						\
+{									\
+	return sprintf(buf, format_string, device_desc.field);		\
+}									\
+static ssize_t								\
+field ## _store(struct device *dev, struct device_attribute *attr,	\
+		const char *buf, size_t size)				\
+{									\
+	int value;							\
+	if (sscanf(buf, format_string, &value) == 1) {			\
+		device_desc.field = value;				\
+		return size;						\
+	}								\
+	return -1;							\
+}									\
+static DEVICE_ATTR(field, S_IRUGO | S_IWUSR, field ## _show, field ## _store);
+
+#define DESCRIPTOR_STRING_ATTR(field, buffer)				\
+static ssize_t								\
+field ## _show(struct device *dev, struct device_attribute *attr,	\
+		char *buf)						\
+{									\
+	return sprintf(buf, "%s", buffer);				\
+}									\
+static ssize_t								\
+field ## _store(struct device *dev, struct device_attribute *attr,	\
+		const char *buf, size_t size)				\
+{									\
+	if (size >= sizeof(buffer))					\
+		return -EINVAL;						\
+	return strlcpy(buffer, buf, sizeof(buffer));			\
+}									\
+static DEVICE_ATTR(field, S_IRUGO | S_IWUSR, field ## _show, field ## _store);
+
+
+DESCRIPTOR_ATTR(idVendor, "%04x\n")
+DESCRIPTOR_ATTR(idProduct, "%04x\n")
+DESCRIPTOR_ATTR(bcdDevice, "%04x\n")
+DESCRIPTOR_ATTR(bDeviceClass, "%d\n")
+DESCRIPTOR_ATTR(bDeviceSubClass, "%d\n")
+DESCRIPTOR_ATTR(bDeviceProtocol, "%d\n")
+DESCRIPTOR_STRING_ATTR(iManufacturer, manufacturer_string)
+DESCRIPTOR_STRING_ATTR(iProduct, product_string)
+DESCRIPTOR_STRING_ATTR(iSerial, serial_string)
+
+static DEVICE_ATTR(functions, S_IRUGO | S_IWUSR, functions_show,
+						 functions_store);
+static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR, enable_show, enable_store);
+static DEVICE_ATTR(state, S_IRUGO, state_show, NULL);
+
+static struct device_attribute *android_usb_attributes[] = {
+	&dev_attr_idVendor,
+	&dev_attr_idProduct,
+	&dev_attr_bcdDevice,
+	&dev_attr_bDeviceClass,
+	&dev_attr_bDeviceSubClass,
+	&dev_attr_bDeviceProtocol,
+	&dev_attr_iManufacturer,
+	&dev_attr_iProduct,
+	&dev_attr_iSerial,
+	&dev_attr_functions,
+	&dev_attr_enable,
+	&dev_attr_state,
+	NULL
+};
+
+/*-------------------------------------------------------------------------*/
+/* Composite driver */
+
+static int android_bind_config(struct usb_configuration *c)
+{
+	struct android_dev *dev = _android_dev;
+	int ret = 0;
+
+	ret = android_bind_enabled_functions(dev, c);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void android_unbind_config(struct usb_configuration *c)
+{
+	struct android_dev *dev = _android_dev;
+
+	android_unbind_enabled_functions(dev, c);
+}
+
+static int android_bind(struct usb_composite_dev *cdev)
+{
+	struct android_dev *dev = _android_dev;
+	struct usb_gadget	*gadget = cdev->gadget;
+	int			id, ret;
+
+	/* Save the default handler */
+	dev->setup_complete = cdev->req->complete;
+
+	/*
+	 * Start disconnected. Userspace will connect the gadget once
+	 * it is done configuring the functions.
+	 */
+	usb_gadget_disconnect(gadget);
+
+	ret = android_init_functions(dev->functions, cdev);
+	if (ret)
+		return ret;
+
+	/* Allocate string descriptor numbers ... note that string
+	 * contents can be overridden by the composite_dev glue.
+	 */
+	id = usb_string_id(cdev);
+	if (id < 0)
+		return id;
+	strings_dev[STRING_MANUFACTURER_IDX].id = id;
+	device_desc.iManufacturer = id;
+
+	id = usb_string_id(cdev);
+	if (id < 0)
+		return id;
+	strings_dev[STRING_PRODUCT_IDX].id = id;
+	device_desc.iProduct = id;
+
+	/* Default strings - should be updated by userspace */
+	strncpy(manufacturer_string, "Android", sizeof(manufacturer_string)-1);
+	strncpy(product_string, "Android", sizeof(product_string) - 1);
+	strncpy(serial_string, "0123456789ABCDEF", sizeof(serial_string) - 1);
+
+	id = usb_string_id(cdev);
+	if (id < 0)
+		return id;
+	strings_dev[STRING_SERIAL_IDX].id = id;
+	device_desc.iSerialNumber = id;
+
+	usb_gadget_set_selfpowered(gadget);
+	dev->cdev = cdev;
+
+	return 0;
+}
+
+static int android_usb_unbind(struct usb_composite_dev *cdev)
+{
+	struct android_dev *dev = _android_dev;
+
+	cancel_work_sync(&dev->work);
+	android_cleanup_functions(dev->functions);
+	return 0;
+}
+
+/* HACK: android needs to override setup for accessory to work */
+static int (*composite_setup_func)(struct usb_gadget *gadget, const struct usb_ctrlrequest *c);
+
+static int
+android_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *c)
+{
+	struct android_dev		*dev = _android_dev;
+	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
+	struct usb_request		*req = cdev->req;
+	struct android_usb_function	*f;
+	int value = -EOPNOTSUPP;
+	unsigned long flags;
+
+	req->zero = 0;
+	req->length = 0;
+	req->complete = dev->setup_complete;
+	gadget->ep0->driver_data = cdev;
+
+	list_for_each_entry(f, &dev->enabled_functions, enabled_list) {
+		if (f->ctrlrequest) {
+			value = f->ctrlrequest(f, cdev, c);
+			if (value >= 0)
+				break;
+		}
+	}
+
+	/* Special case the accessory function.
+	 * It needs to handle control requests before it is enabled.
+	 */
+	if (value < 0)
+		value = acc_ctrlrequest(cdev, c);
+
+	if (value < 0)
+		value = composite_setup_func(gadget, c);
+
+	spin_lock_irqsave(&cdev->lock, flags);
+	if (!dev->connected) {
+		dev->connected = 1;
+		schedule_work(&dev->work);
+	} else if (c->bRequest == USB_REQ_SET_CONFIGURATION &&
+						cdev->config) {
+		schedule_work(&dev->work);
+	}
+	spin_unlock_irqrestore(&cdev->lock, flags);
+
+	return value;
+}
+
+static void android_disconnect(struct usb_composite_dev *cdev)
+{
+	struct android_dev *dev = _android_dev;
+
+	/* accessory HID support can be active while the
+	   accessory function is not actually enabled,
+	   so we need to inform it when we are disconnected.
+	 */
+	acc_disconnect();
+
+	dev->connected = 0;
+	schedule_work(&dev->work);
+}
+
+static struct usb_composite_driver android_usb_driver = {
+	.name		= "android_usb",
+	.dev		= &device_desc,
+	.strings	= dev_strings,
+	.bind		= android_bind,
+	.unbind		= android_usb_unbind,
+	.disconnect	= android_disconnect,
+	.max_speed	= USB_SPEED_HIGH,
+};
+
+static int android_create_device(struct android_dev *dev)
+{
+	struct device_attribute **attrs = android_usb_attributes;
+	struct device_attribute *attr;
+	int err;
+
+	dev->dev = device_create(android_class, NULL,
+					MKDEV(0, 0), NULL, "android0");
+	if (IS_ERR(dev->dev))
+		return PTR_ERR(dev->dev);
+
+	dev_set_drvdata(dev->dev, dev);
+
+	while ((attr = *attrs++)) {
+		err = device_create_file(dev->dev, attr);
+		if (err) {
+			device_destroy(android_class, dev->dev->devt);
+			return err;
+		}
+	}
+	return 0;
+}
+
+
+static int __init init(void)
+{
+	struct android_dev *dev;
+	int err;
+
+	android_class = class_create(THIS_MODULE, "android_usb");
+	if (IS_ERR(android_class))
+		return PTR_ERR(android_class);
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		err = -ENOMEM;
+		goto err_dev;
+	}
+
+	dev->disable_depth = 1;
+	dev->functions = supported_functions;
+	INIT_LIST_HEAD(&dev->enabled_functions);
+	INIT_WORK(&dev->work, android_work);
+	mutex_init(&dev->mutex);
+
+	err = android_create_device(dev);
+	if (err) {
+		pr_err("%s: failed to create android device %d", __func__, err);
+		goto err_create;
+	}
+
+	_android_dev = dev;
+
+	err = usb_composite_probe(&android_usb_driver);
+	if (err) {
+		pr_err("%s: failed to probe driver %d", __func__, err);
+		goto err_probe;
+	}
+
+	/* HACK: exchange composite's setup with ours */
+	composite_setup_func = android_usb_driver.gadget_driver.setup;
+	android_usb_driver.gadget_driver.setup = android_setup;
+
+	return 0;
+
+err_probe:
+	device_destroy(android_class, dev->dev->devt);
+err_create:
+	kfree(dev);
+err_dev:
+	class_destroy(android_class);
+	return err;
+}
+late_initcall(init);
+
+static void __exit cleanup(void)
+{
+	usb_composite_unregister(&android_usb_driver);
+	class_destroy(android_class);
+	kfree(_android_dev);
+	_android_dev = NULL;
+}
+module_exit(cleanup);
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index a660716f9331..6a87860a32b9 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -812,7 +812,7 @@ done:
 }
 EXPORT_SYMBOL_GPL(usb_add_config);
 
-static void remove_config(struct usb_composite_dev *cdev,
+static void unbind_config(struct usb_composite_dev *cdev,
 			      struct usb_configuration *config)
 {
 	while (!list_empty(&config->functions)) {
@@ -827,7 +827,6 @@ static void remove_config(struct usb_composite_dev *cdev,
 			/* may free memory for "f" */
 		}
 	}
-	list_del(&config->list);
 	if (config->unbind) {
 		DBG(cdev, "unbind config '%s'/%p\n", config->label, config);
 		config->unbind(config);
@@ -854,9 +853,11 @@ void usb_remove_config(struct usb_composite_dev *cdev,
 	if (cdev->config == config)
 		reset_config(cdev);
 
+	list_del(&config->list);
+
 	spin_unlock_irqrestore(&cdev->lock, flags);
 
-	remove_config(cdev, config);
+	unbind_config(cdev, config);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1525,7 +1526,8 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver)
 		struct usb_configuration	*c;
 		c = list_first_entry(&cdev->configs,
 				struct usb_configuration, list);
-		remove_config(cdev, c);
+		list_del(&c->list);
+		unbind_config(cdev, c);
 	}
 	if (cdev->driver->unbind && unbind_driver)
 		cdev->driver->unbind(cdev);
diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c
new file mode 100644
index 000000000000..0237f1e059b4
--- /dev/null
+++ b/drivers/usb/gadget/f_accessory.c
@@ -0,0 +1,1215 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+/* #define VERBOSE_DEBUG */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+
+#include <linux/hid.h>
+#include <linux/hiddev.h>
+#include <linux/usb.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/f_accessory.h>
+
+#define BULK_BUFFER_SIZE    16384
+#define ACC_STRING_SIZE     256
+
+#define PROTOCOL_VERSION    2
+
+/* String IDs */
+#define INTERFACE_STRING_INDEX	0
+
+/* number of tx and rx requests to allocate */
+#define TX_REQ_MAX 4
+#define RX_REQ_MAX 2
+
+struct acc_hid_dev {
+	struct list_head	list;
+	struct hid_device *hid;
+	struct acc_dev *dev;
+	/* accessory defined ID */
+	int id;
+	/* HID report descriptor */
+	u8 *report_desc;
+	/* length of HID report descriptor */
+	int report_desc_len;
+	/* number of bytes of report_desc we have received so far */
+	int report_desc_offset;
+};
+
+struct acc_dev {
+	struct usb_function function;
+	struct usb_composite_dev *cdev;
+	spinlock_t lock;
+
+	struct usb_ep *ep_in;
+	struct usb_ep *ep_out;
+
+	/* set to 1 when we connect */
+	int online:1;
+	/* Set to 1 when we disconnect.
+	 * Not cleared until our file is closed.
+	 */
+	int disconnected:1;
+
+	/* strings sent by the host */
+	char manufacturer[ACC_STRING_SIZE];
+	char model[ACC_STRING_SIZE];
+	char description[ACC_STRING_SIZE];
+	char version[ACC_STRING_SIZE];
+	char uri[ACC_STRING_SIZE];
+	char serial[ACC_STRING_SIZE];
+
+	/* for acc_complete_set_string */
+	int string_index;
+
+	/* set to 1 if we have a pending start request */
+	int start_requested;
+
+	int audio_mode;
+
+	/* synchronize access to our device file */
+	atomic_t open_excl;
+
+	struct list_head tx_idle;
+
+	wait_queue_head_t read_wq;
+	wait_queue_head_t write_wq;
+	struct usb_request *rx_req[RX_REQ_MAX];
+	int rx_done;
+
+	/* delayed work for handling ACCESSORY_START */
+	struct delayed_work start_work;
+
+	/* worker for registering and unregistering hid devices */
+	struct work_struct hid_work;
+
+	/* list of active HID devices */
+	struct list_head	hid_list;
+
+	/* list of new HID devices to register */
+	struct list_head	new_hid_list;
+
+	/* list of dead HID devices to unregister */
+	struct list_head	dead_hid_list;
+};
+
+static struct usb_interface_descriptor acc_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 2,
+	.bInterfaceClass        = USB_CLASS_VENDOR_SPEC,
+	.bInterfaceSubClass     = USB_SUBCLASS_VENDOR_SPEC,
+	.bInterfaceProtocol     = 0,
+};
+
+static struct usb_endpoint_descriptor acc_highspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor acc_highspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor acc_fullspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor acc_fullspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_descriptor_header *fs_acc_descs[] = {
+	(struct usb_descriptor_header *) &acc_interface_desc,
+	(struct usb_descriptor_header *) &acc_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &acc_fullspeed_out_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_acc_descs[] = {
+	(struct usb_descriptor_header *) &acc_interface_desc,
+	(struct usb_descriptor_header *) &acc_highspeed_in_desc,
+	(struct usb_descriptor_header *) &acc_highspeed_out_desc,
+	NULL,
+};
+
+static struct usb_string acc_string_defs[] = {
+	[INTERFACE_STRING_INDEX].s	= "Android Accessory Interface",
+	{  },	/* end of list */
+};
+
+static struct usb_gadget_strings acc_string_table = {
+	.language		= 0x0409,	/* en-US */
+	.strings		= acc_string_defs,
+};
+
+static struct usb_gadget_strings *acc_strings[] = {
+	&acc_string_table,
+	NULL,
+};
+
+/* temporary variable used between acc_open() and acc_gadget_bind() */
+static struct acc_dev *_acc_dev;
+
+static inline struct acc_dev *func_to_dev(struct usb_function *f)
+{
+	return container_of(f, struct acc_dev, function);
+}
+
+static struct usb_request *acc_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+	if (!req)
+		return NULL;
+
+	/* now allocate buffers for the requests */
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+
+	return req;
+}
+
+static void acc_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+/* add a request to the tail of a list */
+static void req_put(struct acc_dev *dev, struct list_head *head,
+		struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_add_tail(&req->list, head);
+	spin_unlock_irqrestore(&dev->lock, flags);
+}
+
+/* remove a request from the head of a list */
+static struct usb_request *req_get(struct acc_dev *dev, struct list_head *head)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	if (list_empty(head)) {
+		req = 0;
+	} else {
+		req = list_first_entry(head, struct usb_request, list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return req;
+}
+
+static void acc_set_disconnected(struct acc_dev *dev)
+{
+	dev->online = 0;
+	dev->disconnected = 1;
+}
+
+static void acc_complete_in(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	if (req->status == -ESHUTDOWN) {
+		pr_debug("acc_complete_in set disconnected");
+		acc_set_disconnected(dev);
+	}
+
+	req_put(dev, &dev->tx_idle, req);
+
+	wake_up(&dev->write_wq);
+}
+
+static void acc_complete_out(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev *dev = _acc_dev;
+
+	dev->rx_done = 1;
+	if (req->status == -ESHUTDOWN) {
+		pr_debug("acc_complete_out set disconnected");
+		acc_set_disconnected(dev);
+	}
+
+	wake_up(&dev->read_wq);
+}
+
+static void acc_complete_set_string(struct usb_ep *ep, struct usb_request *req)
+{
+	struct acc_dev	*dev = ep->driver_data;
+	char *string_dest = NULL;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_set_string, err %d\n", req->status);
+		return;
+	}
+
+	switch (dev->string_index) {
+	case ACCESSORY_STRING_MANUFACTURER:
+		string_dest = dev->manufacturer;
+		break;
+	case ACCESSORY_STRING_MODEL:
+		string_dest = dev->model;
+		break;
+	case ACCESSORY_STRING_DESCRIPTION:
+		string_dest = dev->description;
+		break;
+	case ACCESSORY_STRING_VERSION:
+		string_dest = dev->version;
+		break;
+	case ACCESSORY_STRING_URI:
+		string_dest = dev->uri;
+		break;
+	case ACCESSORY_STRING_SERIAL:
+		string_dest = dev->serial;
+		break;
+	}
+	if (string_dest) {
+		unsigned long flags;
+
+		if (length >= ACC_STRING_SIZE)
+			length = ACC_STRING_SIZE - 1;
+
+		spin_lock_irqsave(&dev->lock, flags);
+		memcpy(string_dest, req->buf, length);
+		/* ensure zero termination */
+		string_dest[length] = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
+	} else {
+		pr_err("unknown accessory string index %d\n",
+			dev->string_index);
+	}
+}
+
+static void acc_complete_set_hid_report_desc(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	struct acc_hid_dev *hid = req->context;
+	struct acc_dev *dev = hid->dev;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_set_hid_report_desc, err %d\n",
+			req->status);
+		return;
+	}
+
+	memcpy(hid->report_desc + hid->report_desc_offset, req->buf, length);
+	hid->report_desc_offset += length;
+	if (hid->report_desc_offset == hid->report_desc_len) {
+		/* After we have received the entire report descriptor
+		 * we schedule work to initialize the HID device
+		 */
+		schedule_work(&dev->hid_work);
+	}
+}
+
+static void acc_complete_send_hid_event(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	struct acc_hid_dev *hid = req->context;
+	int length = req->actual;
+
+	if (req->status != 0) {
+		pr_err("acc_complete_send_hid_event, err %d\n", req->status);
+		return;
+	}
+
+	hid_report_raw_event(hid->hid, HID_INPUT_REPORT, req->buf, length, 1);
+}
+
+static int acc_hid_parse(struct hid_device *hid)
+{
+	struct acc_hid_dev *hdev = hid->driver_data;
+
+	hid_parse_report(hid, hdev->report_desc, hdev->report_desc_len);
+	return 0;
+}
+
+static int acc_hid_start(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void acc_hid_stop(struct hid_device *hid)
+{
+}
+
+static int acc_hid_open(struct hid_device *hid)
+{
+	return 0;
+}
+
+static void acc_hid_close(struct hid_device *hid)
+{
+}
+
+static struct hid_ll_driver acc_hid_ll_driver = {
+	.parse = acc_hid_parse,
+	.start = acc_hid_start,
+	.stop = acc_hid_stop,
+	.open = acc_hid_open,
+	.close = acc_hid_close,
+};
+
+static struct acc_hid_dev *acc_hid_new(struct acc_dev *dev,
+		int id, int desc_len)
+{
+	struct acc_hid_dev *hdev;
+
+	hdev = kzalloc(sizeof(*hdev), GFP_ATOMIC);
+	if (!hdev)
+		return NULL;
+	hdev->report_desc = kzalloc(desc_len, GFP_ATOMIC);
+	if (!hdev->report_desc) {
+		kfree(hdev);
+		return NULL;
+	}
+	hdev->dev = dev;
+	hdev->id = id;
+	hdev->report_desc_len = desc_len;
+
+	return hdev;
+}
+
+static struct acc_hid_dev *acc_hid_get(struct list_head *list, int id)
+{
+	struct acc_hid_dev *hid;
+
+	list_for_each_entry(hid, list, list) {
+		if (hid->id == id)
+			return hid;
+	}
+	return NULL;
+}
+
+static int acc_register_hid(struct acc_dev *dev, int id, int desc_length)
+{
+	struct acc_hid_dev *hid;
+	unsigned long flags;
+
+	/* report descriptor length must be > 0 */
+	if (desc_length <= 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	/* replace HID if one already exists with this ID */
+	hid = acc_hid_get(&dev->hid_list, id);
+	if (!hid)
+		hid = acc_hid_get(&dev->new_hid_list, id);
+	if (hid)
+		list_move(&hid->list, &dev->dead_hid_list);
+
+	hid = acc_hid_new(dev, id, desc_length);
+	if (!hid) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -ENOMEM;
+	}
+
+	list_add(&hid->list, &dev->new_hid_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* schedule work to register the HID device */
+	schedule_work(&dev->hid_work);
+	return 0;
+}
+
+static int acc_unregister_hid(struct acc_dev *dev, int id)
+{
+	struct acc_hid_dev *hid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	hid = acc_hid_get(&dev->hid_list, id);
+	if (!hid)
+		hid = acc_hid_get(&dev->new_hid_list, id);
+	if (!hid) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return -EINVAL;
+	}
+
+	list_move(&hid->list, &dev->dead_hid_list);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	schedule_work(&dev->hid_work);
+	return 0;
+}
+
+static int create_bulk_endpoints(struct acc_dev *dev,
+				struct usb_endpoint_descriptor *in_desc,
+				struct usb_endpoint_descriptor *out_desc)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	struct usb_ep *ep;
+	int i;
+
+	DBG(cdev, "create_bulk_endpoints dev: %p\n", dev);
+
+	ep = usb_ep_autoconfig(cdev->gadget, in_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_in failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_in = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, out_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_out failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_out = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, out_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_out failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_out got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_out = ep;
+
+	/* now allocate requests for our endpoints */
+	for (i = 0; i < TX_REQ_MAX; i++) {
+		req = acc_request_new(dev->ep_in, BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = acc_complete_in;
+		req_put(dev, &dev->tx_idle, req);
+	}
+	for (i = 0; i < RX_REQ_MAX; i++) {
+		req = acc_request_new(dev->ep_out, BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = acc_complete_out;
+		dev->rx_req[i] = req;
+	}
+
+	return 0;
+
+fail:
+	pr_err("acc_bind() could not allocate requests\n");
+	while ((req = req_get(dev, &dev->tx_idle)))
+		acc_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		acc_request_free(dev->rx_req[i], dev->ep_out);
+	return -1;
+}
+
+static ssize_t acc_read(struct file *fp, char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct acc_dev *dev = fp->private_data;
+	struct usb_request *req;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret = 0;
+
+	pr_debug("acc_read(%zu)\n", count);
+
+	if (dev->disconnected) {
+		pr_debug("acc_read disconnected");
+		return -ENODEV;
+	}
+
+	if (count > BULK_BUFFER_SIZE)
+		count = BULK_BUFFER_SIZE;
+
+	/* we will block until we're online */
+	pr_debug("acc_read: waiting for online\n");
+	ret = wait_event_interruptible(dev->read_wq, dev->online);
+	if (ret < 0) {
+		r = ret;
+		goto done;
+	}
+
+	if (dev->rx_done) {
+		// last req cancelled. try to get it.
+		req = dev->rx_req[0];
+		goto copy_data;
+	}
+
+requeue_req:
+	/* queue a request */
+	req = dev->rx_req[0];
+	req->length = count;
+	dev->rx_done = 0;
+	ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL);
+	if (ret < 0) {
+		r = -EIO;
+		goto done;
+	} else {
+		pr_debug("rx %p queue\n", req);
+	}
+
+	/* wait for a request to complete */
+	ret = wait_event_interruptible(dev->read_wq, dev->rx_done);
+	if (ret < 0) {
+		r = ret;
+		ret = usb_ep_dequeue(dev->ep_out, req);
+		if (ret != 0) {
+			// cancel failed. There can be a data already received.
+			// it will be retrieved in the next read.
+			pr_debug("acc_read: cancelling failed %d", ret);
+		}
+		goto done;
+	}
+
+copy_data:
+	dev->rx_done = 0;
+	if (dev->online) {
+		/* If we got a 0-len packet, throw it back and try again. */
+		if (req->actual == 0)
+			goto requeue_req;
+
+		pr_debug("rx %p %u\n", req, req->actual);
+		xfer = (req->actual < count) ? req->actual : count;
+		r = xfer;
+		if (copy_to_user(buf, req->buf, xfer))
+			r = -EFAULT;
+	} else
+		r = -EIO;
+
+done:
+	pr_debug("acc_read returning %zd\n", r);
+	return r;
+}
+
+static ssize_t acc_write(struct file *fp, const char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct acc_dev *dev = fp->private_data;
+	struct usb_request *req = 0;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret;
+
+	pr_debug("acc_write(%zu)\n", count);
+
+	if (!dev->online || dev->disconnected) {
+		pr_debug("acc_write disconnected or not online");
+		return -ENODEV;
+	}
+
+	while (count > 0) {
+		if (!dev->online) {
+			pr_debug("acc_write dev->error\n");
+			r = -EIO;
+			break;
+		}
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			((req = req_get(dev, &dev->tx_idle)) || !dev->online));
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > BULK_BUFFER_SIZE) {
+			xfer = BULK_BUFFER_SIZE;
+			/* ZLP, They will be more TX requests so not yet. */
+			req->zero = 0;
+		} else {
+			xfer = count;
+			/* If the data length is a multple of the
+			 * maxpacket size then send a zero length packet(ZLP).
+			*/
+			req->zero = ((xfer % dev->ep_in->maxpacket) == 0);
+		}
+		if (copy_from_user(req->buf, buf, xfer)) {
+			r = -EFAULT;
+			break;
+		}
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			pr_debug("acc_write: xfer error %d\n", ret);
+			r = -EIO;
+			break;
+		}
+
+		buf += xfer;
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		req_put(dev, &dev->tx_idle, req);
+
+	pr_debug("acc_write returning %zd\n", r);
+	return r;
+}
+
+static long acc_ioctl(struct file *fp, unsigned code, unsigned long value)
+{
+	struct acc_dev *dev = fp->private_data;
+	char *src = NULL;
+	int ret;
+
+	switch (code) {
+	case ACCESSORY_GET_STRING_MANUFACTURER:
+		src = dev->manufacturer;
+		break;
+	case ACCESSORY_GET_STRING_MODEL:
+		src = dev->model;
+		break;
+	case ACCESSORY_GET_STRING_DESCRIPTION:
+		src = dev->description;
+		break;
+	case ACCESSORY_GET_STRING_VERSION:
+		src = dev->version;
+		break;
+	case ACCESSORY_GET_STRING_URI:
+		src = dev->uri;
+		break;
+	case ACCESSORY_GET_STRING_SERIAL:
+		src = dev->serial;
+		break;
+	case ACCESSORY_IS_START_REQUESTED:
+		return dev->start_requested;
+	case ACCESSORY_GET_AUDIO_MODE:
+		return dev->audio_mode;
+	}
+	if (!src)
+		return -EINVAL;
+
+	ret = strlen(src) + 1;
+	if (copy_to_user((void __user *)value, src, ret))
+		ret = -EFAULT;
+	return ret;
+}
+
+static int acc_open(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "acc_open\n");
+	if (atomic_xchg(&_acc_dev->open_excl, 1))
+		return -EBUSY;
+
+	_acc_dev->disconnected = 0;
+	fp->private_data = _acc_dev;
+	return 0;
+}
+
+static int acc_release(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "acc_release\n");
+
+	WARN_ON(!atomic_xchg(&_acc_dev->open_excl, 0));
+	_acc_dev->disconnected = 0;
+	return 0;
+}
+
+/* file operations for /dev/usb_accessory */
+static const struct file_operations acc_fops = {
+	.owner = THIS_MODULE,
+	.read = acc_read,
+	.write = acc_write,
+	.unlocked_ioctl = acc_ioctl,
+	.open = acc_open,
+	.release = acc_release,
+};
+
+static int acc_hid_probe(struct hid_device *hdev,
+		const struct hid_device_id *id)
+{
+	int ret;
+
+	ret = hid_parse(hdev);
+	if (ret)
+		return ret;
+	return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+}
+
+static struct miscdevice acc_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "usb_accessory",
+	.fops = &acc_fops,
+};
+
+static const struct hid_device_id acc_hid_table[] = {
+	{ HID_USB_DEVICE(HID_ANY_ID, HID_ANY_ID) },
+	{ }
+};
+
+static struct hid_driver acc_hid_driver = {
+	.name = "USB accessory",
+	.id_table = acc_hid_table,
+	.probe = acc_hid_probe,
+};
+
+static int acc_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl)
+{
+	struct acc_dev	*dev = _acc_dev;
+	int	value = -EOPNOTSUPP;
+	struct acc_hid_dev *hid;
+	int offset;
+	u8 b_requestType = ctrl->bRequestType;
+	u8 b_request = ctrl->bRequest;
+	u16	w_index = le16_to_cpu(ctrl->wIndex);
+	u16	w_value = le16_to_cpu(ctrl->wValue);
+	u16	w_length = le16_to_cpu(ctrl->wLength);
+	unsigned long flags;
+
+/*
+	printk(KERN_INFO "acc_ctrlrequest "
+			"%02x.%02x v%04x i%04x l%u\n",
+			b_requestType, b_request,
+			w_value, w_index, w_length);
+*/
+
+	if (b_requestType == (USB_DIR_OUT | USB_TYPE_VENDOR)) {
+		if (b_request == ACCESSORY_START) {
+			dev->start_requested = 1;
+			schedule_delayed_work(
+				&dev->start_work, msecs_to_jiffies(10));
+			value = 0;
+		} else if (b_request == ACCESSORY_SEND_STRING) {
+			dev->string_index = w_index;
+			cdev->gadget->ep0->driver_data = dev;
+			cdev->req->complete = acc_complete_set_string;
+			value = w_length;
+		} else if (b_request == ACCESSORY_SET_AUDIO_MODE &&
+				w_index == 0 && w_length == 0) {
+			dev->audio_mode = w_value;
+			value = 0;
+		} else if (b_request == ACCESSORY_REGISTER_HID) {
+			value = acc_register_hid(dev, w_value, w_index);
+		} else if (b_request == ACCESSORY_UNREGISTER_HID) {
+			value = acc_unregister_hid(dev, w_value);
+		} else if (b_request == ACCESSORY_SET_HID_REPORT_DESC) {
+			spin_lock_irqsave(&dev->lock, flags);
+			hid = acc_hid_get(&dev->new_hid_list, w_value);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			if (!hid) {
+				value = -EINVAL;
+				goto err;
+			}
+			offset = w_index;
+			if (offset != hid->report_desc_offset
+				|| offset + w_length > hid->report_desc_len) {
+				value = -EINVAL;
+				goto err;
+			}
+			cdev->req->context = hid;
+			cdev->req->complete = acc_complete_set_hid_report_desc;
+			value = w_length;
+		} else if (b_request == ACCESSORY_SEND_HID_EVENT) {
+			spin_lock_irqsave(&dev->lock, flags);
+			hid = acc_hid_get(&dev->hid_list, w_value);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			if (!hid) {
+				value = -EINVAL;
+				goto err;
+			}
+			cdev->req->context = hid;
+			cdev->req->complete = acc_complete_send_hid_event;
+			value = w_length;
+		}
+	} else if (b_requestType == (USB_DIR_IN | USB_TYPE_VENDOR)) {
+		if (b_request == ACCESSORY_GET_PROTOCOL) {
+			*((u16 *)cdev->req->buf) = PROTOCOL_VERSION;
+			value = sizeof(u16);
+
+			/* clear any string left over from a previous session */
+			memset(dev->manufacturer, 0, sizeof(dev->manufacturer));
+			memset(dev->model, 0, sizeof(dev->model));
+			memset(dev->description, 0, sizeof(dev->description));
+			memset(dev->version, 0, sizeof(dev->version));
+			memset(dev->uri, 0, sizeof(dev->uri));
+			memset(dev->serial, 0, sizeof(dev->serial));
+			dev->start_requested = 0;
+			dev->audio_mode = 0;
+		}
+	}
+
+	if (value >= 0) {
+		cdev->req->zero = 0;
+		cdev->req->length = value;
+		value = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC);
+		if (value < 0)
+			ERROR(cdev, "%s setup response queue error\n",
+				__func__);
+	}
+
+err:
+	if (value == -EOPNOTSUPP)
+		VDBG(cdev,
+			"unknown class-specific control req "
+			"%02x.%02x v%04x i%04x l%u\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+	return value;
+}
+
+static int
+acc_function_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct acc_dev	*dev = func_to_dev(f);
+	int			id;
+	int			ret;
+
+	DBG(cdev, "acc_function_bind dev: %p\n", dev);
+
+	ret = hid_register_driver(&acc_hid_driver);
+	if (ret)
+		return ret;
+
+	dev->start_requested = 0;
+
+	/* allocate interface ID(s) */
+	id = usb_interface_id(c, f);
+	if (id < 0)
+		return id;
+	acc_interface_desc.bInterfaceNumber = id;
+
+	/* allocate endpoints */
+	ret = create_bulk_endpoints(dev, &acc_fullspeed_in_desc,
+			&acc_fullspeed_out_desc);
+	if (ret)
+		return ret;
+
+	/* support high speed hardware */
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		acc_highspeed_in_desc.bEndpointAddress =
+			acc_fullspeed_in_desc.bEndpointAddress;
+		acc_highspeed_out_desc.bEndpointAddress =
+			acc_fullspeed_out_desc.bEndpointAddress;
+	}
+
+	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
+			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
+			f->name, dev->ep_in->name, dev->ep_out->name);
+	return 0;
+}
+
+static void
+kill_all_hid_devices(struct acc_dev *dev)
+{
+	struct acc_hid_dev *hid;
+	struct list_head *entry, *temp;
+	unsigned long flags;
+
+	/* do nothing if usb accessory device doesn't exist */
+	if (!dev)
+		return;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_for_each_safe(entry, temp, &dev->hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		list_add(&hid->list, &dev->dead_hid_list);
+	}
+	list_for_each_safe(entry, temp, &dev->new_hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		list_add(&hid->list, &dev->dead_hid_list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	schedule_work(&dev->hid_work);
+}
+
+static void
+acc_hid_unbind(struct acc_dev *dev)
+{
+	hid_unregister_driver(&acc_hid_driver);
+	kill_all_hid_devices(dev);
+}
+
+static void
+acc_function_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_request *req;
+	int i;
+
+	while ((req = req_get(dev, &dev->tx_idle)))
+		acc_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		acc_request_free(dev->rx_req[i], dev->ep_out);
+
+	acc_hid_unbind(dev);
+}
+
+static void acc_start_work(struct work_struct *data)
+{
+	char *envp[2] = { "ACCESSORY=START", NULL };
+	kobject_uevent_env(&acc_device.this_device->kobj, KOBJ_CHANGE, envp);
+}
+
+static int acc_hid_init(struct acc_hid_dev *hdev)
+{
+	struct hid_device *hid;
+	int ret;
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid))
+		return PTR_ERR(hid);
+
+	hid->ll_driver = &acc_hid_ll_driver;
+	hid->dev.parent = acc_device.this_device;
+
+	hid->bus = BUS_USB;
+	hid->vendor = HID_ANY_ID;
+	hid->product = HID_ANY_ID;
+	hid->driver_data = hdev;
+	ret = hid_add_device(hid);
+	if (ret) {
+		pr_err("can't add hid device: %d\n", ret);
+		hid_destroy_device(hid);
+		return ret;
+	}
+
+	hdev->hid = hid;
+	return 0;
+}
+
+static void acc_hid_delete(struct acc_hid_dev *hid)
+{
+	kfree(hid->report_desc);
+	kfree(hid);
+}
+
+static void acc_hid_work(struct work_struct *data)
+{
+	struct acc_dev *dev = _acc_dev;
+	struct list_head	*entry, *temp;
+	struct acc_hid_dev *hid;
+	struct list_head	new_list, dead_list;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&new_list);
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* copy hids that are ready for initialization to new_list */
+	list_for_each_safe(entry, temp, &dev->new_hid_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		if (hid->report_desc_offset == hid->report_desc_len)
+			list_move(&hid->list, &new_list);
+	}
+
+	if (list_empty(&dev->dead_hid_list)) {
+		INIT_LIST_HEAD(&dead_list);
+	} else {
+		/* move all of dev->dead_hid_list to dead_list */
+		dead_list.prev = dev->dead_hid_list.prev;
+		dead_list.next = dev->dead_hid_list.next;
+		dead_list.next->prev = &dead_list;
+		dead_list.prev->next = &dead_list;
+		INIT_LIST_HEAD(&dev->dead_hid_list);
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/* register new HID devices */
+	list_for_each_safe(entry, temp, &new_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		if (acc_hid_init(hid)) {
+			pr_err("can't add HID device %p\n", hid);
+			acc_hid_delete(hid);
+		} else {
+			spin_lock_irqsave(&dev->lock, flags);
+			list_move(&hid->list, &dev->hid_list);
+			spin_unlock_irqrestore(&dev->lock, flags);
+		}
+	}
+
+	/* remove dead HID devices */
+	list_for_each_safe(entry, temp, &dead_list) {
+		hid = list_entry(entry, struct acc_hid_dev, list);
+		list_del(&hid->list);
+		if (hid->hid)
+			hid_destroy_device(hid->hid);
+		acc_hid_delete(hid);
+	}
+}
+
+static int acc_function_set_alt(struct usb_function *f,
+		unsigned intf, unsigned alt)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	DBG(cdev, "acc_function_set_alt intf: %d alt: %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_out);
+	if (ret) {
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+
+	dev->online = 1;
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+	return 0;
+}
+
+static void acc_function_disable(struct usb_function *f)
+{
+	struct acc_dev	*dev = func_to_dev(f);
+	struct usb_composite_dev	*cdev = dev->cdev;
+
+	DBG(cdev, "acc_function_disable\n");
+	acc_set_disconnected(dev);
+	usb_ep_disable(dev->ep_in);
+	usb_ep_disable(dev->ep_out);
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+
+	VDBG(cdev, "%s disabled\n", dev->function.name);
+}
+
+static int acc_bind_config(struct usb_configuration *c)
+{
+	struct acc_dev *dev = _acc_dev;
+	int ret;
+
+	printk(KERN_INFO "acc_bind_config\n");
+
+	/* allocate a string ID for our interface */
+	if (acc_string_defs[INTERFACE_STRING_INDEX].id == 0) {
+		ret = usb_string_id(c->cdev);
+		if (ret < 0)
+			return ret;
+		acc_string_defs[INTERFACE_STRING_INDEX].id = ret;
+		acc_interface_desc.iInterface = ret;
+	}
+
+	dev->cdev = c->cdev;
+	dev->function.name = "accessory";
+	dev->function.strings = acc_strings,
+	dev->function.fs_descriptors = fs_acc_descs;
+	dev->function.hs_descriptors = hs_acc_descs;
+	dev->function.bind = acc_function_bind;
+	dev->function.unbind = acc_function_unbind;
+	dev->function.set_alt = acc_function_set_alt;
+	dev->function.disable = acc_function_disable;
+
+	return usb_add_function(c, &dev->function);
+}
+
+static int acc_setup(void)
+{
+	struct acc_dev *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_init(&dev->lock);
+	init_waitqueue_head(&dev->read_wq);
+	init_waitqueue_head(&dev->write_wq);
+	atomic_set(&dev->open_excl, 0);
+	INIT_LIST_HEAD(&dev->tx_idle);
+	INIT_LIST_HEAD(&dev->hid_list);
+	INIT_LIST_HEAD(&dev->new_hid_list);
+	INIT_LIST_HEAD(&dev->dead_hid_list);
+	INIT_DELAYED_WORK(&dev->start_work, acc_start_work);
+	INIT_WORK(&dev->hid_work, acc_hid_work);
+
+	/* _acc_dev must be set before calling usb_gadget_register_driver */
+	_acc_dev = dev;
+
+	ret = misc_register(&acc_device);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	kfree(dev);
+	pr_err("USB accessory gadget driver failed to initialize\n");
+	return ret;
+}
+
+static void acc_disconnect(void)
+{
+	/* unregister all HID devices if USB is disconnected */
+	kill_all_hid_devices(_acc_dev);
+}
+
+static void acc_cleanup(void)
+{
+	misc_deregister(&acc_device);
+	kfree(_acc_dev);
+	_acc_dev = NULL;
+}
diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c
new file mode 100644
index 000000000000..07a1a3c28f3d
--- /dev/null
+++ b/drivers/usb/gadget/f_audio_source.c
@@ -0,0 +1,834 @@
+/*
+ * Gadget Function Driver for USB audio source device
+ *
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/usb/audio.h>
+#include <linux/wait.h>
+#include <sound/core.h>
+#include <sound/initval.h>
+#include <sound/pcm.h>
+
+#define SAMPLE_RATE 44100
+#define FRAMES_PER_MSEC (SAMPLE_RATE / 1000)
+
+#define IN_EP_MAX_PACKET_SIZE 256
+
+/* Number of requests to allocate */
+#define IN_EP_REQ_COUNT 4
+
+#define AUDIO_AC_INTERFACE	0
+#define AUDIO_AS_INTERFACE	1
+#define AUDIO_NUM_INTERFACES	2
+
+/* B.3.1  Standard AC Interface Descriptor */
+static struct usb_interface_descriptor audio_ac_interface_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
+};
+
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
+
+#define UAC_DT_AC_HEADER_LENGTH	UAC_DT_AC_HEADER_SIZE(AUDIO_NUM_INTERFACES)
+/* 1 input terminal, 1 output terminal and 1 feature unit */
+#define UAC_DT_TOTAL_LENGTH (UAC_DT_AC_HEADER_LENGTH \
+	+ UAC_DT_INPUT_TERMINAL_SIZE + UAC_DT_OUTPUT_TERMINAL_SIZE \
+	+ UAC_DT_FEATURE_UNIT_SIZE(0))
+/* B.3.2  Class-Specific AC Interface Descriptor */
+static struct uac1_ac_header_descriptor_2 audio_ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_LENGTH,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_HEADER,
+	.bcdADC =		__constant_cpu_to_le16(0x0100),
+	.wTotalLength =		__constant_cpu_to_le16(UAC_DT_TOTAL_LENGTH),
+	.bInCollection =	AUDIO_NUM_INTERFACES,
+	.baInterfaceNr = {
+		[0] =		AUDIO_AC_INTERFACE,
+		[1] =		AUDIO_AS_INTERFACE,
+	}
+};
+
+#define INPUT_TERMINAL_ID	1
+static struct uac_input_terminal_descriptor audio_input_terminal_desc = {
+	.bLength =		UAC_DT_INPUT_TERMINAL_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
+	.bTerminalID =		INPUT_TERMINAL_ID,
+	.wTerminalType =	UAC_INPUT_TERMINAL_MICROPHONE,
+	.bAssocTerminal =	0,
+	.wChannelConfig =	0x3,
+};
+
+DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0);
+
+#define FEATURE_UNIT_ID		2
+static struct uac_feature_unit_descriptor_0 audio_feature_unit_desc = {
+	.bLength		= UAC_DT_FEATURE_UNIT_SIZE(0),
+	.bDescriptorType	= USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype	= UAC_FEATURE_UNIT,
+	.bUnitID		= FEATURE_UNIT_ID,
+	.bSourceID		= INPUT_TERMINAL_ID,
+	.bControlSize		= 2,
+};
+
+#define OUTPUT_TERMINAL_ID	3
+static struct uac1_output_terminal_descriptor audio_output_terminal_desc = {
+	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
+	.bDescriptorType	= USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
+	.bTerminalID		= OUTPUT_TERMINAL_ID,
+	.wTerminalType		= UAC_TERMINAL_STREAMING,
+	.bAssocTerminal		= FEATURE_UNIT_ID,
+	.bSourceID		= FEATURE_UNIT_ID,
+};
+
+/* B.4.1  Standard AS Interface Descriptor */
+static struct usb_interface_descriptor audio_as_interface_alt_0_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bAlternateSetting =	0,
+	.bNumEndpoints =	0,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOSTREAMING,
+};
+
+static struct usb_interface_descriptor audio_as_interface_alt_1_desc = {
+	.bLength =		USB_DT_INTERFACE_SIZE,
+	.bDescriptorType =	USB_DT_INTERFACE,
+	.bAlternateSetting =	1,
+	.bNumEndpoints =	1,
+	.bInterfaceClass =	USB_CLASS_AUDIO,
+	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOSTREAMING,
+};
+
+/* B.4.2  Class-Specific AS Interface Descriptor */
+static struct uac1_as_header_descriptor audio_as_header_desc = {
+	.bLength =		UAC_DT_AS_HEADER_SIZE,
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_AS_GENERAL,
+	.bTerminalLink =	INPUT_TERMINAL_ID,
+	.bDelay =		1,
+	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
+};
+
+DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
+
+static struct uac_format_type_i_discrete_descriptor_1 audio_as_type_i_desc = {
+	.bLength =		UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
+	.bDescriptorType =	USB_DT_CS_INTERFACE,
+	.bDescriptorSubtype =	UAC_FORMAT_TYPE,
+	.bFormatType =		UAC_FORMAT_TYPE_I,
+	.bSubframeSize =	2,
+	.bBitResolution =	16,
+	.bSamFreqType =		1,
+};
+
+/* Standard ISO IN Endpoint Descriptor for highspeed */
+static struct usb_endpoint_descriptor audio_hs_as_in_ep_desc  = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_SYNC_SYNC
+				| USB_ENDPOINT_XFER_ISOC,
+	.wMaxPacketSize =	__constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE),
+	.bInterval =		4, /* poll 1 per millisecond */
+};
+
+/* Standard ISO IN Endpoint Descriptor for highspeed */
+static struct usb_endpoint_descriptor audio_fs_as_in_ep_desc  = {
+	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_SYNC_SYNC
+				| USB_ENDPOINT_XFER_ISOC,
+	.wMaxPacketSize =	__constant_cpu_to_le16(IN_EP_MAX_PACKET_SIZE),
+	.bInterval =		1, /* poll 1 per millisecond */
+};
+
+/* Class-specific AS ISO OUT Endpoint Descriptor */
+static struct uac_iso_endpoint_descriptor audio_as_iso_in_desc = {
+	.bLength =		UAC_ISO_ENDPOINT_DESC_SIZE,
+	.bDescriptorType =	USB_DT_CS_ENDPOINT,
+	.bDescriptorSubtype =	UAC_EP_GENERAL,
+	.bmAttributes =		1,
+	.bLockDelayUnits =	1,
+	.wLockDelay =		__constant_cpu_to_le16(1),
+};
+
+static struct usb_descriptor_header *hs_audio_desc[] = {
+	(struct usb_descriptor_header *)&audio_ac_interface_desc,
+	(struct usb_descriptor_header *)&audio_ac_header_desc,
+
+	(struct usb_descriptor_header *)&audio_input_terminal_desc,
+	(struct usb_descriptor_header *)&audio_output_terminal_desc,
+	(struct usb_descriptor_header *)&audio_feature_unit_desc,
+
+	(struct usb_descriptor_header *)&audio_as_interface_alt_0_desc,
+	(struct usb_descriptor_header *)&audio_as_interface_alt_1_desc,
+	(struct usb_descriptor_header *)&audio_as_header_desc,
+
+	(struct usb_descriptor_header *)&audio_as_type_i_desc,
+
+	(struct usb_descriptor_header *)&audio_hs_as_in_ep_desc,
+	(struct usb_descriptor_header *)&audio_as_iso_in_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *fs_audio_desc[] = {
+	(struct usb_descriptor_header *)&audio_ac_interface_desc,
+	(struct usb_descriptor_header *)&audio_ac_header_desc,
+
+	(struct usb_descriptor_header *)&audio_input_terminal_desc,
+	(struct usb_descriptor_header *)&audio_output_terminal_desc,
+	(struct usb_descriptor_header *)&audio_feature_unit_desc,
+
+	(struct usb_descriptor_header *)&audio_as_interface_alt_0_desc,
+	(struct usb_descriptor_header *)&audio_as_interface_alt_1_desc,
+	(struct usb_descriptor_header *)&audio_as_header_desc,
+
+	(struct usb_descriptor_header *)&audio_as_type_i_desc,
+
+	(struct usb_descriptor_header *)&audio_fs_as_in_ep_desc,
+	(struct usb_descriptor_header *)&audio_as_iso_in_desc,
+	NULL,
+};
+
+static struct snd_pcm_hardware audio_hw_info = {
+	.info =			SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_BATCH |
+				SNDRV_PCM_INFO_INTERLEAVED |
+				SNDRV_PCM_INFO_BLOCK_TRANSFER,
+
+	.formats		= SNDRV_PCM_FMTBIT_S16_LE,
+	.channels_min		= 2,
+	.channels_max		= 2,
+	.rate_min		= SAMPLE_RATE,
+	.rate_max		= SAMPLE_RATE,
+
+	.buffer_bytes_max =	1024 * 1024,
+	.period_bytes_min =	64,
+	.period_bytes_max =	512 * 1024,
+	.periods_min =		2,
+	.periods_max =		1024,
+};
+
+/*-------------------------------------------------------------------------*/
+
+struct audio_source_config {
+	int	card;
+	int	device;
+};
+
+struct audio_dev {
+	struct usb_function		func;
+	struct snd_card			*card;
+	struct snd_pcm			*pcm;
+	struct snd_pcm_substream *substream;
+
+	struct list_head		idle_reqs;
+	struct usb_ep			*in_ep;
+
+	spinlock_t			lock;
+
+	/* beginning, end and current position in our buffer */
+	void				*buffer_start;
+	void				*buffer_end;
+	void				*buffer_pos;
+
+	/* byte size of a "period" */
+	unsigned int			period;
+	/* bytes sent since last call to snd_pcm_period_elapsed */
+	unsigned int			period_offset;
+	/* time we started playing */
+	ktime_t				start_time;
+	/* number of frames sent since start_time */
+	s64				frames_sent;
+};
+
+static inline struct audio_dev *func_to_audio(struct usb_function *f)
+{
+	return container_of(f, struct audio_dev, func);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct usb_request *audio_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+	if (!req)
+		return NULL;
+
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+	req->length = buffer_size;
+	return req;
+}
+
+static void audio_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+static void audio_req_put(struct audio_dev *audio, struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	list_add_tail(&req->list, &audio->idle_reqs);
+	spin_unlock_irqrestore(&audio->lock, flags);
+}
+
+static struct usb_request *audio_req_get(struct audio_dev *audio)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	if (list_empty(&audio->idle_reqs)) {
+		req = 0;
+	} else {
+		req = list_first_entry(&audio->idle_reqs, struct usb_request,
+				list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&audio->lock, flags);
+	return req;
+}
+
+/* send the appropriate number of packets to match our bitrate */
+static void audio_send(struct audio_dev *audio)
+{
+	struct snd_pcm_runtime *runtime;
+	struct usb_request *req;
+	int length, length1, length2, ret;
+	s64 msecs;
+	s64 frames;
+	ktime_t now;
+
+	/* audio->substream will be null if we have been closed */
+	if (!audio->substream)
+		return;
+	/* audio->buffer_pos will be null if we have been stopped */
+	if (!audio->buffer_pos)
+		return;
+
+	runtime = audio->substream->runtime;
+
+	/* compute number of frames to send */
+	now = ktime_get();
+	msecs = ktime_to_ns(now) - ktime_to_ns(audio->start_time);
+	do_div(msecs, 1000000);
+	frames = msecs * SAMPLE_RATE;
+	do_div(frames, 1000);
+
+	/* Readjust our frames_sent if we fall too far behind.
+	 * If we get too far behind it is better to drop some frames than
+	 * to keep sending data too fast in an attempt to catch up.
+	 */
+	if (frames - audio->frames_sent > 10 * FRAMES_PER_MSEC)
+		audio->frames_sent = frames - FRAMES_PER_MSEC;
+
+	frames -= audio->frames_sent;
+
+	/* We need to send something to keep the pipeline going */
+	if (frames <= 0)
+		frames = FRAMES_PER_MSEC;
+
+	while (frames > 0) {
+		req = audio_req_get(audio);
+		if (!req)
+			break;
+
+		length = frames_to_bytes(runtime, frames);
+		if (length > IN_EP_MAX_PACKET_SIZE)
+			length = IN_EP_MAX_PACKET_SIZE;
+
+		if (audio->buffer_pos + length > audio->buffer_end)
+			length1 = audio->buffer_end - audio->buffer_pos;
+		else
+			length1 = length;
+		memcpy(req->buf, audio->buffer_pos, length1);
+		if (length1 < length) {
+			/* Wrap around and copy remaining length
+			 * at beginning of buffer.
+			 */
+			length2 = length - length1;
+			memcpy(req->buf + length1, audio->buffer_start,
+					length2);
+			audio->buffer_pos = audio->buffer_start + length2;
+		} else {
+			audio->buffer_pos += length1;
+			if (audio->buffer_pos >= audio->buffer_end)
+				audio->buffer_pos = audio->buffer_start;
+		}
+
+		req->length = length;
+		ret = usb_ep_queue(audio->in_ep, req, GFP_ATOMIC);
+		if (ret < 0) {
+			pr_err("usb_ep_queue failed ret: %d\n", ret);
+			audio_req_put(audio, req);
+			break;
+		}
+
+		frames -= bytes_to_frames(runtime, length);
+		audio->frames_sent += bytes_to_frames(runtime, length);
+	}
+}
+
+static void audio_control_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	/* nothing to do here */
+}
+
+static void audio_data_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct audio_dev *audio = req->context;
+
+	pr_debug("audio_data_complete req->status %d req->actual %d\n",
+		req->status, req->actual);
+
+	audio_req_put(audio, req);
+
+	if (!audio->buffer_start || req->status)
+		return;
+
+	audio->period_offset += req->actual;
+	if (audio->period_offset >= audio->period) {
+		snd_pcm_period_elapsed(audio->substream);
+		audio->period_offset = 0;
+	}
+	audio_send(audio);
+}
+
+static int audio_set_endpoint_req(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	int value = -EOPNOTSUPP;
+	u16 ep = le16_to_cpu(ctrl->wIndex);
+	u16 len = le16_to_cpu(ctrl->wLength);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+
+	pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n",
+			ctrl->bRequest, w_value, len, ep);
+
+	switch (ctrl->bRequest) {
+	case UAC_SET_CUR:
+	case UAC_SET_MIN:
+	case UAC_SET_MAX:
+	case UAC_SET_RES:
+		value = len;
+		break;
+	default:
+		break;
+	}
+
+	return value;
+}
+
+static int audio_get_endpoint_req(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int value = -EOPNOTSUPP;
+	u8 ep = ((le16_to_cpu(ctrl->wIndex) >> 8) & 0xFF);
+	u16 len = le16_to_cpu(ctrl->wLength);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u8 *buf = cdev->req->buf;
+
+	pr_debug("bRequest 0x%x, w_value 0x%04x, len %d, endpoint %d\n",
+			ctrl->bRequest, w_value, len, ep);
+
+	if (w_value == UAC_EP_CS_ATTR_SAMPLE_RATE << 8) {
+		switch (ctrl->bRequest) {
+		case UAC_GET_CUR:
+		case UAC_GET_MIN:
+		case UAC_GET_MAX:
+		case UAC_GET_RES:
+			/* return our sample rate */
+			buf[0] = (u8)SAMPLE_RATE;
+			buf[1] = (u8)(SAMPLE_RATE >> 8);
+			buf[2] = (u8)(SAMPLE_RATE >> 16);
+			value = 3;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return value;
+}
+
+static int
+audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev = f->config->cdev;
+	struct usb_request *req = cdev->req;
+	int value = -EOPNOTSUPP;
+	u16 w_index = le16_to_cpu(ctrl->wIndex);
+	u16 w_value = le16_to_cpu(ctrl->wValue);
+	u16 w_length = le16_to_cpu(ctrl->wLength);
+
+	/* composite driver infrastructure handles everything; interface
+	 * activation uses set_alt().
+	 */
+	switch (ctrl->bRequestType) {
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
+		value = audio_set_endpoint_req(f, ctrl);
+		break;
+
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
+		value = audio_get_endpoint_req(f, ctrl);
+		break;
+	}
+
+	/* respond with data transfer or status phase? */
+	if (value >= 0) {
+		pr_debug("audio req%02x.%02x v%04x i%04x l%d\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+		req->zero = 0;
+		req->length = value;
+		req->complete = audio_control_complete;
+		value = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
+		if (value < 0)
+			pr_err("audio response on err %d\n", value);
+	}
+
+	/* device either stalls (value < 0) or reports success */
+	return value;
+}
+
+static int audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
+{
+	struct audio_dev *audio = func_to_audio(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	pr_debug("audio_set_alt intf %d, alt %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, audio->in_ep);
+	if (ret)
+		return ret;
+
+	usb_ep_enable(audio->in_ep);
+	return 0;
+}
+
+static void audio_disable(struct usb_function *f)
+{
+	struct audio_dev	*audio = func_to_audio(f);
+
+	pr_debug("audio_disable\n");
+	usb_ep_disable(audio->in_ep);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static void audio_build_desc(struct audio_dev *audio)
+{
+	u8 *sam_freq;
+	int rate;
+
+	/* Set channel numbers */
+	audio_input_terminal_desc.bNrChannels = 2;
+	audio_as_type_i_desc.bNrChannels = 2;
+
+	/* Set sample rates */
+	rate = SAMPLE_RATE;
+	sam_freq = audio_as_type_i_desc.tSamFreq[0];
+	memcpy(sam_freq, &rate, 3);
+}
+
+/* audio function driver setup/binding */
+static int
+audio_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct audio_dev *audio = func_to_audio(f);
+	int status;
+	struct usb_ep *ep;
+	struct usb_request *req;
+	int i;
+
+	audio_build_desc(audio);
+
+	/* allocate instance-specific interface IDs, and patch descriptors */
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	audio_ac_interface_desc.bInterfaceNumber = status;
+
+	/* AUDIO_AC_INTERFACE */
+	audio_ac_header_desc.baInterfaceNr[0] = status;
+
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	audio_as_interface_alt_0_desc.bInterfaceNumber = status;
+	audio_as_interface_alt_1_desc.bInterfaceNumber = status;
+
+	/* AUDIO_AS_INTERFACE */
+	audio_ac_header_desc.baInterfaceNr[1] = status;
+
+	status = -ENODEV;
+
+	/* allocate our endpoint */
+	ep = usb_ep_autoconfig(cdev->gadget, &audio_fs_as_in_ep_desc);
+	if (!ep)
+		goto fail;
+	audio->in_ep = ep;
+	ep->driver_data = audio; /* claim */
+
+	if (gadget_is_dualspeed(c->cdev->gadget))
+		audio_hs_as_in_ep_desc.bEndpointAddress =
+			audio_fs_as_in_ep_desc.bEndpointAddress;
+
+	f->fs_descriptors = fs_audio_desc;
+	f->hs_descriptors = hs_audio_desc;
+
+	for (i = 0, status = 0; i < IN_EP_REQ_COUNT && status == 0; i++) {
+		req = audio_request_new(ep, IN_EP_MAX_PACKET_SIZE);
+		if (req) {
+			req->context = audio;
+			req->complete = audio_data_complete;
+			audio_req_put(audio, req);
+		} else
+			status = -ENOMEM;
+	}
+
+fail:
+	return status;
+}
+
+static void
+audio_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct audio_dev *audio = func_to_audio(f);
+	struct usb_request *req;
+
+	while ((req = audio_req_get(audio)))
+		audio_request_free(req, audio->in_ep);
+
+	snd_card_free_when_closed(audio->card);
+	audio->card = NULL;
+	audio->pcm = NULL;
+	audio->substream = NULL;
+	audio->in_ep = NULL;
+}
+
+static void audio_pcm_playback_start(struct audio_dev *audio)
+{
+	audio->start_time = ktime_get();
+	audio->frames_sent = 0;
+	audio_send(audio);
+}
+
+static void audio_pcm_playback_stop(struct audio_dev *audio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	audio->buffer_start = 0;
+	audio->buffer_end = 0;
+	audio->buffer_pos = 0;
+	spin_unlock_irqrestore(&audio->lock, flags);
+}
+
+static int audio_pcm_open(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = substream->private_data;
+
+	runtime->private_data = audio;
+	runtime->hw = audio_hw_info;
+	snd_pcm_limit_hw_rates(runtime);
+	runtime->hw.channels_max = 2;
+
+	audio->substream = substream;
+	return 0;
+}
+
+static int audio_pcm_close(struct snd_pcm_substream *substream)
+{
+	struct audio_dev *audio = substream->private_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&audio->lock, flags);
+	audio->substream = NULL;
+	spin_unlock_irqrestore(&audio->lock, flags);
+
+	return 0;
+}
+
+static int audio_pcm_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params)
+{
+	unsigned int channels = params_channels(params);
+	unsigned int rate = params_rate(params);
+
+	if (rate != SAMPLE_RATE)
+		return -EINVAL;
+	if (channels != 2)
+		return -EINVAL;
+
+	return snd_pcm_lib_alloc_vmalloc_buffer(substream,
+		params_buffer_bytes(params));
+}
+
+static int audio_pcm_hw_free(struct snd_pcm_substream *substream)
+{
+	return snd_pcm_lib_free_vmalloc_buffer(substream);
+}
+
+static int audio_pcm_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = runtime->private_data;
+
+	audio->period = snd_pcm_lib_period_bytes(substream);
+	audio->period_offset = 0;
+	audio->buffer_start = runtime->dma_area;
+	audio->buffer_end = audio->buffer_start
+		+ snd_pcm_lib_buffer_bytes(substream);
+	audio->buffer_pos = audio->buffer_start;
+
+	return 0;
+}
+
+static snd_pcm_uframes_t audio_pcm_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct audio_dev *audio = runtime->private_data;
+	ssize_t bytes = audio->buffer_pos - audio->buffer_start;
+
+	/* return offset of next frame to fill in our buffer */
+	return bytes_to_frames(runtime, bytes);
+}
+
+static int audio_pcm_playback_trigger(struct snd_pcm_substream *substream,
+					int cmd)
+{
+	struct audio_dev *audio = substream->runtime->private_data;
+	int ret = 0;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+		audio_pcm_playback_start(audio);
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		audio_pcm_playback_stop(audio);
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static struct audio_dev _audio_dev = {
+	.func = {
+		.name = "audio_source",
+		.bind = audio_bind,
+		.unbind = audio_unbind,
+		.set_alt = audio_set_alt,
+		.setup = audio_setup,
+		.disable = audio_disable,
+	},
+	.lock = __SPIN_LOCK_UNLOCKED(_audio_dev.lock),
+	.idle_reqs = LIST_HEAD_INIT(_audio_dev.idle_reqs),
+};
+
+static struct snd_pcm_ops audio_playback_ops = {
+	.open		= audio_pcm_open,
+	.close		= audio_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= audio_pcm_hw_params,
+	.hw_free	= audio_pcm_hw_free,
+	.prepare	= audio_pcm_prepare,
+	.trigger	= audio_pcm_playback_trigger,
+	.pointer	= audio_pcm_pointer,
+};
+
+int audio_source_bind_config(struct usb_configuration *c,
+		struct audio_source_config *config)
+{
+	struct audio_dev *audio;
+	struct snd_card *card;
+	struct snd_pcm *pcm;
+	int err;
+
+	config->card = -1;
+	config->device = -1;
+
+	audio = &_audio_dev;
+
+	err = snd_card_create(SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
+			THIS_MODULE, 0, &card);
+	if (err)
+		return err;
+
+	snd_card_set_dev(card, &c->cdev->gadget->dev);
+
+	err = snd_pcm_new(card, "USB audio source", 0, 1, 0, &pcm);
+	if (err)
+		goto pcm_fail;
+	pcm->private_data = audio;
+	pcm->info_flags = 0;
+	audio->pcm = pcm;
+
+	strlcpy(pcm->name, "USB gadget audio", sizeof(pcm->name));
+
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &audio_playback_ops);
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+				NULL, 0, 64 * 1024);
+
+	strlcpy(card->driver, "audio_source", sizeof(card->driver));
+	strlcpy(card->shortname, card->driver, sizeof(card->shortname));
+	strlcpy(card->longname, "USB accessory audio source",
+		sizeof(card->longname));
+
+	err = snd_card_register(card);
+	if (err)
+		goto register_fail;
+
+	err = usb_add_function(c, &audio->func);
+	if (err)
+		goto add_fail;
+
+	config->card = pcm->card->number;
+	config->device = pcm->device;
+	audio->card = card;
+	return 0;
+
+add_fail:
+register_fail:
+pcm_fail:
+	snd_card_free(audio->card);
+	return err;
+}
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index 84219f656051..c36f660dcc2e 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -1563,7 +1563,12 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 	spin_lock_irqsave(&func->ffs->eps_lock, flags);
 	do {
 		struct usb_endpoint_descriptor *ds;
-		ds = ep->descs[ep->descs[1] ? 1 : 0];
+		int desc_idx = ffs->gadget->speed == USB_SPEED_HIGH ? 1 : 0;
+		ds = ep->descs[desc_idx];
+		if (!ds) {
+			ret = -EINVAL;
+			break;
+		}
 
 		ep->ep->driver_data = ep;
 		ep->ep->desc = ds;
diff --git a/drivers/usb/gadget/f_midi.c b/drivers/usb/gadget/f_midi.c
index 263e721c2694..b0bce7ea31fa 100644
--- a/drivers/usb/gadget/f_midi.c
+++ b/drivers/usb/gadget/f_midi.c
@@ -65,6 +65,11 @@ struct gmidi_in_port {
 	uint8_t data[2];
 };
 
+struct midi_alsa_config {
+	int	card;
+	int	device;
+};
+
 struct f_midi {
 	struct usb_function	func;
 	struct usb_gadget	*gadget;
@@ -97,7 +102,7 @@ DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1);
 DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(16);
 
 /* B.3.1  Standard AC Interface Descriptor */
-static struct usb_interface_descriptor ac_interface_desc __initdata = {
+static struct usb_interface_descriptor ac_interface_desc /* __initdata */ = {
 	.bLength =		USB_DT_INTERFACE_SIZE,
 	.bDescriptorType =	USB_DT_INTERFACE,
 	/* .bInterfaceNumber =	DYNAMIC */
@@ -108,7 +113,7 @@ static struct usb_interface_descriptor ac_interface_desc __initdata = {
 };
 
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static struct uac1_ac_header_descriptor_1 ac_header_desc __initdata = {
+static struct uac1_ac_header_descriptor_1 ac_header_desc /* __initdata */ = {
 	.bLength =		UAC_DT_AC_HEADER_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	USB_MS_HEADER,
@@ -119,7 +124,7 @@ static struct uac1_ac_header_descriptor_1 ac_header_desc __initdata = {
 };
 
 /* B.4.1  Standard MS Interface Descriptor */
-static struct usb_interface_descriptor ms_interface_desc __initdata = {
+static struct usb_interface_descriptor ms_interface_desc /* __initdata */ = {
 	.bLength =		USB_DT_INTERFACE_SIZE,
 	.bDescriptorType =	USB_DT_INTERFACE,
 	/* .bInterfaceNumber =	DYNAMIC */
@@ -130,7 +135,7 @@ static struct usb_interface_descriptor ms_interface_desc __initdata = {
 };
 
 /* B.4.2  Class-Specific MS Interface Descriptor */
-static struct usb_ms_header_descriptor ms_header_desc __initdata = {
+static struct usb_ms_header_descriptor ms_header_desc /* __initdata */ = {
 	.bLength =		USB_DT_MS_HEADER_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	USB_MS_HEADER,
@@ -191,7 +196,7 @@ static struct usb_gadget_strings *midi_strings[] = {
 	NULL,
 };
 
-static struct usb_request *alloc_ep_req(struct usb_ep *ep, unsigned length)
+static struct usb_request *midi_alloc_ep_req(struct usb_ep *ep, unsigned length)
 {
 	struct usb_request *req;
 
@@ -207,7 +212,7 @@ static struct usb_request *alloc_ep_req(struct usb_ep *ep, unsigned length)
 	return req;
 }
 
-static void free_ep_req(struct usb_ep *ep, struct usb_request *req)
+static void midi_free_ep_req(struct usb_ep *ep, struct usb_request *req)
 {
 	kfree(req->buf);
 	usb_ep_free_request(ep, req);
@@ -278,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
 		if (ep == midi->out_ep)
 			f_midi_handle_out_data(ep, req);
 
-		free_ep_req(ep, req);
+		midi_free_ep_req(ep, req);
 		return;
 
 	case -EOVERFLOW:	/* buffer overrun on read means that
@@ -365,7 +370,7 @@ static int f_midi_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 	/* allocate a bunch of read buffers and queue them all at once. */
 	for (i = 0; i < midi->qlen && err == 0; i++) {
 		struct usb_request *req =
-			alloc_ep_req(midi->out_ep, midi->buflen);
+			midi_alloc_ep_req(midi->out_ep, midi->buflen);
 		if (req == NULL)
 			return -ENOMEM;
 
@@ -409,7 +414,7 @@ static void f_midi_unbind(struct usb_configuration *c, struct usb_function *f)
 	card = midi->card;
 	midi->card = NULL;
 	if (card)
-		snd_card_free(card);
+		snd_card_free_when_closed(card);
 
 	kfree(midi->id);
 	midi->id = NULL;
@@ -546,10 +551,10 @@ static void f_midi_transmit(struct f_midi *midi, struct usb_request *req)
 		return;
 
 	if (!req)
-		req = alloc_ep_req(ep, midi->buflen);
+		req = midi_alloc_ep_req(ep, midi->buflen);
 
 	if (!req) {
-		ERROR(midi, "gmidi_transmit: alloc_ep_request failed\n");
+		ERROR(midi, "gmidi_transmit: midi_alloc_ep_request failed\n");
 		return;
 	}
 	req->length = 0;
@@ -575,7 +580,7 @@ static void f_midi_transmit(struct f_midi *midi, struct usb_request *req)
 	if (req->length > 0)
 		usb_ep_queue(ep, req, GFP_ATOMIC);
 	else
-		free_ep_req(ep, req);
+		midi_free_ep_req(ep, req);
 }
 
 static void f_midi_in_tasklet(unsigned long data)
@@ -733,7 +738,7 @@ fail:
 
 /* MIDI function driver setup/binding */
 
-static int __init
+static int /* __init */
 f_midi_bind(struct usb_configuration *c, struct usb_function *f)
 {
 	struct usb_descriptor_header **midi_function;
@@ -923,16 +928,22 @@ fail:
  *
  * Returns zero on success, else negative errno.
  */
-int __init f_midi_bind_config(struct usb_configuration *c,
+int /* __init */ f_midi_bind_config(struct usb_configuration *c,
 			      int index, char *id,
 			      unsigned int in_ports,
 			      unsigned int out_ports,
 			      unsigned int buflen,
-			      unsigned int qlen)
+			      unsigned int qlen,
+			      struct midi_alsa_config* config)
 {
 	struct f_midi *midi;
 	int status, i;
 
+	if (config) {
+		config->card = -1;
+		config->device = -1;
+	}
+
 	/* sanity check */
 	if (in_ports > MAX_PORTS || out_ports > MAX_PORTS)
 		return -EINVAL;
@@ -961,6 +972,10 @@ int __init f_midi_bind_config(struct usb_configuration *c,
 	tasklet_init(&midi->tasklet, f_midi_in_tasklet, (unsigned long) midi);
 
 	/* set up ALSA midi devices */
+	midi->id = kstrdup(id, GFP_KERNEL);
+	midi->index = index;
+	midi->buflen = buflen;
+	midi->qlen = qlen;
 	midi->in_ports = in_ports;
 	midi->out_ports = out_ports;
 	status = f_midi_register_card(midi);
@@ -974,15 +989,16 @@ int __init f_midi_bind_config(struct usb_configuration *c,
 	midi->func.set_alt     = f_midi_set_alt;
 	midi->func.disable     = f_midi_disable;
 
-	midi->id = kstrdup(id, GFP_KERNEL);
-	midi->index = index;
-	midi->buflen = buflen;
-	midi->qlen = qlen;
-
 	status = usb_add_function(c, &midi->func);
 	if (status)
 		goto setup_fail;
 
+
+	if (config) {
+		config->card = midi->rmidi->card->number;
+		config->device = midi->rmidi->device;
+	}
+
 	return 0;
 
 setup_fail:
diff --git a/drivers/usb/gadget/f_mtp.c b/drivers/usb/gadget/f_mtp.c
new file mode 100644
index 000000000000..620aeaaf2d72
--- /dev/null
+++ b/drivers/usb/gadget/f_mtp.c
@@ -0,0 +1,1287 @@
+/*
+ * Gadget Function Driver for MTP
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+/* #define VERBOSE_DEBUG */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+
+#include <linux/usb.h>
+#include <linux/usb_usual.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/f_mtp.h>
+
+#define MTP_BULK_BUFFER_SIZE       16384
+#define INTR_BUFFER_SIZE           28
+
+/* String IDs */
+#define INTERFACE_STRING_INDEX	0
+
+/* values for mtp_dev.state */
+#define STATE_OFFLINE               0   /* initial state, disconnected */
+#define STATE_READY                 1   /* ready for userspace calls */
+#define STATE_BUSY                  2   /* processing userspace calls */
+#define STATE_CANCELED              3   /* transaction canceled by host */
+#define STATE_ERROR                 4   /* error from completion routine */
+
+/* number of tx and rx requests to allocate */
+#define TX_REQ_MAX 4
+#define RX_REQ_MAX 2
+#define INTR_REQ_MAX 5
+
+/* ID for Microsoft MTP OS String */
+#define MTP_OS_STRING_ID   0xEE
+
+/* MTP class reqeusts */
+#define MTP_REQ_CANCEL              0x64
+#define MTP_REQ_GET_EXT_EVENT_DATA  0x65
+#define MTP_REQ_RESET               0x66
+#define MTP_REQ_GET_DEVICE_STATUS   0x67
+
+/* constants for device status */
+#define MTP_RESPONSE_OK             0x2001
+#define MTP_RESPONSE_DEVICE_BUSY    0x2019
+
+static const char mtp_shortname[] = "mtp_usb";
+
+struct mtp_dev {
+	struct usb_function function;
+	struct usb_composite_dev *cdev;
+	spinlock_t lock;
+
+	struct usb_ep *ep_in;
+	struct usb_ep *ep_out;
+	struct usb_ep *ep_intr;
+
+	int state;
+
+	/* synchronize access to our device file */
+	atomic_t open_excl;
+	/* to enforce only one ioctl at a time */
+	atomic_t ioctl_excl;
+
+	struct list_head tx_idle;
+	struct list_head intr_idle;
+
+	wait_queue_head_t read_wq;
+	wait_queue_head_t write_wq;
+	wait_queue_head_t intr_wq;
+	struct usb_request *rx_req[RX_REQ_MAX];
+	int rx_done;
+
+	/* for processing MTP_SEND_FILE, MTP_RECEIVE_FILE and
+	 * MTP_SEND_FILE_WITH_HEADER ioctls on a work queue
+	 */
+	struct workqueue_struct *wq;
+	struct work_struct send_file_work;
+	struct work_struct receive_file_work;
+	struct file *xfer_file;
+	loff_t xfer_file_offset;
+	int64_t xfer_file_length;
+	unsigned xfer_send_header;
+	uint16_t xfer_command;
+	uint32_t xfer_transaction_id;
+	int xfer_result;
+};
+
+static struct usb_interface_descriptor mtp_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 3,
+	.bInterfaceClass        = USB_CLASS_VENDOR_SPEC,
+	.bInterfaceSubClass     = USB_SUBCLASS_VENDOR_SPEC,
+	.bInterfaceProtocol     = 0,
+};
+
+static struct usb_interface_descriptor ptp_interface_desc = {
+	.bLength                = USB_DT_INTERFACE_SIZE,
+	.bDescriptorType        = USB_DT_INTERFACE,
+	.bInterfaceNumber       = 0,
+	.bNumEndpoints          = 3,
+	.bInterfaceClass        = USB_CLASS_STILL_IMAGE,
+	.bInterfaceSubClass     = 1,
+	.bInterfaceProtocol     = 1,
+};
+
+static struct usb_endpoint_descriptor mtp_highspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor mtp_highspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize         = __constant_cpu_to_le16(512),
+};
+
+static struct usb_endpoint_descriptor mtp_fullspeed_in_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor mtp_fullspeed_out_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_OUT,
+	.bmAttributes           = USB_ENDPOINT_XFER_BULK,
+};
+
+static struct usb_endpoint_descriptor mtp_intr_desc = {
+	.bLength                = USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType        = USB_DT_ENDPOINT,
+	.bEndpointAddress       = USB_DIR_IN,
+	.bmAttributes           = USB_ENDPOINT_XFER_INT,
+	.wMaxPacketSize         = __constant_cpu_to_le16(INTR_BUFFER_SIZE),
+	.bInterval              = 6,
+};
+
+static struct usb_descriptor_header *fs_mtp_descs[] = {
+	(struct usb_descriptor_header *) &mtp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_mtp_descs[] = {
+	(struct usb_descriptor_header *) &mtp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *fs_ptp_descs[] = {
+	(struct usb_descriptor_header *) &ptp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_fullspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_descriptor_header *hs_ptp_descs[] = {
+	(struct usb_descriptor_header *) &ptp_interface_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_in_desc,
+	(struct usb_descriptor_header *) &mtp_highspeed_out_desc,
+	(struct usb_descriptor_header *) &mtp_intr_desc,
+	NULL,
+};
+
+static struct usb_string mtp_string_defs[] = {
+	/* Naming interface "MTP" so libmtp will recognize us */
+	[INTERFACE_STRING_INDEX].s	= "MTP",
+	{  },	/* end of list */
+};
+
+static struct usb_gadget_strings mtp_string_table = {
+	.language		= 0x0409,	/* en-US */
+	.strings		= mtp_string_defs,
+};
+
+static struct usb_gadget_strings *mtp_strings[] = {
+	&mtp_string_table,
+	NULL,
+};
+
+/* Microsoft MTP OS String */
+static u8 mtp_os_string[] = {
+	18, /* sizeof(mtp_os_string) */
+	USB_DT_STRING,
+	/* Signature field: "MSFT100" */
+	'M', 0, 'S', 0, 'F', 0, 'T', 0, '1', 0, '0', 0, '0', 0,
+	/* vendor code */
+	1,
+	/* padding */
+	0
+};
+
+/* Microsoft Extended Configuration Descriptor Header Section */
+struct mtp_ext_config_desc_header {
+	__le32	dwLength;
+	__u16	bcdVersion;
+	__le16	wIndex;
+	__u8	bCount;
+	__u8	reserved[7];
+};
+
+/* Microsoft Extended Configuration Descriptor Function Section */
+struct mtp_ext_config_desc_function {
+	__u8	bFirstInterfaceNumber;
+	__u8	bInterfaceCount;
+	__u8	compatibleID[8];
+	__u8	subCompatibleID[8];
+	__u8	reserved[6];
+};
+
+/* MTP Extended Configuration Descriptor */
+struct {
+	struct mtp_ext_config_desc_header	header;
+	struct mtp_ext_config_desc_function    function;
+} mtp_ext_config_desc = {
+	.header = {
+		.dwLength = __constant_cpu_to_le32(sizeof(mtp_ext_config_desc)),
+		.bcdVersion = __constant_cpu_to_le16(0x0100),
+		.wIndex = __constant_cpu_to_le16(4),
+		.bCount = __constant_cpu_to_le16(1),
+	},
+	.function = {
+		.bFirstInterfaceNumber = 0,
+		.bInterfaceCount = 1,
+		.compatibleID = { 'M', 'T', 'P' },
+	},
+};
+
+struct mtp_device_status {
+	__le16	wLength;
+	__le16	wCode;
+};
+
+struct mtp_data_header {
+	/* length of packet, including this header */
+	__le32	length;
+	/* container type (2 for data packet) */
+	__le16	type;
+	/* MTP command code */
+	__le16	command;
+	/* MTP transaction ID */
+	__le32	transaction_id;
+};
+
+/* temporary variable used between mtp_open() and mtp_gadget_bind() */
+static struct mtp_dev *_mtp_dev;
+
+static inline struct mtp_dev *func_to_mtp(struct usb_function *f)
+{
+	return container_of(f, struct mtp_dev, function);
+}
+
+static struct usb_request *mtp_request_new(struct usb_ep *ep, int buffer_size)
+{
+	struct usb_request *req = usb_ep_alloc_request(ep, GFP_KERNEL);
+	if (!req)
+		return NULL;
+
+	/* now allocate buffers for the requests */
+	req->buf = kmalloc(buffer_size, GFP_KERNEL);
+	if (!req->buf) {
+		usb_ep_free_request(ep, req);
+		return NULL;
+	}
+
+	return req;
+}
+
+static void mtp_request_free(struct usb_request *req, struct usb_ep *ep)
+{
+	if (req) {
+		kfree(req->buf);
+		usb_ep_free_request(ep, req);
+	}
+}
+
+static inline int mtp_lock(atomic_t *excl)
+{
+	if (atomic_inc_return(excl) == 1) {
+		return 0;
+	} else {
+		atomic_dec(excl);
+		return -1;
+	}
+}
+
+static inline void mtp_unlock(atomic_t *excl)
+{
+	atomic_dec(excl);
+}
+
+/* add a request to the tail of a list */
+static void mtp_req_put(struct mtp_dev *dev, struct list_head *head,
+		struct usb_request *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	list_add_tail(&req->list, head);
+	spin_unlock_irqrestore(&dev->lock, flags);
+}
+
+/* remove a request from the head of a list */
+static struct usb_request
+*mtp_req_get(struct mtp_dev *dev, struct list_head *head)
+{
+	unsigned long flags;
+	struct usb_request *req;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	if (list_empty(head)) {
+		req = 0;
+	} else {
+		req = list_first_entry(head, struct usb_request, list);
+		list_del(&req->list);
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return req;
+}
+
+static void mtp_complete_in(struct usb_ep *ep, struct usb_request *req)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	if (req->status != 0)
+		dev->state = STATE_ERROR;
+
+	mtp_req_put(dev, &dev->tx_idle, req);
+
+	wake_up(&dev->write_wq);
+}
+
+static void mtp_complete_out(struct usb_ep *ep, struct usb_request *req)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	dev->rx_done = 1;
+	if (req->status != 0)
+		dev->state = STATE_ERROR;
+
+	wake_up(&dev->read_wq);
+}
+
+static void mtp_complete_intr(struct usb_ep *ep, struct usb_request *req)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	if (req->status != 0)
+		dev->state = STATE_ERROR;
+
+	mtp_req_put(dev, &dev->intr_idle, req);
+
+	wake_up(&dev->intr_wq);
+}
+
+static int mtp_create_bulk_endpoints(struct mtp_dev *dev,
+				struct usb_endpoint_descriptor *in_desc,
+				struct usb_endpoint_descriptor *out_desc,
+				struct usb_endpoint_descriptor *intr_desc)
+{
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	struct usb_ep *ep;
+	int i;
+
+	DBG(cdev, "create_bulk_endpoints dev: %p\n", dev);
+
+	ep = usb_ep_autoconfig(cdev->gadget, in_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_in failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for ep_in got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_in = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, out_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_out failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for mtp ep_out got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_out = ep;
+
+	ep = usb_ep_autoconfig(cdev->gadget, intr_desc);
+	if (!ep) {
+		DBG(cdev, "usb_ep_autoconfig for ep_intr failed\n");
+		return -ENODEV;
+	}
+	DBG(cdev, "usb_ep_autoconfig for mtp ep_intr got %s\n", ep->name);
+	ep->driver_data = dev;		/* claim the endpoint */
+	dev->ep_intr = ep;
+
+	/* now allocate requests for our endpoints */
+	for (i = 0; i < TX_REQ_MAX; i++) {
+		req = mtp_request_new(dev->ep_in, MTP_BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = mtp_complete_in;
+		mtp_req_put(dev, &dev->tx_idle, req);
+	}
+	for (i = 0; i < RX_REQ_MAX; i++) {
+		req = mtp_request_new(dev->ep_out, MTP_BULK_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = mtp_complete_out;
+		dev->rx_req[i] = req;
+	}
+	for (i = 0; i < INTR_REQ_MAX; i++) {
+		req = mtp_request_new(dev->ep_intr, INTR_BUFFER_SIZE);
+		if (!req)
+			goto fail;
+		req->complete = mtp_complete_intr;
+		mtp_req_put(dev, &dev->intr_idle, req);
+	}
+
+	return 0;
+
+fail:
+	printk(KERN_ERR "mtp_bind() could not allocate requests\n");
+	return -1;
+}
+
+static ssize_t mtp_read(struct file *fp, char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct mtp_dev *dev = fp->private_data;
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req;
+	ssize_t r = count;
+	unsigned xfer;
+	int ret = 0;
+
+	DBG(cdev, "mtp_read(%zu)\n", count);
+
+	if (count > MTP_BULK_BUFFER_SIZE)
+		return -EINVAL;
+
+	/* we will block until we're online */
+	DBG(cdev, "mtp_read: waiting for online state\n");
+	ret = wait_event_interruptible(dev->read_wq,
+		dev->state != STATE_OFFLINE);
+	if (ret < 0) {
+		r = ret;
+		goto done;
+	}
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED) {
+		/* report cancelation to userspace */
+		dev->state = STATE_READY;
+		spin_unlock_irq(&dev->lock);
+		return -ECANCELED;
+	}
+	dev->state = STATE_BUSY;
+	spin_unlock_irq(&dev->lock);
+
+requeue_req:
+	/* queue a request */
+	req = dev->rx_req[0];
+	req->length = count;
+	dev->rx_done = 0;
+	ret = usb_ep_queue(dev->ep_out, req, GFP_KERNEL);
+	if (ret < 0) {
+		r = -EIO;
+		goto done;
+	} else {
+		DBG(cdev, "rx %p queue\n", req);
+	}
+
+	/* wait for a request to complete */
+	ret = wait_event_interruptible(dev->read_wq, dev->rx_done);
+	if (ret < 0) {
+		r = ret;
+		usb_ep_dequeue(dev->ep_out, req);
+		goto done;
+	}
+	if (dev->state == STATE_BUSY) {
+		/* If we got a 0-len packet, throw it back and try again. */
+		if (req->actual == 0)
+			goto requeue_req;
+
+		DBG(cdev, "rx %p %d\n", req, req->actual);
+		xfer = (req->actual < count) ? req->actual : count;
+		r = xfer;
+		if (copy_to_user(buf, req->buf, xfer))
+			r = -EFAULT;
+	} else
+		r = -EIO;
+
+done:
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED)
+		r = -ECANCELED;
+	else if (dev->state != STATE_OFFLINE)
+		dev->state = STATE_READY;
+	spin_unlock_irq(&dev->lock);
+
+	DBG(cdev, "mtp_read returning %zd\n", r);
+	return r;
+}
+
+static ssize_t mtp_write(struct file *fp, const char __user *buf,
+	size_t count, loff_t *pos)
+{
+	struct mtp_dev *dev = fp->private_data;
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req = 0;
+	ssize_t r = count;
+	unsigned xfer;
+	int sendZLP = 0;
+	int ret;
+
+	DBG(cdev, "mtp_write(%zu)\n", count);
+
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED) {
+		/* report cancelation to userspace */
+		dev->state = STATE_READY;
+		spin_unlock_irq(&dev->lock);
+		return -ECANCELED;
+	}
+	if (dev->state == STATE_OFFLINE) {
+		spin_unlock_irq(&dev->lock);
+		return -ENODEV;
+	}
+	dev->state = STATE_BUSY;
+	spin_unlock_irq(&dev->lock);
+
+	/* we need to send a zero length packet to signal the end of transfer
+	 * if the transfer size is aligned to a packet boundary.
+	 */
+	if ((count & (dev->ep_in->maxpacket - 1)) == 0)
+		sendZLP = 1;
+
+	while (count > 0 || sendZLP) {
+		/* so we exit after sending ZLP */
+		if (count == 0)
+			sendZLP = 0;
+
+		if (dev->state != STATE_BUSY) {
+			DBG(cdev, "mtp_write dev->error\n");
+			r = -EIO;
+			break;
+		}
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			((req = mtp_req_get(dev, &dev->tx_idle))
+				|| dev->state != STATE_BUSY));
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > MTP_BULK_BUFFER_SIZE)
+			xfer = MTP_BULK_BUFFER_SIZE;
+		else
+			xfer = count;
+		if (xfer && copy_from_user(req->buf, buf, xfer)) {
+			r = -EFAULT;
+			break;
+		}
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			DBG(cdev, "mtp_write: xfer error %d\n", ret);
+			r = -EIO;
+			break;
+		}
+
+		buf += xfer;
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		mtp_req_put(dev, &dev->tx_idle, req);
+
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED)
+		r = -ECANCELED;
+	else if (dev->state != STATE_OFFLINE)
+		dev->state = STATE_READY;
+	spin_unlock_irq(&dev->lock);
+
+	DBG(cdev, "mtp_write returning %zd\n", r);
+	return r;
+}
+
+/* read from a local file and write to USB */
+static void send_file_work(struct work_struct *data)
+{
+	struct mtp_dev *dev = container_of(data, struct mtp_dev,
+						send_file_work);
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *req = 0;
+	struct mtp_data_header *header;
+	struct file *filp;
+	loff_t offset;
+	int64_t count;
+	int xfer, ret, hdr_size;
+	int r = 0;
+	int sendZLP = 0;
+
+	/* read our parameters */
+	smp_rmb();
+	filp = dev->xfer_file;
+	offset = dev->xfer_file_offset;
+	count = dev->xfer_file_length;
+
+	DBG(cdev, "send_file_work(%lld %lld)\n", offset, count);
+
+	if (dev->xfer_send_header) {
+		hdr_size = sizeof(struct mtp_data_header);
+		count += hdr_size;
+	} else {
+		hdr_size = 0;
+	}
+
+	/* we need to send a zero length packet to signal the end of transfer
+	 * if the transfer size is aligned to a packet boundary.
+	 */
+	if ((count & (dev->ep_in->maxpacket - 1)) == 0)
+		sendZLP = 1;
+
+	while (count > 0 || sendZLP) {
+		/* so we exit after sending ZLP */
+		if (count == 0)
+			sendZLP = 0;
+
+		/* get an idle tx request to use */
+		req = 0;
+		ret = wait_event_interruptible(dev->write_wq,
+			(req = mtp_req_get(dev, &dev->tx_idle))
+			|| dev->state != STATE_BUSY);
+		if (dev->state == STATE_CANCELED) {
+			r = -ECANCELED;
+			break;
+		}
+		if (!req) {
+			r = ret;
+			break;
+		}
+
+		if (count > MTP_BULK_BUFFER_SIZE)
+			xfer = MTP_BULK_BUFFER_SIZE;
+		else
+			xfer = count;
+
+		if (hdr_size) {
+			/* prepend MTP data header */
+			header = (struct mtp_data_header *)req->buf;
+			header->length = __cpu_to_le32(count);
+			header->type = __cpu_to_le16(2); /* data packet */
+			header->command = __cpu_to_le16(dev->xfer_command);
+			header->transaction_id =
+					__cpu_to_le32(dev->xfer_transaction_id);
+		}
+
+		ret = vfs_read(filp, req->buf + hdr_size, xfer - hdr_size,
+								&offset);
+		if (ret < 0) {
+			r = ret;
+			break;
+		}
+		xfer = ret + hdr_size;
+		hdr_size = 0;
+
+		req->length = xfer;
+		ret = usb_ep_queue(dev->ep_in, req, GFP_KERNEL);
+		if (ret < 0) {
+			DBG(cdev, "send_file_work: xfer error %d\n", ret);
+			dev->state = STATE_ERROR;
+			r = -EIO;
+			break;
+		}
+
+		count -= xfer;
+
+		/* zero this so we don't try to free it on error exit */
+		req = 0;
+	}
+
+	if (req)
+		mtp_req_put(dev, &dev->tx_idle, req);
+
+	DBG(cdev, "send_file_work returning %d\n", r);
+	/* write the result */
+	dev->xfer_result = r;
+	smp_wmb();
+}
+
+/* read from USB and write to a local file */
+static void receive_file_work(struct work_struct *data)
+{
+	struct mtp_dev *dev = container_of(data, struct mtp_dev,
+						receive_file_work);
+	struct usb_composite_dev *cdev = dev->cdev;
+	struct usb_request *read_req = NULL, *write_req = NULL;
+	struct file *filp;
+	loff_t offset;
+	int64_t count;
+	int ret, cur_buf = 0;
+	int r = 0;
+
+	/* read our parameters */
+	smp_rmb();
+	filp = dev->xfer_file;
+	offset = dev->xfer_file_offset;
+	count = dev->xfer_file_length;
+
+	DBG(cdev, "receive_file_work(%lld)\n", count);
+
+	while (count > 0 || write_req) {
+		if (count > 0) {
+			/* queue a request */
+			read_req = dev->rx_req[cur_buf];
+			cur_buf = (cur_buf + 1) % RX_REQ_MAX;
+
+			read_req->length = (count > MTP_BULK_BUFFER_SIZE
+					? MTP_BULK_BUFFER_SIZE : count);
+			dev->rx_done = 0;
+			ret = usb_ep_queue(dev->ep_out, read_req, GFP_KERNEL);
+			if (ret < 0) {
+				r = -EIO;
+				dev->state = STATE_ERROR;
+				break;
+			}
+		}
+
+		if (write_req) {
+			DBG(cdev, "rx %p %d\n", write_req, write_req->actual);
+			ret = vfs_write(filp, write_req->buf, write_req->actual,
+				&offset);
+			DBG(cdev, "vfs_write %d\n", ret);
+			if (ret != write_req->actual) {
+				r = -EIO;
+				dev->state = STATE_ERROR;
+				break;
+			}
+			write_req = NULL;
+		}
+
+		if (read_req) {
+			/* wait for our last read to complete */
+			ret = wait_event_interruptible(dev->read_wq,
+				dev->rx_done || dev->state != STATE_BUSY);
+			if (dev->state == STATE_CANCELED) {
+				r = -ECANCELED;
+				if (!dev->rx_done)
+					usb_ep_dequeue(dev->ep_out, read_req);
+				break;
+			}
+			/* if xfer_file_length is 0xFFFFFFFF, then we read until
+			 * we get a zero length packet
+			 */
+			if (count != 0xFFFFFFFF)
+				count -= read_req->actual;
+			if (read_req->actual < read_req->length) {
+				/*
+				 * short packet is used to signal EOF for
+				 * sizes > 4 gig
+				 */
+				DBG(cdev, "got short packet\n");
+				count = 0;
+			}
+
+			write_req = read_req;
+			read_req = NULL;
+		}
+	}
+
+	DBG(cdev, "receive_file_work returning %d\n", r);
+	/* write the result */
+	dev->xfer_result = r;
+	smp_wmb();
+}
+
+static int mtp_send_event(struct mtp_dev *dev, struct mtp_event *event)
+{
+	struct usb_request *req = NULL;
+	int ret;
+	int length = event->length;
+
+	DBG(dev->cdev, "mtp_send_event(%zu)\n", event->length);
+
+	if (length < 0 || length > INTR_BUFFER_SIZE)
+		return -EINVAL;
+	if (dev->state == STATE_OFFLINE)
+		return -ENODEV;
+
+	ret = wait_event_interruptible_timeout(dev->intr_wq,
+			(req = mtp_req_get(dev, &dev->intr_idle)),
+			msecs_to_jiffies(1000));
+	if (!req)
+		return -ETIME;
+
+	if (copy_from_user(req->buf, (void __user *)event->data, length)) {
+		mtp_req_put(dev, &dev->intr_idle, req);
+		return -EFAULT;
+	}
+	req->length = length;
+	ret = usb_ep_queue(dev->ep_intr, req, GFP_KERNEL);
+	if (ret)
+		mtp_req_put(dev, &dev->intr_idle, req);
+
+	return ret;
+}
+
+static long mtp_ioctl(struct file *fp, unsigned code, unsigned long value)
+{
+	struct mtp_dev *dev = fp->private_data;
+	struct file *filp = NULL;
+	int ret = -EINVAL;
+
+	if (mtp_lock(&dev->ioctl_excl))
+		return -EBUSY;
+
+	switch (code) {
+	case MTP_SEND_FILE:
+	case MTP_RECEIVE_FILE:
+	case MTP_SEND_FILE_WITH_HEADER:
+	{
+		struct mtp_file_range	mfr;
+		struct work_struct *work;
+
+		spin_lock_irq(&dev->lock);
+		if (dev->state == STATE_CANCELED) {
+			/* report cancelation to userspace */
+			dev->state = STATE_READY;
+			spin_unlock_irq(&dev->lock);
+			ret = -ECANCELED;
+			goto out;
+		}
+		if (dev->state == STATE_OFFLINE) {
+			spin_unlock_irq(&dev->lock);
+			ret = -ENODEV;
+			goto out;
+		}
+		dev->state = STATE_BUSY;
+		spin_unlock_irq(&dev->lock);
+
+		if (copy_from_user(&mfr, (void __user *)value, sizeof(mfr))) {
+			ret = -EFAULT;
+			goto fail;
+		}
+		/* hold a reference to the file while we are working with it */
+		filp = fget(mfr.fd);
+		if (!filp) {
+			ret = -EBADF;
+			goto fail;
+		}
+
+		/* write the parameters */
+		dev->xfer_file = filp;
+		dev->xfer_file_offset = mfr.offset;
+		dev->xfer_file_length = mfr.length;
+		smp_wmb();
+
+		if (code == MTP_SEND_FILE_WITH_HEADER) {
+			work = &dev->send_file_work;
+			dev->xfer_send_header = 1;
+			dev->xfer_command = mfr.command;
+			dev->xfer_transaction_id = mfr.transaction_id;
+		} else if (code == MTP_SEND_FILE) {
+			work = &dev->send_file_work;
+			dev->xfer_send_header = 0;
+		} else {
+			work = &dev->receive_file_work;
+		}
+
+		/* We do the file transfer on a work queue so it will run
+		 * in kernel context, which is necessary for vfs_read and
+		 * vfs_write to use our buffers in the kernel address space.
+		 */
+		queue_work(dev->wq, work);
+		/* wait for operation to complete */
+		flush_workqueue(dev->wq);
+		fput(filp);
+
+		/* read the result */
+		smp_rmb();
+		ret = dev->xfer_result;
+		break;
+	}
+	case MTP_SEND_EVENT:
+	{
+		struct mtp_event	event;
+		/* return here so we don't change dev->state below,
+		 * which would interfere with bulk transfer state.
+		 */
+		if (copy_from_user(&event, (void __user *)value, sizeof(event)))
+			ret = -EFAULT;
+		else
+			ret = mtp_send_event(dev, &event);
+		goto out;
+	}
+	}
+
+fail:
+	spin_lock_irq(&dev->lock);
+	if (dev->state == STATE_CANCELED)
+		ret = -ECANCELED;
+	else if (dev->state != STATE_OFFLINE)
+		dev->state = STATE_READY;
+	spin_unlock_irq(&dev->lock);
+out:
+	mtp_unlock(&dev->ioctl_excl);
+	DBG(dev->cdev, "ioctl returning %d\n", ret);
+	return ret;
+}
+
+static int mtp_open(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "mtp_open\n");
+	if (mtp_lock(&_mtp_dev->open_excl))
+		return -EBUSY;
+
+	/* clear any error condition */
+	if (_mtp_dev->state != STATE_OFFLINE)
+		_mtp_dev->state = STATE_READY;
+
+	fp->private_data = _mtp_dev;
+	return 0;
+}
+
+static int mtp_release(struct inode *ip, struct file *fp)
+{
+	printk(KERN_INFO "mtp_release\n");
+
+	mtp_unlock(&_mtp_dev->open_excl);
+	return 0;
+}
+
+/* file operations for /dev/mtp_usb */
+static const struct file_operations mtp_fops = {
+	.owner = THIS_MODULE,
+	.read = mtp_read,
+	.write = mtp_write,
+	.unlocked_ioctl = mtp_ioctl,
+	.open = mtp_open,
+	.release = mtp_release,
+};
+
+static struct miscdevice mtp_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = mtp_shortname,
+	.fops = &mtp_fops,
+};
+
+static int mtp_ctrlrequest(struct usb_composite_dev *cdev,
+				const struct usb_ctrlrequest *ctrl)
+{
+	struct mtp_dev *dev = _mtp_dev;
+	int	value = -EOPNOTSUPP;
+	u16	w_index = le16_to_cpu(ctrl->wIndex);
+	u16	w_value = le16_to_cpu(ctrl->wValue);
+	u16	w_length = le16_to_cpu(ctrl->wLength);
+	unsigned long	flags;
+
+	VDBG(cdev, "mtp_ctrlrequest "
+			"%02x.%02x v%04x i%04x l%u\n",
+			ctrl->bRequestType, ctrl->bRequest,
+			w_value, w_index, w_length);
+
+	/* Handle MTP OS string */
+	if (ctrl->bRequestType ==
+			(USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE)
+			&& ctrl->bRequest == USB_REQ_GET_DESCRIPTOR
+			&& (w_value >> 8) == USB_DT_STRING
+			&& (w_value & 0xFF) == MTP_OS_STRING_ID) {
+		value = (w_length < sizeof(mtp_os_string)
+				? w_length : sizeof(mtp_os_string));
+		memcpy(cdev->req->buf, mtp_os_string, value);
+	} else if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) {
+		/* Handle MTP OS descriptor */
+		DBG(cdev, "vendor request: %d index: %d value: %d length: %d\n",
+			ctrl->bRequest, w_index, w_value, w_length);
+
+		if (ctrl->bRequest == 1
+				&& (ctrl->bRequestType & USB_DIR_IN)
+				&& (w_index == 4 || w_index == 5)) {
+			value = (w_length < sizeof(mtp_ext_config_desc) ?
+					w_length : sizeof(mtp_ext_config_desc));
+			memcpy(cdev->req->buf, &mtp_ext_config_desc, value);
+		}
+	} else if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) {
+		DBG(cdev, "class request: %d index: %d value: %d length: %d\n",
+			ctrl->bRequest, w_index, w_value, w_length);
+
+		if (ctrl->bRequest == MTP_REQ_CANCEL && w_index == 0
+				&& w_value == 0) {
+			DBG(cdev, "MTP_REQ_CANCEL\n");
+
+			spin_lock_irqsave(&dev->lock, flags);
+			if (dev->state == STATE_BUSY) {
+				dev->state = STATE_CANCELED;
+				wake_up(&dev->read_wq);
+				wake_up(&dev->write_wq);
+			}
+			spin_unlock_irqrestore(&dev->lock, flags);
+
+			/* We need to queue a request to read the remaining
+			 *  bytes, but we don't actually need to look at
+			 * the contents.
+			 */
+			value = w_length;
+		} else if (ctrl->bRequest == MTP_REQ_GET_DEVICE_STATUS
+				&& w_index == 0 && w_value == 0) {
+			struct mtp_device_status *status = cdev->req->buf;
+			status->wLength =
+				__constant_cpu_to_le16(sizeof(*status));
+
+			DBG(cdev, "MTP_REQ_GET_DEVICE_STATUS\n");
+			spin_lock_irqsave(&dev->lock, flags);
+			/* device status is "busy" until we report
+			 * the cancelation to userspace
+			 */
+			if (dev->state == STATE_CANCELED)
+				status->wCode =
+					__cpu_to_le16(MTP_RESPONSE_DEVICE_BUSY);
+			else
+				status->wCode =
+					__cpu_to_le16(MTP_RESPONSE_OK);
+			spin_unlock_irqrestore(&dev->lock, flags);
+			value = sizeof(*status);
+		}
+	}
+
+	/* respond with data transfer or status phase? */
+	if (value >= 0) {
+		int rc;
+		cdev->req->zero = value < w_length;
+		cdev->req->length = value;
+		rc = usb_ep_queue(cdev->gadget->ep0, cdev->req, GFP_ATOMIC);
+		if (rc < 0)
+			ERROR(cdev, "%s: response queue error\n", __func__);
+	}
+	return value;
+}
+
+static int
+mtp_function_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_composite_dev *cdev = c->cdev;
+	struct mtp_dev	*dev = func_to_mtp(f);
+	int			id;
+	int			ret;
+
+	dev->cdev = cdev;
+	DBG(cdev, "mtp_function_bind dev: %p\n", dev);
+
+	/* allocate interface ID(s) */
+	id = usb_interface_id(c, f);
+	if (id < 0)
+		return id;
+	mtp_interface_desc.bInterfaceNumber = id;
+
+	/* allocate endpoints */
+	ret = mtp_create_bulk_endpoints(dev, &mtp_fullspeed_in_desc,
+			&mtp_fullspeed_out_desc, &mtp_intr_desc);
+	if (ret)
+		return ret;
+
+	/* support high speed hardware */
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		mtp_highspeed_in_desc.bEndpointAddress =
+			mtp_fullspeed_in_desc.bEndpointAddress;
+		mtp_highspeed_out_desc.bEndpointAddress =
+			mtp_fullspeed_out_desc.bEndpointAddress;
+	}
+
+	DBG(cdev, "%s speed %s: IN/%s, OUT/%s\n",
+			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
+			f->name, dev->ep_in->name, dev->ep_out->name);
+	return 0;
+}
+
+static void
+mtp_function_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct mtp_dev	*dev = func_to_mtp(f);
+	struct usb_request *req;
+	int i;
+
+	while ((req = mtp_req_get(dev, &dev->tx_idle)))
+		mtp_request_free(req, dev->ep_in);
+	for (i = 0; i < RX_REQ_MAX; i++)
+		mtp_request_free(dev->rx_req[i], dev->ep_out);
+	while ((req = mtp_req_get(dev, &dev->intr_idle)))
+		mtp_request_free(req, dev->ep_intr);
+	dev->state = STATE_OFFLINE;
+}
+
+static int mtp_function_set_alt(struct usb_function *f,
+		unsigned intf, unsigned alt)
+{
+	struct mtp_dev	*dev = func_to_mtp(f);
+	struct usb_composite_dev *cdev = f->config->cdev;
+	int ret;
+
+	DBG(cdev, "mtp_function_set_alt intf: %d alt: %d\n", intf, alt);
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_in);
+	if (ret)
+		return ret;
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_out);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_out);
+	if (ret) {
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+
+	ret = config_ep_by_speed(cdev->gadget, f, dev->ep_intr);
+	if (ret)
+		return ret;
+
+	ret = usb_ep_enable(dev->ep_intr);
+	if (ret) {
+		usb_ep_disable(dev->ep_out);
+		usb_ep_disable(dev->ep_in);
+		return ret;
+	}
+	dev->state = STATE_READY;
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+	return 0;
+}
+
+static void mtp_function_disable(struct usb_function *f)
+{
+	struct mtp_dev	*dev = func_to_mtp(f);
+	struct usb_composite_dev	*cdev = dev->cdev;
+
+	DBG(cdev, "mtp_function_disable\n");
+	dev->state = STATE_OFFLINE;
+	usb_ep_disable(dev->ep_in);
+	usb_ep_disable(dev->ep_out);
+	usb_ep_disable(dev->ep_intr);
+
+	/* readers may be blocked waiting for us to go online */
+	wake_up(&dev->read_wq);
+
+	VDBG(cdev, "%s disabled\n", dev->function.name);
+}
+
+static int mtp_bind_config(struct usb_configuration *c, bool ptp_config)
+{
+	struct mtp_dev *dev = _mtp_dev;
+	int ret = 0;
+
+	printk(KERN_INFO "mtp_bind_config\n");
+
+	/* allocate a string ID for our interface */
+	if (mtp_string_defs[INTERFACE_STRING_INDEX].id == 0) {
+		ret = usb_string_id(c->cdev);
+		if (ret < 0)
+			return ret;
+		mtp_string_defs[INTERFACE_STRING_INDEX].id = ret;
+		mtp_interface_desc.iInterface = ret;
+	}
+
+	dev->cdev = c->cdev;
+	dev->function.name = "mtp";
+	dev->function.strings = mtp_strings;
+	if (ptp_config) {
+		dev->function.fs_descriptors = fs_ptp_descs;
+		dev->function.hs_descriptors = hs_ptp_descs;
+	} else {
+		dev->function.fs_descriptors = fs_mtp_descs;
+		dev->function.hs_descriptors = hs_mtp_descs;
+	}
+	dev->function.bind = mtp_function_bind;
+	dev->function.unbind = mtp_function_unbind;
+	dev->function.set_alt = mtp_function_set_alt;
+	dev->function.disable = mtp_function_disable;
+
+	return usb_add_function(c, &dev->function);
+}
+
+static int mtp_setup(void)
+{
+	struct mtp_dev *dev;
+	int ret;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	spin_lock_init(&dev->lock);
+	init_waitqueue_head(&dev->read_wq);
+	init_waitqueue_head(&dev->write_wq);
+	init_waitqueue_head(&dev->intr_wq);
+	atomic_set(&dev->open_excl, 0);
+	atomic_set(&dev->ioctl_excl, 0);
+	INIT_LIST_HEAD(&dev->tx_idle);
+	INIT_LIST_HEAD(&dev->intr_idle);
+
+	dev->wq = create_singlethread_workqueue("f_mtp");
+	if (!dev->wq) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+	INIT_WORK(&dev->send_file_work, send_file_work);
+	INIT_WORK(&dev->receive_file_work, receive_file_work);
+
+	_mtp_dev = dev;
+
+	ret = misc_register(&mtp_device);
+	if (ret)
+		goto err2;
+
+	return 0;
+
+err2:
+	destroy_workqueue(dev->wq);
+err1:
+	_mtp_dev = NULL;
+	kfree(dev);
+	printk(KERN_ERR "mtp gadget driver failed to initialize\n");
+	return ret;
+}
+
+static void mtp_cleanup(void)
+{
+	struct mtp_dev *dev = _mtp_dev;
+
+	if (!dev)
+		return;
+
+	misc_deregister(&mtp_device);
+	destroy_workqueue(dev->wq);
+	_mtp_dev = NULL;
+	kfree(dev);
+}
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 36e8c44d8e5e..5c274f1d7b7a 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -25,7 +25,6 @@
 #include "u_ether.h"
 #include "rndis.h"
 
-
 /*
  * This function is an RNDIS Ethernet port -- a Microsoft protocol that's
  * been promoted instead of the standard CDC Ethernet.  The published RNDIS
@@ -67,6 +66,16 @@
  *   - MS-Windows drivers sometimes emit undocumented requests.
  */
 
+static unsigned int rndis_dl_max_pkt_per_xfer = 3;
+module_param(rndis_dl_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(rndis_dl_max_pkt_per_xfer,
+	"Maximum packets per transfer for DL aggregation");
+
+static unsigned int rndis_ul_max_pkt_per_xfer = 3;
+module_param(rndis_ul_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer,
+       "Maximum packets per transfer for UL aggregation");
+
 struct f_rndis {
 	struct gether			port;
 	u8				ctrl_id, data_id;
@@ -448,6 +457,7 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 {
 	struct f_rndis			*rndis = req->context;
 	int				status;
+	rndis_init_msg_type		*buf;
 
 	/* received RNDIS command from USB_CDC_SEND_ENCAPSULATED_COMMAND */
 //	spin_lock(&dev->lock);
@@ -455,6 +465,21 @@ static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 	if (status < 0)
 		pr_err("RNDIS command error %d, %d/%d\n",
 			status, req->actual, req->length);
+
+	buf = (rndis_init_msg_type *)req->buf;
+
+	if (buf->MessageType == RNDIS_MSG_INIT) {
+		if (buf->MaxTransferSize > 2048)
+			rndis->port.multi_pkt_xfer = 1;
+		else
+			rndis->port.multi_pkt_xfer = 0;
+		pr_info("%s: MaxTransferSize: %d : Multi_pkt_txr: %s\n",
+				__func__, buf->MaxTransferSize,
+				rndis->port.multi_pkt_xfer ? "enabled" :
+							    "disabled");
+		if (rndis_dl_max_pkt_per_xfer <= 1)
+			rndis->port.multi_pkt_xfer = 0;
+	}
 //	spin_unlock(&dev->lock);
 }
 
@@ -748,6 +773,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f)
 
 	rndis_set_param_medium(rndis->config, RNDIS_MEDIUM_802_3, 0);
 	rndis_set_host_mac(rndis->config, rndis->ethaddr);
+	rndis_set_max_pkt_xfer(rndis->config, rndis_ul_max_pkt_per_xfer);
 
 	if (rndis->manufacturer && rndis->vendorID &&
 			rndis_set_param_vendor(rndis->config, rndis->vendorID,
@@ -821,12 +847,12 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN],
 	if (!can_support_rndis(c) || !ethaddr)
 		return -EINVAL;
 
-	if (rndis_string_defs[0].id == 0) {
-		/* ... and setup RNDIS itself */
-		status = rndis_init();
-		if (status < 0)
-			return status;
+	/* setup RNDIS itself */
+	status = rndis_init();
+	if (status < 0)
+		return status;
 
+	if (rndis_string_defs[0].id == 0) {
 		status = usb_string_ids_tab(c->cdev, rndis_string_defs);
 		if (status)
 			return status;
@@ -854,6 +880,8 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN],
 	rndis->port.header_len = sizeof(struct rndis_packet_msg_type);
 	rndis->port.wrap = rndis_add_header;
 	rndis->port.unwrap = rndis_rm_hdr;
+	rndis->port.ul_max_pkts_per_xfer = rndis_ul_max_pkt_per_xfer;
+	rndis->port.dl_max_pkts_per_xfer = rndis_dl_max_pkt_per_xfer;
 
 	rndis->port.func.name = "rndis";
 	rndis->port.func.strings = rndis_strings;
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index 1e4cfb05f70b..cb2767df3fba 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -59,6 +59,16 @@ MODULE_PARM_DESC (rndis_debug, "enable debugging");
 
 #define RNDIS_MAX_CONFIGS	1
 
+int rndis_ul_max_pkt_per_xfer_rcvd;
+module_param(rndis_ul_max_pkt_per_xfer_rcvd, int, S_IRUGO);
+MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer_rcvd,
+		"Max num of REMOTE_NDIS_PACKET_MSGs received in a single transfer");
+
+int rndis_ul_max_xfer_size_rcvd;
+module_param(rndis_ul_max_xfer_size_rcvd, int, S_IRUGO);
+MODULE_PARM_DESC(rndis_ul_max_xfer_size_rcvd,
+		"Max size of bus transfer received");
+
 
 static rndis_params rndis_per_dev_params[RNDIS_MAX_CONFIGS];
 
@@ -585,12 +595,12 @@ static int rndis_init_response(int configNr, rndis_init_msg_type *buf)
 	resp->MinorVersion = cpu_to_le32(RNDIS_MINOR_VERSION);
 	resp->DeviceFlags = cpu_to_le32(RNDIS_DF_CONNECTIONLESS);
 	resp->Medium = cpu_to_le32(RNDIS_MEDIUM_802_3);
-	resp->MaxPacketsPerTransfer = cpu_to_le32(1);
-	resp->MaxTransferSize = cpu_to_le32(
-		  params->dev->mtu
+	resp->MaxPacketsPerTransfer = cpu_to_le32(params->max_pkt_per_xfer);
+	resp->MaxTransferSize = cpu_to_le32(params->max_pkt_per_xfer *
+		(params->dev->mtu
 		+ sizeof(struct ethhdr)
 		+ sizeof(struct rndis_packet_msg_type)
-		+ 22);
+		+ 22));
 	resp->PacketAlignmentFactor = cpu_to_le32(0);
 	resp->AFListOffset = cpu_to_le32(0);
 	resp->AFListSize = cpu_to_le32(0);
@@ -686,6 +696,12 @@ static int rndis_reset_response(int configNr, rndis_reset_msg_type *buf)
 	rndis_reset_cmplt_type *resp;
 	rndis_resp_t *r;
 	struct rndis_params *params = rndis_per_dev_params + configNr;
+	u32 length;
+	u8 *xbuf;
+
+	/* drain the response queue */
+	while ((xbuf = rndis_get_next_response(configNr, &length)))
+		rndis_free_response(configNr, xbuf);
 
 	r = rndis_add_response(configNr, sizeof(rndis_reset_cmplt_type));
 	if (!r)
@@ -910,6 +926,8 @@ int rndis_set_param_dev(u8 configNr, struct net_device *dev, u16 *cdc_filter)
 	rndis_per_dev_params[configNr].dev = dev;
 	rndis_per_dev_params[configNr].filter = cdc_filter;
 
+	rndis_ul_max_xfer_size_rcvd = 0;
+	rndis_ul_max_pkt_per_xfer_rcvd = 0;
 	return 0;
 }
 
@@ -936,6 +954,13 @@ int rndis_set_param_medium(u8 configNr, u32 medium, u32 speed)
 	return 0;
 }
 
+void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer)
+{
+	pr_debug("%s:\n", __func__);
+
+	rndis_per_dev_params[configNr].max_pkt_per_xfer = max_pkt_per_xfer;
+}
+
 void rndis_add_hdr(struct sk_buff *skb)
 {
 	struct rndis_packet_msg_type *header;
@@ -1008,23 +1033,73 @@ int rndis_rm_hdr(struct gether *port,
 			struct sk_buff *skb,
 			struct sk_buff_head *list)
 {
-	/* tmp points to a struct rndis_packet_msg_type */
-	__le32 *tmp = (void *)skb->data;
+	int num_pkts = 1;
 
-	/* MessageType, MessageLength */
-	if (cpu_to_le32(RNDIS_MSG_PACKET)
-			!= get_unaligned(tmp++)) {
-		dev_kfree_skb_any(skb);
-		return -EINVAL;
-	}
-	tmp++;
+	if (skb->len > rndis_ul_max_xfer_size_rcvd)
+		rndis_ul_max_xfer_size_rcvd = skb->len;
+
+	while (skb->len) {
+		struct rndis_packet_msg_type *hdr;
+		struct sk_buff          *skb2;
+		u32             msg_len, data_offset, data_len;
+
+		/* some rndis hosts send extra byte to avoid zlp, ignore it */
+		if (skb->len == 1) {
+			dev_kfree_skb_any(skb);
+			return 0;
+		}
+
+		if (skb->len < sizeof *hdr) {
+			pr_err("invalid rndis pkt: skblen:%u hdr_len:%u",
+					skb->len, sizeof *hdr);
+			dev_kfree_skb_any(skb);
+			return -EINVAL;
+		}
 
-	/* DataOffset, DataLength */
-	if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8)) {
-		dev_kfree_skb_any(skb);
-		return -EOVERFLOW;
+		hdr = (void *)skb->data;
+		msg_len = le32_to_cpu(hdr->MessageLength);
+		data_offset = le32_to_cpu(hdr->DataOffset);
+		data_len = le32_to_cpu(hdr->DataLength);
+
+		if (skb->len < msg_len ||
+				((data_offset + data_len + 8) > msg_len)) {
+			pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n",
+					le32_to_cpu(hdr->MessageType),
+					msg_len, data_offset, data_len, skb->len);
+			dev_kfree_skb_any(skb);
+			return -EOVERFLOW;
+		}
+		if (le32_to_cpu(hdr->MessageType) != RNDIS_MSG_PACKET) {
+			pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n",
+					le32_to_cpu(hdr->MessageType),
+					msg_len, data_offset, data_len, skb->len);
+			dev_kfree_skb_any(skb);
+			return -EINVAL;
+		}
+
+		skb_pull(skb, data_offset + 8);
+
+		if (msg_len == skb->len) {
+			skb_trim(skb, data_len);
+			break;
+		}
+
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2) {
+			pr_err("%s:skb clone failed\n", __func__);
+			dev_kfree_skb_any(skb);
+			return -ENOMEM;
+		}
+
+		skb_pull(skb, msg_len - sizeof *hdr);
+		skb_trim(skb2, data_len);
+		skb_queue_tail(list, skb2);
+
+		num_pkts++;
 	}
-	skb_trim(skb, get_unaligned_le32(tmp++));
+
+	if (num_pkts > rndis_ul_max_pkt_per_xfer_rcvd)
+		rndis_ul_max_pkt_per_xfer_rcvd = num_pkts;
 
 	skb_queue_tail(list, skb);
 	return 0;
@@ -1127,11 +1202,15 @@ static struct proc_dir_entry *rndis_connect_state [RNDIS_MAX_CONFIGS];
 
 #endif /* CONFIG_USB_GADGET_DEBUG_FILES */
 
+static bool rndis_initialized;
 
 int rndis_init(void)
 {
 	u8 i;
 
+	if (rndis_initialized)
+		return 0;
+
 	for (i = 0; i < RNDIS_MAX_CONFIGS; i++) {
 #ifdef	CONFIG_USB_GADGET_DEBUG_FILES
 		char name [20];
@@ -1158,6 +1237,7 @@ int rndis_init(void)
 		INIT_LIST_HEAD(&(rndis_per_dev_params[i].resp_queue));
 	}
 
+	rndis_initialized = true;
 	return 0;
 }
 
@@ -1166,7 +1246,13 @@ void rndis_exit(void)
 #ifdef CONFIG_USB_GADGET_DEBUG_FILES
 	u8 i;
 	char name[20];
+#endif
+
+	if (!rndis_initialized)
+		return;
+	rndis_initialized = false;
 
+#ifdef CONFIG_USB_GADGET_DEBUG_FILES
 	for (i = 0; i < RNDIS_MAX_CONFIGS; i++) {
 		sprintf(name, NAME_TEMPLATE, i);
 		remove_proc_entry(name, NULL);
diff --git a/drivers/usb/gadget/rndis.h b/drivers/usb/gadget/rndis.h
index 0647f2f34e89..12045b31a311 100644
--- a/drivers/usb/gadget/rndis.h
+++ b/drivers/usb/gadget/rndis.h
@@ -189,6 +189,7 @@ typedef struct rndis_params
 	struct net_device	*dev;
 
 	u32			vendorID;
+	u8			max_pkt_per_xfer;
 	const char		*vendorDescr;
 	void			(*resp_avail)(void *v);
 	void			*v;
@@ -204,6 +205,7 @@ int  rndis_set_param_dev (u8 configNr, struct net_device *dev,
 int  rndis_set_param_vendor (u8 configNr, u32 vendorID,
 			    const char *vendorDescr);
 int  rndis_set_param_medium (u8 configNr, u32 medium, u32 speed);
+void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer);
 void rndis_add_hdr (struct sk_buff *skb);
 int rndis_rm_hdr(struct gether *port, struct sk_buff *skb,
 			struct sk_buff_head *list);
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 4b76124ce96b..14f587efc0f6 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -48,6 +48,8 @@
 
 #define UETH__VERSION	"29-May-2008"
 
+static struct workqueue_struct	*uether_wq;
+
 struct eth_dev {
 	/* lock is held while accessing port_usb
 	 */
@@ -59,17 +61,25 @@ struct eth_dev {
 
 	spinlock_t		req_lock;	/* guard {rx,tx}_reqs */
 	struct list_head	tx_reqs, rx_reqs;
-	atomic_t		tx_qlen;
+	unsigned		tx_qlen;
+/* Minimum number of TX USB request queued to UDC */
+#define TX_REQ_THRESHOLD	5
+	int			no_tx_req_used;
+	int			tx_skb_hold_count;
+	u32			tx_req_bufsize;
 
 	struct sk_buff_head	rx_frames;
 
 	unsigned		header_len;
+	unsigned		ul_max_pkts_per_xfer;
+	unsigned		dl_max_pkts_per_xfer;
 	struct sk_buff		*(*wrap)(struct gether *, struct sk_buff *skb);
 	int			(*unwrap)(struct gether *,
 						struct sk_buff *skb,
 						struct sk_buff_head *list);
 
 	struct work_struct	work;
+	struct work_struct	rx_work;
 
 	unsigned long		todo;
 #define	WORK_RX_MEMORY		0
@@ -84,7 +94,7 @@ struct eth_dev {
 
 #define DEFAULT_QLEN	2	/* double buffering by default */
 
-static unsigned qmult = 5;
+static unsigned qmult = 10;
 module_param(qmult, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(qmult, "queue length multiplier at high/super speed");
 
@@ -226,9 +236,13 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags)
 	size += out->maxpacket - 1;
 	size -= size % out->maxpacket;
 
+	if (dev->ul_max_pkts_per_xfer)
+		size *= dev->ul_max_pkts_per_xfer;
+
 	if (dev->port_usb->is_fixed)
 		size = max_t(size_t, size, dev->port_usb->fixed_out_len);
 
+	DBG(dev, "%s: size: %d\n", __func__, size);
 	skb = alloc_skb(size + NET_IP_ALIGN, gfp_flags);
 	if (skb == NULL) {
 		DBG(dev, "no rx skb\n");
@@ -254,18 +268,16 @@ enomem:
 		DBG(dev, "rx submit --> %d\n", retval);
 		if (skb)
 			dev_kfree_skb_any(skb);
-		spin_lock_irqsave(&dev->req_lock, flags);
-		list_add(&req->list, &dev->rx_reqs);
-		spin_unlock_irqrestore(&dev->req_lock, flags);
 	}
 	return retval;
 }
 
 static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct sk_buff	*skb = req->context, *skb2;
+	struct sk_buff	*skb = req->context;
 	struct eth_dev	*dev = ep->driver_data;
 	int		status = req->status;
+	bool		queue = 0;
 
 	switch (status) {
 
@@ -281,6 +293,10 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 				status = dev->unwrap(dev->port_usb,
 							skb,
 							&dev->rx_frames);
+				if (status == -EINVAL)
+					dev->net->stats.rx_errors++;
+				else if (status == -EOVERFLOW)
+					dev->net->stats.rx_over_errors++;
 			} else {
 				dev_kfree_skb_any(skb);
 				status = -ENOTCONN;
@@ -289,30 +305,9 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req)
 		} else {
 			skb_queue_tail(&dev->rx_frames, skb);
 		}
-		skb = NULL;
-
-		skb2 = skb_dequeue(&dev->rx_frames);
-		while (skb2) {
-			if (status < 0
-					|| ETH_HLEN > skb2->len
-					|| skb2->len > VLAN_ETH_FRAME_LEN) {
-				dev->net->stats.rx_errors++;
-				dev->net->stats.rx_length_errors++;
-				DBG(dev, "rx length %d\n", skb2->len);
-				dev_kfree_skb_any(skb2);
-				goto next_frame;
-			}
-			skb2->protocol = eth_type_trans(skb2, dev->net);
-			dev->net->stats.rx_packets++;
-			dev->net->stats.rx_bytes += skb2->len;
 
-			/* no buffer copies needed, unless hardware can't
-			 * use skb buffers.
-			 */
-			status = netif_rx(skb2);
-next_frame:
-			skb2 = skb_dequeue(&dev->rx_frames);
-		}
+		if (!status)
+			queue = 1;
 		break;
 
 	/* software-driven interface shutdown */
@@ -335,22 +330,20 @@ quiesce:
 		/* FALLTHROUGH */
 
 	default:
+		queue = 1;
+		dev_kfree_skb_any(skb);
 		dev->net->stats.rx_errors++;
 		DBG(dev, "rx status %d\n", status);
 		break;
 	}
 
-	if (skb)
-		dev_kfree_skb_any(skb);
-	if (!netif_running(dev->net)) {
 clean:
-		spin_lock(&dev->req_lock);
-		list_add(&req->list, &dev->rx_reqs);
-		spin_unlock(&dev->req_lock);
-		req = NULL;
-	}
-	if (req)
-		rx_submit(dev, req, GFP_ATOMIC);
+	spin_lock(&dev->req_lock);
+	list_add(&req->list, &dev->rx_reqs);
+	spin_unlock(&dev->req_lock);
+
+	if (queue)
+		queue_work(uether_wq, &dev->rx_work);
 }
 
 static int prealloc(struct list_head *list, struct usb_ep *ep, unsigned n)
@@ -415,16 +408,24 @@ static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags)
 {
 	struct usb_request	*req;
 	unsigned long		flags;
+	int			req_cnt = 0;
 
 	/* fill unused rxq slots with some skb */
 	spin_lock_irqsave(&dev->req_lock, flags);
 	while (!list_empty(&dev->rx_reqs)) {
+		/* break the nexus of continuous completion and re-submission*/
+		if (++req_cnt > qlen(dev->gadget))
+			break;
+
 		req = container_of(dev->rx_reqs.next,
 				struct usb_request, list);
 		list_del_init(&req->list);
 		spin_unlock_irqrestore(&dev->req_lock, flags);
 
 		if (rx_submit(dev, req, gfp_flags) < 0) {
+			spin_lock_irqsave(&dev->req_lock, flags);
+			list_add(&req->list, &dev->rx_reqs);
+			spin_unlock_irqrestore(&dev->req_lock, flags);
 			defer_kevent(dev, WORK_RX_MEMORY);
 			return;
 		}
@@ -434,6 +435,36 @@ static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags)
 	spin_unlock_irqrestore(&dev->req_lock, flags);
 }
 
+static void process_rx_w(struct work_struct *work)
+{
+	struct eth_dev	*dev = container_of(work, struct eth_dev, rx_work);
+	struct sk_buff	*skb;
+	int		status = 0;
+
+	if (!dev->port_usb)
+		return;
+
+	while ((skb = skb_dequeue(&dev->rx_frames))) {
+		if (status < 0
+				|| ETH_HLEN > skb->len
+				|| skb->len > ETH_FRAME_LEN) {
+			dev->net->stats.rx_errors++;
+			dev->net->stats.rx_length_errors++;
+			DBG(dev, "rx length %d\n", skb->len);
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+		skb->protocol = eth_type_trans(skb, dev->net);
+		dev->net->stats.rx_packets++;
+		dev->net->stats.rx_bytes += skb->len;
+
+		status = netif_rx_ni(skb);
+	}
+
+	if (netif_running(dev->net))
+		rx_fill(dev, GFP_KERNEL);
+}
+
 static void eth_work(struct work_struct *work)
 {
 	struct eth_dev	*dev = container_of(work, struct eth_dev, work);
@@ -451,6 +482,11 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req)
 {
 	struct sk_buff	*skb = req->context;
 	struct eth_dev	*dev = ep->driver_data;
+	struct net_device *net = dev->net;
+	struct usb_request *new_req;
+	struct usb_ep *in;
+	int length;
+	int retval;
 
 	switch (req->status) {
 	default:
@@ -461,16 +497,74 @@ static void tx_complete(struct usb_ep *ep, struct usb_request *req)
 	case -ESHUTDOWN:		/* disconnect etc */
 		break;
 	case 0:
-		dev->net->stats.tx_bytes += skb->len;
+		if (!req->zero)
+			dev->net->stats.tx_bytes += req->length-1;
+		else
+			dev->net->stats.tx_bytes += req->length;
 	}
 	dev->net->stats.tx_packets++;
 
 	spin_lock(&dev->req_lock);
-	list_add(&req->list, &dev->tx_reqs);
-	spin_unlock(&dev->req_lock);
-	dev_kfree_skb_any(skb);
+	list_add_tail(&req->list, &dev->tx_reqs);
+
+	if (dev->port_usb->multi_pkt_xfer) {
+		dev->no_tx_req_used--;
+		req->length = 0;
+		in = dev->port_usb->in_ep;
+
+		if (!list_empty(&dev->tx_reqs)) {
+			new_req = container_of(dev->tx_reqs.next,
+					struct usb_request, list);
+			list_del(&new_req->list);
+			spin_unlock(&dev->req_lock);
+			if (new_req->length > 0) {
+				length = new_req->length;
+
+				/* NCM requires no zlp if transfer is
+				 * dwNtbInMaxSize */
+				if (dev->port_usb->is_fixed &&
+					length == dev->port_usb->fixed_in_len &&
+					(length % in->maxpacket) == 0)
+					new_req->zero = 0;
+				else
+					new_req->zero = 1;
+
+				/* use zlp framing on tx for strict CDC-Ether
+				 * conformance, though any robust network rx
+				 * path ignores extra padding. and some hardware
+				 * doesn't like to write zlps.
+				 */
+				if (new_req->zero && !dev->zlp &&
+						(length % in->maxpacket) == 0) {
+					new_req->zero = 0;
+					length++;
+				}
+
+				new_req->length = length;
+				retval = usb_ep_queue(in, new_req, GFP_ATOMIC);
+				switch (retval) {
+				default:
+					DBG(dev, "tx queue err %d\n", retval);
+					break;
+				case 0:
+					spin_lock(&dev->req_lock);
+					dev->no_tx_req_used++;
+					spin_unlock(&dev->req_lock);
+					net->trans_start = jiffies;
+				}
+			} else {
+				spin_lock(&dev->req_lock);
+				list_add(&new_req->list, &dev->tx_reqs);
+				spin_unlock(&dev->req_lock);
+			}
+		} else {
+			spin_unlock(&dev->req_lock);
+		}
+	} else {
+		spin_unlock(&dev->req_lock);
+		dev_kfree_skb_any(skb);
+	}
 
-	atomic_dec(&dev->tx_qlen);
 	if (netif_carrier_ok(dev->net))
 		netif_wake_queue(dev->net);
 }
@@ -480,6 +574,26 @@ static inline int is_promisc(u16 cdc_filter)
 	return cdc_filter & USB_CDC_PACKET_TYPE_PROMISCUOUS;
 }
 
+static void alloc_tx_buffer(struct eth_dev *dev)
+{
+	struct list_head	*act;
+	struct usb_request	*req;
+
+	dev->tx_req_bufsize = (dev->dl_max_pkts_per_xfer *
+				(dev->net->mtu
+				+ sizeof(struct ethhdr)
+				/* size of rndis_packet_msg_type */
+				+ 44
+				+ 22));
+
+	list_for_each(act, &dev->tx_reqs) {
+		req = container_of(act, struct usb_request, list);
+		if (!req->buf)
+			req->buf = kmalloc(dev->tx_req_bufsize,
+						GFP_ATOMIC);
+	}
+}
+
 static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 					struct net_device *net)
 {
@@ -506,6 +620,10 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
+	/* Allocate memory for tx_reqs to support multi packet transfer */
+	if (dev->port_usb->multi_pkt_xfer && !dev->tx_req_bufsize)
+		alloc_tx_buffer(dev);
+
 	/* apply outgoing CDC or RNDIS filters */
 	if (!is_promisc(cdc_filter)) {
 		u8		*dest = skb->data;
@@ -560,11 +678,39 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		spin_unlock_irqrestore(&dev->lock, flags);
 		if (!skb)
 			goto drop;
+	}
+
+	spin_lock_irqsave(&dev->req_lock, flags);
+	dev->tx_skb_hold_count++;
+	spin_unlock_irqrestore(&dev->req_lock, flags);
+
+	if (dev->port_usb->multi_pkt_xfer) {
+		memcpy(req->buf + req->length, skb->data, skb->len);
+		req->length = req->length + skb->len;
+		length = req->length;
+		dev_kfree_skb_any(skb);
+
+		spin_lock_irqsave(&dev->req_lock, flags);
+		if (dev->tx_skb_hold_count < dev->dl_max_pkts_per_xfer) {
+			if (dev->no_tx_req_used > TX_REQ_THRESHOLD) {
+				list_add(&req->list, &dev->tx_reqs);
+				spin_unlock_irqrestore(&dev->req_lock, flags);
+				goto success;
+			}
+		}
+
+		dev->no_tx_req_used++;
+		spin_unlock_irqrestore(&dev->req_lock, flags);
 
+		spin_lock_irqsave(&dev->lock, flags);
+		dev->tx_skb_hold_count = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
+	} else {
 		length = skb->len;
+		req->buf = skb->data;
+		req->context = skb;
 	}
-	req->buf = skb->data;
-	req->context = skb;
+
 	req->complete = tx_complete;
 
 	/* NCM requires no zlp if transfer is dwNtbInMaxSize */
@@ -579,17 +725,26 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 	 * though any robust network rx path ignores extra padding.
 	 * and some hardware doesn't like to write zlps.
 	 */
-	if (req->zero && !dev->zlp && (length % in->maxpacket) == 0)
+	if (req->zero && !dev->zlp && (length % in->maxpacket) == 0) {
+		req->zero = 0;
 		length++;
+	}
 
 	req->length = length;
 
-	/* throttle high/super speed IRQ rate back slightly */
-	if (gadget_is_dualspeed(dev->gadget))
-		req->no_interrupt = (dev->gadget->speed == USB_SPEED_HIGH ||
-				     dev->gadget->speed == USB_SPEED_SUPER)
-			? ((atomic_read(&dev->tx_qlen) % qmult) != 0)
-			: 0;
+	/* throttle highspeed IRQ rate back slightly */
+	if (gadget_is_dualspeed(dev->gadget) &&
+			 (dev->gadget->speed == USB_SPEED_HIGH)) {
+		dev->tx_qlen++;
+		if (dev->tx_qlen == (qmult/2)) {
+			req->no_interrupt = 0;
+			dev->tx_qlen = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+	} else {
+		req->no_interrupt = 0;
+	}
 
 	retval = usb_ep_queue(in, req, GFP_ATOMIC);
 	switch (retval) {
@@ -598,11 +753,11 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 		break;
 	case 0:
 		net->trans_start = jiffies;
-		atomic_inc(&dev->tx_qlen);
 	}
 
 	if (retval) {
-		dev_kfree_skb_any(skb);
+		if (!dev->port_usb->multi_pkt_xfer)
+			dev_kfree_skb_any(skb);
 drop:
 		dev->net->stats.tx_dropped++;
 		spin_lock_irqsave(&dev->req_lock, flags);
@@ -611,6 +766,7 @@ drop:
 		list_add(&req->list, &dev->tx_reqs);
 		spin_unlock_irqrestore(&dev->req_lock, flags);
 	}
+success:
 	return NETDEV_TX_OK;
 }
 
@@ -624,7 +780,7 @@ static void eth_start(struct eth_dev *dev, gfp_t gfp_flags)
 	rx_fill(dev, gfp_flags);
 
 	/* and open the tx floodgates */
-	atomic_set(&dev->tx_qlen, 0);
+	dev->tx_qlen = 0;
 	netif_wake_queue(dev->net);
 }
 
@@ -697,6 +853,8 @@ static int eth_stop(struct net_device *net)
 
 /*-------------------------------------------------------------------------*/
 
+static u8 host_ethaddr[ETH_ALEN];
+
 /* initial value, changed by "ifconfig usb0 hw ether xx:xx:xx:xx:xx:xx" */
 static char *dev_addr;
 module_param(dev_addr, charp, S_IRUGO);
@@ -728,6 +886,17 @@ static int get_ether_addr(const char *str, u8 *dev_addr)
 	return 1;
 }
 
+static int get_host_ether_addr(u8 *str, u8 *dev_addr)
+{
+	memcpy(dev_addr, str, ETH_ALEN);
+	if (is_valid_ether_addr(dev_addr))
+		return 0;
+
+	random_ether_addr(dev_addr);
+	memcpy(str, dev_addr, ETH_ALEN);
+	return 1;
+}
+
 static const struct net_device_ops eth_netdev_ops = {
 	.ndo_open		= eth_open,
 	.ndo_stop		= eth_stop,
@@ -770,6 +939,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN],
 	spin_lock_init(&dev->lock);
 	spin_lock_init(&dev->req_lock);
 	INIT_WORK(&dev->work, eth_work);
+	INIT_WORK(&dev->rx_work, process_rx_w);
 	INIT_LIST_HEAD(&dev->tx_reqs);
 	INIT_LIST_HEAD(&dev->rx_reqs);
 
@@ -782,9 +952,11 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, u8 ethaddr[ETH_ALEN],
 	if (get_ether_addr(dev_addr, net->dev_addr))
 		dev_warn(&g->dev,
 			"using random %s ethernet address\n", "self");
-	if (get_ether_addr(host_addr, dev->host_mac))
-		dev_warn(&g->dev,
-			"using random %s ethernet address\n", "host");
+
+	if (get_host_ether_addr(host_ethaddr, dev->host_mac))
+		dev_warn(&g->dev, "using random %s ethernet address\n", "host");
+	else
+		dev_warn(&g->dev, "using previous %s ethernet address\n", "host");
 
 	if (ethaddr)
 		memcpy(ethaddr, dev->host_mac, ETH_ALEN);
@@ -882,8 +1054,13 @@ struct net_device *gether_connect(struct gether *link)
 		dev->header_len = link->header_len;
 		dev->unwrap = link->unwrap;
 		dev->wrap = link->wrap;
+		dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer;
+		dev->dl_max_pkts_per_xfer = link->dl_max_pkts_per_xfer;
 
 		spin_lock(&dev->lock);
+		dev->tx_skb_hold_count = 0;
+		dev->no_tx_req_used = 0;
+		dev->tx_req_bufsize = 0;
 		dev->port_usb = link;
 		if (netif_running(dev->net)) {
 			if (link->open)
@@ -927,6 +1104,7 @@ void gether_disconnect(struct gether *link)
 {
 	struct eth_dev		*dev = link->ioport;
 	struct usb_request	*req;
+	struct sk_buff		*skb;
 
 	WARN_ON(!dev);
 	if (!dev)
@@ -949,6 +1127,8 @@ void gether_disconnect(struct gether *link)
 		list_del(&req->list);
 
 		spin_unlock(&dev->req_lock);
+		if (link->multi_pkt_xfer)
+			kfree(req->buf);
 		usb_ep_free_request(link->in_ep, req);
 		spin_lock(&dev->req_lock);
 	}
@@ -968,6 +1148,12 @@ void gether_disconnect(struct gether *link)
 		spin_lock(&dev->req_lock);
 	}
 	spin_unlock(&dev->req_lock);
+
+	spin_lock(&dev->rx_frames.lock);
+	while ((skb = __skb_dequeue(&dev->rx_frames)))
+		dev_kfree_skb_any(skb);
+	spin_unlock(&dev->rx_frames.lock);
+
 	link->out_ep->driver_data = NULL;
 	link->out_ep->desc = NULL;
 
@@ -980,3 +1166,23 @@ void gether_disconnect(struct gether *link)
 	dev->port_usb = NULL;
 	spin_unlock(&dev->lock);
 }
+
+static int __init gether_init(void)
+{
+	uether_wq  = create_singlethread_workqueue("uether");
+	if (!uether_wq) {
+		pr_err("%s: Unable to create workqueue: uether\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+module_init(gether_init);
+
+static void __exit gether_exit(void)
+{
+	destroy_workqueue(uether_wq);
+
+}
+module_exit(gether_exit);
+MODULE_DESCRIPTION("ethernet over USB driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h
index 02522338a708..67eda50ae995 100644
--- a/drivers/usb/gadget/u_ether.h
+++ b/drivers/usb/gadget/u_ether.h
@@ -54,6 +54,9 @@ struct gether {
 	bool				is_fixed;
 	u32				fixed_out_len;
 	u32				fixed_in_len;
+	unsigned		ul_max_pkts_per_xfer;
+	unsigned		dl_max_pkts_per_xfer;
+	bool				multi_pkt_xfer;
 	struct sk_buff			*(*wrap)(struct gether *port,
 						struct sk_buff *skb);
 	int				(*unwrap)(struct gether *port,
diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c
index b369292d4b90..72068082e83c 100644
--- a/drivers/usb/gadget/u_serial.c
+++ b/drivers/usb/gadget/u_serial.c
@@ -1126,6 +1126,7 @@ int gserial_alloc_line(unsigned char *line_num)
 
 	tty_dev = tty_port_register_device(&ports[port_num].port->port,
 			gs_tty_driver, port_num, NULL);
+
 	if (IS_ERR(tty_dev)) {
 		struct gs_port	*port;
 		pr_err("%s: failed to register tty for port %d, err %ld\n",
diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index 817a26cbfab1..58a861395ea0 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -335,7 +335,15 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
 		driver->unbind(udc->gadget);
 		goto err1;
 	}
-	usb_gadget_connect(udc->gadget);
+	/*
+	 * HACK: The Android gadget driver disconnects the gadget
+	 * on bind and expects the gadget to stay disconnected until
+	 * it calls usb_gadget_connect when userspace is ready. Remove
+	 * the call to usb_gadget_connect bellow to avoid enabling the
+	 * pullup before userspace is ready.
+	 *
+	 * usb_gadget_connect(udc->gadget);
+	 */
 
 	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
 	return 0;
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index be41733a5eac..0dd0da0d0ac5 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -16,6 +16,14 @@ menuconfig USB_PHY
 	  If you're not sure if this applies to you, it probably doesn't;
 	  say N here.
 
+config USB_OTG_WAKELOCK
+	bool "Hold a wakelock when USB connected"
+	depends on WAKELOCK
+	select USB_OTG_UTILS
+	help
+	  Select this to automatically hold a wakelock when USB is
+	  connected, preventing suspend.
+
 if USB_PHY
 
 #
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index a9169cb1e6fc..a0a6cbad8806 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -5,6 +5,7 @@
 ccflags-$(CONFIG_USB_DEBUG) := -DDEBUG
 
 obj-$(CONFIG_USB_PHY)			+= phy.o
+obj-$(CONFIG_USB_OTG_WAKELOCK)		+= otg-wakelock.o
 
 # transceiver drivers, keep the list sorted
 
diff --git a/drivers/usb/phy/otg-wakelock.c b/drivers/usb/phy/otg-wakelock.c
new file mode 100644
index 000000000000..479376bfa484
--- /dev/null
+++ b/drivers/usb/phy/otg-wakelock.c
@@ -0,0 +1,173 @@
+/*
+ * otg-wakelock.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/wakelock.h>
+#include <linux/spinlock.h>
+#include <linux/usb/otg.h>
+
+#define TEMPORARY_HOLD_TIME	2000
+
+static bool enabled = true;
+static struct usb_phy *otgwl_xceiv;
+static struct notifier_block otgwl_nb;
+
+/*
+ * otgwl_spinlock is held while the VBUS lock is grabbed or dropped and the
+ * held field is updated to match.
+ */
+
+static DEFINE_SPINLOCK(otgwl_spinlock);
+
+/*
+ * Only one lock, but since these 3 fields are associated with each other...
+ */
+
+struct otgwl_lock {
+	char name[40];
+	struct wake_lock wakelock;
+	bool held;
+};
+
+/*
+ * VBUS present lock.  Also used as a timed lock on charger
+ * connect/disconnect and USB host disconnect, to allow the system
+ * to react to the change in power.
+ */
+
+static struct otgwl_lock vbus_lock;
+
+static void otgwl_hold(struct otgwl_lock *lock)
+{
+	if (!lock->held) {
+		wake_lock(&lock->wakelock);
+		lock->held = true;
+	}
+}
+
+static void otgwl_temporary_hold(struct otgwl_lock *lock)
+{
+	wake_lock_timeout(&lock->wakelock,
+			  msecs_to_jiffies(TEMPORARY_HOLD_TIME));
+	lock->held = false;
+}
+
+static void otgwl_drop(struct otgwl_lock *lock)
+{
+	if (lock->held) {
+		wake_unlock(&lock->wakelock);
+		lock->held = false;
+	}
+}
+
+static void otgwl_handle_event(unsigned long event)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&otgwl_spinlock, irqflags);
+
+	if (!enabled) {
+		otgwl_drop(&vbus_lock);
+		spin_unlock_irqrestore(&otgwl_spinlock, irqflags);
+		return;
+	}
+
+	switch (event) {
+	case USB_EVENT_VBUS:
+	case USB_EVENT_ENUMERATED:
+		otgwl_hold(&vbus_lock);
+		break;
+
+	case USB_EVENT_NONE:
+	case USB_EVENT_ID:
+	case USB_EVENT_CHARGER:
+		otgwl_temporary_hold(&vbus_lock);
+		break;
+
+	default:
+		break;
+	}
+
+	spin_unlock_irqrestore(&otgwl_spinlock, irqflags);
+}
+
+static int otgwl_otg_notifications(struct notifier_block *nb,
+				   unsigned long event, void *unused)
+{
+	otgwl_handle_event(event);
+	return NOTIFY_OK;
+}
+
+static int set_enabled(const char *val, const struct kernel_param *kp)
+{
+	int rv = param_set_bool(val, kp);
+
+	if (rv)
+		return rv;
+
+	if (otgwl_xceiv)
+		otgwl_handle_event(otgwl_xceiv->last_event);
+
+	return 0;
+}
+
+static struct kernel_param_ops enabled_param_ops = {
+	.set = set_enabled,
+	.get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0644);
+MODULE_PARM_DESC(enabled, "enable wakelock when VBUS present");
+
+static int __init otg_wakelock_init(void)
+{
+	int ret;
+	struct usb_phy *phy;
+
+	phy = usb_get_phy(USB_PHY_TYPE_USB2);
+
+	if (IS_ERR(phy)) {
+		pr_err("%s: No USB transceiver found\n", __func__);
+		return PTR_ERR(phy);
+	}
+	otgwl_xceiv = phy;
+
+	snprintf(vbus_lock.name, sizeof(vbus_lock.name), "vbus-%s",
+		 dev_name(otgwl_xceiv->dev));
+	wake_lock_init(&vbus_lock.wakelock, WAKE_LOCK_SUSPEND,
+		       vbus_lock.name);
+
+	otgwl_nb.notifier_call = otgwl_otg_notifications;
+	ret = usb_register_notifier(otgwl_xceiv, &otgwl_nb);
+
+	if (ret) {
+		pr_err("%s: usb_register_notifier on transceiver %s"
+		       " failed\n", __func__,
+		       dev_name(otgwl_xceiv->dev));
+		otgwl_xceiv = NULL;
+		wake_lock_destroy(&vbus_lock.wakelock);
+		return ret;
+	}
+
+	otgwl_handle_event(otgwl_xceiv->last_event);
+	return ret;
+}
+
+late_initcall(otg_wakelock_init);
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 29a5121ce7fd..48b1de211e37 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -19,6 +19,8 @@ source "drivers/char/agp/Kconfig"
 
 source "drivers/gpu/vga/Kconfig"
 
+source "drivers/gpu/arm/Kconfig"
+
 source "drivers/gpu/drm/Kconfig"
 
 source "drivers/gpu/host1x/Kconfig"
@@ -2496,6 +2498,7 @@ source "drivers/video/omap2/Kconfig"
 source "drivers/video/exynos/Kconfig"
 source "drivers/video/mmp/Kconfig"
 source "drivers/video/backlight/Kconfig"
+source "drivers/video/adf/Kconfig"
 
 if VT
 	source "drivers/video/console/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 33869eea4981..3adbd32eb091 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -12,6 +12,7 @@ fb-y                              := fbmem.o fbmon.o fbcmap.o fbsysfs.o \
                                      modedb.o fbcvt.o
 fb-objs                           := $(fb-y)
 
+obj-$(CONFIG_ADF)		  += adf/
 obj-$(CONFIG_VT)		  += console/
 obj-$(CONFIG_LOGO)		  += logo/
 obj-y				  += backlight/
diff --git a/drivers/video/adf/Kconfig b/drivers/video/adf/Kconfig
new file mode 100644
index 000000000000..33858b73d8bb
--- /dev/null
+++ b/drivers/video/adf/Kconfig
@@ -0,0 +1,14 @@
+menuconfig ADF
+	depends on SYNC
+	depends on DMA_SHARED_BUFFER
+	tristate "Atomic Display Framework"
+
+menuconfig ADF_FBDEV
+	depends on ADF
+	depends on FB
+	tristate "Helper for implementing the fbdev API in ADF drivers"
+
+menuconfig ADF_MEMBLOCK
+	depends on ADF
+	depends on HAVE_MEMBLOCK
+	tristate "Helper for using memblocks as buffers in ADF drivers"
diff --git a/drivers/video/adf/Makefile b/drivers/video/adf/Makefile
new file mode 100644
index 000000000000..78d0915122f4
--- /dev/null
+++ b/drivers/video/adf/Makefile
@@ -0,0 +1,15 @@
+ccflags-y := -Idrivers/staging/android
+
+CFLAGS_adf.o := -I$(src)
+
+obj-$(CONFIG_ADF) += adf.o \
+	adf_client.o \
+	adf_fops.o \
+	adf_format.o \
+	adf_sysfs.o
+
+obj-$(CONFIG_COMPAT) += adf_fops32.o
+
+obj-$(CONFIG_ADF_FBDEV) += adf_fbdev.o
+
+obj-$(CONFIG_ADF_MEMBLOCK) += adf_memblock.o
diff --git a/drivers/video/adf/adf.c b/drivers/video/adf/adf.c
new file mode 100644
index 000000000000..42c30c05826a
--- /dev/null
+++ b/drivers/video/adf/adf.c
@@ -0,0 +1,1188 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ * adf_modeinfo_{set_name,set_vrefresh} modified from
+ * drivers/gpu/drm/drm_modes.c
+ * adf_format_validate_yuv modified from framebuffer_check in
+ * drivers/gpu/drm/drm_crtc.c
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <video/adf_format.h>
+
+#include "sw_sync.h"
+#include "sync.h"
+
+#include "adf.h"
+#include "adf_fops.h"
+#include "adf_sysfs.h"
+
+#define CREATE_TRACE_POINTS
+#include "adf_trace.h"
+
+#define ADF_SHORT_FENCE_TIMEOUT (1 * MSEC_PER_SEC)
+#define ADF_LONG_FENCE_TIMEOUT (10 * MSEC_PER_SEC)
+
+static DEFINE_IDR(adf_devices);
+
+static void adf_fence_wait(struct adf_device *dev, struct sync_fence *fence)
+{
+	/* sync_fence_wait() dumps debug information on timeout.  Experience
+	   has shown that if the pipeline gets stuck, a short timeout followed
+	   by a longer one provides useful information for debugging. */
+	int err = sync_fence_wait(fence, ADF_SHORT_FENCE_TIMEOUT);
+	if (err >= 0)
+		return;
+
+	if (err == -ETIME)
+		err = sync_fence_wait(fence, ADF_LONG_FENCE_TIMEOUT);
+
+	if (err < 0)
+		dev_warn(&dev->base.dev, "error waiting on fence: %d\n", err);
+}
+
+void adf_buffer_cleanup(struct adf_buffer *buf)
+{
+	size_t i;
+	for (i = 0; i < ARRAY_SIZE(buf->dma_bufs); i++)
+		if (buf->dma_bufs[i])
+			dma_buf_put(buf->dma_bufs[i]);
+
+	if (buf->acquire_fence)
+		sync_fence_put(buf->acquire_fence);
+}
+
+void adf_buffer_mapping_cleanup(struct adf_buffer_mapping *mapping,
+		struct adf_buffer *buf)
+{
+	/* calling adf_buffer_mapping_cleanup() is safe even if mapping is
+	   uninitialized or partially-initialized, as long as it was
+	   zeroed on allocation */
+	size_t i;
+	for (i = 0; i < ARRAY_SIZE(mapping->sg_tables); i++) {
+		if (mapping->sg_tables[i])
+			dma_buf_unmap_attachment(mapping->attachments[i],
+					mapping->sg_tables[i], DMA_TO_DEVICE);
+		if (mapping->attachments[i])
+			dma_buf_detach(buf->dma_bufs[i],
+					mapping->attachments[i]);
+	}
+}
+
+void adf_post_cleanup(struct adf_device *dev, struct adf_pending_post *post)
+{
+	size_t i;
+
+	if (post->state)
+		dev->ops->state_free(dev, post->state);
+
+	for (i = 0; i < post->config.n_bufs; i++) {
+		adf_buffer_mapping_cleanup(&post->config.mappings[i],
+				&post->config.bufs[i]);
+		adf_buffer_cleanup(&post->config.bufs[i]);
+	}
+
+	kfree(post->config.custom_data);
+	kfree(post->config.mappings);
+	kfree(post->config.bufs);
+	kfree(post);
+}
+
+static void adf_sw_advance_timeline(struct adf_device *dev)
+{
+#ifdef CONFIG_SW_SYNC
+	sw_sync_timeline_inc(dev->timeline, 1);
+#else
+	BUG();
+#endif
+}
+
+static void adf_post_work_func(struct kthread_work *work)
+{
+	struct adf_device *dev =
+			container_of(work, struct adf_device, post_work);
+	struct adf_pending_post *post, *next;
+	struct list_head saved_list;
+
+	mutex_lock(&dev->post_lock);
+	memcpy(&saved_list, &dev->post_list, sizeof(saved_list));
+	list_replace_init(&dev->post_list, &saved_list);
+	mutex_unlock(&dev->post_lock);
+
+	list_for_each_entry_safe(post, next, &saved_list, head) {
+		int i;
+
+		for (i = 0; i < post->config.n_bufs; i++) {
+			struct sync_fence *fence =
+					post->config.bufs[i].acquire_fence;
+			if (fence)
+				adf_fence_wait(dev, fence);
+		}
+
+		dev->ops->post(dev, &post->config, post->state);
+
+		if (dev->ops->advance_timeline)
+			dev->ops->advance_timeline(dev, &post->config,
+					post->state);
+		else
+			adf_sw_advance_timeline(dev);
+
+		list_del(&post->head);
+		if (dev->onscreen)
+			adf_post_cleanup(dev, dev->onscreen);
+		dev->onscreen = post;
+	}
+}
+
+void adf_attachment_free(struct adf_attachment_list *attachment)
+{
+	list_del(&attachment->head);
+	kfree(attachment);
+}
+
+struct adf_event_refcount *adf_obj_find_event_refcount(struct adf_obj *obj,
+		enum adf_event_type type)
+{
+	struct rb_root *root = &obj->event_refcount;
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+	struct adf_event_refcount *refcount;
+
+	while (*new) {
+		refcount = container_of(*new, struct adf_event_refcount, node);
+		parent = *new;
+
+		if (refcount->type > type)
+			new = &(*new)->rb_left;
+		else if (refcount->type < type)
+			new = &(*new)->rb_right;
+		else
+			return refcount;
+	}
+
+	refcount = kzalloc(sizeof(*refcount), GFP_KERNEL);
+	if (!refcount)
+		return NULL;
+	refcount->type = type;
+
+	rb_link_node(&refcount->node, parent, new);
+	rb_insert_color(&refcount->node, root);
+	return refcount;
+}
+
+/**
+ * adf_event_get - increase the refcount for an event
+ *
+ * @obj: the object that produces the event
+ * @type: the event type
+ *
+ * ADF will call the object's set_event() op if needed.  ops are allowed
+ * to sleep, so adf_event_get() must NOT be called from an atomic context.
+ *
+ * Returns 0 if successful, or -%EINVAL if the object does not support the
+ * requested event type.
+ */
+int adf_event_get(struct adf_obj *obj, enum adf_event_type type)
+{
+	struct adf_event_refcount *refcount;
+	int old_refcount;
+	int ret;
+
+	ret = adf_obj_check_supports_event(obj, type);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&obj->event_lock);
+
+	refcount = adf_obj_find_event_refcount(obj, type);
+	if (!refcount) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	old_refcount = refcount->refcount++;
+
+	if (old_refcount == 0) {
+		obj->ops->set_event(obj, type, true);
+		trace_adf_event_enable(obj, type);
+	}
+
+done:
+	mutex_unlock(&obj->event_lock);
+	return ret;
+}
+EXPORT_SYMBOL(adf_event_get);
+
+/**
+ * adf_event_put - decrease the refcount for an event
+ *
+ * @obj: the object that produces the event
+ * @type: the event type
+ *
+ * ADF will call the object's set_event() op if needed.  ops are allowed
+ * to sleep, so adf_event_put() must NOT be called from an atomic context.
+ *
+ * Returns 0 if successful, -%EINVAL if the object does not support the
+ * requested event type, or -%EALREADY if the refcount is already 0.
+ */
+int adf_event_put(struct adf_obj *obj, enum adf_event_type type)
+{
+	struct adf_event_refcount *refcount;
+	int old_refcount;
+	int ret;
+
+	ret = adf_obj_check_supports_event(obj, type);
+	if (ret < 0)
+		return ret;
+
+
+	mutex_lock(&obj->event_lock);
+
+	refcount = adf_obj_find_event_refcount(obj, type);
+	if (!refcount) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	old_refcount = refcount->refcount--;
+
+	if (WARN_ON(old_refcount == 0)) {
+		refcount->refcount++;
+		ret = -EALREADY;
+	} else if (old_refcount == 1) {
+		obj->ops->set_event(obj, type, false);
+		trace_adf_event_disable(obj, type);
+	}
+
+done:
+	mutex_unlock(&obj->event_lock);
+	return ret;
+}
+EXPORT_SYMBOL(adf_event_put);
+
+/**
+ * adf_vsync_wait - wait for a vsync event on a display interface
+ *
+ * @intf: the display interface
+ * @timeout: timeout in jiffies (0 = wait indefinitely)
+ *
+ * adf_vsync_wait() may sleep, so it must NOT be called from an atomic context.
+ *
+ * This function returns -%ERESTARTSYS if it is interrupted by a signal.
+ * If @timeout == 0 then this function returns 0 on vsync. If @timeout > 0 then
+ * this function returns the number of remaining jiffies or -%ETIMEDOUT on
+ * timeout.
+ */
+int adf_vsync_wait(struct adf_interface *intf, long timeout)
+{
+	ktime_t timestamp;
+	int ret;
+	unsigned long flags;
+
+	read_lock_irqsave(&intf->vsync_lock, flags);
+	timestamp = intf->vsync_timestamp;
+	read_unlock_irqrestore(&intf->vsync_lock, flags);
+
+	adf_vsync_get(intf);
+	if (timeout) {
+		ret = wait_event_interruptible_timeout(intf->vsync_wait,
+				!ktime_equal(timestamp,
+						intf->vsync_timestamp),
+				msecs_to_jiffies(timeout));
+		if (ret == 0 && ktime_equal(timestamp, intf->vsync_timestamp))
+			ret = -ETIMEDOUT;
+	} else {
+		ret = wait_event_interruptible(intf->vsync_wait,
+				!ktime_equal(timestamp,
+						intf->vsync_timestamp));
+	}
+	adf_vsync_put(intf);
+
+	return ret;
+}
+EXPORT_SYMBOL(adf_vsync_wait);
+
+static void adf_event_queue(struct adf_obj *obj, struct adf_event *event)
+{
+	struct adf_file *file;
+	unsigned long flags;
+
+	trace_adf_event(obj, event->type);
+
+	spin_lock_irqsave(&obj->file_lock, flags);
+
+	list_for_each_entry(file, &obj->file_list, head)
+		if (test_bit(event->type, file->event_subscriptions))
+			adf_file_queue_event(file, event);
+
+	spin_unlock_irqrestore(&obj->file_lock, flags);
+}
+
+/**
+ * adf_event_notify - notify userspace of a driver-private event
+ *
+ * @obj: the ADF object that produced the event
+ * @event: the event
+ *
+ * adf_event_notify() may be called safely from an atomic context.  It will
+ * copy @event if needed, so @event may point to a variable on the stack.
+ *
+ * Drivers must NOT call adf_event_notify() for vsync and hotplug events.
+ * ADF provides adf_vsync_notify() and
+ * adf_hotplug_notify_{connected,disconnected}() for these events.
+ */
+int adf_event_notify(struct adf_obj *obj, struct adf_event *event)
+{
+	if (WARN_ON(event->type == ADF_EVENT_VSYNC ||
+			event->type == ADF_EVENT_HOTPLUG))
+		return -EINVAL;
+
+	adf_event_queue(obj, event);
+	return 0;
+}
+EXPORT_SYMBOL(adf_event_notify);
+
+/**
+ * adf_vsync_notify - notify ADF of a display interface's vsync event
+ *
+ * @intf: the display interface
+ * @timestamp: the time the vsync occurred
+ *
+ * adf_vsync_notify() may be called safely from an atomic context.
+ */
+void adf_vsync_notify(struct adf_interface *intf, ktime_t timestamp)
+{
+	unsigned long flags;
+	struct adf_vsync_event event;
+
+	write_lock_irqsave(&intf->vsync_lock, flags);
+	intf->vsync_timestamp = timestamp;
+	write_unlock_irqrestore(&intf->vsync_lock, flags);
+
+	wake_up_interruptible_all(&intf->vsync_wait);
+
+	event.base.type = ADF_EVENT_VSYNC;
+	event.base.length = sizeof(event);
+	event.timestamp = ktime_to_ns(timestamp);
+	adf_event_queue(&intf->base, &event.base);
+}
+EXPORT_SYMBOL(adf_vsync_notify);
+
+void adf_hotplug_notify(struct adf_interface *intf, bool connected,
+		struct drm_mode_modeinfo *modelist, size_t n_modes)
+{
+	unsigned long flags;
+	struct adf_hotplug_event event;
+	struct drm_mode_modeinfo *old_modelist;
+
+	write_lock_irqsave(&intf->hotplug_modelist_lock, flags);
+	old_modelist = intf->modelist;
+	intf->hotplug_detect = connected;
+	intf->modelist = modelist;
+	intf->n_modes = n_modes;
+	write_unlock_irqrestore(&intf->hotplug_modelist_lock, flags);
+
+	kfree(old_modelist);
+
+	event.base.length = sizeof(event);
+	event.base.type = ADF_EVENT_HOTPLUG;
+	event.connected = connected;
+	adf_event_queue(&intf->base, &event.base);
+}
+
+/**
+ * adf_hotplug_notify_connected - notify ADF of a display interface being
+ * connected to a display
+ *
+ * @intf: the display interface
+ * @modelist: hardware modes supported by display
+ * @n_modes: length of modelist
+ *
+ * @modelist is copied as needed, so it may point to a variable on the stack.
+ *
+ * adf_hotplug_notify_connected() may NOT be called safely from an atomic
+ * context.
+ *
+ * Returns 0 on success or error code (<0) on error.
+ */
+int adf_hotplug_notify_connected(struct adf_interface *intf,
+		struct drm_mode_modeinfo *modelist, size_t n_modes)
+{
+	struct drm_mode_modeinfo *modelist_copy;
+
+	if (n_modes > ADF_MAX_MODES)
+		return -ENOMEM;
+
+	modelist_copy = kzalloc(sizeof(modelist_copy[0]) * n_modes,
+			GFP_KERNEL);
+	if (!modelist_copy)
+		return -ENOMEM;
+	memcpy(modelist_copy, modelist, sizeof(modelist_copy[0]) * n_modes);
+
+	adf_hotplug_notify(intf, true, modelist_copy, n_modes);
+	return 0;
+}
+EXPORT_SYMBOL(adf_hotplug_notify_connected);
+
+/**
+ * adf_hotplug_notify_disconnected - notify ADF of a display interface being
+ * disconnected from a display
+ *
+ * @intf: the display interface
+ *
+ * adf_hotplug_notify_disconnected() may be called safely from an atomic
+ * context.
+ */
+void adf_hotplug_notify_disconnected(struct adf_interface *intf)
+{
+	adf_hotplug_notify(intf, false, NULL, 0);
+}
+EXPORT_SYMBOL(adf_hotplug_notify_disconnected);
+
+static int adf_obj_init(struct adf_obj *obj, enum adf_obj_type type,
+		struct idr *idr, struct adf_device *parent,
+		const struct adf_obj_ops *ops, const char *fmt, va_list args)
+{
+	int ret;
+
+	if (ops && ops->supports_event && !ops->set_event) {
+		pr_err("%s: %s implements supports_event but not set_event\n",
+				__func__, adf_obj_type_str(type));
+		return -EINVAL;
+	}
+
+	ret = idr_alloc(idr, obj, 0, 0, GFP_KERNEL);
+	if (ret < 0) {
+		pr_err("%s: allocating object id failed: %d\n", __func__, ret);
+		return ret;
+	}
+	obj->id = ret;
+
+	vscnprintf(obj->name, sizeof(obj->name), fmt, args);
+
+	obj->type = type;
+	obj->ops = ops;
+	obj->parent = parent;
+	mutex_init(&obj->event_lock);
+	obj->event_refcount = RB_ROOT;
+	spin_lock_init(&obj->file_lock);
+	INIT_LIST_HEAD(&obj->file_list);
+	return 0;
+}
+
+static void adf_obj_destroy(struct adf_obj *obj, struct idr *idr)
+{
+	struct rb_node *node = rb_first(&obj->event_refcount);
+
+	while (node) {
+		struct adf_event_refcount *refcount =
+				container_of(node, struct adf_event_refcount,
+						node);
+		rb_erase(&refcount->node, &obj->event_refcount);
+		kfree(refcount);
+		node = rb_first(&obj->event_refcount);
+	}
+
+	mutex_destroy(&obj->event_lock);
+	idr_remove(idr, obj->id);
+}
+
+/**
+ * adf_device_init - initialize ADF-internal data for a display device
+ * and create sysfs entries
+ *
+ * @dev: the display device
+ * @parent: the device's parent device
+ * @ops: the device's associated ops
+ * @fmt: formatting string for the display device's name
+ *
+ * @fmt specifies the device's sysfs filename and the name returned to
+ * userspace through the %ADF_GET_DEVICE_DATA ioctl.
+ *
+ * Returns 0 on success or error code (<0) on failure.
+ */
+int adf_device_init(struct adf_device *dev, struct device *parent,
+		const struct adf_device_ops *ops, const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+
+	if (!ops->validate || !ops->post) {
+		pr_err("%s: device must implement validate and post\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	if (!ops->complete_fence && !ops->advance_timeline) {
+		if (!IS_ENABLED(CONFIG_SW_SYNC)) {
+			pr_err("%s: device requires sw_sync but it is not enabled in the kernel\n",
+					__func__);
+			return -EINVAL;
+		}
+	} else if (!(ops->complete_fence && ops->advance_timeline)) {
+		pr_err("%s: device must implement both complete_fence and advance_timeline, or implement neither\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	memset(dev, 0, sizeof(*dev));
+
+	va_start(args, fmt);
+	ret = adf_obj_init(&dev->base, ADF_OBJ_DEVICE, &adf_devices, dev,
+			&ops->base, fmt, args);
+	va_end(args);
+	if (ret < 0)
+		return ret;
+
+	dev->dev = parent;
+	dev->ops = ops;
+	idr_init(&dev->overlay_engines);
+	idr_init(&dev->interfaces);
+	mutex_init(&dev->client_lock);
+	INIT_LIST_HEAD(&dev->post_list);
+	mutex_init(&dev->post_lock);
+	init_kthread_worker(&dev->post_worker);
+	INIT_LIST_HEAD(&dev->attached);
+	INIT_LIST_HEAD(&dev->attach_allowed);
+
+	dev->post_thread = kthread_run(kthread_worker_fn,
+			&dev->post_worker, dev->base.name);
+	if (IS_ERR(dev->post_thread)) {
+		ret = PTR_ERR(dev->post_thread);
+		dev->post_thread = NULL;
+
+		pr_err("%s: failed to run config posting thread: %d\n",
+				__func__, ret);
+		goto err;
+	}
+	init_kthread_work(&dev->post_work, adf_post_work_func);
+
+	ret = adf_device_sysfs_init(dev);
+	if (ret < 0)
+		goto err;
+
+	return 0;
+
+err:
+	adf_device_destroy(dev);
+	return ret;
+}
+EXPORT_SYMBOL(adf_device_init);
+
+/**
+ * adf_device_destroy - clean up ADF-internal data for a display device
+ *
+ * @dev: the display device
+ */
+void adf_device_destroy(struct adf_device *dev)
+{
+	struct adf_attachment_list *entry, *next;
+
+	idr_destroy(&dev->interfaces);
+	idr_destroy(&dev->overlay_engines);
+
+	if (dev->post_thread) {
+		flush_kthread_worker(&dev->post_worker);
+		kthread_stop(dev->post_thread);
+	}
+
+	if (dev->onscreen)
+		adf_post_cleanup(dev, dev->onscreen);
+	adf_device_sysfs_destroy(dev);
+	list_for_each_entry_safe(entry, next, &dev->attach_allowed, head) {
+		adf_attachment_free(entry);
+	}
+	list_for_each_entry_safe(entry, next, &dev->attached, head) {
+		adf_attachment_free(entry);
+	}
+	mutex_destroy(&dev->post_lock);
+	mutex_destroy(&dev->client_lock);
+
+	if (dev->timeline)
+		sync_timeline_destroy(&dev->timeline->obj);
+
+	adf_obj_destroy(&dev->base, &adf_devices);
+}
+EXPORT_SYMBOL(adf_device_destroy);
+
+/**
+ * adf_interface_init - initialize ADF-internal data for a display interface
+ * and create sysfs entries
+ *
+ * @intf: the display interface
+ * @dev: the interface's "parent" display device
+ * @type: interface type (see enum @adf_interface_type)
+ * @idx: which interface of type @type;
+ *	e.g. interface DSI.1 -> @type=%ADF_INTF_TYPE_DSI, @idx=1
+ * @flags: informational flags (bitmask of %ADF_INTF_FLAG_* values)
+ * @ops: the interface's associated ops
+ * @fmt: formatting string for the display interface's name
+ *
+ * @dev must have previously been initialized with adf_device_init().
+ *
+ * @fmt affects the name returned to userspace through the
+ * %ADF_GET_INTERFACE_DATA ioctl.  It does not affect the sysfs filename,
+ * which is derived from @dev's name.
+ *
+ * Returns 0 on success or error code (<0) on failure.
+ */
+int adf_interface_init(struct adf_interface *intf, struct adf_device *dev,
+		enum adf_interface_type type, u32 idx, u32 flags,
+		const struct adf_interface_ops *ops, const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+	const u32 allowed_flags = ADF_INTF_FLAG_PRIMARY |
+			ADF_INTF_FLAG_EXTERNAL;
+
+	if (dev->n_interfaces == ADF_MAX_INTERFACES) {
+		pr_err("%s: parent device %s has too many interfaces\n",
+				__func__, dev->base.name);
+		return -ENOMEM;
+	}
+
+	if (type >= ADF_INTF_MEMORY && type <= ADF_INTF_TYPE_DEVICE_CUSTOM) {
+		pr_err("%s: invalid interface type %u\n", __func__, type);
+		return -EINVAL;
+	}
+
+	if (flags & ~allowed_flags) {
+		pr_err("%s: invalid interface flags 0x%X\n", __func__,
+				flags & ~allowed_flags);
+		return -EINVAL;
+	}
+
+	memset(intf, 0, sizeof(*intf));
+
+	va_start(args, fmt);
+	ret = adf_obj_init(&intf->base, ADF_OBJ_INTERFACE, &dev->interfaces,
+			dev, ops ? &ops->base : NULL, fmt, args);
+	va_end(args);
+	if (ret < 0)
+		return ret;
+
+	intf->type = type;
+	intf->idx = idx;
+	intf->flags = flags;
+	intf->ops = ops;
+	intf->dpms_state = DRM_MODE_DPMS_OFF;
+	init_waitqueue_head(&intf->vsync_wait);
+	rwlock_init(&intf->vsync_lock);
+	rwlock_init(&intf->hotplug_modelist_lock);
+
+	ret = adf_interface_sysfs_init(intf);
+	if (ret < 0)
+		goto err;
+	dev->n_interfaces++;
+
+	return 0;
+
+err:
+	adf_obj_destroy(&intf->base, &dev->interfaces);
+	return ret;
+}
+EXPORT_SYMBOL(adf_interface_init);
+
+/**
+ * adf_interface_destroy - clean up ADF-internal data for a display interface
+ *
+ * @intf: the display interface
+ */
+void adf_interface_destroy(struct adf_interface *intf)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+	struct adf_attachment_list *entry, *next;
+
+	mutex_lock(&dev->client_lock);
+	list_for_each_entry_safe(entry, next, &dev->attach_allowed, head) {
+		if (entry->attachment.interface == intf) {
+			adf_attachment_free(entry);
+			dev->n_attach_allowed--;
+		}
+	}
+	list_for_each_entry_safe(entry, next, &dev->attached, head) {
+		if (entry->attachment.interface == intf) {
+			adf_device_detach_op(dev,
+					entry->attachment.overlay_engine, intf);
+			adf_attachment_free(entry);
+			dev->n_attached--;
+		}
+	}
+	kfree(intf->modelist);
+	adf_interface_sysfs_destroy(intf);
+	adf_obj_destroy(&intf->base, &dev->interfaces);
+	dev->n_interfaces--;
+	mutex_unlock(&dev->client_lock);
+}
+EXPORT_SYMBOL(adf_interface_destroy);
+
+static bool adf_overlay_engine_has_custom_formats(
+		const struct adf_overlay_engine_ops *ops)
+{
+	size_t i;
+	for (i = 0; i < ops->n_supported_formats; i++)
+		if (!adf_format_is_standard(ops->supported_formats[i]))
+			return true;
+	return false;
+}
+
+/**
+ * adf_overlay_engine_init - initialize ADF-internal data for an
+ * overlay engine and create sysfs entries
+ *
+ * @eng: the overlay engine
+ * @dev: the overlay engine's "parent" display device
+ * @ops: the overlay engine's associated ops
+ * @fmt: formatting string for the overlay engine's name
+ *
+ * @dev must have previously been initialized with adf_device_init().
+ *
+ * @fmt affects the name returned to userspace through the
+ * %ADF_GET_OVERLAY_ENGINE_DATA ioctl.  It does not affect the sysfs filename,
+ * which is derived from @dev's name.
+ *
+ * Returns 0 on success or error code (<0) on failure.
+ */
+int adf_overlay_engine_init(struct adf_overlay_engine *eng,
+		struct adf_device *dev,
+		const struct adf_overlay_engine_ops *ops, const char *fmt, ...)
+{
+	int ret;
+	va_list args;
+
+	if (!ops->supported_formats) {
+		pr_err("%s: overlay engine must support at least one format\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	if (ops->n_supported_formats > ADF_MAX_SUPPORTED_FORMATS) {
+		pr_err("%s: overlay engine supports too many formats\n",
+				__func__);
+		return -EINVAL;
+	}
+
+	if (adf_overlay_engine_has_custom_formats(ops) &&
+			!dev->ops->validate_custom_format) {
+		pr_err("%s: overlay engine has custom formats but parent device %s does not implement validate_custom_format\n",
+				__func__, dev->base.name);
+		return -EINVAL;
+	}
+
+	memset(eng, 0, sizeof(*eng));
+
+	va_start(args, fmt);
+	ret = adf_obj_init(&eng->base, ADF_OBJ_OVERLAY_ENGINE,
+			&dev->overlay_engines, dev, &ops->base, fmt, args);
+	va_end(args);
+	if (ret < 0)
+		return ret;
+
+	eng->ops = ops;
+
+	ret = adf_overlay_engine_sysfs_init(eng);
+	if (ret < 0)
+		goto err;
+
+	return 0;
+
+err:
+	adf_obj_destroy(&eng->base, &dev->overlay_engines);
+	return ret;
+}
+EXPORT_SYMBOL(adf_overlay_engine_init);
+
+/**
+ * adf_interface_destroy - clean up ADF-internal data for an overlay engine
+ *
+ * @eng: the overlay engine
+ */
+void adf_overlay_engine_destroy(struct adf_overlay_engine *eng)
+{
+	struct adf_device *dev = adf_overlay_engine_parent(eng);
+	struct adf_attachment_list *entry, *next;
+
+	mutex_lock(&dev->client_lock);
+	list_for_each_entry_safe(entry, next, &dev->attach_allowed, head) {
+		if (entry->attachment.overlay_engine == eng) {
+			adf_attachment_free(entry);
+			dev->n_attach_allowed--;
+		}
+	}
+	list_for_each_entry_safe(entry, next, &dev->attached, head) {
+		if (entry->attachment.overlay_engine == eng) {
+			adf_device_detach_op(dev, eng,
+					entry->attachment.interface);
+			adf_attachment_free(entry);
+			dev->n_attached--;
+		}
+	}
+	adf_overlay_engine_sysfs_destroy(eng);
+	adf_obj_destroy(&eng->base, &dev->overlay_engines);
+	mutex_unlock(&dev->client_lock);
+}
+EXPORT_SYMBOL(adf_overlay_engine_destroy);
+
+struct adf_attachment_list *adf_attachment_find(struct list_head *list,
+		struct adf_overlay_engine *eng, struct adf_interface *intf)
+{
+	struct adf_attachment_list *entry;
+	list_for_each_entry(entry, list, head) {
+		if (entry->attachment.interface == intf &&
+				entry->attachment.overlay_engine == eng)
+			return entry;
+	}
+	return NULL;
+}
+
+int adf_attachment_validate(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf)
+{
+	struct adf_device *intf_dev = adf_interface_parent(intf);
+	struct adf_device *eng_dev = adf_overlay_engine_parent(eng);
+
+	if (intf_dev != dev) {
+		dev_err(&dev->base.dev, "can't attach interface %s belonging to device %s\n",
+				intf->base.name, intf_dev->base.name);
+		return -EINVAL;
+	}
+
+	if (eng_dev != dev) {
+		dev_err(&dev->base.dev, "can't attach overlay engine %s belonging to device %s\n",
+				eng->base.name, eng_dev->base.name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * adf_attachment_allow - add a new entry to the list of allowed
+ * attachments
+ *
+ * @dev: the parent device
+ * @eng: the overlay engine
+ * @intf: the interface
+ *
+ * adf_attachment_allow() indicates that the underlying display hardware allows
+ * @intf to scan out @eng's output.  It is intended to be called at
+ * driver initialization for each supported overlay engine + interface pair.
+ *
+ * Returns 0 on success, -%EALREADY if the entry already exists, or -errno on
+ * any other failure.
+ */
+int adf_attachment_allow(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf)
+{
+	int ret;
+	struct adf_attachment_list *entry = NULL;
+
+	ret = adf_attachment_validate(dev, eng, intf);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&dev->client_lock);
+
+	if (dev->n_attach_allowed == ADF_MAX_ATTACHMENTS) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	if (adf_attachment_find(&dev->attach_allowed, eng, intf)) {
+		ret = -EALREADY;
+		goto done;
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	entry->attachment.interface = intf;
+	entry->attachment.overlay_engine = eng;
+	list_add_tail(&entry->head, &dev->attach_allowed);
+	dev->n_attach_allowed++;
+
+done:
+	mutex_unlock(&dev->client_lock);
+	if (ret < 0)
+		kfree(entry);
+
+	return ret;
+}
+EXPORT_SYMBOL(adf_attachment_allow);
+
+/**
+ * adf_obj_type_str - string representation of an adf_obj_type
+ *
+ * @type: the object type
+ */
+const char *adf_obj_type_str(enum adf_obj_type type)
+{
+	switch (type) {
+	case ADF_OBJ_OVERLAY_ENGINE:
+		return "overlay engine";
+
+	case ADF_OBJ_INTERFACE:
+		return "interface";
+
+	case ADF_OBJ_DEVICE:
+		return "device";
+
+	default:
+		return "unknown";
+	}
+}
+EXPORT_SYMBOL(adf_obj_type_str);
+
+/**
+ * adf_interface_type_str - string representation of an adf_interface's type
+ *
+ * @intf: the interface
+ */
+const char *adf_interface_type_str(struct adf_interface *intf)
+{
+	switch (intf->type) {
+	case ADF_INTF_DSI:
+		return "DSI";
+
+	case ADF_INTF_eDP:
+		return "eDP";
+
+	case ADF_INTF_DPI:
+		return "DPI";
+
+	case ADF_INTF_VGA:
+		return "VGA";
+
+	case ADF_INTF_DVI:
+		return "DVI";
+
+	case ADF_INTF_HDMI:
+		return "HDMI";
+
+	case ADF_INTF_MEMORY:
+		return "memory";
+
+	default:
+		if (intf->type >= ADF_INTF_TYPE_DEVICE_CUSTOM) {
+			if (intf->ops && intf->ops->type_str)
+				return intf->ops->type_str(intf);
+			return "custom";
+		}
+		return "unknown";
+	}
+}
+EXPORT_SYMBOL(adf_interface_type_str);
+
+/**
+ * adf_event_type_str - string representation of an adf_event_type
+ *
+ * @obj: ADF object that produced the event
+ * @type: event type
+ */
+const char *adf_event_type_str(struct adf_obj *obj, enum adf_event_type type)
+{
+	switch (type) {
+	case ADF_EVENT_VSYNC:
+		return "vsync";
+
+	case ADF_EVENT_HOTPLUG:
+		return "hotplug";
+
+	default:
+		if (type >= ADF_EVENT_DEVICE_CUSTOM) {
+			if (obj->ops && obj->ops->event_type_str)
+				return obj->ops->event_type_str(obj, type);
+			return "custom";
+		}
+		return "unknown";
+	}
+}
+EXPORT_SYMBOL(adf_event_type_str);
+
+/**
+ * adf_format_str - string representation of an ADF/DRM fourcc format
+ *
+ * @format: format fourcc
+ * @buf: target buffer for the format's string representation
+ */
+void adf_format_str(u32 format, char buf[ADF_FORMAT_STR_SIZE])
+{
+	buf[0] = format & 0xFF;
+	buf[1] = (format >> 8) & 0xFF;
+	buf[2] = (format >> 16) & 0xFF;
+	buf[3] = (format >> 24) & 0xFF;
+	buf[4] = '\0';
+}
+EXPORT_SYMBOL(adf_format_str);
+
+/**
+ * adf_format_validate_yuv - validate the number and size of planes in buffers
+ * with a custom YUV format.
+ *
+ * @dev: ADF device performing the validation
+ * @buf: buffer to validate
+ * @num_planes: expected number of planes
+ * @hsub: expected horizontal chroma subsampling factor, in pixels
+ * @vsub: expected vertical chroma subsampling factor, in pixels
+ * @cpp: expected bytes per pixel for each plane (length @num_planes)
+ *
+ * adf_format_validate_yuv() is intended to be called as a helper from @dev's
+ * validate_custom_format() op.
+ *
+ * Returns 0 if @buf has the expected number of planes and each plane
+ * has sufficient size, or -EINVAL otherwise.
+ */
+int adf_format_validate_yuv(struct adf_device *dev, struct adf_buffer *buf,
+		u8 num_planes, u8 hsub, u8 vsub, u8 cpp[])
+{
+	u8 i;
+
+	if (num_planes != buf->n_planes) {
+		char format_str[ADF_FORMAT_STR_SIZE];
+		adf_format_str(buf->format, format_str);
+		dev_err(&dev->base.dev, "%u planes expected for format %s but %u planes provided\n",
+				num_planes, format_str, buf->n_planes);
+		return -EINVAL;
+	}
+
+	if (buf->w == 0 || buf->w % hsub) {
+		dev_err(&dev->base.dev, "bad buffer width %u\n", buf->w);
+		return -EINVAL;
+	}
+
+	if (buf->h == 0 || buf->h % vsub) {
+		dev_err(&dev->base.dev, "bad buffer height %u\n", buf->h);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < num_planes; i++) {
+		u32 width = buf->w / (i != 0 ? hsub : 1);
+		u32 height = buf->h / (i != 0 ? vsub : 1);
+		u8 cpp = adf_format_plane_cpp(buf->format, i);
+		u32 last_line_size;
+
+		if (buf->pitch[i] < (u64) width * cpp) {
+			dev_err(&dev->base.dev, "plane %u pitch is shorter than buffer width (pitch = %u, width = %u, bpp = %u)\n",
+					i, buf->pitch[i], width, cpp * 8);
+			return -EINVAL;
+		}
+
+		switch (dev->ops->quirks.buffer_padding) {
+		case ADF_BUFFER_PADDED_TO_PITCH:
+			last_line_size = buf->pitch[i];
+			break;
+
+		case ADF_BUFFER_UNPADDED:
+			last_line_size = width * cpp;
+			break;
+
+		default:
+			BUG();
+		}
+
+		if ((u64) (height - 1) * buf->pitch[i] + last_line_size +
+				buf->offset[i] > buf->dma_bufs[i]->size) {
+			dev_err(&dev->base.dev, "plane %u buffer too small (height = %u, pitch = %u, offset = %u, size = %zu)\n",
+					i, height, buf->pitch[i],
+					buf->offset[i], buf->dma_bufs[i]->size);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(adf_format_validate_yuv);
+
+/**
+ * adf_modeinfo_set_name - sets the name of a mode from its display resolution
+ *
+ * @mode: mode
+ *
+ * adf_modeinfo_set_name() fills in @mode->name in the format
+ * "[hdisplay]x[vdisplay](i)".  It is intended to help drivers create
+ * ADF/DRM-style modelists from other mode formats.
+ */
+void adf_modeinfo_set_name(struct drm_mode_modeinfo *mode)
+{
+	bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE;
+
+	snprintf(mode->name, DRM_DISPLAY_MODE_LEN, "%dx%d%s",
+		 mode->hdisplay, mode->vdisplay,
+		 interlaced ? "i" : "");
+}
+EXPORT_SYMBOL(adf_modeinfo_set_name);
+
+/**
+ * adf_modeinfo_set_vrefresh - sets the vrefresh of a mode from its other
+ * timing data
+ *
+ * @mode: mode
+ *
+ * adf_modeinfo_set_vrefresh() calculates @mode->vrefresh from
+ * @mode->{h,v}display and @mode->flags.  It is intended to help drivers
+ * create ADF/DRM-style modelists from other mode formats.
+ */
+void adf_modeinfo_set_vrefresh(struct drm_mode_modeinfo *mode)
+{
+	int refresh = 0;
+	unsigned int calc_val;
+
+	if (mode->vrefresh > 0)
+		return;
+
+	if (mode->htotal <= 0 || mode->vtotal <= 0)
+		return;
+
+	/* work out vrefresh the value will be x1000 */
+	calc_val = (mode->clock * 1000);
+	calc_val /= mode->htotal;
+	refresh = (calc_val + mode->vtotal / 2) / mode->vtotal;
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		refresh *= 2;
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		refresh /= 2;
+	if (mode->vscan > 1)
+		refresh /= mode->vscan;
+
+	mode->vrefresh = refresh;
+}
+EXPORT_SYMBOL(adf_modeinfo_set_vrefresh);
+
+static int __init adf_init(void)
+{
+	int err;
+
+	err = adf_sysfs_init();
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void __exit adf_exit(void)
+{
+	adf_sysfs_destroy();
+}
+
+module_init(adf_init);
+module_exit(adf_exit);
diff --git a/drivers/video/adf/adf.h b/drivers/video/adf/adf.h
new file mode 100644
index 000000000000..3bcf1fabc23c
--- /dev/null
+++ b/drivers/video/adf/adf.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __VIDEO_ADF_ADF_H
+#define __VIDEO_ADF_ADF_H
+
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <video/adf.h>
+#include "sync.h"
+
+struct adf_event_refcount {
+	struct rb_node node;
+	enum adf_event_type type;
+	int refcount;
+};
+
+void adf_buffer_cleanup(struct adf_buffer *buf);
+void adf_buffer_mapping_cleanup(struct adf_buffer_mapping *mapping,
+		struct adf_buffer *buf);
+void adf_post_cleanup(struct adf_device *dev, struct adf_pending_post *post);
+
+struct adf_attachment_list *adf_attachment_find(struct list_head *list,
+		struct adf_overlay_engine *eng, struct adf_interface *intf);
+int adf_attachment_validate(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf);
+void adf_attachment_free(struct adf_attachment_list *attachment);
+
+struct adf_event_refcount *adf_obj_find_event_refcount(struct adf_obj *obj,
+		enum adf_event_type type);
+
+static inline int adf_obj_check_supports_event(struct adf_obj *obj,
+		enum adf_event_type type)
+{
+	if (!obj->ops || !obj->ops->supports_event)
+		return -EOPNOTSUPP;
+	if (!obj->ops->supports_event(obj, type))
+		return -EINVAL;
+	return 0;
+}
+
+static inline int adf_device_attach_op(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf)
+{
+	if (!dev->ops->attach)
+		return 0;
+
+	return dev->ops->attach(dev, eng, intf);
+}
+
+static inline int adf_device_detach_op(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf)
+{
+	if (!dev->ops->detach)
+		return 0;
+
+	return dev->ops->detach(dev, eng, intf);
+}
+
+#endif /* __VIDEO_ADF_ADF_H */
diff --git a/drivers/video/adf/adf_client.c b/drivers/video/adf/adf_client.c
new file mode 100644
index 000000000000..8061d8e6b9fb
--- /dev/null
+++ b/drivers/video/adf/adf_client.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+#include "sw_sync.h"
+
+#include <video/adf.h>
+#include <video/adf_client.h>
+#include <video/adf_format.h>
+
+#include "adf.h"
+
+static inline bool vsync_active(u8 state)
+{
+	return state == DRM_MODE_DPMS_ON || state == DRM_MODE_DPMS_STANDBY;
+}
+
+/**
+ * adf_interface_blank - set interface's DPMS state
+ *
+ * @intf: the interface
+ * @state: one of %DRM_MODE_DPMS_*
+ *
+ * Returns 0 on success or -errno on failure.
+ */
+int adf_interface_blank(struct adf_interface *intf, u8 state)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+	u8 prev_state;
+	bool disable_vsync;
+	bool enable_vsync;
+	int ret = 0;
+	struct adf_event_refcount *vsync_refcount;
+
+	if (!intf->ops || !intf->ops->blank)
+		return -EOPNOTSUPP;
+
+	if (state > DRM_MODE_DPMS_OFF)
+		return -EINVAL;
+
+	mutex_lock(&dev->client_lock);
+	if (state != DRM_MODE_DPMS_ON)
+		flush_kthread_worker(&dev->post_worker);
+	mutex_lock(&intf->base.event_lock);
+
+	vsync_refcount = adf_obj_find_event_refcount(&intf->base,
+			ADF_EVENT_VSYNC);
+	if (!vsync_refcount) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	prev_state = intf->dpms_state;
+	if (prev_state == state) {
+		ret = -EBUSY;
+		goto done;
+	}
+
+	disable_vsync = vsync_active(prev_state) &&
+			!vsync_active(state) &&
+			vsync_refcount->refcount;
+	enable_vsync = !vsync_active(prev_state) &&
+			vsync_active(state) &&
+			vsync_refcount->refcount;
+
+	if (disable_vsync)
+		intf->base.ops->set_event(&intf->base, ADF_EVENT_VSYNC,
+				false);
+
+	ret = intf->ops->blank(intf, state);
+	if (ret < 0) {
+		if (disable_vsync)
+			intf->base.ops->set_event(&intf->base, ADF_EVENT_VSYNC,
+					true);
+		goto done;
+	}
+
+	if (enable_vsync)
+		intf->base.ops->set_event(&intf->base, ADF_EVENT_VSYNC,
+				true);
+
+	intf->dpms_state = state;
+done:
+	mutex_unlock(&intf->base.event_lock);
+	mutex_unlock(&dev->client_lock);
+	return ret;
+}
+EXPORT_SYMBOL(adf_interface_blank);
+
+/**
+ * adf_interface_blank - get interface's current DPMS state
+ *
+ * @intf: the interface
+ *
+ * Returns one of %DRM_MODE_DPMS_*.
+ */
+u8 adf_interface_dpms_state(struct adf_interface *intf)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+	u8 dpms_state;
+
+	mutex_lock(&dev->client_lock);
+	dpms_state = intf->dpms_state;
+	mutex_unlock(&dev->client_lock);
+
+	return dpms_state;
+}
+EXPORT_SYMBOL(adf_interface_dpms_state);
+
+/**
+ * adf_interface_current_mode - get interface's current display mode
+ *
+ * @intf: the interface
+ * @mode: returns the current mode
+ */
+void adf_interface_current_mode(struct adf_interface *intf,
+		struct drm_mode_modeinfo *mode)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+
+	mutex_lock(&dev->client_lock);
+	memcpy(mode, &intf->current_mode, sizeof(*mode));
+	mutex_unlock(&dev->client_lock);
+}
+EXPORT_SYMBOL(adf_interface_current_mode);
+
+/**
+ * adf_interface_modelist - get interface's modelist
+ *
+ * @intf: the interface
+ * @modelist: storage for the modelist (optional)
+ * @n_modes: length of @modelist
+ *
+ * If @modelist is not NULL, adf_interface_modelist() will copy up to @n_modes
+ * modelist entries into @modelist.
+ *
+ * Returns the length of the modelist.
+ */
+size_t adf_interface_modelist(struct adf_interface *intf,
+		struct drm_mode_modeinfo *modelist, size_t n_modes)
+{
+	unsigned long flags;
+	size_t retval;
+
+	read_lock_irqsave(&intf->hotplug_modelist_lock, flags);
+	if (modelist)
+		memcpy(modelist, intf->modelist, sizeof(modelist[0]) *
+				min(n_modes, intf->n_modes));
+	retval = intf->n_modes;
+	read_unlock_irqrestore(&intf->hotplug_modelist_lock, flags);
+
+	return retval;
+}
+EXPORT_SYMBOL(adf_interface_modelist);
+
+/**
+ * adf_interface_set_mode - set interface's display mode
+ *
+ * @intf: the interface
+ * @mode: the new mode
+ *
+ * Returns 0 on success or -errno on failure.
+ */
+int adf_interface_set_mode(struct adf_interface *intf,
+		struct drm_mode_modeinfo *mode)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+	int ret = 0;
+
+	if (!intf->ops || !intf->ops->modeset)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&dev->client_lock);
+	flush_kthread_worker(&dev->post_worker);
+
+	ret = intf->ops->modeset(intf, mode);
+	if (ret < 0)
+		goto done;
+
+	memcpy(&intf->current_mode, mode, sizeof(*mode));
+done:
+	mutex_unlock(&dev->client_lock);
+	return ret;
+}
+EXPORT_SYMBOL(adf_interface_set_mode);
+
+/**
+ * adf_interface_screen_size - get size of screen connected to interface
+ *
+ * @intf: the interface
+ * @width_mm: returns the screen width in mm
+ * @height_mm: returns the screen width in mm
+ *
+ * Returns 0 on success or -errno on failure.
+ */
+int adf_interface_get_screen_size(struct adf_interface *intf, u16 *width_mm,
+		u16 *height_mm)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+	int ret;
+
+	if (!intf->ops || !intf->ops->screen_size)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&dev->client_lock);
+	ret = intf->ops->screen_size(intf, width_mm, height_mm);
+	mutex_unlock(&dev->client_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(adf_interface_get_screen_size);
+
+/**
+ * adf_overlay_engine_supports_format - returns whether a format is in an
+ * overlay engine's supported list
+ *
+ * @eng: the overlay engine
+ * @format: format fourcc
+ */
+bool adf_overlay_engine_supports_format(struct adf_overlay_engine *eng,
+		u32 format)
+{
+	size_t i;
+	for (i = 0; i < eng->ops->n_supported_formats; i++)
+		if (format == eng->ops->supported_formats[i])
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL(adf_overlay_engine_supports_format);
+
+static int adf_buffer_validate(struct adf_buffer *buf)
+{
+	struct adf_overlay_engine *eng = buf->overlay_engine;
+	struct device *dev = &eng->base.dev;
+	struct adf_device *parent = adf_overlay_engine_parent(eng);
+	u8 hsub, vsub, num_planes, cpp[ADF_MAX_PLANES], i;
+
+	if (!adf_overlay_engine_supports_format(eng, buf->format)) {
+		char format_str[ADF_FORMAT_STR_SIZE];
+		adf_format_str(buf->format, format_str);
+		dev_err(dev, "unsupported format %s\n", format_str);
+		return -EINVAL;
+	}
+
+	if (!adf_format_is_standard(buf->format))
+		return parent->ops->validate_custom_format(parent, buf);
+
+	hsub = adf_format_horz_chroma_subsampling(buf->format);
+	vsub = adf_format_vert_chroma_subsampling(buf->format);
+	num_planes = adf_format_num_planes(buf->format);
+	for (i = 0; i < num_planes; i++)
+		cpp[i] = adf_format_plane_cpp(buf->format, i);
+
+	return adf_format_validate_yuv(parent, buf, num_planes, hsub, vsub,
+			cpp);
+}
+
+static int adf_buffer_map(struct adf_device *dev, struct adf_buffer *buf,
+		struct adf_buffer_mapping *mapping)
+{
+	int ret = 0;
+	size_t i;
+
+	for (i = 0; i < buf->n_planes; i++) {
+		struct dma_buf_attachment *attachment;
+		struct sg_table *sg_table;
+
+		attachment = dma_buf_attach(buf->dma_bufs[i], dev->dev);
+		if (IS_ERR(attachment)) {
+			ret = PTR_ERR(attachment);
+			dev_err(&dev->base.dev, "attaching plane %zu failed: %d\n",
+					i, ret);
+			goto done;
+		}
+		mapping->attachments[i] = attachment;
+
+		sg_table = dma_buf_map_attachment(attachment, DMA_TO_DEVICE);
+		if (IS_ERR(sg_table)) {
+			ret = PTR_ERR(sg_table);
+			dev_err(&dev->base.dev, "mapping plane %zu failed: %d",
+					i, ret);
+			goto done;
+		} else if (!sg_table) {
+			ret = -ENOMEM;
+			dev_err(&dev->base.dev, "mapping plane %zu failed\n",
+					i);
+			goto done;
+		}
+		mapping->sg_tables[i] = sg_table;
+	}
+
+done:
+	if (ret < 0)
+		adf_buffer_mapping_cleanup(mapping, buf);
+
+	return ret;
+}
+
+static struct sync_fence *adf_sw_complete_fence(struct adf_device *dev)
+{
+	struct sync_pt *pt;
+	struct sync_fence *complete_fence;
+
+	if (!dev->timeline) {
+		dev->timeline = sw_sync_timeline_create(dev->base.name);
+		if (!dev->timeline)
+			return ERR_PTR(-ENOMEM);
+		dev->timeline_max = 1;
+	}
+
+	dev->timeline_max++;
+	pt = sw_sync_pt_create(dev->timeline, dev->timeline_max);
+	if (!pt)
+		goto err_pt_create;
+	complete_fence = sync_fence_create(dev->base.name, pt);
+	if (!complete_fence)
+		goto err_fence_create;
+
+	return complete_fence;
+
+err_fence_create:
+	sync_pt_free(pt);
+err_pt_create:
+	dev->timeline_max--;
+	return ERR_PTR(-ENOSYS);
+}
+
+/**
+ * adf_device_post - flip to a new set of buffers
+ *
+ * @dev: device targeted by the flip
+ * @intfs: interfaces targeted by the flip
+ * @n_intfs: number of targeted interfaces
+ * @bufs: description of buffers displayed
+ * @n_bufs: number of buffers displayed
+ * @custom_data: driver-private data
+ * @custom_data_size: size of driver-private data
+ *
+ * adf_device_post() will copy @intfs, @bufs, and @custom_data, so they may
+ * point to variables on the stack.  adf_device_post() also takes its own
+ * reference on each of the dma-bufs in @bufs.  The adf_device_post_nocopy()
+ * variant transfers ownership of these resources to ADF instead.
+ *
+ * On success, returns a sync fence which signals when the buffers are removed
+ * from the screen.  On failure, returns ERR_PTR(-errno).
+ */
+struct sync_fence *adf_device_post(struct adf_device *dev,
+		struct adf_interface **intfs, size_t n_intfs,
+		struct adf_buffer *bufs, size_t n_bufs, void *custom_data,
+		size_t custom_data_size)
+{
+	struct adf_interface **intfs_copy = NULL;
+	struct adf_buffer *bufs_copy = NULL;
+	void *custom_data_copy = NULL;
+	struct sync_fence *ret;
+	size_t i;
+
+	intfs_copy = kzalloc(sizeof(intfs_copy[0]) * n_intfs, GFP_KERNEL);
+	if (!intfs_copy)
+		return ERR_PTR(-ENOMEM);
+
+	bufs_copy = kzalloc(sizeof(bufs_copy[0]) * n_bufs, GFP_KERNEL);
+	if (!bufs_copy) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_alloc;
+	}
+
+	custom_data_copy = kzalloc(custom_data_size, GFP_KERNEL);
+	if (!custom_data_copy) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_alloc;
+	}
+
+	for (i = 0; i < n_bufs; i++) {
+		size_t j;
+		for (j = 0; j < bufs[i].n_planes; j++)
+			get_dma_buf(bufs[i].dma_bufs[j]);
+	}
+
+	memcpy(intfs_copy, intfs, sizeof(intfs_copy[0]) * n_intfs);
+	memcpy(bufs_copy, bufs, sizeof(bufs_copy[0]) * n_bufs);
+	memcpy(custom_data_copy, custom_data, custom_data_size);
+
+	ret = adf_device_post_nocopy(dev, intfs_copy, n_intfs, bufs_copy,
+			n_bufs, custom_data_copy, custom_data_size);
+	if (IS_ERR(ret))
+		goto err_post;
+
+	return ret;
+
+err_post:
+	for (i = 0; i < n_bufs; i++) {
+		size_t j;
+		for (j = 0; j < bufs[i].n_planes; j++)
+			dma_buf_put(bufs[i].dma_bufs[j]);
+	}
+err_alloc:
+	kfree(custom_data_copy);
+	kfree(bufs_copy);
+	kfree(intfs_copy);
+	return ret;
+}
+EXPORT_SYMBOL(adf_device_post);
+
+/**
+ * adf_device_post_nocopy - flip to a new set of buffers
+ *
+ * adf_device_post_nocopy() has the same behavior as adf_device_post(),
+ * except ADF does not copy @intfs, @bufs, or @custom_data, and it does
+ * not take an extra reference on the dma-bufs in @bufs.
+ *
+ * @intfs, @bufs, and @custom_data must point to buffers allocated by
+ * kmalloc().  On success, ADF takes ownership of these buffers and the dma-bufs
+ * in @bufs, and will kfree()/dma_buf_put() them when they are no longer needed.
+ * On failure, adf_device_post_nocopy() does NOT take ownership of these
+ * buffers or the dma-bufs, and the caller must clean them up.
+ *
+ * adf_device_post_nocopy() is mainly intended for implementing ADF's ioctls.
+ * Clients may find the nocopy variant useful in limited cases, but most should
+ * call adf_device_post() instead.
+ */
+struct sync_fence *adf_device_post_nocopy(struct adf_device *dev,
+		struct adf_interface **intfs, size_t n_intfs,
+		struct adf_buffer *bufs, size_t n_bufs,
+		void *custom_data, size_t custom_data_size)
+{
+	struct adf_pending_post *cfg;
+	struct adf_buffer_mapping *mappings;
+	struct sync_fence *ret;
+	size_t i;
+	int err;
+
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg)
+		return ERR_PTR(-ENOMEM);
+
+	mappings = kzalloc(sizeof(mappings[0]) * n_bufs, GFP_KERNEL);
+	if (!mappings) {
+		ret = ERR_PTR(-ENOMEM);
+		goto err_alloc;
+	}
+
+	mutex_lock(&dev->client_lock);
+
+	for (i = 0; i < n_bufs; i++) {
+		err = adf_buffer_validate(&bufs[i]);
+		if (err < 0) {
+			ret = ERR_PTR(err);
+			goto err_buf;
+		}
+
+		err = adf_buffer_map(dev, &bufs[i], &mappings[i]);
+		if (err < 0) {
+			ret = ERR_PTR(err);
+			goto err_buf;
+		}
+	}
+
+	INIT_LIST_HEAD(&cfg->head);
+	cfg->config.n_bufs = n_bufs;
+	cfg->config.bufs = bufs;
+	cfg->config.mappings = mappings;
+	cfg->config.custom_data = custom_data;
+	cfg->config.custom_data_size = custom_data_size;
+
+	err = dev->ops->validate(dev, &cfg->config, &cfg->state);
+	if (err < 0) {
+		ret = ERR_PTR(err);
+		goto err_buf;
+	}
+
+	mutex_lock(&dev->post_lock);
+
+	if (dev->ops->complete_fence)
+		ret = dev->ops->complete_fence(dev, &cfg->config,
+				cfg->state);
+	else
+		ret = adf_sw_complete_fence(dev);
+
+	if (IS_ERR(ret))
+		goto err_fence;
+
+	list_add_tail(&cfg->head, &dev->post_list);
+	queue_kthread_work(&dev->post_worker, &dev->post_work);
+	mutex_unlock(&dev->post_lock);
+	mutex_unlock(&dev->client_lock);
+	kfree(intfs);
+	return ret;
+
+err_fence:
+	mutex_unlock(&dev->post_lock);
+
+err_buf:
+	for (i = 0; i < n_bufs; i++)
+		adf_buffer_mapping_cleanup(&mappings[i], &bufs[i]);
+
+	mutex_unlock(&dev->client_lock);
+	kfree(mappings);
+
+err_alloc:
+	kfree(cfg);
+	return ret;
+}
+EXPORT_SYMBOL(adf_device_post_nocopy);
+
+static void adf_attachment_list_to_array(struct adf_device *dev,
+		struct list_head *src, struct adf_attachment *dst, size_t size)
+{
+	struct adf_attachment_list *entry;
+	size_t i = 0;
+
+	if (!dst)
+		return;
+
+	list_for_each_entry(entry, src, head) {
+		if (i == size)
+			return;
+		dst[i] = entry->attachment;
+		i++;
+	}
+}
+
+/**
+ * adf_device_attachments - get device's list of active attachments
+ *
+ * @dev: the device
+ * @attachments: storage for the attachment list (optional)
+ * @n_attachments: length of @attachments
+ *
+ * If @attachments is not NULL, adf_device_attachments() will copy up to
+ * @n_attachments entries into @attachments.
+ *
+ * Returns the length of the active attachment list.
+ */
+size_t adf_device_attachments(struct adf_device *dev,
+		struct adf_attachment *attachments, size_t n_attachments)
+{
+	size_t retval;
+
+	mutex_lock(&dev->client_lock);
+	adf_attachment_list_to_array(dev, &dev->attached, attachments,
+			n_attachments);
+	retval = dev->n_attached;
+	mutex_unlock(&dev->client_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL(adf_device_attachments);
+
+/**
+ * adf_device_attachments_allowed - get device's list of allowed attachments
+ *
+ * @dev: the device
+ * @attachments: storage for the attachment list (optional)
+ * @n_attachments: length of @attachments
+ *
+ * If @attachments is not NULL, adf_device_attachments_allowed() will copy up to
+ * @n_attachments entries into @attachments.
+ *
+ * Returns the length of the allowed attachment list.
+ */
+size_t adf_device_attachments_allowed(struct adf_device *dev,
+		struct adf_attachment *attachments, size_t n_attachments)
+{
+	size_t retval;
+
+	mutex_lock(&dev->client_lock);
+	adf_attachment_list_to_array(dev, &dev->attach_allowed, attachments,
+			n_attachments);
+	retval = dev->n_attach_allowed;
+	mutex_unlock(&dev->client_lock);
+
+	return retval;
+}
+EXPORT_SYMBOL(adf_device_attachments_allowed);
+
+/**
+ * adf_device_attached - return whether an overlay engine and interface are
+ * attached
+ *
+ * @dev: the parent device
+ * @eng: the overlay engine
+ * @intf: the interface
+ */
+bool adf_device_attached(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf)
+{
+	struct adf_attachment_list *attachment;
+
+	mutex_lock(&dev->client_lock);
+	attachment = adf_attachment_find(&dev->attached, eng, intf);
+	mutex_unlock(&dev->client_lock);
+
+	return attachment != NULL;
+}
+EXPORT_SYMBOL(adf_device_attached);
+
+/**
+ * adf_device_attach_allowed - return whether the ADF device supports attaching
+ * an overlay engine and interface
+ *
+ * @dev: the parent device
+ * @eng: the overlay engine
+ * @intf: the interface
+ */
+bool adf_device_attach_allowed(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf)
+{
+	struct adf_attachment_list *attachment;
+
+	mutex_lock(&dev->client_lock);
+	attachment = adf_attachment_find(&dev->attach_allowed, eng, intf);
+	mutex_unlock(&dev->client_lock);
+
+	return attachment != NULL;
+}
+EXPORT_SYMBOL(adf_device_attach_allowed);
+/**
+ * adf_device_attach - attach an overlay engine to an interface
+ *
+ * @dev: the parent device
+ * @eng: the overlay engine
+ * @intf: the interface
+ *
+ * Returns 0 on success, -%EINVAL if attaching @intf and @eng is not allowed,
+ * -%EALREADY if @intf and @eng are already attached, or -errno on any other
+ * failure.
+ */
+int adf_device_attach(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf)
+{
+	int ret;
+	struct adf_attachment_list *attachment = NULL;
+
+	ret = adf_attachment_validate(dev, eng, intf);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&dev->client_lock);
+
+	if (dev->n_attached == ADF_MAX_ATTACHMENTS) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	if (!adf_attachment_find(&dev->attach_allowed, eng, intf)) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (adf_attachment_find(&dev->attached, eng, intf)) {
+		ret = -EALREADY;
+		goto done;
+	}
+
+	ret = adf_device_attach_op(dev, eng, intf);
+	if (ret < 0)
+		goto done;
+
+	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
+	if (!attachment) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	attachment->attachment.interface = intf;
+	attachment->attachment.overlay_engine = eng;
+	list_add_tail(&attachment->head, &dev->attached);
+	dev->n_attached++;
+
+done:
+	mutex_unlock(&dev->client_lock);
+	if (ret < 0)
+		kfree(attachment);
+
+	return ret;
+}
+EXPORT_SYMBOL(adf_device_attach);
+
+/**
+ * adf_device_detach - detach an overlay engine from an interface
+ *
+ * @dev: the parent device
+ * @eng: the overlay engine
+ * @intf: the interface
+ *
+ * Returns 0 on success, -%EINVAL if @intf and @eng are not attached,
+ * or -errno on any other failure.
+ */
+int adf_device_detach(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf)
+{
+	int ret;
+	struct adf_attachment_list *attachment;
+
+	ret = adf_attachment_validate(dev, eng, intf);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&dev->client_lock);
+
+	attachment = adf_attachment_find(&dev->attached, eng, intf);
+	if (!attachment) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	ret = adf_device_detach_op(dev, eng, intf);
+	if (ret < 0)
+		goto done;
+
+	adf_attachment_free(attachment);
+	dev->n_attached--;
+done:
+	mutex_unlock(&dev->client_lock);
+	return ret;
+}
+EXPORT_SYMBOL(adf_device_detach);
+
+/**
+ * adf_interface_simple_buffer_alloc - allocate a simple buffer
+ *
+ * @intf: target interface
+ * @w: width in pixels
+ * @h: height in pixels
+ * @format: format fourcc
+ * @dma_buf: returns the allocated buffer
+ * @offset: returns the byte offset of the allocated buffer's first pixel
+ * @pitch: returns the allocated buffer's pitch
+ *
+ * See &struct adf_simple_buffer_alloc for a description of simple buffers and
+ * their limitations.
+ *
+ * Returns 0 on success or -errno on failure.
+ */
+int adf_interface_simple_buffer_alloc(struct adf_interface *intf, u16 w, u16 h,
+		u32 format, struct dma_buf **dma_buf, u32 *offset, u32 *pitch)
+{
+	if (!intf->ops || !intf->ops->alloc_simple_buffer)
+		return -EOPNOTSUPP;
+
+	if (!adf_format_is_rgb(format))
+		return -EINVAL;
+
+	return intf->ops->alloc_simple_buffer(intf, w, h, format, dma_buf,
+			offset, pitch);
+}
+EXPORT_SYMBOL(adf_interface_simple_buffer_alloc);
+
+/**
+ * adf_interface_simple_post - flip to a single buffer
+ *
+ * @intf: interface targeted by the flip
+ * @buf: buffer to display
+ *
+ * adf_interface_simple_post() can be used generically for simple display
+ * configurations, since the client does not need to provide any driver-private
+ * configuration data.
+ *
+ * adf_interface_simple_post() has the same copying semantics as
+ * adf_device_post().
+ *
+ * On success, returns a sync fence which signals when the buffer is removed
+ * from the screen.  On failure, returns ERR_PTR(-errno).
+ */
+struct sync_fence *adf_interface_simple_post(struct adf_interface *intf,
+		struct adf_buffer *buf)
+{
+	size_t custom_data_size = 0;
+	void *custom_data = NULL;
+	struct sync_fence *ret;
+
+	if (intf->ops && intf->ops->describe_simple_post) {
+		int err;
+
+		custom_data = kzalloc(ADF_MAX_CUSTOM_DATA_SIZE, GFP_KERNEL);
+		if (!custom_data) {
+			ret = ERR_PTR(-ENOMEM);
+			goto done;
+		}
+
+		err = intf->ops->describe_simple_post(intf, buf, custom_data,
+				&custom_data_size);
+		if (err < 0) {
+			ret = ERR_PTR(err);
+			goto done;
+		}
+	}
+
+	ret = adf_device_post(adf_interface_parent(intf), &intf, 1, buf, 1,
+			custom_data, custom_data_size);
+done:
+	kfree(custom_data);
+	return ret;
+}
+EXPORT_SYMBOL(adf_interface_simple_post);
diff --git a/drivers/video/adf/adf_fbdev.c b/drivers/video/adf/adf_fbdev.c
new file mode 100644
index 000000000000..a5b53bc08c3f
--- /dev/null
+++ b/drivers/video/adf/adf_fbdev.c
@@ -0,0 +1,665 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/vmalloc.h>
+
+#include <video/adf.h>
+#include <video/adf_client.h>
+#include <video/adf_fbdev.h>
+#include <video/adf_format.h>
+
+#include "adf.h"
+
+struct adf_fbdev_format {
+	u32 fourcc;
+	u32 bpp;
+	u32 r_length;
+	u32 g_length;
+	u32 b_length;
+	u32 a_length;
+	u32 r_offset;
+	u32 g_offset;
+	u32 b_offset;
+	u32 a_offset;
+};
+
+static const struct adf_fbdev_format format_table[] = {
+	{DRM_FORMAT_RGB332, 8, 3, 3, 2, 0, 5, 2, 0, 0},
+	{DRM_FORMAT_BGR233, 8, 3, 3, 2, 0, 0, 3, 5, 0},
+
+	{DRM_FORMAT_XRGB4444, 16, 4, 4, 4, 0, 8, 4, 0, 0},
+	{DRM_FORMAT_XBGR4444, 16, 4, 4, 4, 0, 0, 4, 8, 0},
+	{DRM_FORMAT_RGBX4444, 16, 4, 4, 4, 0, 12, 8, 4, 0},
+	{DRM_FORMAT_BGRX4444, 16, 4, 4, 4, 0, 0, 4, 8, 0},
+
+	{DRM_FORMAT_ARGB4444, 16, 4, 4, 4, 4, 8, 4, 0, 12},
+	{DRM_FORMAT_ABGR4444, 16, 4, 4, 4, 4, 0, 4, 8, 12},
+	{DRM_FORMAT_RGBA4444, 16, 4, 4, 4, 4, 12, 8, 4, 0},
+	{DRM_FORMAT_BGRA4444, 16, 4, 4, 4, 4, 0, 4, 8, 0},
+
+	{DRM_FORMAT_XRGB1555, 16, 5, 5, 5, 0, 10, 5, 0, 0},
+	{DRM_FORMAT_XBGR1555, 16, 5, 5, 5, 0, 0, 5, 10, 0},
+	{DRM_FORMAT_RGBX5551, 16, 5, 5, 5, 0, 11, 6, 1, 0},
+	{DRM_FORMAT_BGRX5551, 16, 5, 5, 5, 0, 1, 6, 11, 0},
+
+	{DRM_FORMAT_ARGB1555, 16, 5, 5, 5, 1, 10, 5, 0, 15},
+	{DRM_FORMAT_ABGR1555, 16, 5, 5, 5, 1, 0, 5, 10, 15},
+	{DRM_FORMAT_RGBA5551, 16, 5, 5, 5, 1, 11, 6, 1, 0},
+	{DRM_FORMAT_BGRA5551, 16, 5, 5, 5, 1, 1, 6, 11, 0},
+
+	{DRM_FORMAT_RGB565, 16, 5, 6, 5, 0, 11, 5, 0, 0},
+	{DRM_FORMAT_BGR565, 16, 5, 6, 5, 0, 0, 5, 11, 0},
+
+	{DRM_FORMAT_RGB888, 24, 8, 8, 8, 0, 16, 8, 0, 0},
+	{DRM_FORMAT_BGR888, 24, 8, 8, 8, 0, 0, 8, 16, 0},
+
+	{DRM_FORMAT_XRGB8888, 32, 8, 8, 8, 0, 16, 8, 0, 0},
+	{DRM_FORMAT_XBGR8888, 32, 8, 8, 8, 0, 0, 8, 16, 0},
+	{DRM_FORMAT_RGBX8888, 32, 8, 8, 8, 0, 24, 16, 8, 0},
+	{DRM_FORMAT_BGRX8888, 32, 8, 8, 8, 0, 8, 16, 24, 0},
+
+	{DRM_FORMAT_ARGB8888, 32, 8, 8, 8, 8, 16, 8, 0, 24},
+	{DRM_FORMAT_ABGR8888, 32, 8, 8, 8, 8, 0, 8, 16, 24},
+	{DRM_FORMAT_RGBA8888, 32, 8, 8, 8, 8, 24, 16, 8, 0},
+	{DRM_FORMAT_BGRA8888, 32, 8, 8, 8, 8, 8, 16, 24, 0},
+
+	{DRM_FORMAT_XRGB2101010, 32, 10, 10, 10, 0, 20, 10, 0, 0},
+	{DRM_FORMAT_XBGR2101010, 32, 10, 10, 10, 0, 0, 10, 20, 0},
+	{DRM_FORMAT_RGBX1010102, 32, 10, 10, 10, 0, 22, 12, 2, 0},
+	{DRM_FORMAT_BGRX1010102, 32, 10, 10, 10, 0, 2, 12, 22, 0},
+
+	{DRM_FORMAT_ARGB2101010, 32, 10, 10, 10, 2, 20, 10, 0, 30},
+	{DRM_FORMAT_ABGR2101010, 32, 10, 10, 10, 2, 0, 10, 20, 30},
+	{DRM_FORMAT_RGBA1010102, 32, 10, 10, 10, 2, 22, 12, 2, 0},
+	{DRM_FORMAT_BGRA1010102, 32, 10, 10, 10, 2, 2, 12, 22, 0},
+};
+
+static u32 drm_fourcc_from_fb_var(struct fb_var_screeninfo *var)
+{
+	size_t i;
+	for (i = 0; i < ARRAY_SIZE(format_table); i++) {
+		const struct adf_fbdev_format *f = &format_table[i];
+		if (var->red.length == f->r_length &&
+			var->red.offset == f->r_offset &&
+			var->green.length == f->g_length &&
+			var->green.offset == f->g_offset &&
+			var->blue.length == f->b_length &&
+			var->blue.offset == f->b_offset &&
+			var->transp.length == f->a_length &&
+			(var->transp.length == 0 ||
+					var->transp.offset == f->a_offset))
+			return f->fourcc;
+	}
+
+	return 0;
+}
+
+static const struct adf_fbdev_format *fbdev_format_info(u32 format)
+{
+	size_t i;
+	for (i = 0; i < ARRAY_SIZE(format_table); i++) {
+		const struct adf_fbdev_format *f = &format_table[i];
+		if (f->fourcc == format)
+			return f;
+	}
+
+	BUG();
+}
+
+void adf_modeinfo_to_fb_videomode(const struct drm_mode_modeinfo *mode,
+		struct fb_videomode *vmode)
+{
+	memset(vmode, 0, sizeof(*vmode));
+
+	vmode->refresh = mode->vrefresh;
+
+	vmode->xres = mode->hdisplay;
+	vmode->yres = mode->vdisplay;
+
+	vmode->pixclock = mode->clock ? KHZ2PICOS(mode->clock) : 0;
+	vmode->left_margin = mode->htotal - mode->hsync_end;
+	vmode->right_margin = mode->hsync_start - mode->hdisplay;
+	vmode->upper_margin = mode->vtotal - mode->vsync_end;
+	vmode->lower_margin = mode->vsync_start - mode->vdisplay;
+	vmode->hsync_len = mode->hsync_end - mode->hsync_start;
+	vmode->vsync_len = mode->vsync_end - mode->vsync_start;
+
+	vmode->sync = 0;
+	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+		vmode->sync |= FB_SYNC_HOR_HIGH_ACT;
+	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+		vmode->sync |= FB_SYNC_VERT_HIGH_ACT;
+	if (mode->flags & DRM_MODE_FLAG_PCSYNC)
+		vmode->sync |= FB_SYNC_COMP_HIGH_ACT;
+	if (mode->flags & DRM_MODE_FLAG_BCAST)
+		vmode->sync |= FB_SYNC_BROADCAST;
+
+	vmode->vmode = 0;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		vmode->vmode |= FB_VMODE_INTERLACED;
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		vmode->vmode |= FB_VMODE_DOUBLE;
+}
+EXPORT_SYMBOL(adf_modeinfo_to_fb_videomode);
+
+void adf_modeinfo_from_fb_videomode(const struct fb_videomode *vmode,
+		struct drm_mode_modeinfo *mode)
+{
+	memset(mode, 0, sizeof(*mode));
+
+	mode->hdisplay = vmode->xres;
+	mode->hsync_start = mode->hdisplay + vmode->right_margin;
+	mode->hsync_end = mode->hsync_start + vmode->hsync_len;
+	mode->htotal = mode->hsync_end + vmode->left_margin;
+
+	mode->vdisplay = vmode->yres;
+	mode->vsync_start = mode->vdisplay + vmode->lower_margin;
+	mode->vsync_end = mode->vsync_start + vmode->vsync_len;
+	mode->vtotal = mode->vsync_end + vmode->upper_margin;
+
+	mode->clock = vmode->pixclock ? PICOS2KHZ(vmode->pixclock) : 0;
+
+	mode->flags = 0;
+	if (vmode->sync & FB_SYNC_HOR_HIGH_ACT)
+		mode->flags |= DRM_MODE_FLAG_PHSYNC;
+	if (vmode->sync & FB_SYNC_VERT_HIGH_ACT)
+		mode->flags |= DRM_MODE_FLAG_PVSYNC;
+	if (vmode->sync & FB_SYNC_COMP_HIGH_ACT)
+		mode->flags |= DRM_MODE_FLAG_PCSYNC;
+	if (vmode->sync & FB_SYNC_BROADCAST)
+		mode->flags |= DRM_MODE_FLAG_BCAST;
+	if (vmode->vmode & FB_VMODE_INTERLACED)
+		mode->flags |= DRM_MODE_FLAG_INTERLACE;
+	if (vmode->vmode & FB_VMODE_DOUBLE)
+		mode->flags |= DRM_MODE_FLAG_DBLSCAN;
+
+	if (vmode->refresh)
+		mode->vrefresh = vmode->refresh;
+	else
+		adf_modeinfo_set_vrefresh(mode);
+
+	if (vmode->name)
+		strlcpy(mode->name, vmode->name, sizeof(mode->name));
+	else
+		adf_modeinfo_set_name(mode);
+}
+EXPORT_SYMBOL(adf_modeinfo_from_fb_videomode);
+
+static int adf_fbdev_post(struct adf_fbdev *fbdev)
+{
+	struct adf_buffer buf;
+	struct sync_fence *complete_fence;
+	int ret = 0;
+
+	memset(&buf, 0, sizeof(buf));
+	buf.overlay_engine = fbdev->eng;
+	buf.w = fbdev->info->var.xres;
+	buf.h = fbdev->info->var.yres;
+	buf.format = fbdev->format;
+	buf.dma_bufs[0] = fbdev->dma_buf;
+	buf.offset[0] = fbdev->offset +
+			fbdev->info->var.yoffset * fbdev->pitch +
+			fbdev->info->var.xoffset *
+			(fbdev->info->var.bits_per_pixel / 8);
+	buf.pitch[0] = fbdev->pitch;
+	buf.n_planes = 1;
+
+	complete_fence = adf_interface_simple_post(fbdev->intf, &buf);
+	if (IS_ERR(complete_fence)) {
+		ret = PTR_ERR(complete_fence);
+		goto done;
+	}
+
+	sync_fence_put(complete_fence);
+done:
+	return ret;
+}
+
+static const u16 vga_palette[][3] = {
+	{0x0000, 0x0000, 0x0000},
+	{0x0000, 0x0000, 0xAAAA},
+	{0x0000, 0xAAAA, 0x0000},
+	{0x0000, 0xAAAA, 0xAAAA},
+	{0xAAAA, 0x0000, 0x0000},
+	{0xAAAA, 0x0000, 0xAAAA},
+	{0xAAAA, 0x5555, 0x0000},
+	{0xAAAA, 0xAAAA, 0xAAAA},
+	{0x5555, 0x5555, 0x5555},
+	{0x5555, 0x5555, 0xFFFF},
+	{0x5555, 0xFFFF, 0x5555},
+	{0x5555, 0xFFFF, 0xFFFF},
+	{0xFFFF, 0x5555, 0x5555},
+	{0xFFFF, 0x5555, 0xFFFF},
+	{0xFFFF, 0xFFFF, 0x5555},
+	{0xFFFF, 0xFFFF, 0xFFFF},
+};
+
+static int adf_fb_alloc(struct adf_fbdev *fbdev)
+{
+	int ret;
+
+	ret = adf_interface_simple_buffer_alloc(fbdev->intf,
+			fbdev->default_xres_virtual,
+			fbdev->default_yres_virtual,
+			fbdev->default_format,
+			&fbdev->dma_buf, &fbdev->offset, &fbdev->pitch);
+	if (ret < 0) {
+		dev_err(fbdev->info->dev, "allocating fb failed: %d\n", ret);
+		return ret;
+	}
+
+	fbdev->vaddr = dma_buf_vmap(fbdev->dma_buf);
+	if (!fbdev->vaddr) {
+		ret = -ENOMEM;
+		dev_err(fbdev->info->dev, "vmapping fb failed\n");
+		goto err_vmap;
+	}
+	fbdev->info->fix.line_length = fbdev->pitch;
+	fbdev->info->var.xres_virtual = fbdev->default_xres_virtual;
+	fbdev->info->var.yres_virtual = fbdev->default_yres_virtual;
+	fbdev->info->fix.smem_len = fbdev->dma_buf->size;
+	fbdev->info->screen_base = fbdev->vaddr;
+
+	return 0;
+
+err_vmap:
+	dma_buf_put(fbdev->dma_buf);
+	return ret;
+}
+
+static void adf_fb_destroy(struct adf_fbdev *fbdev)
+{
+	dma_buf_vunmap(fbdev->dma_buf, fbdev->vaddr);
+	dma_buf_put(fbdev->dma_buf);
+}
+
+static void adf_fbdev_set_format(struct adf_fbdev *fbdev, u32 format)
+{
+	size_t i;
+	const struct adf_fbdev_format *info = fbdev_format_info(format);
+	for (i = 0; i < ARRAY_SIZE(vga_palette); i++) {
+		u16 r = vga_palette[i][0];
+		u16 g = vga_palette[i][1];
+		u16 b = vga_palette[i][2];
+
+		r >>= (16 - info->r_length);
+		g >>= (16 - info->g_length);
+		b >>= (16 - info->b_length);
+
+		fbdev->pseudo_palette[i] =
+			(r << info->r_offset) |
+			(g << info->g_offset) |
+			(b << info->b_offset);
+
+		if (info->a_length) {
+			u16 a = BIT(info->a_length) - 1;
+			fbdev->pseudo_palette[i] |= (a << info->a_offset);
+		}
+	}
+
+	fbdev->info->var.bits_per_pixel = adf_format_bpp(format);
+	fbdev->info->var.red.length = info->r_length;
+	fbdev->info->var.red.offset = info->r_offset;
+	fbdev->info->var.green.length = info->g_length;
+	fbdev->info->var.green.offset = info->g_offset;
+	fbdev->info->var.blue.length = info->b_length;
+	fbdev->info->var.blue.offset = info->b_offset;
+	fbdev->info->var.transp.length = info->a_length;
+	fbdev->info->var.transp.offset = info->a_offset;
+	fbdev->format = format;
+}
+
+static void adf_fbdev_fill_modelist(struct adf_fbdev *fbdev)
+{
+	struct drm_mode_modeinfo *modelist;
+	struct fb_videomode fbmode;
+	size_t n_modes, i;
+	int ret = 0;
+
+	n_modes = adf_interface_modelist(fbdev->intf, NULL, 0);
+	modelist = kzalloc(sizeof(modelist[0]) * n_modes, GFP_KERNEL);
+	if (!modelist) {
+		dev_warn(fbdev->info->dev, "allocating new modelist failed; keeping old modelist\n");
+		return;
+	}
+	adf_interface_modelist(fbdev->intf, modelist, n_modes);
+
+	fb_destroy_modelist(&fbdev->info->modelist);
+
+	for (i = 0; i < n_modes; i++) {
+		adf_modeinfo_to_fb_videomode(&modelist[i], &fbmode);
+		ret = fb_add_videomode(&fbmode, &fbdev->info->modelist);
+		if (ret < 0)
+			dev_warn(fbdev->info->dev, "adding mode %s to modelist failed: %d\n",
+					modelist[i].name, ret);
+	}
+
+	kfree(modelist);
+}
+
+/**
+ * adf_fbdev_open - default implementation of fbdev open op
+ */
+int adf_fbdev_open(struct fb_info *info, int user)
+{
+	struct adf_fbdev *fbdev = info->par;
+	int ret;
+
+	mutex_lock(&fbdev->refcount_lock);
+
+	if (unlikely(fbdev->refcount == UINT_MAX)) {
+		ret = -EMFILE;
+		goto done;
+	}
+
+	if (!fbdev->refcount) {
+		struct drm_mode_modeinfo mode;
+		struct fb_videomode fbmode;
+		struct adf_device *dev = adf_interface_parent(fbdev->intf);
+
+		ret = adf_device_attach(dev, fbdev->eng, fbdev->intf);
+		if (ret < 0 && ret != -EALREADY)
+			goto done;
+
+		ret = adf_fb_alloc(fbdev);
+		if (ret < 0)
+			goto done;
+
+		adf_interface_current_mode(fbdev->intf, &mode);
+		adf_modeinfo_to_fb_videomode(&mode, &fbmode);
+		fb_videomode_to_var(&fbdev->info->var, &fbmode);
+
+		adf_fbdev_set_format(fbdev, fbdev->default_format);
+		adf_fbdev_fill_modelist(fbdev);
+	}
+
+	ret = adf_fbdev_post(fbdev);
+	if (ret < 0) {
+		if (!fbdev->refcount)
+			adf_fb_destroy(fbdev);
+		goto done;
+	}
+
+	fbdev->refcount++;
+done:
+	mutex_unlock(&fbdev->refcount_lock);
+	return ret;
+}
+EXPORT_SYMBOL(adf_fbdev_open);
+
+/**
+ * adf_fbdev_release - default implementation of fbdev release op
+ */
+int adf_fbdev_release(struct fb_info *info, int user)
+{
+	struct adf_fbdev *fbdev = info->par;
+	mutex_lock(&fbdev->refcount_lock);
+	BUG_ON(!fbdev->refcount);
+	fbdev->refcount--;
+	if (!fbdev->refcount)
+		adf_fb_destroy(fbdev);
+	mutex_unlock(&fbdev->refcount_lock);
+	return 0;
+}
+EXPORT_SYMBOL(adf_fbdev_release);
+
+/**
+ * adf_fbdev_check_var - default implementation of fbdev check_var op
+ */
+int adf_fbdev_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct adf_fbdev *fbdev = info->par;
+	bool valid_format = true;
+	u32 format = drm_fourcc_from_fb_var(var);
+	u32 pitch = var->xres_virtual * var->bits_per_pixel / 8;
+
+	if (!format) {
+		dev_dbg(info->dev, "%s: unrecognized format\n", __func__);
+		valid_format = false;
+	}
+
+	if (valid_format && var->grayscale) {
+		dev_dbg(info->dev, "%s: grayscale modes not supported\n",
+				__func__);
+		valid_format = false;
+	}
+
+	if (valid_format && var->nonstd) {
+		dev_dbg(info->dev, "%s: nonstandard formats not supported\n",
+				__func__);
+		valid_format = false;
+	}
+
+	if (valid_format && !adf_overlay_engine_supports_format(fbdev->eng,
+			format)) {
+		char format_str[ADF_FORMAT_STR_SIZE];
+		adf_format_str(format, format_str);
+		dev_dbg(info->dev, "%s: format %s not supported by overlay engine %s\n",
+				__func__, format_str, fbdev->eng->base.name);
+		valid_format = false;
+	}
+
+	if (valid_format && pitch > fbdev->pitch) {
+		dev_dbg(info->dev, "%s: fb pitch too small for var (pitch = %u, xres_virtual = %u, bits_per_pixel = %u)\n",
+				__func__, fbdev->pitch, var->xres_virtual,
+				var->bits_per_pixel);
+		valid_format = false;
+	}
+
+	if (valid_format && var->yres_virtual > fbdev->default_yres_virtual) {
+		dev_dbg(info->dev, "%s: fb height too small for var (h = %u, yres_virtual = %u)\n",
+				__func__, fbdev->default_yres_virtual,
+				var->yres_virtual);
+		valid_format = false;
+	}
+
+	if (valid_format) {
+		var->activate = info->var.activate;
+		var->height = info->var.height;
+		var->width = info->var.width;
+		var->accel_flags = info->var.accel_flags;
+		var->rotate = info->var.rotate;
+		var->colorspace = info->var.colorspace;
+		/* userspace can't change these */
+	} else {
+		/* if any part of the format is invalid then fixing it up is
+		   impractical, so save just the modesetting bits and
+		   overwrite everything else */
+		struct fb_videomode mode;
+		fb_var_to_videomode(&mode, var);
+		memcpy(var, &info->var, sizeof(*var));
+		fb_videomode_to_var(var, &mode);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(adf_fbdev_check_var);
+
+/**
+ * adf_fbdev_set_par - default implementation of fbdev set_par op
+ */
+int adf_fbdev_set_par(struct fb_info *info)
+{
+	struct adf_fbdev *fbdev = info->par;
+	struct adf_interface *intf = fbdev->intf;
+	struct fb_videomode vmode;
+	struct drm_mode_modeinfo mode;
+	int ret;
+	u32 format = drm_fourcc_from_fb_var(&info->var);
+
+	fb_var_to_videomode(&vmode, &info->var);
+	adf_modeinfo_from_fb_videomode(&vmode, &mode);
+	ret = adf_interface_set_mode(intf, &mode);
+	if (ret < 0)
+		return ret;
+
+	ret = adf_fbdev_post(fbdev);
+	if (ret < 0)
+		return ret;
+
+	if (format != fbdev->format)
+		adf_fbdev_set_format(fbdev, format);
+
+	return 0;
+}
+EXPORT_SYMBOL(adf_fbdev_set_par);
+
+/**
+ * adf_fbdev_blank - default implementation of fbdev blank op
+ */
+int adf_fbdev_blank(int blank, struct fb_info *info)
+{
+	struct adf_fbdev *fbdev = info->par;
+	struct adf_interface *intf = fbdev->intf;
+	u8 dpms_state;
+
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		dpms_state = DRM_MODE_DPMS_ON;
+		break;
+	case FB_BLANK_NORMAL:
+		dpms_state = DRM_MODE_DPMS_STANDBY;
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		dpms_state = DRM_MODE_DPMS_SUSPEND;
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		dpms_state = DRM_MODE_DPMS_STANDBY;
+		break;
+	case FB_BLANK_POWERDOWN:
+		dpms_state = DRM_MODE_DPMS_OFF;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return adf_interface_blank(intf, dpms_state);
+}
+EXPORT_SYMBOL(adf_fbdev_blank);
+
+/**
+ * adf_fbdev_pan_display - default implementation of fbdev pan_display op
+ */
+int adf_fbdev_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct adf_fbdev *fbdev = info->par;
+	return adf_fbdev_post(fbdev);
+}
+EXPORT_SYMBOL(adf_fbdev_pan_display);
+
+/**
+ * adf_fbdev_mmap - default implementation of fbdev mmap op
+ */
+int adf_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	struct adf_fbdev *fbdev = info->par;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	return dma_buf_mmap(fbdev->dma_buf, vma, 0);
+}
+EXPORT_SYMBOL(adf_fbdev_mmap);
+
+/**
+ * adf_fbdev_init - initialize helper to wrap ADF device in fbdev API
+ *
+ * @fbdev: the fbdev helper
+ * @interface: the ADF interface that will display the framebuffer
+ * @eng: the ADF overlay engine that will scan out the framebuffer
+ * @xres_virtual: the virtual width of the framebuffer
+ * @yres_virtual: the virtual height of the framebuffer
+ * @format: the format of the framebuffer
+ * @fbops: the device's fbdev ops
+ * @fmt: formatting for the framebuffer identification string
+ * @...: variable arguments
+ *
+ * @format must be a standard, non-indexed RGB format, i.e.,
+ * adf_format_is_rgb(@format) && @format != @DRM_FORMAT_C8.
+ *
+ * Returns 0 on success or -errno on failure.
+ */
+int adf_fbdev_init(struct adf_fbdev *fbdev, struct adf_interface *interface,
+		struct adf_overlay_engine *eng,
+		u16 xres_virtual, u16 yres_virtual, u32 format,
+		struct fb_ops *fbops, const char *fmt, ...)
+{
+	struct adf_device *parent = adf_interface_parent(interface);
+	struct device *dev = &parent->base.dev;
+	u16 width_mm, height_mm;
+	va_list args;
+	int ret;
+
+	if (!adf_format_is_rgb(format) ||
+			format == DRM_FORMAT_C8) {
+		dev_err(dev, "fbdev helper does not support format %u\n",
+				format);
+		return -EINVAL;
+	}
+
+	memset(fbdev, 0, sizeof(*fbdev));
+	fbdev->intf = interface;
+	fbdev->eng = eng;
+	fbdev->info = framebuffer_alloc(0, dev);
+	if (!fbdev->info) {
+		dev_err(dev, "allocating framebuffer device failed\n");
+		return -ENOMEM;
+	}
+	mutex_init(&fbdev->refcount_lock);
+	fbdev->default_xres_virtual = xres_virtual;
+	fbdev->default_yres_virtual = yres_virtual;
+	fbdev->default_format = format;
+
+	fbdev->info->flags = FBINFO_FLAG_DEFAULT;
+	ret = adf_interface_get_screen_size(interface, &width_mm, &height_mm);
+	if (ret < 0) {
+		width_mm = 0;
+		height_mm = 0;
+	}
+	fbdev->info->var.width = width_mm;
+	fbdev->info->var.height = height_mm;
+	fbdev->info->var.activate = FB_ACTIVATE_VBL;
+	va_start(args, fmt);
+	vsnprintf(fbdev->info->fix.id, sizeof(fbdev->info->fix.id), fmt, args);
+	va_end(args);
+	fbdev->info->fix.type = FB_TYPE_PACKED_PIXELS;
+	fbdev->info->fix.visual = FB_VISUAL_TRUECOLOR;
+	fbdev->info->fix.xpanstep = 1;
+	fbdev->info->fix.ypanstep = 1;
+	INIT_LIST_HEAD(&fbdev->info->modelist);
+	fbdev->info->fbops = fbops;
+	fbdev->info->pseudo_palette = fbdev->pseudo_palette;
+	fbdev->info->par = fbdev;
+
+	ret = register_framebuffer(fbdev->info);
+	if (ret < 0) {
+		dev_err(dev, "registering framebuffer failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(adf_fbdev_init);
+
+/**
+ * adf_fbdev_destroy - destroy helper to wrap ADF device in fbdev API
+ *
+ * @fbdev: the fbdev helper
+ */
+void adf_fbdev_destroy(struct adf_fbdev *fbdev)
+{
+	unregister_framebuffer(fbdev->info);
+	BUG_ON(fbdev->refcount);
+	mutex_destroy(&fbdev->refcount_lock);
+	framebuffer_release(fbdev->info);
+}
+EXPORT_SYMBOL(adf_fbdev_destroy);
diff --git a/drivers/video/adf/adf_fops.c b/drivers/video/adf/adf_fops.c
new file mode 100644
index 000000000000..7fbf33e1cb39
--- /dev/null
+++ b/drivers/video/adf/adf_fops.c
@@ -0,0 +1,957 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/circ_buf.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <video/adf_client.h>
+#include <video/adf_format.h>
+
+#include "sw_sync.h"
+#include "sync.h"
+
+#include "adf.h"
+#include "adf_fops.h"
+#include "adf_sysfs.h"
+
+#ifdef CONFIG_COMPAT
+#include "adf_fops32.h"
+#endif
+
+static int adf_obj_set_event(struct adf_obj *obj, struct adf_file *file,
+		struct adf_set_event __user *arg)
+{
+	struct adf_set_event data;
+	bool enabled;
+	unsigned long flags;
+	int err;
+
+	if (copy_from_user(&data, arg, sizeof(data)))
+		return -EFAULT;
+
+	err = adf_obj_check_supports_event(obj, data.type);
+	if (err < 0)
+		return err;
+
+	spin_lock_irqsave(&obj->file_lock, flags);
+	if (data.enabled)
+		enabled = test_and_set_bit(data.type,
+				file->event_subscriptions);
+	else
+		enabled = test_and_clear_bit(data.type,
+				file->event_subscriptions);
+	spin_unlock_irqrestore(&obj->file_lock, flags);
+
+	if (data.enabled == enabled)
+		return -EALREADY;
+
+	if (data.enabled)
+		adf_event_get(obj, data.type);
+	else
+		adf_event_put(obj, data.type);
+
+	return 0;
+}
+
+static int adf_obj_copy_custom_data_to_user(struct adf_obj *obj,
+		void __user *dst, size_t *dst_size)
+{
+	void *custom_data;
+	size_t custom_data_size;
+	int ret;
+
+	if (!obj->ops || !obj->ops->custom_data) {
+		dev_dbg(&obj->dev, "%s: no custom_data op\n", __func__);
+		return 0;
+	}
+
+	custom_data = kzalloc(ADF_MAX_CUSTOM_DATA_SIZE, GFP_KERNEL);
+	if (!custom_data)
+		return -ENOMEM;
+
+	ret = obj->ops->custom_data(obj, custom_data, &custom_data_size);
+	if (ret < 0)
+		goto done;
+
+	if (copy_to_user(dst, custom_data, min(*dst_size, custom_data_size))) {
+		ret = -EFAULT;
+		goto done;
+	}
+	*dst_size = custom_data_size;
+
+done:
+	kfree(custom_data);
+	return ret;
+}
+
+static int adf_eng_get_data(struct adf_overlay_engine *eng,
+		struct adf_overlay_engine_data __user *arg)
+{
+	struct adf_device *dev = adf_overlay_engine_parent(eng);
+	struct adf_overlay_engine_data data;
+	size_t n_supported_formats;
+	u32 *supported_formats = NULL;
+	int ret = 0;
+
+	if (copy_from_user(&data, arg, sizeof(data)))
+		return -EFAULT;
+
+	strlcpy(data.name, eng->base.name, sizeof(data.name));
+
+	if (data.n_supported_formats > ADF_MAX_SUPPORTED_FORMATS)
+		return -EINVAL;
+
+	n_supported_formats = data.n_supported_formats;
+	data.n_supported_formats = eng->ops->n_supported_formats;
+
+	if (n_supported_formats) {
+		supported_formats = kzalloc(n_supported_formats *
+				sizeof(supported_formats[0]), GFP_KERNEL);
+		if (!supported_formats)
+			return -ENOMEM;
+	}
+
+	memcpy(supported_formats, eng->ops->supported_formats,
+			sizeof(u32) * min(n_supported_formats,
+					eng->ops->n_supported_formats));
+
+	mutex_lock(&dev->client_lock);
+	ret = adf_obj_copy_custom_data_to_user(&eng->base, arg->custom_data,
+			&data.custom_data_size);
+	mutex_unlock(&dev->client_lock);
+
+	if (ret < 0)
+		goto done;
+
+	if (copy_to_user(arg, &data, sizeof(data))) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	if (supported_formats && copy_to_user(arg->supported_formats,
+			supported_formats,
+			n_supported_formats * sizeof(supported_formats[0])))
+		ret = -EFAULT;
+
+done:
+	kfree(supported_formats);
+	return ret;
+}
+
+static int adf_buffer_import(struct adf_device *dev,
+		struct adf_buffer_config __user *cfg, struct adf_buffer *buf)
+{
+	struct adf_buffer_config user_buf;
+	size_t i;
+	int ret = 0;
+
+	if (copy_from_user(&user_buf, cfg, sizeof(user_buf)))
+		return -EFAULT;
+
+	memset(buf, 0, sizeof(*buf));
+
+	if (user_buf.n_planes > ADF_MAX_PLANES) {
+		dev_err(&dev->base.dev, "invalid plane count %u\n",
+				user_buf.n_planes);
+		return -EINVAL;
+	}
+
+	buf->overlay_engine = idr_find(&dev->overlay_engines,
+			user_buf.overlay_engine);
+	if (!buf->overlay_engine) {
+		dev_err(&dev->base.dev, "invalid overlay engine id %u\n",
+				user_buf.overlay_engine);
+		return -ENOENT;
+	}
+
+	buf->w = user_buf.w;
+	buf->h = user_buf.h;
+	buf->format = user_buf.format;
+	for (i = 0; i < user_buf.n_planes; i++) {
+		buf->dma_bufs[i] = dma_buf_get(user_buf.fd[i]);
+		if (IS_ERR(buf->dma_bufs[i])) {
+			ret = PTR_ERR(buf->dma_bufs[i]);
+			dev_err(&dev->base.dev, "importing dma_buf fd %d failed: %d\n",
+					user_buf.fd[i], ret);
+			buf->dma_bufs[i] = NULL;
+			goto done;
+		}
+		buf->offset[i] = user_buf.offset[i];
+		buf->pitch[i] = user_buf.pitch[i];
+	}
+	buf->n_planes = user_buf.n_planes;
+
+	if (user_buf.acquire_fence >= 0) {
+		buf->acquire_fence = sync_fence_fdget(user_buf.acquire_fence);
+		if (!buf->acquire_fence) {
+			dev_err(&dev->base.dev, "getting fence fd %d failed\n",
+					user_buf.acquire_fence);
+			ret = -EINVAL;
+			goto done;
+		}
+	}
+
+done:
+	if (ret < 0)
+		adf_buffer_cleanup(buf);
+	return ret;
+}
+
+static int adf_device_post_config(struct adf_device *dev,
+		struct adf_post_config __user *arg)
+{
+	struct sync_fence *complete_fence;
+	int complete_fence_fd;
+	struct adf_buffer *bufs = NULL;
+	struct adf_interface **intfs = NULL;
+	size_t n_intfs, n_bufs, i;
+	void *custom_data = NULL;
+	size_t custom_data_size;
+	int ret = 0;
+
+	complete_fence_fd = get_unused_fd();
+	if (complete_fence_fd < 0)
+		return complete_fence_fd;
+
+	if (get_user(n_intfs, &arg->n_interfaces)) {
+		ret = -EFAULT;
+		goto err_get_user;
+	}
+
+	if (n_intfs > ADF_MAX_INTERFACES) {
+		ret = -EINVAL;
+		goto err_get_user;
+	}
+
+	if (get_user(n_bufs, &arg->n_bufs)) {
+		ret = -EFAULT;
+		goto err_get_user;
+	}
+
+	if (n_bufs > ADF_MAX_BUFFERS) {
+		ret = -EINVAL;
+		goto err_get_user;
+	}
+
+	if (get_user(custom_data_size, &arg->custom_data_size)) {
+		ret = -EFAULT;
+		goto err_get_user;
+	}
+
+	if (custom_data_size > ADF_MAX_CUSTOM_DATA_SIZE) {
+		ret = -EINVAL;
+		goto err_get_user;
+	}
+
+	if (n_intfs) {
+		intfs = kmalloc(sizeof(intfs[0]) * n_intfs, GFP_KERNEL);
+		if (!intfs) {
+			ret = -ENOMEM;
+			goto err_get_user;
+		}
+	}
+
+	for (i = 0; i < n_intfs; i++) {
+		u32 intf_id;
+		if (get_user(intf_id, &arg->interfaces[i])) {
+			ret = -EFAULT;
+			goto err_get_user;
+		}
+
+		intfs[i] = idr_find(&dev->interfaces, intf_id);
+		if (!intfs[i]) {
+			ret = -EINVAL;
+			goto err_get_user;
+		}
+	}
+
+	if (n_bufs) {
+		bufs = kzalloc(sizeof(bufs[0]) * n_bufs, GFP_KERNEL);
+		if (!bufs) {
+			ret = -ENOMEM;
+			goto err_get_user;
+		}
+	}
+
+	for (i = 0; i < n_bufs; i++) {
+		ret = adf_buffer_import(dev, &arg->bufs[i], &bufs[i]);
+		if (ret < 0) {
+			memset(&bufs[i], 0, sizeof(bufs[i]));
+			goto err_import;
+		}
+	}
+
+	if (custom_data_size) {
+		custom_data = kzalloc(custom_data_size, GFP_KERNEL);
+		if (!custom_data) {
+			ret = -ENOMEM;
+			goto err_import;
+		}
+
+		if (copy_from_user(custom_data, arg->custom_data,
+				custom_data_size)) {
+			ret = -EFAULT;
+			goto err_import;
+		}
+	}
+
+	if (put_user(complete_fence_fd, &arg->complete_fence)) {
+		ret = -EFAULT;
+		goto err_import;
+	}
+
+	complete_fence = adf_device_post_nocopy(dev, intfs, n_intfs, bufs,
+			n_bufs, custom_data, custom_data_size);
+	if (IS_ERR(complete_fence)) {
+		ret = PTR_ERR(complete_fence);
+		goto err_import;
+	}
+
+	sync_fence_install(complete_fence, complete_fence_fd);
+	return 0;
+
+err_import:
+	for (i = 0; i < n_bufs; i++)
+		adf_buffer_cleanup(&bufs[i]);
+
+err_get_user:
+	kfree(custom_data);
+	kfree(bufs);
+	kfree(intfs);
+	put_unused_fd(complete_fence_fd);
+	return ret;
+}
+
+static int adf_intf_simple_post_config(struct adf_interface *intf,
+		struct adf_simple_post_config __user *arg)
+{
+	struct adf_device *dev = intf->base.parent;
+	struct sync_fence *complete_fence;
+	int complete_fence_fd;
+	struct adf_buffer buf;
+	int ret = 0;
+
+	complete_fence_fd = get_unused_fd();
+	if (complete_fence_fd < 0)
+		return complete_fence_fd;
+
+	ret = adf_buffer_import(dev, &arg->buf, &buf);
+	if (ret < 0)
+		goto err_import;
+
+	if (put_user(complete_fence_fd, &arg->complete_fence)) {
+		ret = -EFAULT;
+		goto err_put_user;
+	}
+
+	complete_fence = adf_interface_simple_post(intf, &buf);
+	if (IS_ERR(complete_fence)) {
+		ret = PTR_ERR(complete_fence);
+		goto err_put_user;
+	}
+
+	sync_fence_install(complete_fence, complete_fence_fd);
+	return 0;
+
+err_put_user:
+	adf_buffer_cleanup(&buf);
+err_import:
+	put_unused_fd(complete_fence_fd);
+	return ret;
+}
+
+static int adf_intf_simple_buffer_alloc(struct adf_interface *intf,
+		struct adf_simple_buffer_alloc __user *arg)
+{
+	struct adf_simple_buffer_alloc data;
+	struct dma_buf *dma_buf;
+	int ret = 0;
+
+	if (copy_from_user(&data, arg, sizeof(data)))
+		return -EFAULT;
+
+	data.fd = get_unused_fd_flags(O_CLOEXEC);
+	if (data.fd < 0)
+		return data.fd;
+
+	ret = adf_interface_simple_buffer_alloc(intf, data.w, data.h,
+			data.format, &dma_buf, &data.offset, &data.pitch);
+	if (ret < 0)
+		goto err_alloc;
+
+	if (copy_to_user(arg, &data, sizeof(*arg))) {
+		ret = -EFAULT;
+		goto err_copy;
+	}
+
+	fd_install(data.fd, dma_buf->file);
+	return 0;
+
+err_copy:
+	dma_buf_put(dma_buf);
+
+err_alloc:
+	put_unused_fd(data.fd);
+	return ret;
+}
+
+static int adf_copy_attachment_list_to_user(
+		struct adf_attachment_config __user *to, size_t n_to,
+		struct adf_attachment *from, size_t n_from)
+{
+	struct adf_attachment_config *temp;
+	size_t n = min(n_to, n_from);
+	size_t i;
+	int ret = 0;
+
+	if (!n)
+		return 0;
+
+	temp = kzalloc(n * sizeof(temp[0]), GFP_KERNEL);
+	if (!temp)
+		return -ENOMEM;
+
+	for (i = 0; i < n; i++) {
+		temp[i].interface = from[i].interface->base.id;
+		temp[i].overlay_engine = from[i].overlay_engine->base.id;
+	}
+
+	if (copy_to_user(to, temp, n * sizeof(to[0]))) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+done:
+	kfree(temp);
+	return ret;
+}
+
+static int adf_device_get_data(struct adf_device *dev,
+		struct adf_device_data __user *arg)
+{
+	struct adf_device_data data;
+	size_t n_attach;
+	struct adf_attachment *attach = NULL;
+	size_t n_allowed_attach;
+	struct adf_attachment *allowed_attach = NULL;
+	int ret = 0;
+
+	if (copy_from_user(&data, arg, sizeof(data)))
+		return -EFAULT;
+
+	if (data.n_attachments > ADF_MAX_ATTACHMENTS ||
+			data.n_allowed_attachments > ADF_MAX_ATTACHMENTS)
+		return -EINVAL;
+
+	strlcpy(data.name, dev->base.name, sizeof(data.name));
+
+	if (data.n_attachments) {
+		attach = kzalloc(data.n_attachments * sizeof(attach[0]),
+				GFP_KERNEL);
+		if (!attach)
+			return -ENOMEM;
+	}
+	n_attach = adf_device_attachments(dev, attach, data.n_attachments);
+
+	if (data.n_allowed_attachments) {
+		allowed_attach = kzalloc(data.n_allowed_attachments *
+				sizeof(allowed_attach[0]), GFP_KERNEL);
+		if (!allowed_attach) {
+			ret = -ENOMEM;
+			goto done;
+		}
+	}
+	n_allowed_attach = adf_device_attachments_allowed(dev, allowed_attach,
+			data.n_allowed_attachments);
+
+	mutex_lock(&dev->client_lock);
+	ret = adf_obj_copy_custom_data_to_user(&dev->base, arg->custom_data,
+			&data.custom_data_size);
+	mutex_unlock(&dev->client_lock);
+
+	if (ret < 0)
+		goto done;
+
+	ret = adf_copy_attachment_list_to_user(arg->attachments,
+			data.n_attachments, attach, n_attach);
+	if (ret < 0)
+		goto done;
+
+	ret = adf_copy_attachment_list_to_user(arg->allowed_attachments,
+			data.n_allowed_attachments, allowed_attach,
+			n_allowed_attach);
+	if (ret < 0)
+		goto done;
+
+	data.n_attachments = n_attach;
+	data.n_allowed_attachments = n_allowed_attach;
+
+	if (copy_to_user(arg, &data, sizeof(data)))
+		ret = -EFAULT;
+
+done:
+	kfree(allowed_attach);
+	kfree(attach);
+	return ret;
+}
+
+static int adf_device_handle_attachment(struct adf_device *dev,
+		struct adf_attachment_config __user *arg, bool attach)
+{
+	struct adf_attachment_config data;
+	struct adf_overlay_engine *eng;
+	struct adf_interface *intf;
+
+	if (copy_from_user(&data, arg, sizeof(data)))
+		return -EFAULT;
+
+	eng = idr_find(&dev->overlay_engines, data.overlay_engine);
+	if (!eng) {
+		dev_err(&dev->base.dev, "invalid overlay engine id %u\n",
+				data.overlay_engine);
+		return -EINVAL;
+	}
+
+	intf = idr_find(&dev->interfaces, data.interface);
+	if (!intf) {
+		dev_err(&dev->base.dev, "invalid interface id %u\n",
+				data.interface);
+		return -EINVAL;
+	}
+
+	if (attach)
+		return adf_device_attach(dev, eng, intf);
+	else
+		return adf_device_detach(dev, eng, intf);
+}
+
+static int adf_intf_set_mode(struct adf_interface *intf,
+		struct drm_mode_modeinfo __user *arg)
+{
+	struct drm_mode_modeinfo mode;
+
+	if (copy_from_user(&mode, arg, sizeof(mode)))
+		return -EFAULT;
+
+	return adf_interface_set_mode(intf, &mode);
+}
+
+static int adf_intf_get_data(struct adf_interface *intf,
+		struct adf_interface_data __user *arg)
+{
+	struct adf_device *dev = adf_interface_parent(intf);
+	struct adf_interface_data data;
+	struct drm_mode_modeinfo *modelist;
+	size_t modelist_size;
+	int err;
+	int ret = 0;
+	unsigned long flags;
+
+	if (copy_from_user(&data, arg, sizeof(data)))
+		return -EFAULT;
+
+	strlcpy(data.name, intf->base.name, sizeof(data.name));
+
+	data.type = intf->type;
+	data.id = intf->idx;
+	data.flags = intf->flags;
+
+	err = adf_interface_get_screen_size(intf, &data.width_mm,
+			&data.height_mm);
+	if (err < 0) {
+		data.width_mm = 0;
+		data.height_mm = 0;
+	}
+
+	modelist = kmalloc(sizeof(modelist[0]) * ADF_MAX_MODES, GFP_KERNEL);
+	if (!modelist)
+		return -ENOMEM;
+
+	mutex_lock(&dev->client_lock);
+	read_lock_irqsave(&intf->hotplug_modelist_lock, flags);
+	data.hotplug_detect = intf->hotplug_detect;
+	modelist_size = min(data.n_available_modes, intf->n_modes) *
+			sizeof(intf->modelist[0]);
+	memcpy(modelist, intf->modelist, modelist_size);
+	data.n_available_modes = intf->n_modes;
+	read_unlock_irqrestore(&intf->hotplug_modelist_lock, flags);
+
+	if (copy_to_user(arg->available_modes, modelist, modelist_size)) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	data.dpms_state = intf->dpms_state;
+	memcpy(&data.current_mode, &intf->current_mode,
+			sizeof(intf->current_mode));
+
+	ret = adf_obj_copy_custom_data_to_user(&intf->base, arg->custom_data,
+			&data.custom_data_size);
+done:
+	mutex_unlock(&dev->client_lock);
+	kfree(modelist);
+
+	if (ret < 0)
+		return ret;
+
+	if (copy_to_user(arg, &data, sizeof(data)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static inline long adf_obj_custom_ioctl(struct adf_obj *obj, unsigned int cmd,
+		unsigned long arg)
+{
+	if (obj->ops && obj->ops->ioctl)
+		return obj->ops->ioctl(obj, cmd, arg);
+	return -ENOTTY;
+}
+
+static long adf_overlay_engine_ioctl(struct adf_overlay_engine *eng,
+		struct adf_file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case ADF_SET_EVENT:
+		return adf_obj_set_event(&eng->base, file,
+				(struct adf_set_event __user *)arg);
+
+	case ADF_GET_OVERLAY_ENGINE_DATA:
+		return adf_eng_get_data(eng,
+			(struct adf_overlay_engine_data __user *)arg);
+
+	case ADF_BLANK:
+	case ADF_POST_CONFIG:
+	case ADF_SET_MODE:
+	case ADF_GET_DEVICE_DATA:
+	case ADF_GET_INTERFACE_DATA:
+	case ADF_SIMPLE_POST_CONFIG:
+	case ADF_SIMPLE_BUFFER_ALLOC:
+	case ADF_ATTACH:
+	case ADF_DETACH:
+		return -EINVAL;
+
+	default:
+		return adf_obj_custom_ioctl(&eng->base, cmd, arg);
+	}
+}
+
+static long adf_interface_ioctl(struct adf_interface *intf,
+		struct adf_file *file, unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case ADF_SET_EVENT:
+		return adf_obj_set_event(&intf->base, file,
+				(struct adf_set_event __user *)arg);
+
+	case ADF_BLANK:
+		return adf_interface_blank(intf, arg);
+
+	case ADF_SET_MODE:
+		return adf_intf_set_mode(intf,
+				(struct drm_mode_modeinfo __user *)arg);
+
+	case ADF_GET_INTERFACE_DATA:
+		return adf_intf_get_data(intf,
+				(struct adf_interface_data __user *)arg);
+
+	case ADF_SIMPLE_POST_CONFIG:
+		return adf_intf_simple_post_config(intf,
+				(struct adf_simple_post_config __user *)arg);
+
+	case ADF_SIMPLE_BUFFER_ALLOC:
+		return adf_intf_simple_buffer_alloc(intf,
+				(struct adf_simple_buffer_alloc __user *)arg);
+
+	case ADF_POST_CONFIG:
+	case ADF_GET_DEVICE_DATA:
+	case ADF_GET_OVERLAY_ENGINE_DATA:
+	case ADF_ATTACH:
+	case ADF_DETACH:
+		return -EINVAL;
+
+	default:
+		return adf_obj_custom_ioctl(&intf->base, cmd, arg);
+	}
+}
+
+static long adf_device_ioctl(struct adf_device *dev, struct adf_file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case ADF_SET_EVENT:
+		return adf_obj_set_event(&dev->base, file,
+				(struct adf_set_event __user *)arg);
+
+	case ADF_POST_CONFIG:
+		return adf_device_post_config(dev,
+				(struct adf_post_config __user *)arg);
+
+	case ADF_GET_DEVICE_DATA:
+		return adf_device_get_data(dev,
+				(struct adf_device_data __user *)arg);
+
+	case ADF_ATTACH:
+		return adf_device_handle_attachment(dev,
+				(struct adf_attachment_config __user *)arg,
+				true);
+
+	case ADF_DETACH:
+		return adf_device_handle_attachment(dev,
+				(struct adf_attachment_config __user *)arg,
+				false);
+
+	case ADF_BLANK:
+	case ADF_SET_MODE:
+	case ADF_GET_INTERFACE_DATA:
+	case ADF_GET_OVERLAY_ENGINE_DATA:
+	case ADF_SIMPLE_POST_CONFIG:
+	case ADF_SIMPLE_BUFFER_ALLOC:
+		return -EINVAL;
+
+	default:
+		return adf_obj_custom_ioctl(&dev->base, cmd, arg);
+	}
+}
+
+static int adf_file_open(struct inode *inode, struct file *file)
+{
+	struct adf_obj *obj;
+	struct adf_file *fpriv = NULL;
+	unsigned long flags;
+	int ret = 0;
+
+	obj = adf_obj_sysfs_find(iminor(inode));
+	if (!obj)
+		return -ENODEV;
+
+	dev_dbg(&obj->dev, "opening %s\n", dev_name(&obj->dev));
+
+	if (!try_module_get(obj->parent->ops->owner)) {
+		dev_err(&obj->dev, "getting owner module failed\n");
+		return -ENODEV;
+	}
+
+	fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
+	if (!fpriv) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	INIT_LIST_HEAD(&fpriv->head);
+	fpriv->obj = obj;
+	init_waitqueue_head(&fpriv->event_wait);
+
+	file->private_data = fpriv;
+
+	if (obj->ops && obj->ops->open) {
+		ret = obj->ops->open(obj, inode, file);
+		if (ret < 0)
+			goto done;
+	}
+
+	spin_lock_irqsave(&obj->file_lock, flags);
+	list_add_tail(&fpriv->head, &obj->file_list);
+	spin_unlock_irqrestore(&obj->file_lock, flags);
+
+done:
+	if (ret < 0) {
+		kfree(fpriv);
+		module_put(obj->parent->ops->owner);
+	}
+	return ret;
+}
+
+static int adf_file_release(struct inode *inode, struct file *file)
+{
+	struct adf_file *fpriv = file->private_data;
+	struct adf_obj *obj = fpriv->obj;
+	enum adf_event_type event_type;
+	unsigned long flags;
+
+	if (obj->ops && obj->ops->release)
+		obj->ops->release(obj, inode, file);
+
+	spin_lock_irqsave(&obj->file_lock, flags);
+	list_del(&fpriv->head);
+	spin_unlock_irqrestore(&obj->file_lock, flags);
+
+	for_each_set_bit(event_type, fpriv->event_subscriptions,
+			ADF_EVENT_TYPE_MAX) {
+		adf_event_put(obj, event_type);
+	}
+
+	kfree(fpriv);
+	module_put(obj->parent->ops->owner);
+
+	dev_dbg(&obj->dev, "released %s\n", dev_name(&obj->dev));
+	return 0;
+}
+
+long adf_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct adf_file *fpriv = file->private_data;
+	struct adf_obj *obj = fpriv->obj;
+	long ret = -EINVAL;
+
+	dev_dbg(&obj->dev, "%s ioctl %u\n", dev_name(&obj->dev), _IOC_NR(cmd));
+
+	switch (obj->type) {
+	case ADF_OBJ_OVERLAY_ENGINE:
+		ret = adf_overlay_engine_ioctl(adf_obj_to_overlay_engine(obj),
+				fpriv, cmd, arg);
+		break;
+
+	case ADF_OBJ_INTERFACE:
+		ret = adf_interface_ioctl(adf_obj_to_interface(obj), fpriv, cmd,
+				arg);
+		break;
+
+	case ADF_OBJ_DEVICE:
+		ret = adf_device_ioctl(adf_obj_to_device(obj), fpriv, cmd, arg);
+		break;
+	}
+
+	return ret;
+}
+
+static inline bool adf_file_event_available(struct adf_file *fpriv)
+{
+	int head = fpriv->event_head;
+	int tail = fpriv->event_tail;
+	return CIRC_CNT(head, tail, sizeof(fpriv->event_buf)) != 0;
+}
+
+void adf_file_queue_event(struct adf_file *fpriv, struct adf_event *event)
+{
+	int head = fpriv->event_head;
+	int tail = fpriv->event_tail;
+	size_t space = CIRC_SPACE(head, tail, sizeof(fpriv->event_buf));
+	size_t space_to_end =
+			CIRC_SPACE_TO_END(head, tail, sizeof(fpriv->event_buf));
+
+	if (space < event->length) {
+		dev_dbg(&fpriv->obj->dev,
+				"insufficient buffer space for event %u\n",
+				event->type);
+		return;
+	}
+
+	if (space_to_end >= event->length) {
+		memcpy(fpriv->event_buf + head, event, event->length);
+	} else {
+		memcpy(fpriv->event_buf + head, event, space_to_end);
+		memcpy(fpriv->event_buf, (u8 *)event + space_to_end,
+				event->length - space_to_end);
+	}
+
+	smp_wmb();
+	fpriv->event_head = (fpriv->event_head + event->length) &
+			(sizeof(fpriv->event_buf) - 1);
+	wake_up_interruptible_all(&fpriv->event_wait);
+}
+
+static ssize_t adf_file_copy_to_user(struct adf_file *fpriv,
+		char __user *buffer, size_t buffer_size)
+{
+	int head, tail;
+	u8 *event_buf;
+	size_t cnt, cnt_to_end, copy_size = 0;
+	ssize_t ret = 0;
+	unsigned long flags;
+
+	event_buf = kmalloc(min(buffer_size, sizeof(fpriv->event_buf)),
+			GFP_KERNEL);
+	if (!event_buf)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&fpriv->obj->file_lock, flags);
+
+	if (!adf_file_event_available(fpriv))
+		goto out;
+
+	head = fpriv->event_head;
+	tail = fpriv->event_tail;
+
+	cnt = CIRC_CNT(head, tail, sizeof(fpriv->event_buf));
+	cnt_to_end = CIRC_CNT_TO_END(head, tail, sizeof(fpriv->event_buf));
+	copy_size = min(buffer_size, cnt);
+
+	if (cnt_to_end >= copy_size) {
+		memcpy(event_buf, fpriv->event_buf + tail, copy_size);
+	} else {
+		memcpy(event_buf, fpriv->event_buf + tail, cnt_to_end);
+		memcpy(event_buf + cnt_to_end, fpriv->event_buf,
+				copy_size - cnt_to_end);
+	}
+
+	fpriv->event_tail = (fpriv->event_tail + copy_size) &
+			(sizeof(fpriv->event_buf) - 1);
+
+out:
+	spin_unlock_irqrestore(&fpriv->obj->file_lock, flags);
+	if (copy_size) {
+		if (copy_to_user(buffer, event_buf, copy_size))
+			ret = -EFAULT;
+		else
+			ret = copy_size;
+	}
+	kfree(event_buf);
+	return ret;
+}
+
+ssize_t adf_file_read(struct file *filp, char __user *buffer,
+		 size_t count, loff_t *offset)
+{
+	struct adf_file *fpriv = filp->private_data;
+	int err;
+
+	err = wait_event_interruptible(fpriv->event_wait,
+			adf_file_event_available(fpriv));
+	if (err < 0)
+		return err;
+
+	return adf_file_copy_to_user(fpriv, buffer, count);
+}
+
+unsigned int adf_file_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct adf_file *fpriv = filp->private_data;
+	unsigned int mask = 0;
+
+	poll_wait(filp, &fpriv->event_wait, wait);
+
+	if (adf_file_event_available(fpriv))
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+const struct file_operations adf_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = adf_file_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = adf_file_compat_ioctl,
+#endif
+	.open = adf_file_open,
+	.release = adf_file_release,
+	.llseek = default_llseek,
+	.read = adf_file_read,
+	.poll = adf_file_poll,
+};
diff --git a/drivers/video/adf/adf_fops.h b/drivers/video/adf/adf_fops.h
new file mode 100644
index 000000000000..90a3a74796d6
--- /dev/null
+++ b/drivers/video/adf/adf_fops.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __VIDEO_ADF_ADF_FOPS_H
+#define __VIDEO_ADF_ADF_FOPS_H
+
+#include <linux/bitmap.h>
+#include <linux/fs.h>
+
+extern const struct file_operations adf_fops;
+
+struct adf_file {
+	struct list_head head;
+	struct adf_obj *obj;
+
+	DECLARE_BITMAP(event_subscriptions, ADF_EVENT_TYPE_MAX);
+	u8 event_buf[4096];
+	int event_head;
+	int event_tail;
+	wait_queue_head_t event_wait;
+};
+
+void adf_file_queue_event(struct adf_file *file, struct adf_event *event);
+long adf_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#endif /* __VIDEO_ADF_ADF_FOPS_H */
diff --git a/drivers/video/adf/adf_fops32.c b/drivers/video/adf/adf_fops32.c
new file mode 100644
index 000000000000..d299a8161491
--- /dev/null
+++ b/drivers/video/adf/adf_fops32.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <video/adf.h>
+
+#include "adf_fops.h"
+#include "adf_fops32.h"
+
+long adf_compat_post_config(struct file *file,
+		struct adf_post_config32 __user *arg)
+{
+	struct adf_post_config32 cfg32;
+	struct adf_post_config __user *cfg;
+	int ret;
+
+	if (copy_from_user(&cfg32, arg, sizeof(cfg32)))
+		return -EFAULT;
+
+	cfg = compat_alloc_user_space(sizeof(*cfg));
+	if (!access_ok(VERIFY_WRITE, cfg, sizeof(*cfg)))
+		return -EFAULT;
+
+	if (put_user(cfg32.n_interfaces, &cfg->n_interfaces) ||
+			put_user(compat_ptr(cfg32.interfaces),
+					&cfg->interfaces) ||
+			put_user(cfg32.n_bufs, &cfg->n_bufs) ||
+			put_user(compat_ptr(cfg32.bufs), &cfg->bufs) ||
+			put_user(cfg32.custom_data_size,
+					&cfg->custom_data_size) ||
+			put_user(compat_ptr(cfg32.custom_data),
+					&cfg->custom_data))
+		return -EFAULT;
+
+	ret = adf_file_ioctl(file, ADF_POST_CONFIG, (unsigned long)cfg);
+	if (ret < 0)
+		return ret;
+
+	if (copy_in_user(&arg->complete_fence, &cfg->complete_fence,
+			sizeof(cfg->complete_fence)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long adf_compat_get_device_data(struct file *file,
+		struct adf_device_data32 __user *arg)
+{
+	struct adf_device_data32 data32;
+	struct adf_device_data __user *data;
+	int ret;
+
+	if (copy_from_user(&data32, arg, sizeof(data32)))
+		return -EFAULT;
+
+	data = compat_alloc_user_space(sizeof(*data));
+	if (!access_ok(VERIFY_WRITE, data, sizeof(*data)))
+		return -EFAULT;
+
+	if (put_user(data32.n_attachments, &data->n_attachments) ||
+			put_user(compat_ptr(data32.attachments),
+					&data->attachments) ||
+			put_user(data32.n_allowed_attachments,
+					&data->n_allowed_attachments) ||
+			put_user(compat_ptr(data32.allowed_attachments),
+					&data->allowed_attachments) ||
+			put_user(data32.custom_data_size,
+					&data->custom_data_size) ||
+			put_user(compat_ptr(data32.custom_data),
+					&data->custom_data))
+		return -EFAULT;
+
+	ret = adf_file_ioctl(file, ADF_GET_DEVICE_DATA, (unsigned long)data);
+	if (ret < 0)
+		return ret;
+
+	if (copy_in_user(arg->name, data->name, sizeof(arg->name)) ||
+			copy_in_user(&arg->n_attachments, &data->n_attachments,
+					sizeof(arg->n_attachments)) ||
+			copy_in_user(&arg->n_allowed_attachments,
+					&data->n_allowed_attachments,
+					sizeof(arg->n_allowed_attachments)) ||
+			copy_in_user(&arg->custom_data_size,
+					&data->custom_data_size,
+					sizeof(arg->custom_data_size)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long adf_compat_get_interface_data(struct file *file,
+		struct adf_interface_data32 __user *arg)
+{
+	struct adf_interface_data32 data32;
+	struct adf_interface_data __user *data;
+	int ret;
+
+	if (copy_from_user(&data32, arg, sizeof(data32)))
+		return -EFAULT;
+
+	data = compat_alloc_user_space(sizeof(*data));
+	if (!access_ok(VERIFY_WRITE, data, sizeof(*data)))
+		return -EFAULT;
+
+	if (put_user(data32.n_available_modes, &data->n_available_modes) ||
+			put_user(compat_ptr(data32.available_modes),
+					&data->available_modes) ||
+			put_user(data32.custom_data_size,
+					&data->custom_data_size) ||
+			put_user(compat_ptr(data32.custom_data),
+					&data->custom_data))
+		return -EFAULT;
+
+	ret = adf_file_ioctl(file, ADF_GET_INTERFACE_DATA, (unsigned long)data);
+	if (ret < 0)
+		return ret;
+
+	if (copy_in_user(arg->name, data->name, sizeof(arg->name)) ||
+			copy_in_user(&arg->type, &data->type,
+					sizeof(arg->type)) ||
+			copy_in_user(&arg->id, &data->id, sizeof(arg->id)) ||
+			copy_in_user(&arg->flags, &data->flags,
+					sizeof(arg->flags)) ||
+			copy_in_user(&arg->dpms_state, &data->dpms_state,
+					sizeof(arg->dpms_state)) ||
+			copy_in_user(&arg->hotplug_detect,
+					&data->hotplug_detect,
+					sizeof(arg->hotplug_detect)) ||
+			copy_in_user(&arg->width_mm, &data->width_mm,
+					sizeof(arg->width_mm)) ||
+			copy_in_user(&arg->height_mm, &data->height_mm,
+					sizeof(arg->height_mm)) ||
+			copy_in_user(&arg->current_mode, &data->current_mode,
+					sizeof(arg->current_mode)) ||
+			copy_in_user(&arg->n_available_modes,
+					&data->n_available_modes,
+					sizeof(arg->n_available_modes)) ||
+			copy_in_user(&arg->custom_data_size,
+					&data->custom_data_size,
+					sizeof(arg->custom_data_size)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long adf_compat_get_overlay_engine_data(struct file *file,
+		struct adf_overlay_engine_data32 __user *arg)
+{
+	struct adf_overlay_engine_data32 data32;
+	struct adf_overlay_engine_data __user *data;
+	int ret;
+
+	if (copy_from_user(&data32, arg, sizeof(data32)))
+		return -EFAULT;
+
+	data = compat_alloc_user_space(sizeof(*data));
+	if (!access_ok(VERIFY_WRITE, data, sizeof(*data)))
+		return -EFAULT;
+
+	if (put_user(data32.n_supported_formats, &data->n_supported_formats) ||
+			put_user(compat_ptr(data32.supported_formats),
+					&data->supported_formats) ||
+			put_user(data32.custom_data_size,
+					&data->custom_data_size) ||
+			put_user(compat_ptr(data32.custom_data),
+					&data->custom_data))
+		return -EFAULT;
+
+	ret = adf_file_ioctl(file, ADF_GET_OVERLAY_ENGINE_DATA,
+			(unsigned long)data);
+	if (ret < 0)
+		return ret;
+
+	if (copy_in_user(arg->name, data->name, sizeof(arg->name)) ||
+			copy_in_user(&arg->n_supported_formats,
+					&data->n_supported_formats,
+					sizeof(arg->n_supported_formats)) ||
+			copy_in_user(&arg->custom_data_size,
+					&data->custom_data_size,
+					sizeof(arg->custom_data_size)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long adf_file_compat_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	switch (cmd) {
+	case ADF_POST_CONFIG32:
+		return adf_compat_post_config(file, compat_ptr(arg));
+
+	case ADF_GET_DEVICE_DATA32:
+		return adf_compat_get_device_data(file, compat_ptr(arg));
+
+	case ADF_GET_INTERFACE_DATA32:
+		return adf_compat_get_interface_data(file, compat_ptr(arg));
+
+	case ADF_GET_OVERLAY_ENGINE_DATA32:
+		return adf_compat_get_overlay_engine_data(file,
+				compat_ptr(arg));
+
+	default:
+		return adf_file_ioctl(file, cmd, arg);
+	}
+}
diff --git a/drivers/video/adf/adf_fops32.h b/drivers/video/adf/adf_fops32.h
new file mode 100644
index 000000000000..64034ce33a6b
--- /dev/null
+++ b/drivers/video/adf/adf_fops32.h
@@ -0,0 +1,78 @@
+#ifndef __VIDEO_ADF_ADF_FOPS32_H
+#define __VIDEO_ADF_ADF_FOPS32_H
+
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+
+#include <video/adf.h>
+
+#define ADF_POST_CONFIG32 \
+		_IOW(ADF_IOCTL_TYPE, 2, struct adf_post_config32)
+#define ADF_GET_DEVICE_DATA32 \
+		_IOR(ADF_IOCTL_TYPE, 4, struct adf_device_data32)
+#define ADF_GET_INTERFACE_DATA32 \
+		_IOR(ADF_IOCTL_TYPE, 5, struct adf_interface_data32)
+#define ADF_GET_OVERLAY_ENGINE_DATA32 \
+		_IOR(ADF_IOCTL_TYPE, 6, struct adf_overlay_engine_data32)
+
+struct adf_post_config32 {
+	compat_size_t n_interfaces;
+	compat_uptr_t interfaces;
+
+	compat_size_t n_bufs;
+	compat_uptr_t bufs;
+
+	compat_size_t custom_data_size;
+	compat_uptr_t custom_data;
+
+	__s32 complete_fence;
+};
+
+struct adf_device_data32 {
+	char name[ADF_NAME_LEN];
+
+	compat_size_t n_attachments;
+	compat_uptr_t attachments;
+
+	compat_size_t n_allowed_attachments;
+	compat_uptr_t allowed_attachments;
+
+	compat_size_t custom_data_size;
+	compat_uptr_t custom_data;
+};
+
+struct adf_interface_data32 {
+	char name[ADF_NAME_LEN];
+
+	__u8 type;
+	__u32 id;
+	/* e.g. type=ADF_INTF_TYPE_DSI, id=1 => DSI.1 */
+	__u32 flags;
+
+	__u8 dpms_state;
+	__u8 hotplug_detect;
+	__u16 width_mm;
+	__u16 height_mm;
+
+	struct drm_mode_modeinfo current_mode;
+	compat_size_t n_available_modes;
+	compat_uptr_t available_modes;
+
+	compat_size_t custom_data_size;
+	compat_uptr_t custom_data;
+};
+
+struct adf_overlay_engine_data32 {
+	char name[ADF_NAME_LEN];
+
+	compat_size_t n_supported_formats;
+	compat_uptr_t supported_formats;
+
+	compat_size_t custom_data_size;
+	compat_uptr_t custom_data;
+};
+
+long adf_file_compat_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg);
+
+#endif /* __VIDEO_ADF_ADF_FOPS32_H */
diff --git a/drivers/video/adf/adf_format.c b/drivers/video/adf/adf_format.c
new file mode 100644
index 000000000000..e3f22c7c85d9
--- /dev/null
+++ b/drivers/video/adf/adf_format.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ * modified from drivers/gpu/drm/drm_crtc.c
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <drm/drm_fourcc.h>
+#include <video/adf_format.h>
+
+bool adf_format_is_standard(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_C8:
+	case DRM_FORMAT_RGB332:
+	case DRM_FORMAT_BGR233:
+	case DRM_FORMAT_XRGB4444:
+	case DRM_FORMAT_XBGR4444:
+	case DRM_FORMAT_RGBX4444:
+	case DRM_FORMAT_BGRX4444:
+	case DRM_FORMAT_ARGB4444:
+	case DRM_FORMAT_ABGR4444:
+	case DRM_FORMAT_RGBA4444:
+	case DRM_FORMAT_BGRA4444:
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_XBGR1555:
+	case DRM_FORMAT_RGBX5551:
+	case DRM_FORMAT_BGRX5551:
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_ABGR1555:
+	case DRM_FORMAT_RGBA5551:
+	case DRM_FORMAT_BGRA5551:
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_BGR565:
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_BGR888:
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_RGBX8888:
+	case DRM_FORMAT_BGRX8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_RGBA8888:
+	case DRM_FORMAT_BGRA8888:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_RGBX1010102:
+	case DRM_FORMAT_BGRX1010102:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_RGBA1010102:
+	case DRM_FORMAT_BGRA1010102:
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_YVYU:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_AYUV:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV16:
+	case DRM_FORMAT_NV61:
+	case DRM_FORMAT_YUV410:
+	case DRM_FORMAT_YVU410:
+	case DRM_FORMAT_YUV411:
+	case DRM_FORMAT_YVU411:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YVU422:
+	case DRM_FORMAT_YUV444:
+	case DRM_FORMAT_YVU444:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL(adf_format_is_standard);
+
+bool adf_format_is_rgb(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_C8:
+	case DRM_FORMAT_RGB332:
+	case DRM_FORMAT_BGR233:
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_XBGR1555:
+	case DRM_FORMAT_RGBX5551:
+	case DRM_FORMAT_BGRX5551:
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_ABGR1555:
+	case DRM_FORMAT_RGBA5551:
+	case DRM_FORMAT_BGRA5551:
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_BGR565:
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_BGR888:
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_RGBX8888:
+	case DRM_FORMAT_BGRX8888:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_RGBX1010102:
+	case DRM_FORMAT_BGRX1010102:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_RGBA1010102:
+	case DRM_FORMAT_BGRA1010102:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_RGBA8888:
+	case DRM_FORMAT_BGRA8888:
+		return true;
+
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL(adf_format_is_rgb);
+
+u8 adf_format_num_planes(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_YUV410:
+	case DRM_FORMAT_YVU410:
+	case DRM_FORMAT_YUV411:
+	case DRM_FORMAT_YVU411:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YVU422:
+	case DRM_FORMAT_YUV444:
+	case DRM_FORMAT_YVU444:
+		return 3;
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV16:
+	case DRM_FORMAT_NV61:
+		return 2;
+	default:
+		return 1;
+	}
+}
+EXPORT_SYMBOL(adf_format_num_planes);
+
+u8 adf_format_bpp(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_C8:
+	case DRM_FORMAT_RGB332:
+	case DRM_FORMAT_BGR233:
+		return 8;
+
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_XBGR1555:
+	case DRM_FORMAT_RGBX5551:
+	case DRM_FORMAT_BGRX5551:
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_ABGR1555:
+	case DRM_FORMAT_RGBA5551:
+	case DRM_FORMAT_BGRA5551:
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_BGR565:
+		return 16;
+
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_BGR888:
+		return 24;
+
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_RGBX8888:
+	case DRM_FORMAT_BGRX8888:
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_RGBX1010102:
+	case DRM_FORMAT_BGRX1010102:
+	case DRM_FORMAT_ARGB2101010:
+	case DRM_FORMAT_ABGR2101010:
+	case DRM_FORMAT_RGBA1010102:
+	case DRM_FORMAT_BGRA1010102:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_RGBA8888:
+	case DRM_FORMAT_BGRA8888:
+		return 32;
+
+	default:
+		pr_debug("%s: unsupported pixel format %u\n", __func__, format);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(adf_format_bpp);
+
+u8 adf_format_plane_cpp(u32 format, int plane)
+{
+	if (plane >= adf_format_num_planes(format))
+		return 0;
+
+	switch (format) {
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_YVYU:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_VYUY:
+		return 2;
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV16:
+	case DRM_FORMAT_NV61:
+		return plane ? 2 : 1;
+	case DRM_FORMAT_YUV410:
+	case DRM_FORMAT_YVU410:
+	case DRM_FORMAT_YUV411:
+	case DRM_FORMAT_YVU411:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YVU422:
+	case DRM_FORMAT_YUV444:
+	case DRM_FORMAT_YVU444:
+		return 1;
+	default:
+		return adf_format_bpp(format) / 8;
+	}
+}
+EXPORT_SYMBOL(adf_format_plane_cpp);
+
+u8 adf_format_horz_chroma_subsampling(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_YUV411:
+	case DRM_FORMAT_YVU411:
+	case DRM_FORMAT_YUV410:
+	case DRM_FORMAT_YVU410:
+		return 4;
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_YVYU:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV21:
+	case DRM_FORMAT_NV16:
+	case DRM_FORMAT_NV61:
+	case DRM_FORMAT_YUV422:
+	case DRM_FORMAT_YVU422:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		return 2;
+	default:
+		return 1;
+	}
+}
+EXPORT_SYMBOL(adf_format_horz_chroma_subsampling);
+
+u8 adf_format_vert_chroma_subsampling(u32 format)
+{
+	switch (format) {
+	case DRM_FORMAT_YUV410:
+	case DRM_FORMAT_YVU410:
+		return 4;
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+	case DRM_FORMAT_NV12:
+	case DRM_FORMAT_NV21:
+		return 2;
+	default:
+		return 1;
+	}
+}
+EXPORT_SYMBOL(adf_format_vert_chroma_subsampling);
diff --git a/drivers/video/adf/adf_memblock.c b/drivers/video/adf/adf_memblock.c
new file mode 100644
index 000000000000..e73a7d59f1e6
--- /dev/null
+++ b/drivers/video/adf/adf_memblock.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+struct adf_memblock_pdata {
+	phys_addr_t base;
+};
+
+static struct sg_table *adf_memblock_map(struct dma_buf_attachment *attach,
+		enum dma_data_direction direction)
+{
+	struct adf_memblock_pdata *pdata = attach->dmabuf->priv;
+	unsigned long pfn = PFN_DOWN(pdata->base);
+	struct page *page = pfn_to_page(pfn);
+	struct sg_table *table;
+	int nents, ret;
+
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return ERR_PTR(-ENOMEM);
+
+	ret = sg_alloc_table(table, 1, GFP_KERNEL);
+	if (ret < 0)
+		goto err_alloc;
+
+	sg_set_page(table->sgl, page, attach->dmabuf->size, 0);
+
+	nents = dma_map_sg(attach->dev, table->sgl, 1, direction);
+	if (!nents) {
+		ret = -EINVAL;
+		goto err_map;
+	}
+
+	return table;
+
+err_map:
+	sg_free_table(table);
+err_alloc:
+	kfree(table);
+	return ERR_PTR(ret);
+}
+
+static void adf_memblock_unmap(struct dma_buf_attachment *attach,
+		struct sg_table *table, enum dma_data_direction direction)
+{
+	dma_unmap_sg(attach->dev, table->sgl, 1, direction);
+	sg_free_table(table);
+}
+
+static void __init_memblock adf_memblock_release(struct dma_buf *buf)
+{
+	struct adf_memblock_pdata *pdata = buf->priv;
+	int err = memblock_free(pdata->base, buf->size);
+
+	if (err < 0)
+		pr_warn("%s: freeing memblock failed: %d\n", __func__, err);
+	kfree(pdata);
+}
+
+static void *adf_memblock_do_kmap(struct dma_buf *buf, unsigned long pgoffset,
+		bool atomic)
+{
+	struct adf_memblock_pdata *pdata = buf->priv;
+	unsigned long pfn = PFN_DOWN(pdata->base) + pgoffset;
+	struct page *page = pfn_to_page(pfn);
+
+	if (atomic)
+		return kmap_atomic(page);
+	else
+		return kmap(page);
+}
+
+static void *adf_memblock_kmap_atomic(struct dma_buf *buf,
+		unsigned long pgoffset)
+{
+	return adf_memblock_do_kmap(buf, pgoffset, true);
+}
+
+static void adf_memblock_kunmap_atomic(struct dma_buf *buf,
+		unsigned long pgoffset, void *vaddr)
+{
+	kunmap_atomic(vaddr);
+}
+
+static void *adf_memblock_kmap(struct dma_buf *buf, unsigned long pgoffset)
+{
+	return adf_memblock_do_kmap(buf, pgoffset, false);
+}
+
+static void adf_memblock_kunmap(struct dma_buf *buf, unsigned long pgoffset,
+		void *vaddr)
+{
+	kunmap(vaddr);
+}
+
+static int adf_memblock_mmap(struct dma_buf *buf, struct vm_area_struct *vma)
+{
+	struct adf_memblock_pdata *pdata = buf->priv;
+
+	return remap_pfn_range(vma, vma->vm_start, PFN_DOWN(pdata->base),
+			vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
+struct dma_buf_ops adf_memblock_ops = {
+	.map_dma_buf = adf_memblock_map,
+	.unmap_dma_buf = adf_memblock_unmap,
+	.release = adf_memblock_release,
+	.kmap_atomic = adf_memblock_kmap_atomic,
+	.kunmap_atomic = adf_memblock_kunmap_atomic,
+	.kmap = adf_memblock_kmap,
+	.kunmap = adf_memblock_kunmap,
+	.mmap = adf_memblock_mmap,
+};
+
+/**
+ * adf_memblock_export - export a memblock reserved area as a dma-buf
+ *
+ * @base: base physical address
+ * @size: memblock size
+ * @flags: mode flags for the dma-buf's file
+ *
+ * @base and @size must be page-aligned.
+ *
+ * Returns a dma-buf on success or ERR_PTR(-errno) on failure.
+ */
+struct dma_buf *adf_memblock_export(phys_addr_t base, size_t size, int flags)
+{
+	struct adf_memblock_pdata *pdata;
+	struct dma_buf *buf;
+
+	if (PAGE_ALIGN(base) != base || PAGE_ALIGN(size) != size)
+		return ERR_PTR(-EINVAL);
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->base = base;
+	buf = dma_buf_export(pdata, &adf_memblock_ops, size, flags);
+	if (IS_ERR(buf))
+		kfree(pdata);
+
+	return buf;
+}
+EXPORT_SYMBOL(adf_memblock_export);
diff --git a/drivers/video/adf/adf_sysfs.c b/drivers/video/adf/adf_sysfs.c
new file mode 100644
index 000000000000..8c659c71ffa8
--- /dev/null
+++ b/drivers/video/adf/adf_sysfs.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <video/adf_client.h>
+
+#include "adf.h"
+#include "adf_fops.h"
+#include "adf_sysfs.h"
+
+static struct class *adf_class;
+static int adf_major;
+static DEFINE_IDR(adf_minors);
+
+#define dev_to_adf_interface(p) \
+	adf_obj_to_interface(container_of(p, struct adf_obj, dev))
+
+static ssize_t dpms_state_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct adf_interface *intf = dev_to_adf_interface(dev);
+	return scnprintf(buf, PAGE_SIZE, "%u\n",
+			adf_interface_dpms_state(intf));
+}
+
+static ssize_t dpms_state_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct adf_interface *intf = dev_to_adf_interface(dev);
+	u8 dpms_state;
+	int err;
+
+	err = kstrtou8(buf, 0, &dpms_state);
+	if (err < 0)
+		return err;
+
+	err = adf_interface_blank(intf, dpms_state);
+	if (err < 0)
+		return err;
+
+	return count;
+}
+
+static ssize_t current_mode_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct adf_interface *intf = dev_to_adf_interface(dev);
+	struct drm_mode_modeinfo mode;
+
+	adf_interface_current_mode(intf, &mode);
+
+	if (mode.name[0]) {
+		return scnprintf(buf, PAGE_SIZE, "%s\n", mode.name);
+	} else {
+		bool interlaced = !!(mode.flags & DRM_MODE_FLAG_INTERLACE);
+		return scnprintf(buf, PAGE_SIZE, "%ux%u%s\n", mode.hdisplay,
+				mode.vdisplay, interlaced ? "i" : "");
+	}
+}
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct adf_interface *intf = dev_to_adf_interface(dev);
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+			adf_interface_type_str(intf));
+}
+
+static ssize_t vsync_timestamp_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct adf_interface *intf = dev_to_adf_interface(dev);
+	ktime_t timestamp;
+	unsigned long flags;
+
+	read_lock_irqsave(&intf->vsync_lock, flags);
+	memcpy(&timestamp, &intf->vsync_timestamp, sizeof(timestamp));
+	read_unlock_irqrestore(&intf->vsync_lock, flags);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", ktime_to_ns(timestamp));
+}
+
+static ssize_t hotplug_detect_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct adf_interface *intf = dev_to_adf_interface(dev);
+	return scnprintf(buf, PAGE_SIZE, "%u\n", intf->hotplug_detect);
+}
+
+static struct device_attribute adf_interface_attrs[] = {
+	__ATTR(dpms_state, S_IRUGO|S_IWUSR, dpms_state_show, dpms_state_store),
+	__ATTR_RO(current_mode),
+	__ATTR_RO(hotplug_detect),
+	__ATTR_RO(type),
+	__ATTR_RO(vsync_timestamp),
+};
+
+int adf_obj_sysfs_init(struct adf_obj *obj, struct device *parent)
+{
+	int ret = idr_alloc(&adf_minors, obj, 0, 0, GFP_KERNEL);
+	if (ret < 0) {
+		pr_err("%s: allocating adf minor failed: %d\n", __func__,
+				ret);
+		return ret;
+	}
+
+	obj->minor = ret;
+	obj->dev.parent = parent;
+	obj->dev.class = adf_class;
+	obj->dev.devt = MKDEV(adf_major, obj->minor);
+
+	ret = device_register(&obj->dev);
+	if (ret < 0) {
+		pr_err("%s: registering adf object failed: %d\n", __func__,
+				ret);
+		goto err_device_register;
+	}
+
+	return 0;
+
+err_device_register:
+	idr_remove(&adf_minors, obj->minor);
+	return ret;
+}
+
+static char *adf_device_devnode(struct device *dev, umode_t *mode,
+		kuid_t *uid, kgid_t *gid)
+{
+	struct adf_obj *obj = container_of(dev, struct adf_obj, dev);
+	return kasprintf(GFP_KERNEL, "adf%d", obj->id);
+}
+
+static char *adf_interface_devnode(struct device *dev, umode_t *mode,
+		kuid_t *uid, kgid_t *gid)
+{
+	struct adf_obj *obj = container_of(dev, struct adf_obj, dev);
+	struct adf_interface *intf = adf_obj_to_interface(obj);
+	struct adf_device *parent = adf_interface_parent(intf);
+	return kasprintf(GFP_KERNEL, "adf-interface%d.%d",
+			parent->base.id, intf->base.id);
+}
+
+static char *adf_overlay_engine_devnode(struct device *dev, umode_t *mode,
+		kuid_t *uid, kgid_t *gid)
+{
+	struct adf_obj *obj = container_of(dev, struct adf_obj, dev);
+	struct adf_overlay_engine *eng = adf_obj_to_overlay_engine(obj);
+	struct adf_device *parent = adf_overlay_engine_parent(eng);
+	return kasprintf(GFP_KERNEL, "adf-overlay-engine%d.%d",
+			parent->base.id, eng->base.id);
+}
+
+static void adf_noop_release(struct device *dev)
+{
+}
+
+static struct device_type adf_device_type = {
+	.name = "adf_device",
+	.devnode = adf_device_devnode,
+	.release = adf_noop_release,
+};
+
+static struct device_type adf_interface_type = {
+	.name = "adf_interface",
+	.devnode = adf_interface_devnode,
+	.release = adf_noop_release,
+};
+
+static struct device_type adf_overlay_engine_type = {
+	.name = "adf_overlay_engine",
+	.devnode = adf_overlay_engine_devnode,
+	.release = adf_noop_release,
+};
+
+int adf_device_sysfs_init(struct adf_device *dev)
+{
+	dev->base.dev.type = &adf_device_type;
+	dev_set_name(&dev->base.dev, "%s", dev->base.name);
+	return adf_obj_sysfs_init(&dev->base, dev->dev);
+}
+
+int adf_interface_sysfs_init(struct adf_interface *intf)
+{
+	struct adf_device *parent = adf_interface_parent(intf);
+	size_t i, j;
+	int ret;
+
+	intf->base.dev.type = &adf_interface_type;
+	dev_set_name(&intf->base.dev, "%s-interface%d", parent->base.name,
+			intf->base.id);
+
+	ret = adf_obj_sysfs_init(&intf->base, &parent->base.dev);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(adf_interface_attrs); i++) {
+		ret = device_create_file(&intf->base.dev,
+				&adf_interface_attrs[i]);
+		if (ret < 0) {
+			dev_err(&intf->base.dev, "creating sysfs attribute %s failed: %d\n",
+					adf_interface_attrs[i].attr.name, ret);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	for (j = 0; j < i; j++)
+		device_remove_file(&intf->base.dev, &adf_interface_attrs[j]);
+	return ret;
+}
+
+int adf_overlay_engine_sysfs_init(struct adf_overlay_engine *eng)
+{
+	struct adf_device *parent = adf_overlay_engine_parent(eng);
+
+	eng->base.dev.type = &adf_overlay_engine_type;
+	dev_set_name(&eng->base.dev, "%s-overlay-engine%d", parent->base.name,
+			eng->base.id);
+
+	return adf_obj_sysfs_init(&eng->base, &parent->base.dev);
+}
+
+struct adf_obj *adf_obj_sysfs_find(int minor)
+{
+	return idr_find(&adf_minors, minor);
+}
+
+void adf_obj_sysfs_destroy(struct adf_obj *obj)
+{
+	idr_remove(&adf_minors, obj->minor);
+	device_unregister(&obj->dev);
+}
+
+void adf_device_sysfs_destroy(struct adf_device *dev)
+{
+	adf_obj_sysfs_destroy(&dev->base);
+}
+
+void adf_interface_sysfs_destroy(struct adf_interface *intf)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(adf_interface_attrs); i++)
+		device_remove_file(&intf->base.dev, &adf_interface_attrs[i]);
+	adf_obj_sysfs_destroy(&intf->base);
+}
+
+void adf_overlay_engine_sysfs_destroy(struct adf_overlay_engine *eng)
+{
+	adf_obj_sysfs_destroy(&eng->base);
+}
+
+int adf_sysfs_init(void)
+{
+	struct class *class;
+	int ret;
+
+	class = class_create(THIS_MODULE, "adf");
+	if (IS_ERR(class)) {
+		ret = PTR_ERR(class);
+		pr_err("%s: creating class failed: %d\n", __func__, ret);
+		return ret;
+	}
+
+	ret = register_chrdev(0, "adf", &adf_fops);
+	if (ret < 0) {
+		pr_err("%s: registering device failed: %d\n", __func__, ret);
+		goto err_chrdev;
+	}
+
+	adf_class = class;
+	adf_major = ret;
+	return 0;
+
+err_chrdev:
+	class_destroy(adf_class);
+	return ret;
+}
+
+void adf_sysfs_destroy(void)
+{
+	idr_destroy(&adf_minors);
+	class_destroy(adf_class);
+}
diff --git a/drivers/video/adf/adf_sysfs.h b/drivers/video/adf/adf_sysfs.h
new file mode 100644
index 000000000000..0613ac364f8d
--- /dev/null
+++ b/drivers/video/adf/adf_sysfs.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __VIDEO_ADF_ADF_SYSFS_H
+#define __VIDEO_ADF_ADF_SYSFS_H
+
+struct adf_device;
+struct adf_interface;
+struct adf_overlay_engine;
+
+int adf_device_sysfs_init(struct adf_device *dev);
+void adf_device_sysfs_destroy(struct adf_device *dev);
+int adf_interface_sysfs_init(struct adf_interface *intf);
+void adf_interface_sysfs_destroy(struct adf_interface *intf);
+int adf_overlay_engine_sysfs_init(struct adf_overlay_engine *eng);
+void adf_overlay_engine_sysfs_destroy(struct adf_overlay_engine *eng);
+struct adf_obj *adf_obj_sysfs_find(int minor);
+
+int adf_sysfs_init(void);
+void adf_sysfs_destroy(void);
+
+#endif /* __VIDEO_ADF_ADF_SYSFS_H */
diff --git a/drivers/video/adf/adf_trace.h b/drivers/video/adf/adf_trace.h
new file mode 100644
index 000000000000..3cb2a84d728c
--- /dev/null
+++ b/drivers/video/adf/adf_trace.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM adf
+
+#if !defined(__VIDEO_ADF_ADF_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VIDEO_ADF_ADF_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <video/adf.h>
+
+TRACE_EVENT(adf_event,
+	TP_PROTO(struct adf_obj *obj, enum adf_event_type type),
+	TP_ARGS(obj, type),
+
+	TP_STRUCT__entry(
+		__string(name, obj->name)
+		__field(enum adf_event_type, type)
+		__array(char, type_str, 32)
+	),
+	TP_fast_assign(
+		__assign_str(name, obj->name);
+		__entry->type = type;
+		strlcpy(__entry->type_str, adf_event_type_str(obj, type),
+				sizeof(__entry->type_str));
+	),
+	TP_printk("obj=%s type=%u (%s)",
+			__get_str(name),
+			__entry->type,
+			__entry->type_str)
+);
+
+TRACE_EVENT(adf_event_enable,
+	TP_PROTO(struct adf_obj *obj, enum adf_event_type type),
+	TP_ARGS(obj, type),
+
+	TP_STRUCT__entry(
+		__string(name, obj->name)
+		__field(enum adf_event_type, type)
+		__array(char, type_str, 32)
+	),
+	TP_fast_assign(
+		__assign_str(name, obj->name);
+		__entry->type = type;
+		strlcpy(__entry->type_str, adf_event_type_str(obj, type),
+				sizeof(__entry->type_str));
+	),
+	TP_printk("obj=%s type=%u (%s)",
+			__get_str(name),
+			__entry->type,
+			__entry->type_str)
+);
+
+TRACE_EVENT(adf_event_disable,
+	TP_PROTO(struct adf_obj *obj, enum adf_event_type type),
+	TP_ARGS(obj, type),
+
+	TP_STRUCT__entry(
+		__string(name, obj->name)
+		__field(enum adf_event_type, type)
+		__array(char, type_str, 32)
+	),
+	TP_fast_assign(
+		__assign_str(name, obj->name);
+		__entry->type = type;
+		strlcpy(__entry->type_str, adf_event_type_str(obj, type),
+				sizeof(__entry->type_str));
+	),
+	TP_printk("obj=%s type=%u (%s)",
+			__get_str(name),
+			__entry->type,
+			__entry->type_str)
+);
+
+#endif /* __VIDEO_ADF_ADF_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE adf_trace
+#include <trace/define_trace.h>
diff --git a/drivers/w1/masters/ds2482.c b/drivers/w1/masters/ds2482.c
index e033491fe308..ab29939ea4cd 100644
--- a/drivers/w1/masters/ds2482.c
+++ b/drivers/w1/masters/ds2482.c
@@ -18,6 +18,8 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/platform_data/ds2482.h>
 #include <asm/delay.h>
 
 #include "../w1.h"
@@ -84,7 +86,8 @@ static const u8 ds2482_chan_rd[8] =
 static int ds2482_probe(struct i2c_client *client,
 			const struct i2c_device_id *id);
 static int ds2482_remove(struct i2c_client *client);
-
+static int ds2482_suspend(struct device *dev);
+static int ds2482_resume(struct device *dev);
 
 /**
  * Driver data (common to all clients)
@@ -94,10 +97,16 @@ static const struct i2c_device_id ds2482_id[] = {
 	{ }
 };
 
+static const struct dev_pm_ops ds2482_pm_ops = {
+	.suspend = ds2482_suspend,
+	.resume = ds2482_resume,
+};
+
 static struct i2c_driver ds2482_driver = {
 	.driver = {
 		.owner	= THIS_MODULE,
 		.name	= "ds2482",
+		.pm = &ds2482_pm_ops,
 	},
 	.probe		= ds2482_probe,
 	.remove		= ds2482_remove,
@@ -119,6 +128,7 @@ struct ds2482_w1_chan {
 struct ds2482_data {
 	struct i2c_client	*client;
 	struct mutex		access_lock;
+	int			slpz_gpio;
 
 	/* 1-wire interface(s) */
 	int			w1_count;	/* 1 or 8 */
@@ -444,11 +454,31 @@ static u8 ds2482_w1_set_pullup(void *data, int delay)
 	return retval;
 }
 
+static int ds2482_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds2482_data *data = i2c_get_clientdata(client);
+
+	if (data->slpz_gpio >= 0)
+		gpio_set_value(data->slpz_gpio, 0);
+	return 0;
+}
+
+static int ds2482_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds2482_data *data = i2c_get_clientdata(client);
+
+	if (data->slpz_gpio >= 0)
+		gpio_set_value(data->slpz_gpio, 1);
+	return 0;
+}
 
 static int ds2482_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct ds2482_data *data;
+	struct ds2482_platform_data *pdata;
 	int err = -ENODEV;
 	int temp1;
 	int idx;
@@ -515,6 +545,16 @@ static int ds2482_probe(struct i2c_client *client,
 		}
 	}
 
+	pdata = client->dev.platform_data;
+	data->slpz_gpio = pdata ? pdata->slpz_gpio : -1;
+
+	if (data->slpz_gpio >= 0) {
+		err = gpio_request_one(data->slpz_gpio, GPIOF_OUT_INIT_HIGH,
+				       "ds2482.slpz");
+		if (err < 0)
+			goto exit_w1_remove;
+	}
+
 	return 0;
 
 exit_w1_remove:
@@ -539,6 +579,11 @@ static int ds2482_remove(struct i2c_client *client)
 			w1_remove_master_device(&data->w1_ch[idx].w1_bm);
 	}
 
+	if (data->slpz_gpio >= 0) {
+		gpio_set_value(data->slpz_gpio, 0);
+		gpio_free(data->slpz_gpio);
+	}
+
 	/* Free the memory */
 	kfree(data);
 	return 0;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 0ff4bae2c2a2..479ef5a9c5d3 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -213,6 +213,7 @@ static int parse_options(struct super_block *sb, char *options)
 
 static int adfs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_NODIRATIME;
 	return parse_options(sb, data);
 }
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 45161a832bbc..01c6e07acbff 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -549,6 +549,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 
 	pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
 
+	sync_filesystem(sb);
 	*flags |= MS_NODIRATIME;
 
 	memcpy(volume, sbi->s_volume, 32);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index f95dddced968..7192a7e99373 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -908,6 +908,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 static int
 befs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	if (!(*flags & MS_RDONLY))
 		return -EINVAL;
 	return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f0857e092a3c..461731c7fa3a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1239,6 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	unsigned int old_metadata_ratio = fs_info->metadata_ratio;
 	int ret;
 
+	sync_filesystem(sb);
 	btrfs_remount_prepare(fs_info);
 
 	ret = btrfs_parse_options(root, data);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3752b9f6d9e4..0d7daf714c4b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -521,6 +521,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
 
 static int cifs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_NODIRATIME;
 	return 0;
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bfbf4700d160..b70aa7c91394 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
 {
 	int error;
 
-	error = wait_event_freezekillable(server->response_q,
+	error = wait_event_freezekillable_unsafe(server->response_q,
 				    midQ->mid_state != MID_REQUEST_SUBMITTED);
 	if (error < 0)
 		return -ERESTARTSYS;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index b7d3a05c062c..fc66861b3598 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -391,8 +391,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
 	if (!host_file->f_op)
 		return -ENOTDIR;
 
-	if (host_file->f_op->readdir)
-	{
+	if (host_file->f_op->readdir) {
 		/* potemkin case: we were handed a directory inode.
 		 * We can't use vfs_readdir because we have to keep the file
 		 * position in sync between the coda_file and the host_file.
@@ -410,8 +409,20 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
 
 		coda_file->f_pos = host_file->f_pos;
 		mutex_unlock(&host_inode->i_mutex);
-	}
-	else /* Venus: we must read Venus dirents from a file */
+	} else if (host_file->f_op->iterate) {
+		struct inode *host_inode = file_inode(host_file);
+		struct dir_context *ctx = buf;
+
+		mutex_lock(&host_inode->i_mutex);
+		ret = -ENOENT;
+		if (!IS_DEADDIR(host_inode)) {
+			ret = host_file->f_op->iterate(host_file, ctx);
+			file_accessed(host_file);
+		}
+		mutex_unlock(&host_inode->i_mutex);
+
+		coda_file->f_pos = ctx->pos;
+	} else /* Venus: we must read Venus dirents from a file */
 		ret = coda_venus_readdir(coda_file, buf, filldir);
 
 	return ret;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 4dcc0d81a7aa..0aa4c4d75ec6 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -96,6 +96,7 @@ void coda_destroy_inodecache(void)
 
 static int coda_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_NOATIME;
 	return 0;
 }
diff --git a/fs/compat.c b/fs/compat.c
index fc3b55dce184..6af20de2c1a3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -832,6 +832,7 @@ struct compat_old_linux_dirent {
 };
 
 struct compat_readdir_callback {
+	struct dir_context ctx;
 	struct compat_old_linux_dirent __user *dirent;
 	int result;
 };
@@ -873,15 +874,15 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
 {
 	int error;
 	struct fd f = fdget(fd);
-	struct compat_readdir_callback buf;
+	struct compat_readdir_callback buf = {
+		.ctx.actor = compat_fillonedir,
+		.dirent = dirent
+	};
 
 	if (!f.file)
 		return -EBADF;
 
-	buf.result = 0;
-	buf.dirent = dirent;
-
-	error = vfs_readdir(f.file, compat_fillonedir, &buf);
+	error = iterate_dir(f.file, &buf.ctx);
 	if (buf.result)
 		error = buf.result;
 
@@ -897,6 +898,7 @@ struct compat_linux_dirent {
 };
 
 struct compat_getdents_callback {
+	struct dir_context ctx;
 	struct compat_linux_dirent __user *current_dir;
 	struct compat_linux_dirent __user *previous;
 	int count;
@@ -951,7 +953,11 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 {
 	struct fd f;
 	struct compat_linux_dirent __user * lastdirent;
-	struct compat_getdents_callback buf;
+	struct compat_getdents_callback buf = {
+		.ctx.actor = compat_filldir,
+		.current_dir = dirent,
+		.count = count
+	};
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
@@ -961,17 +967,12 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 	if (!f.file)
 		return -EBADF;
 
-	buf.current_dir = dirent;
-	buf.previous = NULL;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(f.file, compat_filldir, &buf);
+	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		if (put_user(f.file->f_pos, &lastdirent->d_off))
+		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
@@ -983,6 +984,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64
 
 struct compat_getdents_callback64 {
+	struct dir_context ctx;
 	struct linux_dirent64 __user *current_dir;
 	struct linux_dirent64 __user *previous;
 	int count;
@@ -1036,7 +1038,11 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 {
 	struct fd f;
 	struct linux_dirent64 __user * lastdirent;
-	struct compat_getdents_callback64 buf;
+	struct compat_getdents_callback64 buf = {
+		.ctx.actor = compat_filldir64,
+		.current_dir = dirent,
+		.count = count
+	};
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
@@ -1046,17 +1052,12 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 	if (!f.file)
 		return -EBADF;
 
-	buf.current_dir = dirent;
-	buf.previous = NULL;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(f.file, compat_filldir64, &buf);
+	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		typeof(lastdirent->d_off) d_off = f.file->f_pos;
+		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
 		if (__put_user_unaligned(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 35b1c7bd18b7..c0148585670d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -227,6 +227,7 @@ static void cramfs_put_super(struct super_block *sb)
 
 static int cramfs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_RDONLY;
 	return 0;
 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 26d7fff8d78e..84378f0ffd6d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data)
 	int err;
 	struct debugfs_fs_info *fsi = sb->s_fs_info;
 
+	sync_filesystem(sb);
 	err = debugfs_parse_options(data, &fsi->mount_opts);
 	if (err)
 		goto fail;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index a726b9f29cb7..c71038079b47 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
 
+	sync_filesystem(sb);
 	err = parse_mount_options(data, PARSE_REMOUNT, opts);
 
 	/*
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9ff3664bb3ea..4912bf47226c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -68,6 +68,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 }
 
 struct ecryptfs_getdents_callback {
+	struct dir_context ctx;
 	void *dirent;
 	struct dentry *dentry;
 	filldir_t filldir;
@@ -115,18 +116,19 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 	int rc;
 	struct file *lower_file;
 	struct inode *inode;
-	struct ecryptfs_getdents_callback buf;
+	struct ecryptfs_getdents_callback buf = {
+		.dirent = dirent,
+		.dentry = file->f_path.dentry,
+		.filldir = filldir,
+		.filldir_called = 0,
+		.entries_written = 0,
+		.ctx.actor = ecryptfs_filldir
+	};
 
 	lower_file = ecryptfs_file_to_lower(file);
 	lower_file->f_pos = file->f_pos;
 	inode = file_inode(file);
-	memset(&buf, 0, sizeof(buf));
-	buf.dirent = dirent;
-	buf.dentry = file->f_path.dentry;
-	buf.filldir = filldir;
-	buf.filldir_called = 0;
-	buf.entries_written = 0;
-	rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf);
+	rc = iterate_dir(lower_file, &buf.ctx);
 	file->f_pos = lower_file->f_pos;
 	if (rc < 0)
 		goto out;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index c6f57a74a559..4709692cd7a0 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -113,6 +113,7 @@ static void efs_put_super(struct super_block *s)
 
 static int efs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_RDONLY;
 	return 0;
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index deecc7294a67..db10e00c971a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -34,6 +34,7 @@
 #include <linux/mutex.h>
 #include <linux/anon_inodes.h>
 #include <linux/device.h>
+#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/mman.h>
@@ -1602,7 +1603,8 @@ fetch_events:
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+			if (!freezable_schedule_hrtimeout_range(to, slack,
+								HRTIMER_MODE_ABS))
 				timed_out = 1;
 
 			spin_lock_irqsave(&ep->lock, flags);
@@ -1817,8 +1819,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 		goto error_tgt_fput;
 
 	/* Check if EPOLLWAKEUP is allowed */
-	if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
-		epds.events &= ~EPOLLWAKEUP;
+	ep_take_care_of_epollwakeup(&epds);
 
 	/*
 	 * We have to check that the file structure underneath the file descriptor
diff --git a/fs/exec.c b/fs/exec.c
index dd6aa61c8548..cb7f31c71c6b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
  */
 static int check_unsafe_exec(struct linux_binprm *bprm)
 {
@@ -1239,7 +1239,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
 	 * This isn't strictly necessary, but it makes it harder for LSMs to
 	 * mess up.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
 
 	n_fs = 1;
@@ -1286,7 +1286,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 	bprm->cred->egid = current_egid();
 
 	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !current->no_new_privs &&
+	    !task_no_new_privs(current) &&
 	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
 	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
 		/* Set-uid? */
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b4eec4c9a790..dc4784b52d92 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -212,6 +212,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
 }
 
 struct getdents_callback {
+	struct dir_context ctx;
 	char *name;		/* name that was found. It already points to a
 				   buffer NAME_MAX+1 is size */
 	unsigned long ino;	/* the inum we are looking for */
@@ -254,7 +255,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
 	struct inode *dir = path->dentry->d_inode;
 	int error;
 	struct file *file;
-	struct getdents_callback buffer;
+	struct getdents_callback buffer = {
+		.ctx.actor = filldir_one,
+		.name = name,
+		.ino = child->d_inode->i_ino
+	};
 
 	error = -ENOTDIR;
 	if (!dir || !S_ISDIR(dir->i_mode))
@@ -271,17 +276,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
 		goto out;
 
 	error = -EINVAL;
-	if (!file->f_op->readdir)
+	if (!file->f_op->readdir && !file->f_op->iterate)
 		goto out_close;
 
-	buffer.name = name;
-	buffer.ino = child->d_inode->i_ino;
-	buffer.found = 0;
 	buffer.sequence = 0;
 	while (1) {
 		int old_seq = buffer.sequence;
 
-		error = vfs_readdir(file, filldir_one, &buffer);
+		error = iterate_dir(file, &buffer.ctx);
 		if (buffer.found) {
 			error = 0;
 			break;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 20d6697bd638..d260115c0350 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	unsigned long old_sb_flags;
 	int err;
 
+	sync_filesystem(sb);
 	spin_lock(&sbi->s_lock);
 
 	/* Store the old options */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 882d4bdfd428..b1e30be1945b 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2588,6 +2588,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 	int i;
 #endif
 
+	sync_filesystem(sb);
+
 	/* Store the original options */
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2a71466b0115..e5a77bb30ba3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2063,7 +2063,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
 extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 				ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
+				unsigned long blkdev_flags);
 
 /* inode.c */
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d4fd81c44f55..8855db670425 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -605,11 +605,13 @@ resizefs_out:
 		return err;
 	}
 
+	case FIDTRIM:
 	case FITRIM:
 	{
 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 		struct fstrim_range range;
 		int ret = 0;
+		int flags  = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -617,13 +619,15 @@ resizefs_out:
 		if (!blk_queue_discard(q))
 			return -EOPNOTSUPP;
 
+		if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
+			return -EOPNOTSUPP;
 		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
 		    sizeof(range)))
 			return -EFAULT;
 
 		range.minlen = max((unsigned int)range.minlen,
 				   q->limits.discard_granularity);
-		ret = ext4_trim_fs(sb, &range);
+		ret = ext4_trim_fs(sb, &range, flags);
 		if (ret < 0)
 			return ret;
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index df5050f9080b..aed1a67ab3a2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2707,7 +2707,8 @@ int ext4_mb_release(struct super_block *sb)
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-		ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+		ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+		unsigned long flags)
 {
 	ext4_fsblk_t discard_block;
 
@@ -2716,7 +2717,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
 	count = EXT4_C2B(EXT4_SB(sb), count);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
-	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+	return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
 }
 
 /*
@@ -2738,7 +2739,7 @@ static void ext4_free_data_callback(struct super_block *sb,
 	if (test_opt(sb, DISCARD)) {
 		err = ext4_issue_discard(sb, entry->efd_group,
 					 entry->efd_start_cluster,
-					 entry->efd_count);
+					 entry->efd_count, 0);
 		if (err && err != -EOPNOTSUPP)
 			ext4_msg(sb, KERN_WARNING, "discard request in"
 				 " group:%d block:%d count:%d failed"
@@ -4790,7 +4791,8 @@ do_more:
 		 * them with group lock_held
 		 */
 		if (test_opt(sb, DISCARD)) {
-			err = ext4_issue_discard(sb, block_group, bit, count);
+			err = ext4_issue_discard(sb, block_group, bit, count,
+						 0);
 			if (err && err != -EOPNOTSUPP)
 				ext4_msg(sb, KERN_WARNING, "discard request in"
 					 " group:%d block:%d count:%lu failed"
@@ -4997,13 +4999,15 @@ error_return:
  * @count:	number of blocks to TRIM
  * @group:	alloc. group we are working with
  * @e4b:	ext4 buddy for the group
+ * @blkdev_flags: flags for the block device
  *
  * Trim "count" blocks starting at "start" in the "group". To assure that no
  * one will allocate those blocks, mark it as used in buddy bitmap. This must
  * be called with under the group lock.
  */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
-			     ext4_group_t group, struct ext4_buddy *e4b)
+			    ext4_group_t group, struct ext4_buddy *e4b,
+			    unsigned long blkdev_flags)
 {
 	struct ext4_free_extent ex;
 	int ret = 0;
@@ -5022,7 +5026,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
 	 */
 	mb_mark_used(e4b, &ex);
 	ext4_unlock_group(sb, group);
-	ret = ext4_issue_discard(sb, group, start, count);
+	ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
 	ext4_lock_group(sb, group);
 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
 	return ret;
@@ -5035,6 +5039,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
  * @start:		first group block to examine
  * @max:		last group block to examine
  * @minblocks:		minimum extent block count
+ * @blkdev_flags:	flags for the block device
  *
  * ext4_trim_all_free walks through group's buddy bitmap searching for free
  * extents. When the free block is found, ext4_trim_extent is called to TRIM
@@ -5049,7 +5054,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
 static ext4_grpblk_t
 ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		   ext4_grpblk_t start, ext4_grpblk_t max,
-		   ext4_grpblk_t minblocks)
+		   ext4_grpblk_t minblocks, unsigned long blkdev_flags)
 {
 	void *bitmap;
 	ext4_grpblk_t next, count = 0, free_count = 0;
@@ -5082,7 +5087,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 
 		if ((next - start) >= minblocks) {
 			ret = ext4_trim_extent(sb, start,
-					       next - start, group, &e4b);
+					       next - start, group, &e4b,
+					       blkdev_flags);
 			if (ret && ret != -EOPNOTSUPP)
 				break;
 			ret = 0;
@@ -5124,6 +5130,7 @@ out:
  * ext4_trim_fs() -- trim ioctl handle function
  * @sb:			superblock for filesystem
  * @range:		fstrim_range structure
+ * @blkdev_flags:	flags for the block device
  *
  * start:	First Byte to trim
  * len:		number of Bytes to trim from start
@@ -5132,7 +5139,8 @@ out:
  * start to start+len. For each such a group ext4_trim_all_free function
  * is invoked to trim all free space.
  */
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+			unsigned long blkdev_flags)
 {
 	struct ext4_group_info *grp;
 	ext4_group_t group, first_group, last_group;
@@ -5188,7 +5196,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
 		if (grp->bb_free >= minlen) {
 			cnt = ext4_trim_all_free(sb, group, first_cluster,
-						end, minlen);
+						end, minlen, blkdev_flags);
 			if (cnt < 0) {
 				ret = cnt;
 				break;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 21a0b43a7d31..913a6b279220 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4622,6 +4622,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #endif
 	char *orig_data = kstrdup(data, GFP_KERNEL);
 
+	sync_filesystem(sb);
+
 	/* Store the original options */
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8555f7df82c7..03ab8b830940 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -239,7 +239,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
 		seq_puts(seq, ",disable_ext_identify");
 
-	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
 
 	return 0;
 }
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 7a6f02caf286..4b775e606fc8 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -776,6 +776,13 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
 	return ret;
 }
 
+static int fat_ioctl_volume_id(struct inode *dir)
+{
+	struct super_block *sb = dir->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	return sbi->vol_id;
+}
+
 static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
 			  unsigned long arg)
 {
@@ -792,6 +799,8 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
 		short_only = 0;
 		both = 1;
 		break;
+	case VFAT_IOCTL_GET_VOLUME_ID:
+		return fat_ioctl_volume_id(inode);
 	default:
 		return fat_generic_ioctl(filp, cmd, arg);
 	}
@@ -832,6 +841,8 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
 		short_only = 0;
 		both = 1;
 		break;
+	case VFAT_IOCTL_GET_VOLUME_ID:
+		return fat_ioctl_volume_id(inode);
 	default:
 		return fat_generic_ioctl(filp, cmd, (unsigned long)arg);
 	}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 21664fcf3616..8180ecef59fc 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -86,6 +86,7 @@ struct msdos_sb_info {
 	const void *dir_ops;	      /* Opaque; default directory operations */
 	int dir_per_block;	      /* dir entries per block */
 	int dir_per_block_bits;	      /* log2(dir_per_block) */
+	unsigned long vol_id;         /* volume ID */
 
 	int fatent_shift;
 	struct fatent_operations *fatent_ops;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5d4513cb1b3c..ebdc6656c067 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -632,6 +632,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data)
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	*flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
 
+	sync_filesystem(sb);
+
 	/* make sure we update state on remount. */
 	new_rdonly = *flags & MS_RDONLY;
 	if (new_rdonly != (sb->s_flags & MS_RDONLY)) {
@@ -1252,6 +1254,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	struct inode *fsinfo_inode = NULL;
 	struct buffer_head *bh;
 	struct fat_boot_sector *b;
+	struct fat_boot_bsx *bsx;
 	struct msdos_sb_info *sbi;
 	u16 logical_sector_size;
 	u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
@@ -1398,6 +1401,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 			goto out_fail;
 		}
 
+		bsx = (struct fat_boot_bsx *)(bh->b_data + FAT32_BSX_OFFSET);
+
 		fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
 		if (!IS_FSINFO(fsinfo)) {
 			fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
@@ -1413,8 +1418,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		}
 
 		brelse(fsinfo_bh);
+	} else {
+		bsx = (struct fat_boot_bsx *)(bh->b_data + FAT16_BSX_OFFSET);
 	}
 
+	/* interpret volume ID as a little endian 32 bit integer */
+	sbi->vol_id = (((u32)bsx->vol_id[0]) | ((u32)bsx->vol_id[1] << 8) |
+		((u32)bsx->vol_id[2] << 16) | ((u32)bsx->vol_id[3] << 24));
+
 	sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry);
 	sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
 
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index e37eb274e492..7ca8c75d50d3 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp)
 
 static int vxfs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_RDONLY;
 	return 0;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b44306378193..62d426d1d5e3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1170,7 +1170,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	if ((inode->i_state & flags) == flags)
 		return;
 
-	if (unlikely(block_dump))
+	if (unlikely(block_dump > 1))
 		block_dump___mark_inode_dirty(inode);
 
 	spin_lock(&inode->i_lock);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b535008b6c4c..f4e875a29e7a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -20,6 +20,7 @@
 #include <linux/swap.h>
 #include <linux/splice.h>
 #include <linux/aio.h>
+#include <linux/freezer.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
@@ -464,7 +465,10 @@ __acquires(fc->lock)
 	 * Wait it out.
 	 */
 	spin_unlock(&fc->lock);
-	wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
+
+	while (req->state != FUSE_REQ_FINISHED)
+		wait_event_freezable(req->waitq,
+				     req->state == FUSE_REQ_FINISHED);
 	spin_lock(&fc->lock);
 
 	if (!req->aborted)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 39a986e1da9e..8aabbe2a3989 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode)
 
 static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	if (*flags & MS_MANDLOCK)
 		return -EINVAL;
 
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9973df4ff565..7102c7a5af4d 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -88,7 +88,10 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 	struct inode *dir = parent->d_inode;
 	struct inode *inode = child->d_inode;
 	struct gfs2_inode *dip, *ip;
-	struct get_name_filldir gnfd;
+	struct get_name_filldir gnfd = {
+		.ctx.actor = get_name_filldir,
+		.name = name
+	};
 	struct gfs2_holder gh;
 	u64 offset = 0;
 	int error;
@@ -106,7 +109,6 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 	*name = 0;
 	gnfd.inum.no_addr = ip->i_no_addr;
 	gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
-	gnfd.name = name;
 
 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
 	if (error)
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e5639dec66c4..db7fff5a29c1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1142,6 +1142,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 	struct gfs2_tune *gt = &sdp->sd_tune;
 	int error;
 
+	sync_filesystem(sb);
+
 	spin_lock(&gt->gt_spin);
 	args.ar_commit = gt->gt_logd_secs;
 	args.ar_quota_quantum = gt->gt_quota_quantum;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 2d2039e754cd..eee7206c38d1 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int hfs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_NODIRATIME;
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4c4d142cf890..1b9414f701e6 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
 	if (!(*flags & MS_RDONLY)) {
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 962e90c37aec..d37f6ff8e07f 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -395,6 +395,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
 	char *new_opts = kstrdup(data, GFP_KERNEL);
 	
+	sync_filesystem(s);
+
 	*flags |= MS_NOATIME;
 	
 	hpfs_lock(s);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0defb1cc2a35..0918f0e2e266 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data)
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
 	int err;
 
+	sync_filesystem(sb);
 	err = jffs2_parse_options(c, data);
 	if (err)
 		return -EINVAL;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 788e0a9c1fb0..b7486dafa1cd 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -413,6 +413,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 	int flag = JFS_SBI(sb)->flag;
 	int ret;
 
+	sync_filesystem(sb);
 	if (!parse_options(data, sb, &newLVSize, &flag)) {
 		return -EINVAL;
 	}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index df122496f328..a54d08865e59 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
 	struct minix_sb_info * sbi = minix_sb(sb);
 	struct minix_super_block * ms;
 
+	sync_filesystem(sb);
 	ms = sbi->s_ms;
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 26910c8154da..3f54348b926b 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -99,6 +99,7 @@ static void destroy_inodecache(void)
 
 static int ncp_remount(struct super_block *sb, int *flags, char* data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_NODIRATIME;
 	return 0;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e9be01b2cc5a..e2bb3012d025 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
-	freezable_schedule();
+	freezable_schedule_unsafe();
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 43ea96ced28c..ce90eb4775c2 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EJUKEBOX)
 			break;
-		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+		freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!fatal_signal_pending(current));
 	return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 20ebcfa3c92e..86390c3a95db 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -268,7 +268,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 		*timeout = NFS4_POLL_RETRY_MIN;
 	if (*timeout > NFS4_POLL_RETRY_MAX)
 		*timeout = NFS4_POLL_RETRY_MAX;
-	freezable_schedule_timeout_killable(*timeout);
+	freezable_schedule_timeout_killable_unsafe(*timeout);
 	if (fatal_signal_pending(current))
 		res = -ERESTARTSYS;
 	*timeout <<= 1;
@@ -4558,7 +4558,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 static unsigned long
 nfs4_set_lock_task_retry(unsigned long timeout)
 {
-	freezable_schedule_timeout_killable(timeout);
+	freezable_schedule_timeout_killable_unsafe(timeout);
 	timeout <<= 1;
 	if (timeout > NFS4_LOCK_MAXTIMEOUT)
 		return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2d7525fbcf25..d85c1b819145 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2133,6 +2133,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
 	u32 nfsvers = nfss->nfs_client->rpc_ops->version;
 
+	sync_filesystem(sb);
+
 	/*
 	 * Userspace mount programs that send binary options generally send
 	 * them populated with default values. We have no way to know which
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 4e9a21db867a..105a3b080d12 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -240,11 +240,16 @@ struct name_list {
 	struct list_head list;
 };
 
+struct nfs4_dir_ctx {
+	struct dir_context ctx;
+	struct list_head names;
+};
+
 static int
 nfsd4_build_namelist(void *arg, const char *name, int namlen,
 		loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct list_head *names = arg;
+	struct nfs4_dir_ctx *ctx = arg;
 	struct name_list *entry;
 
 	if (namlen != HEXDIR_LEN - 1)
@@ -254,7 +259,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen,
 		return -ENOMEM;
 	memcpy(entry->name, name, HEXDIR_LEN - 1);
 	entry->name[HEXDIR_LEN - 1] = '\0';
-	list_add(&entry->list, names);
+	list_add(&entry->list, &ctx->names);
 	return 0;
 }
 
@@ -263,7 +268,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 {
 	const struct cred *original_cred;
 	struct dentry *dir = nn->rec_file->f_path.dentry;
-	LIST_HEAD(names);
+	struct nfs4_dir_ctx ctx = {
+		.ctx.actor = nfsd4_build_namelist,
+		.names = LIST_HEAD_INIT(ctx.names)
+	};
 	int status;
 
 	status = nfs4_save_creds(&original_cred);
@@ -276,11 +284,11 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
 		return status;
 	}
 
-	status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names);
+	status = iterate_dir(nn->rec_file, &ctx.ctx);
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-	while (!list_empty(&names)) {
+	while (!list_empty(&ctx.names)) {
 		struct name_list *entry;
-		entry = list_entry(names.next, struct name_list, list);
+		entry = list_entry(ctx.names.next, struct name_list, list);
 		if (!status) {
 			struct dentry *dentry;
 			dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 81325ba8660a..11224fa5ff25 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1950,6 +1950,7 @@ struct buffered_dirent {
 };
 
 struct readdir_data {
+	struct dir_context ctx;
 	char		*dirent;
 	size_t		used;
 	int		full;
@@ -1981,13 +1982,15 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
 static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
 				    struct readdir_cd *cdp, loff_t *offsetp)
 {
-	struct readdir_data buf;
 	struct buffered_dirent *de;
 	int host_err;
 	int size;
 	loff_t offset;
+	struct readdir_data buf = {
+		.ctx.actor = nfsd_buffered_filldir,
+		.dirent = (void *)__get_free_page(GFP_KERNEL)
+	};
 
-	buf.dirent = (void *)__get_free_page(GFP_KERNEL);
 	if (!buf.dirent)
 		return nfserrno(-ENOMEM);
 
@@ -2001,7 +2004,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
 		buf.used = 0;
 		buf.full = 0;
 
-		host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
+		host_err = iterate_dir(file, &buf.ctx);
 		if (buf.full)
 			host_err = 0;
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index c7d1f9f18b09..4b0a8d4a8345 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1114,6 +1114,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 	unsigned long old_mount_opt;
 	int err;
 
+	sync_filesystem(sb);
 	old_sb_flags = sb->s_flags;
 	old_mount_opt = nilfs->ns_mount_opt;
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 82650d52d916..bd5610d48242 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -468,6 +468,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 
 	ntfs_debug("Entering with remount options string: %s", opt);
 
+	sync_filesystem(sb);
+
 #ifndef NTFS_RW
 	/* For read-only compiled driver, enforce read-only flag. */
 	*flags |= MS_RDONLY;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 01b85165552b..616bde667d8d 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -632,6 +632,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	u32 tmp;
 
+	sync_filesystem(sb);
+
 	if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
 	    !ocfs2_check_set_options(sb, &parsed_options)) {
 		ret = -EINVAL;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 75885ffde44e..f4026aba26f3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -375,6 +375,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
 
 static int openprom_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_NOATIME;
 	return 0;
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8fc784aef0b8..809d92a68a50 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2749,8 +2749,8 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
 	INF("oom_score",  S_IRUGO, proc_oom_score),
-	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
-	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+	REG("oom_adj",    S_IRUSR, proc_oom_adj_operations),
+	REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
@@ -3106,8 +3106,8 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
 	INF("oom_score", S_IRUGO, proc_oom_score),
-	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
-	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+	REG("oom_adj",   S_IRUSR, proc_oom_adj_operations),
+	REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
 	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9459710c55ae..8ead812d9897 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
 int proc_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct pid_namespace *pid = sb->s_fs_info;
+
+	sync_filesystem(sb);
 	return !proc_parse_options(data, pid);
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9f285fb9bab3..0e2899492133 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -134,6 +134,56 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
 }
 #endif
 
+static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma)
+{
+	const char __user *name = vma_get_anon_name(vma);
+	struct mm_struct *mm = vma->vm_mm;
+
+	unsigned long page_start_vaddr;
+	unsigned long page_offset;
+	unsigned long num_pages;
+	unsigned long max_len = NAME_MAX;
+	int i;
+
+	page_start_vaddr = (unsigned long)name & PAGE_MASK;
+	page_offset = (unsigned long)name - page_start_vaddr;
+	num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE);
+
+	seq_puts(m, "[anon:");
+
+	for (i = 0; i < num_pages; i++) {
+		int len;
+		int write_len;
+		const char *kaddr;
+		long pages_pinned;
+		struct page *page;
+
+		pages_pinned = get_user_pages(current, mm, page_start_vaddr,
+				1, 0, 0, &page, NULL);
+		if (pages_pinned < 1) {
+			seq_puts(m, "<fault>]");
+			return;
+		}
+
+		kaddr = (const char *)kmap(page);
+		len = min(max_len, PAGE_SIZE - page_offset);
+		write_len = strnlen(kaddr + page_offset, len);
+		seq_write(m, kaddr + page_offset, write_len);
+		kunmap(page);
+		put_page(page);
+
+		/* if strnlen hit a null terminator then we're done */
+		if (write_len != len)
+			break;
+
+		max_len -= len;
+		page_offset = 0;
+		page_start_vaddr += PAGE_SIZE;
+	}
+
+	seq_putc(m, ']');
+}
+
 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
 {
 	if (vma && vma != priv->tail_vma) {
@@ -335,6 +385,12 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 				pad_len_spaces(m, len);
 				seq_printf(m, "[stack:%d]", tid);
 			}
+			goto done;
+		}
+
+		if (vma_get_anon_name(vma)) {
+			pad_len_spaces(m, len);
+			seq_print_vma_name(m, vma);
 		}
 	}
 
@@ -634,6 +690,12 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 
 	show_smap_vma_flags(m, vma);
 
+	if (vma_get_anon_name(vma)) {
+		seq_puts(m, "Name:           ");
+		seq_print_vma_name(m, vma);
+		seq_putc(m, '\n');
+	}
+
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task->mm))
 			? vma->vm_start : 0;
@@ -722,6 +784,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 #define CLEAR_REFS_ALL 1
 #define CLEAR_REFS_ANON 2
 #define CLEAR_REFS_MAPPED 3
+#define CLEAR_REFS_MM_HIWATER_RSS 5
 
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
@@ -741,7 +804,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 	rv = kstrtoint(strstrip(buffer), 10, &type);
 	if (rv < 0)
 		return rv;
-	if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
+	if ((type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) &&
+	    type != CLEAR_REFS_MM_HIWATER_RSS)
 		return -EINVAL;
 	task = get_proc_task(file_inode(file));
 	if (!task)
@@ -752,6 +816,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 			.pmd_entry = clear_refs_pte_range,
 			.mm = mm,
 		};
+
+		if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+			/*
+			 * Writing 5 to /proc/pid/clear_refs resets the peak
+			 * resident set size to this mm's current rss value.
+			 */
+			down_write(&mm->mmap_sem);
+			reset_mm_hiwater_rss(mm);
+			up_write(&mm->mmap_sem);
+			goto out_mm;
+		}
+
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			clear_refs_walk.private = vma;
@@ -775,6 +851,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		}
 		flush_tlb_mm(mm);
 		up_read(&mm->mmap_sem);
+out_mm:
 		mmput(mm);
 	}
 	put_task_struct(task);
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index ca71db69da07..da12fd4e3d47 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -19,6 +19,16 @@ config PSTORE_CONSOLE
 	  When the option is enabled, pstore will log all kernel
 	  messages, even if no oops or panic happened.
 
+config PSTORE_PMSG
+	bool "Log user space messages"
+	depends on PSTORE
+	help
+	  When the option is enabled, pstore will export a character
+	  interface /dev/pmsg0 to log user space messages. On reboot
+	  data can be retrieved from /sys/fs/pstore/pmsg-ramoops-[ID].
+
+	  If unsure, say N.
+
 config PSTORE_FTRACE
 	bool "Persistent function tracer"
 	depends on PSTORE
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 4c9095c2781e..e647d8e81712 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -7,5 +7,7 @@ obj-y += pstore.o
 pstore-objs += inode.o platform.o
 obj-$(CONFIG_PSTORE_FTRACE)	+= ftrace.o
 
+obj-$(CONFIG_PSTORE_PMSG)	+= pmsg.o
+
 ramoops-objs += ram.o ram_core.o
 obj-$(CONFIG_PSTORE_RAM)	+= ramoops.o
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 66c8c2fe86b7..a12e0829d8ac 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -249,6 +249,7 @@ static void parse_options(char *options)
 
 static int pstore_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	parse_options(data);
 
 	return 0;
@@ -315,22 +316,27 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
 
 	switch (type) {
 	case PSTORE_TYPE_DMESG:
-		sprintf(name, "dmesg-%s-%lld", psname, id);
+		scnprintf(name, sizeof(name), "dmesg-%s-%lld",
+			  psname, id);
 		break;
 	case PSTORE_TYPE_CONSOLE:
-		sprintf(name, "console-%s-%lld", psname, id);
+		scnprintf(name, sizeof(name), "console-%s", psname);
 		break;
 	case PSTORE_TYPE_FTRACE:
-		sprintf(name, "ftrace-%s-%lld", psname, id);
+		scnprintf(name, sizeof(name), "ftrace-%s", psname);
 		break;
 	case PSTORE_TYPE_MCE:
-		sprintf(name, "mce-%s-%lld", psname, id);
+		scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id);
+		break;
+	case PSTORE_TYPE_PMSG:
+		scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
 		break;
 	case PSTORE_TYPE_UNKNOWN:
-		sprintf(name, "unknown-%s-%lld", psname, id);
+		scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
 		break;
 	default:
-		sprintf(name, "type%d-%s-%lld", type, psname, id);
+		scnprintf(name, sizeof(name), "type%d-%s-%lld",
+			  type, psname, id);
 		break;
 	}
 
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 937d820f273c..fd8d248c285c 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -45,6 +45,12 @@ extern void pstore_register_ftrace(void);
 static inline void pstore_register_ftrace(void) {}
 #endif
 
+#ifdef CONFIG_PSTORE_PMSG
+extern void pstore_register_pmsg(void);
+#else
+static inline void pstore_register_pmsg(void) {}
+#endif
+
 extern struct pstore_info *psinfo;
 
 extern void	pstore_set_kmsg_bytes(int);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 86d1038b5a12..4f11f2382e24 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -267,6 +267,7 @@ int pstore_register(struct pstore_info *psi)
 	kmsg_dump_register(&pstore_dumper);
 	pstore_register_console();
 	pstore_register_ftrace();
+	pstore_register_pmsg();
 
 	if (pstore_update_ms >= 0) {
 		pstore_timer.expires = jiffies +
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
new file mode 100644
index 000000000000..db47810a92d8
--- /dev/null
+++ b/fs/pstore/pmsg.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2014  Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include "internal.h"
+
+static DEFINE_MUTEX(pmsg_lock);
+#define PMSG_MAX_BOUNCE_BUFFER_SIZE (2*PAGE_SIZE)
+
+static ssize_t write_pmsg(struct file *file, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	size_t i, buffer_size;
+	char *buffer;
+
+	if (!count)
+		return 0;
+
+	if (!access_ok(VERIFY_READ, buf, count))
+		return -EFAULT;
+
+	buffer_size = count;
+	if (buffer_size > PMSG_MAX_BOUNCE_BUFFER_SIZE)
+		buffer_size = PMSG_MAX_BOUNCE_BUFFER_SIZE;
+	buffer = vmalloc(buffer_size);
+
+	mutex_lock(&pmsg_lock);
+	for (i = 0; i < count; ) {
+		size_t c = min(count - i, buffer_size);
+		u64 id;
+		long ret;
+
+		ret = __copy_from_user(buffer, buf + i, c);
+		if (unlikely(ret != 0)) {
+			mutex_unlock(&pmsg_lock);
+			vfree(buffer);
+			return -EFAULT;
+		}
+		psinfo->write_buf(PSTORE_TYPE_PMSG, 0, &id, 0, buffer, c,
+				  psinfo);
+
+		i += c;
+	}
+
+	mutex_unlock(&pmsg_lock);
+	vfree(buffer);
+	return count;
+}
+
+static const struct file_operations pmsg_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= noop_llseek,
+	.write		= write_pmsg,
+};
+
+static struct class *pmsg_class;
+static int pmsg_major;
+#define PMSG_NAME "pmsg"
+#undef pr_fmt
+#define pr_fmt(fmt) PMSG_NAME ": " fmt
+
+static char *pmsg_devnode(struct device *dev, umode_t *mode)
+{
+	if (mode)
+		*mode = 0220;
+	return NULL;
+}
+
+void pstore_register_pmsg(void)
+{
+	struct device *pmsg_device;
+
+	pmsg_major = register_chrdev(0, PMSG_NAME, &pmsg_fops);
+	if (pmsg_major < 0) {
+		pr_err("register_chrdev failed\n");
+		goto err;
+	}
+
+	pmsg_class = class_create(THIS_MODULE, PMSG_NAME);
+	if (IS_ERR(pmsg_class)) {
+		pr_err("device class file already in use\n");
+		goto err_class;
+	}
+	pmsg_class->devnode = pmsg_devnode;
+
+	pmsg_device = device_create(pmsg_class, NULL, MKDEV(pmsg_major, 0),
+					NULL, "%s%d", PMSG_NAME, 0);
+	if (IS_ERR(pmsg_device)) {
+		pr_err("failed to create device\n");
+		goto err_device;
+	}
+	return;
+
+err_device:
+	class_destroy(pmsg_class);
+err_class:
+	unregister_chrdev(pmsg_major, PMSG_NAME);
+err:
+	return;
+}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index d3d37142bd93..f721e47db8ad 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -51,6 +51,10 @@ static ulong ramoops_ftrace_size = MIN_MEM_SIZE;
 module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400);
 MODULE_PARM_DESC(ftrace_size, "size of ftrace log");
 
+static ulong ramoops_pmsg_size = MIN_MEM_SIZE;
+module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
+MODULE_PARM_DESC(pmsg_size, "size of user space message log");
+
 static ulong mem_address;
 module_param(mem_address, ulong, 0400);
 MODULE_PARM_DESC(mem_address,
@@ -82,12 +86,14 @@ struct ramoops_context {
 	struct persistent_ram_zone **przs;
 	struct persistent_ram_zone *cprz;
 	struct persistent_ram_zone *fprz;
+	struct persistent_ram_zone *mprz;
 	phys_addr_t phys_addr;
 	unsigned long size;
 	unsigned int memtype;
 	size_t record_size;
 	size_t console_size;
 	size_t ftrace_size;
+	size_t pmsg_size;
 	int dump_oops;
 	struct persistent_ram_ecc_info ecc_info;
 	unsigned int max_dump_cnt;
@@ -96,6 +102,7 @@ struct ramoops_context {
 	unsigned int dump_read_cnt;
 	unsigned int console_read_cnt;
 	unsigned int ftrace_read_cnt;
+	unsigned int pmsg_read_cnt;
 	struct pstore_info pstore;
 };
 
@@ -109,6 +116,7 @@ static int ramoops_pstore_open(struct pstore_info *psi)
 	cxt->dump_read_cnt = 0;
 	cxt->console_read_cnt = 0;
 	cxt->ftrace_read_cnt = 0;
+	cxt->pmsg_read_cnt = 0;
 	return 0;
 }
 
@@ -141,6 +149,12 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
 	return prz;
 }
 
+static bool prz_ok(struct persistent_ram_zone *prz)
+{
+	return !!prz && !!(persistent_ram_old_size(prz) +
+			   persistent_ram_ecc_string(prz, NULL, 0));
+}
+
 static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 				   int *count, struct timespec *time,
 				   char **buf, struct pstore_info *psi)
@@ -153,13 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt,
 				   cxt->max_dump_cnt, id, type,
 				   PSTORE_TYPE_DMESG, 1);
-	if (!prz)
+	if (!prz_ok(prz))
 		prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
 					   1, id, type, PSTORE_TYPE_CONSOLE, 0);
-	if (!prz)
+	if (!prz_ok(prz))
 		prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt,
 					   1, id, type, PSTORE_TYPE_FTRACE, 0);
-	if (!prz)
+	if (!prz_ok(prz))
+		prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
+					   1, id, type, PSTORE_TYPE_PMSG, 0);
+	if (!prz_ok(prz))
 		return 0;
 
 	/* TODO(kees): Bogus time for the moment. */
@@ -222,6 +239,11 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
 			return -ENOMEM;
 		persistent_ram_write(cxt->fprz, buf, size);
 		return 0;
+	} else if (type == PSTORE_TYPE_PMSG) {
+		if (!cxt->mprz)
+			return -ENOMEM;
+		persistent_ram_write(cxt->mprz, buf, size);
+		return 0;
 	}
 
 	if (type != PSTORE_TYPE_DMESG)
@@ -279,6 +301,9 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
 	case PSTORE_TYPE_FTRACE:
 		prz = cxt->fprz;
 		break;
+	case PSTORE_TYPE_PMSG:
+		prz = cxt->mprz;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -388,6 +413,12 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
 	return 0;
 }
 
+void notrace ramoops_console_write_buf(const char *buf, size_t size)
+{
+	struct ramoops_context *cxt = &oops_cxt;
+	persistent_ram_write(cxt->cprz, buf, size);
+}
+
 static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -404,7 +435,7 @@ static int ramoops_probe(struct platform_device *pdev)
 		goto fail_out;
 
 	if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size &&
-			!pdata->ftrace_size)) {
+			!pdata->ftrace_size && !pdata->pmsg_size)) {
 		pr_err("The memory size and the record/console size must be "
 			"non-zero\n");
 		goto fail_out;
@@ -418,6 +449,8 @@ static int ramoops_probe(struct platform_device *pdev)
 		pdata->console_size = rounddown_pow_of_two(pdata->console_size);
 	if (!is_power_of_2(pdata->ftrace_size))
 		pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size);
+	if (pdata->pmsg_size && !is_power_of_2(pdata->pmsg_size))
+		pdata->pmsg_size = rounddown_pow_of_two(pdata->pmsg_size);
 
 	cxt->size = pdata->mem_size;
 	cxt->phys_addr = pdata->mem_address;
@@ -425,12 +458,14 @@ static int ramoops_probe(struct platform_device *pdev)
 	cxt->record_size = pdata->record_size;
 	cxt->console_size = pdata->console_size;
 	cxt->ftrace_size = pdata->ftrace_size;
+	cxt->pmsg_size = pdata->pmsg_size;
 	cxt->dump_oops = pdata->dump_oops;
 	cxt->ecc_info = pdata->ecc_info;
 
 	paddr = cxt->phys_addr;
 
-	dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size;
+	dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size
+			- cxt->pmsg_size;
 	err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz);
 	if (err)
 		goto fail_out;
@@ -445,13 +480,9 @@ static int ramoops_probe(struct platform_device *pdev)
 	if (err)
 		goto fail_init_fprz;
 
-	if (!cxt->przs && !cxt->cprz && !cxt->fprz) {
-		pr_err("memory size too small, minimum is %zu\n",
-			cxt->console_size + cxt->record_size +
-			cxt->ftrace_size);
-		err = -EINVAL;
-		goto fail_cnt;
-	}
+	err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0);
+	if (err)
+		goto fail_init_mprz;
 
 	cxt->pstore.data = cxt;
 	/*
@@ -497,7 +528,8 @@ fail_buf:
 fail_clear:
 	cxt->pstore.bufsize = 0;
 	cxt->max_dump_cnt = 0;
-fail_cnt:
+	kfree(cxt->mprz);
+fail_init_mprz:
 	kfree(cxt->fprz);
 fail_init_fprz:
 	kfree(cxt->cprz);
@@ -556,6 +588,7 @@ static void ramoops_register_dummy(void)
 	dummy_data->record_size = record_size;
 	dummy_data->console_size = ramoops_console_size;
 	dummy_data->ftrace_size = ramoops_ftrace_size;
+	dummy_data->pmsg_size = ramoops_pmsg_size;
 	dummy_data->dump_oops = dump_oops;
 	/*
 	 * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2e8caa62da78..3410e9f657ca 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -46,6 +46,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct qnx4_sb_info *qs;
 
+	sync_filesystem(sb);
 	qs = qnx4_sb(sb);
 	qs->Version = QNX4_VERSION;
 	*flags |= MS_RDONLY;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 8d941edfefa1..65cdaab3ed49 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root)
 
 static int qnx6_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_RDONLY;
 	return 0;
 }
diff --git a/fs/readdir.c b/fs/readdir.c
index fee38e04fae4..d46eca8567a4 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -20,11 +20,11 @@
 
 #include <asm/uaccess.h>
 
-int vfs_readdir(struct file *file, filldir_t filler, void *buf)
+int iterate_dir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 	int res = -ENOTDIR;
-	if (!file->f_op || !file->f_op->readdir)
+	if (!file->f_op || (!file->f_op->readdir && !file->f_op->iterate))
 		goto out;
 
 	res = security_file_permission(file, MAY_READ);
@@ -37,15 +37,21 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf)
 
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
-		res = file->f_op->readdir(file, buf, filler);
+		if (file->f_op->iterate) {
+			ctx->pos = file->f_pos;
+			res = file->f_op->iterate(file, ctx);
+			file->f_pos = ctx->pos;
+		} else {
+			res = file->f_op->readdir(file, ctx, ctx->actor);
+			ctx->pos = file->f_pos;
+		}
 		file_accessed(file);
 	}
 	mutex_unlock(&inode->i_mutex);
 out:
 	return res;
 }
-
-EXPORT_SYMBOL(vfs_readdir);
+EXPORT_SYMBOL(iterate_dir);
 
 /*
  * Traditional linux readdir() handling..
@@ -66,6 +72,7 @@ struct old_linux_dirent {
 };
 
 struct readdir_callback {
+	struct dir_context ctx;
 	struct old_linux_dirent __user * dirent;
 	int result;
 };
@@ -73,7 +80,7 @@ struct readdir_callback {
 static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
 		      u64 ino, unsigned int d_type)
 {
-	struct readdir_callback * buf = (struct readdir_callback *) __buf;
+	struct readdir_callback *buf = (struct readdir_callback *) __buf;
 	struct old_linux_dirent __user * dirent;
 	unsigned long d_ino;
 
@@ -107,15 +114,15 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
 {
 	int error;
 	struct fd f = fdget(fd);
-	struct readdir_callback buf;
+	struct readdir_callback buf = {
+		.ctx.actor = fillonedir,
+		.dirent = dirent
+	};
 
 	if (!f.file)
 		return -EBADF;
 
-	buf.result = 0;
-	buf.dirent = dirent;
-
-	error = vfs_readdir(f.file, fillonedir, &buf);
+	error = iterate_dir(f.file, &buf.ctx);
 	if (buf.result)
 		error = buf.result;
 
@@ -137,6 +144,7 @@ struct linux_dirent {
 };
 
 struct getdents_callback {
+	struct dir_context ctx;
 	struct linux_dirent __user * current_dir;
 	struct linux_dirent __user * previous;
 	int count;
@@ -191,7 +199,11 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 {
 	struct fd f;
 	struct linux_dirent __user * lastdirent;
-	struct getdents_callback buf;
+	struct getdents_callback buf = {
+		.ctx.actor = filldir,
+		.count = count,
+		.current_dir = dirent
+	};
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
@@ -201,17 +213,12 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 	if (!f.file)
 		return -EBADF;
 
-	buf.current_dir = dirent;
-	buf.previous = NULL;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(f.file, filldir, &buf);
+	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		if (put_user(f.file->f_pos, &lastdirent->d_off))
+		if (put_user(buf.ctx.pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
@@ -221,6 +228,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 }
 
 struct getdents_callback64 {
+	struct dir_context ctx;
 	struct linux_dirent64 __user * current_dir;
 	struct linux_dirent64 __user * previous;
 	int count;
@@ -271,7 +279,11 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
 {
 	struct fd f;
 	struct linux_dirent64 __user * lastdirent;
-	struct getdents_callback64 buf;
+	struct getdents_callback64 buf = {
+		.ctx.actor = filldir64,
+		.count = count,
+		.current_dir = dirent
+	};
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
@@ -281,17 +293,12 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
 	if (!f.file)
 		return -EBADF;
 
-	buf.current_dir = dirent;
-	buf.previous = NULL;
-	buf.count = count;
-	buf.error = 0;
-
-	error = vfs_readdir(f.file, filldir64, &buf);
+	error = iterate_dir(f.file, &buf.ctx);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		typeof(lastdirent->d_off) d_off = f.file->f_pos;
+		typeof(lastdirent->d_off) d_off = buf.ctx.pos;
 		if (__put_user(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e2e202a07b31..460f762a85c0 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1317,6 +1317,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
 	int i;
 #endif
 
+	sync_filesystem(s);
 	reiserfs_write_lock(s);
 
 #ifdef CONFIG_QUOTA
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 15cbc41ee365..ae839482c341 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -435,6 +435,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  */
 static int romfs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_RDONLY;
 	return 0;
 }
diff --git a/fs/select.c b/fs/select.c
index 8c1c96c27062..6b14dc7df3a4 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -27,6 +27,7 @@
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
 #include <linux/sched/rt.h>
+#include <linux/freezer.h>
 
 #include <asm/uaccess.h>
 
@@ -236,7 +237,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 
 	set_current_state(state);
 	if (!pwq->triggered)
-		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+		rc = freezable_schedule_hrtimeout_range(expires, slack,
+							HRTIMER_MODE_ABS);
 	__set_current_state(TASK_RUNNING);
 
 	/*
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3dd44db1465e..d84ca5932c73 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
 #include <linux/fs.h>
 #include <linux/export.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/cred.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
 	m->count = m->size;
 }
 
+static void *seq_buf_alloc(unsigned long size)
+{
+	void *buf;
+
+	buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+	if (!buf && size > PAGE_SIZE)
+		buf = vmalloc(size);
+	return buf;
+}
+
 /**
  *	seq_open -	initialize sequential file
  *	@file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 		return 0;
 	}
 	if (!m->buf) {
-		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+		m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
 		if (!m->buf)
 			return -ENOMEM;
 	}
@@ -135,8 +147,8 @@ static int traverse(struct seq_file *m, loff_t offset)
 
 Eoverflow:
 	m->op->stop(m, p);
-	kfree(m->buf);
-	m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+	kvfree(m->buf);
+	m->buf = seq_buf_alloc(m->size <<= 1);
 	return !m->buf ? -ENOMEM : -EAGAIN;
 }
 
@@ -191,7 +203,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 
 	/* grab buffer if we didn't have one */
 	if (!m->buf) {
-		m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+		m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
 		if (!m->buf)
 			goto Enomem;
 	}
@@ -231,8 +243,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 		if (m->count < m->size)
 			goto Fill;
 		m->op->stop(m, p);
-		kfree(m->buf);
-		m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+		kvfree(m->buf);
+		m->buf = seq_buf_alloc(m->size <<= 1);
 		if (!m->buf)
 			goto Enomem;
 		m->count = 0;
@@ -349,7 +361,7 @@ EXPORT_SYMBOL(seq_lseek);
 int seq_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	kfree(m->buf);
+	kvfree(m->buf);
 	kfree(m);
 	return 0;
 }
@@ -604,13 +616,13 @@ EXPORT_SYMBOL(single_open);
 int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
 		void *data, size_t size)
 {
-	char *buf = kmalloc(size, GFP_KERNEL);
+	char *buf = seq_buf_alloc(size);
 	int ret;
 	if (!buf)
 		return -ENOMEM;
 	ret = single_open(file, show, data);
 	if (ret) {
-		kfree(buf);
+		kvfree(buf);
 		return ret;
 	}
 	((struct seq_file *)file->private_data)->buf = buf;
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index c70111ebefd4..ffb093e72b6c 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,6 +25,78 @@ config SQUASHFS
 
 	  If unsure, say N.
 
+choice
+	prompt "File decompression options"
+	depends on SQUASHFS
+	help
+	  Squashfs now supports two options for decompressing file
+	  data.  Traditionally Squashfs has decompressed into an
+	  intermediate buffer and then memcopied it into the page cache.
+	  Squashfs now supports the ability to decompress directly into
+	  the page cache.
+
+	  If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+	bool "Decompress file data into an intermediate buffer"
+	help
+	  Decompress file data into an intermediate buffer and then
+	  memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+	bool "Decompress files directly into the page cache"
+	help
+	  Directly decompress file data into the page cache.
+	  Doing so can significantly improve performance because
+	  it eliminates a memcpy and it also removes the lock contention
+	  on the single buffer.
+
+endchoice
+
+choice
+	prompt "Decompressor parallelisation options"
+	depends on SQUASHFS
+	help
+	  Squashfs now supports three parallelisation options for
+	  decompression.  Each one exhibits various trade-offs between
+	  decompression performance and CPU and memory usage.
+
+	  If in doubt, select "Single threaded compression"
+
+config SQUASHFS_DECOMP_SINGLE
+	bool "Single threaded compression"
+	help
+	  Traditionally Squashfs has used single-threaded decompression.
+	  Only one block (data or metadata) can be decompressed at any
+	  one time.  This limits CPU and memory usage to a minimum.
+
+config SQUASHFS_DECOMP_MULTI
+	bool "Use multiple decompressors for parallel I/O"
+	help
+	  By default Squashfs uses a single decompressor but it gives
+	  poor performance on parallel I/O workloads when using multiple CPU
+	  machines due to waiting on decompressor availability.
+
+	  If you have a parallel I/O workload and your system has enough memory,
+	  using this option may improve overall I/O performance.
+
+	  This decompressor implementation uses up to two parallel
+	  decompressors per core.  It dynamically allocates decompressors
+	  on a demand basis.
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+	bool "Use percpu multiple decompressors for parallel I/O"
+	help
+	  By default Squashfs uses a single decompressor but it gives
+	  poor performance on parallel I/O workloads when using multiple CPU
+	  machines due to waiting on decompressor availability.
+
+	  This decompressor implementation uses a maximum of one
+	  decompressor per core.  It uses percpu variables to ensure
+	  decompression is load-balanced across the cores.
+
+endchoice
+
 config SQUASHFS_XATTR
 	bool "Squashfs XATTR support"
 	depends on SQUASHFS
@@ -48,6 +120,21 @@ config SQUASHFS_ZLIB
 
 	  If unsure, say Y.
 
+config SQUASHFS_LZ4
+	bool "Include support for LZ4 compressed file systems"
+	depends on SQUASHFS
+	select LZ4_DECOMPRESS
+	help
+	  Saying Y here includes support for reading Squashfs file systems
+	  compressed with LZ4 compression.  LZ4 compression is mainly
+	  aimed at embedded systems with slower CPUs where the overheads
+	  of zlib are too high.
+
+	  LZ4 is not the standard compression used in Squashfs and so most
+	  file systems will be readable without selecting this option.
+
+	  If unsure, say N.
+
 config SQUASHFS_LZO
 	bool "Include support for LZO compressed file systems"
 	depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 110b0476f3b4..246a6f329d89 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,7 +5,13 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
+squashfs-$(CONFIG_SQUASHFS_LZ4) += lz4_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index fb50652e4e11..0cea9b9236d0 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -36,6 +36,7 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 /*
  * Read the metadata block length, this is stored in the first two
@@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb,
  * generated a larger block - this does occasionally happen with compression
  * algorithms).
  */
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
-			int length, u64 *next_index, int srclength, int pages)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+		u64 *next_index, struct squashfs_page_actor *output)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
 	struct buffer_head **bh;
 	int offset = index & ((1 << msblk->devblksize_log2) - 1);
 	u64 cur_index = index >> msblk->devblksize_log2;
-	int bytes, compressed, b = 0, k = 0, page = 0, avail;
+	int bytes, compressed, b = 0, k = 0, avail, i;
 
-	bh = kcalloc(((srclength + msblk->devblksize - 1)
+	bh = kcalloc(((output->length + msblk->devblksize - 1)
 		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
 	if (bh == NULL)
 		return -ENOMEM;
@@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
 			*next_index = index + length;
 
 		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-			index, compressed ? "" : "un", length, srclength);
+			index, compressed ? "" : "un", length, output->length);
 
-		if (length < 0 || length > srclength ||
+		if (length < 0 || length > output->length ||
 				(index + length) > msblk->bytes_used)
 			goto read_failure;
 
@@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
 		TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
 				compressed ? "" : "un", length);
 
-		if (length < 0 || length > srclength ||
+		if (length < 0 || length > output->length ||
 					(index + length) > msblk->bytes_used)
 			goto block_release;
 
@@ -158,35 +159,36 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
 		ll_rw_block(READ, b - 1, bh + 1);
 	}
 
+	for (i = 0; i < b; i++) {
+		wait_on_buffer(bh[i]);
+		if (!buffer_uptodate(bh[i]))
+			goto block_release;
+	}
+
 	if (compressed) {
-		length = squashfs_decompress(msblk, buffer, bh, b, offset,
-			 length, srclength, pages);
+		length = squashfs_decompress(msblk, bh, b, offset, length,
+			output);
 		if (length < 0)
 			goto read_failure;
 	} else {
 		/*
 		 * Block is uncompressed.
 		 */
-		int i, in, pg_offset = 0;
-
-		for (i = 0; i < b; i++) {
-			wait_on_buffer(bh[i]);
-			if (!buffer_uptodate(bh[i]))
-				goto block_release;
-		}
+		int in, pg_offset = 0;
+		void *data = squashfs_first_page(output);
 
 		for (bytes = length; k < b; k++) {
 			in = min(bytes, msblk->devblksize - offset);
 			bytes -= in;
 			while (in) {
 				if (pg_offset == PAGE_CACHE_SIZE) {
-					page++;
+					data = squashfs_next_page(output);
 					pg_offset = 0;
 				}
 				avail = min_t(int, in, PAGE_CACHE_SIZE -
 						pg_offset);
-				memcpy(buffer[page] + pg_offset,
-						bh[k]->b_data + offset, avail);
+				memcpy(data + pg_offset, bh[k]->b_data + offset,
+						avail);
 				in -= avail;
 				pg_offset += avail;
 				offset += avail;
@@ -194,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
 			offset = 0;
 			put_bh(bh[k]);
 		}
+		squashfs_finish_page(output);
 	}
 
 	kfree(bh);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index af0b73802592..1cb70a0b2168 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -56,6 +56,7 @@
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
+#include "page_actor.h"
 
 /*
  * Look-up block in cache, and increment usage count.  If not in cache, read
@@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
 			entry->error = 0;
 			spin_unlock(&cache->lock);
 
-			entry->length = squashfs_read_data(sb, entry->data,
-				block, length, &entry->next_index,
-				cache->block_size, cache->pages);
+			entry->length = squashfs_read_data(sb, block, length,
+				&entry->next_index, entry->actor);
 
 			spin_lock(&cache->lock);
 
@@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
 				kfree(cache->entry[i].data[j]);
 			kfree(cache->entry[i].data);
 		}
+		kfree(cache->entry[i].actor);
 	}
 
 	kfree(cache->entry);
@@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
 				goto cleanup;
 			}
 		}
+
+		entry->actor = squashfs_page_actor_init(entry->data,
+						cache->pages, 0);
+		if (entry->actor == NULL) {
+			ERROR("Failed to allocate %s cache entry\n", name);
+			goto cleanup;
+		}
 	}
 
 	return cache;
@@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
 	int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	int i, res;
 	void *table, *buffer, **data;
+	struct squashfs_page_actor *actor;
 
 	table = buffer = kmalloc(length, GFP_KERNEL);
 	if (table == NULL)
@@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
 		goto failed;
 	}
 
+	actor = squashfs_page_actor_init(data, pages, length);
+	if (actor == NULL) {
+		res = -ENOMEM;
+		goto failed2;
+	}
+
 	for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
 		data[i] = buffer;
 
-	res = squashfs_read_data(sb, data, block, length |
-		SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+	res = squashfs_read_data(sb, block, length |
+		SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
 
 	kfree(data);
+	kfree(actor);
 
 	if (res < 0)
 		goto failed;
 
 	return table;
 
+failed2:
+	kfree(data);
 failed:
 	kfree(table);
 	return ERR_PTR(res);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 3f6271d86abc..e9034bf6e5ae 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -30,6 +30,7 @@
 #include "squashfs_fs_sb.h"
 #include "decompressor.h"
 #include "squashfs.h"
+#include "page_actor.h"
 
 /*
  * This file (and decompressor.h) implements a decompressor framework for
@@ -37,33 +38,40 @@
  */
 
 static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
-	NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+	NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
 };
 
+#ifndef CONFIG_SQUASHFS_LZ4
+static const struct squashfs_decompressor squashfs_lz4_comp_ops = {
+	NULL, NULL, NULL, NULL, LZ4_COMPRESSION, "lz4", 0
+};
+#endif
+
 #ifndef CONFIG_SQUASHFS_LZO
 static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
-	NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+	NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
 };
 #endif
 
 #ifndef CONFIG_SQUASHFS_XZ
 static const struct squashfs_decompressor squashfs_xz_comp_ops = {
-	NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+	NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
 };
 #endif
 
 #ifndef CONFIG_SQUASHFS_ZLIB
 static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
-	NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+	NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
 };
 #endif
 
 static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
-	NULL, NULL, NULL, 0, "unknown", 0
+	NULL, NULL, NULL, NULL, 0, "unknown", 0
 };
 
 static const struct squashfs_decompressor *decompressor[] = {
 	&squashfs_zlib_comp_ops,
+	&squashfs_lz4_comp_ops,
 	&squashfs_lzo_comp_ops,
 	&squashfs_xz_comp_ops,
 	&squashfs_lzma_unsupported_comp_ops,
@@ -83,10 +91,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
 }
 
 
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+static void *get_comp_opts(struct super_block *sb, unsigned short flags)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	void *strm, *buffer = NULL;
+	void *buffer = NULL, *comp_opts;
+	struct squashfs_page_actor *actor = NULL;
 	int length = 0;
 
 	/*
@@ -94,23 +103,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
 	 */
 	if (SQUASHFS_COMP_OPTS(flags)) {
 		buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
-		if (buffer == NULL)
-			return ERR_PTR(-ENOMEM);
+		if (buffer == NULL) {
+			comp_opts = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 
-		length = squashfs_read_data(sb, &buffer,
-			sizeof(struct squashfs_super_block), 0, NULL,
-			PAGE_CACHE_SIZE, 1);
+		actor = squashfs_page_actor_init(&buffer, 1, 0);
+		if (actor == NULL) {
+			comp_opts = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+
+		length = squashfs_read_data(sb,
+			sizeof(struct squashfs_super_block), 0, NULL, actor);
 
 		if (length < 0) {
-			strm = ERR_PTR(length);
-			goto finished;
+			comp_opts = ERR_PTR(length);
+			goto out;
 		}
 	}
 
-	strm = msblk->decompressor->init(msblk, buffer, length);
+	comp_opts = squashfs_comp_opts(msblk, buffer, length);
 
-finished:
+out:
+	kfree(actor);
 	kfree(buffer);
+	return comp_opts;
+}
+
+
+void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
+{
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	void *stream, *comp_opts = get_comp_opts(sb, flags);
+
+	if (IS_ERR(comp_opts))
+		return comp_opts;
+
+	stream = squashfs_decompressor_create(msblk, comp_opts);
+	if (IS_ERR(stream))
+		kfree(comp_opts);
 
-	return strm;
+	return stream;
 }
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 330073e29029..a25713c031a5 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,34 +24,32 @@
  */
 
 struct squashfs_decompressor {
-	void	*(*init)(struct squashfs_sb_info *, void *, int);
+	void	*(*init)(struct squashfs_sb_info *, void *);
+	void	*(*comp_opts)(struct squashfs_sb_info *, void *, int);
 	void	(*free)(void *);
-	int	(*decompress)(struct squashfs_sb_info *, void **,
-		struct buffer_head **, int, int, int, int, int);
+	int	(*decompress)(struct squashfs_sb_info *, void *,
+		struct buffer_head **, int, int, int,
+		struct squashfs_page_actor *);
 	int	id;
 	char	*name;
 	int	supported;
 };
 
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
-	void *s)
+static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
+							void *buff, int length)
 {
-	if (msblk->decompressor)
-		msblk->decompressor->free(s);
-}
-
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
-	void **buffer, struct buffer_head **bh, int b, int offset, int length,
-	int srclength, int pages)
-{
-	return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
-		length, srclength, pages);
+	return msblk->decompressor->comp_opts ?
+		msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
 }
 
 #ifdef CONFIG_SQUASHFS_XZ
 extern const struct squashfs_decompressor squashfs_xz_comp_ops;
 #endif
 
+#ifdef CONFIG_SQUASHFS_LZ4
+extern const struct squashfs_decompressor squashfs_lz4_comp_ops;
+#endif
+
 #ifdef CONFIG_SQUASHFS_LZO
 extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
 #endif
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
new file mode 100644
index 000000000000..d6008a636479
--- /dev/null
+++ b/fs/squashfs/decompressor_multi.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (c) 2013
+ *  Minchan Kim <minchan@kernel.org>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+
+
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR	(num_online_cpus() * 2)
+
+
+int squashfs_max_decompressors(void)
+{
+	return MAX_DECOMPRESSOR;
+}
+
+
+struct squashfs_stream {
+	void			*comp_opts;
+	struct list_head	strm_list;
+	struct mutex		mutex;
+	int			avail_decomp;
+	wait_queue_head_t	wait;
+};
+
+
+struct decomp_stream {
+	void *stream;
+	struct list_head list;
+};
+
+
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+				struct squashfs_stream *stream)
+{
+	mutex_lock(&stream->mutex);
+	list_add(&decomp_strm->list, &stream->strm_list);
+	mutex_unlock(&stream->mutex);
+	wake_up(&stream->wait);
+}
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+				void *comp_opts)
+{
+	struct squashfs_stream *stream;
+	struct decomp_stream *decomp_strm = NULL;
+	int err = -ENOMEM;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream)
+		goto out;
+
+	stream->comp_opts = comp_opts;
+	mutex_init(&stream->mutex);
+	INIT_LIST_HEAD(&stream->strm_list);
+	init_waitqueue_head(&stream->wait);
+
+	/*
+	 * We should have a decompressor at least as default
+	 * so if we fail to allocate new decompressor dynamically,
+	 * we could always fall back to default decompressor and
+	 * file system works.
+	 */
+	decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+	if (!decomp_strm)
+		goto out;
+
+	decomp_strm->stream = msblk->decompressor->init(msblk,
+						stream->comp_opts);
+	if (IS_ERR(decomp_strm->stream)) {
+		err = PTR_ERR(decomp_strm->stream);
+		goto out;
+	}
+
+	list_add(&decomp_strm->list, &stream->strm_list);
+	stream->avail_decomp = 1;
+	return stream;
+
+out:
+	kfree(decomp_strm);
+	kfree(stream);
+	return ERR_PTR(err);
+}
+
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_stream *stream = msblk->stream;
+	if (stream) {
+		struct decomp_stream *decomp_strm;
+
+		while (!list_empty(&stream->strm_list)) {
+			decomp_strm = list_entry(stream->strm_list.prev,
+						struct decomp_stream, list);
+			list_del(&decomp_strm->list);
+			msblk->decompressor->free(decomp_strm->stream);
+			kfree(decomp_strm);
+			stream->avail_decomp--;
+		}
+		WARN_ON(stream->avail_decomp);
+		kfree(stream->comp_opts);
+		kfree(stream);
+	}
+}
+
+
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+					struct squashfs_stream *stream)
+{
+	struct decomp_stream *decomp_strm;
+
+	while (1) {
+		mutex_lock(&stream->mutex);
+
+		/* There is available decomp_stream */
+		if (!list_empty(&stream->strm_list)) {
+			decomp_strm = list_entry(stream->strm_list.prev,
+				struct decomp_stream, list);
+			list_del(&decomp_strm->list);
+			mutex_unlock(&stream->mutex);
+			break;
+		}
+
+		/*
+		 * If there is no available decomp and already full,
+		 * let's wait for releasing decomp from other users.
+		 */
+		if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+			goto wait;
+
+		/* Let's allocate new decomp */
+		decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+		if (!decomp_strm)
+			goto wait;
+
+		decomp_strm->stream = msblk->decompressor->init(msblk,
+						stream->comp_opts);
+		if (IS_ERR(decomp_strm->stream)) {
+			kfree(decomp_strm);
+			goto wait;
+		}
+
+		stream->avail_decomp++;
+		WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+
+		mutex_unlock(&stream->mutex);
+		break;
+wait:
+		/*
+		 * If system memory is tough, let's for other's
+		 * releasing instead of hurting VM because it could
+		 * make page cache thrashing.
+		 */
+		mutex_unlock(&stream->mutex);
+		wait_event(stream->wait,
+			!list_empty(&stream->strm_list));
+	}
+
+	return decomp_strm;
+}
+
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
+{
+	int res;
+	struct squashfs_stream *stream = msblk->stream;
+	struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+	res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+		bh, b, offset, length, output);
+	put_decomp_stream(decomp_stream, stream);
+	if (res < 0)
+		ERROR("%s decompression failed, data probably corrupt\n",
+			msblk->decompressor->name);
+	return res;
+}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
new file mode 100644
index 000000000000..23a9c28ad8ea
--- /dev/null
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+
+struct squashfs_stream {
+	void		*stream;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+						void *comp_opts)
+{
+	struct squashfs_stream *stream;
+	struct squashfs_stream __percpu *percpu;
+	int err, cpu;
+
+	percpu = alloc_percpu(struct squashfs_stream);
+	if (percpu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
+		stream->stream = msblk->decompressor->init(msblk, comp_opts);
+		if (IS_ERR(stream->stream)) {
+			err = PTR_ERR(stream->stream);
+			goto out;
+		}
+	}
+
+	kfree(comp_opts);
+	return (__force void *) percpu;
+
+out:
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
+		if (!IS_ERR_OR_NULL(stream->stream))
+			msblk->decompressor->free(stream->stream);
+	}
+	free_percpu(percpu);
+	return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_stream __percpu *percpu =
+			(struct squashfs_stream __percpu *) msblk->stream;
+	struct squashfs_stream *stream;
+	int cpu;
+
+	if (msblk->stream) {
+		for_each_possible_cpu(cpu) {
+			stream = per_cpu_ptr(percpu, cpu);
+			msblk->decompressor->free(stream->stream);
+		}
+		free_percpu(percpu);
+	}
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
+{
+	struct squashfs_stream __percpu *percpu =
+			(struct squashfs_stream __percpu *) msblk->stream;
+	struct squashfs_stream *stream = get_cpu_ptr(percpu);
+	int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+		offset, length, output);
+	put_cpu_ptr(stream);
+
+	if (res < 0)
+		ERROR("%s decompression failed, data probably corrupt\n",
+			msblk->decompressor->name);
+
+	return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+	return num_possible_cpus();
+}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
new file mode 100644
index 000000000000..a6c75929a00e
--- /dev/null
+++ b/fs/squashfs/decompressor_single.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements single-threaded decompression in the
+ * decompressor framework
+ */
+
+struct squashfs_stream {
+	void		*stream;
+	struct mutex	mutex;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+						void *comp_opts)
+{
+	struct squashfs_stream *stream;
+	int err = -ENOMEM;
+
+	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	if (stream == NULL)
+		goto out;
+
+	stream->stream = msblk->decompressor->init(msblk, comp_opts);
+	if (IS_ERR(stream->stream)) {
+		err = PTR_ERR(stream->stream);
+		goto out;
+	}
+
+	kfree(comp_opts);
+	mutex_init(&stream->mutex);
+	return stream;
+
+out:
+	kfree(stream);
+	return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_stream *stream = msblk->stream;
+
+	if (stream) {
+		msblk->decompressor->free(stream->stream);
+		kfree(stream);
+	}
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
+{
+	int res;
+	struct squashfs_stream *stream = msblk->stream;
+
+	mutex_lock(&stream->mutex);
+	res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+		offset, length, output);
+	mutex_unlock(&stream->mutex);
+
+	if (res < 0)
+		ERROR("%s decompression failed, data probably corrupt\n",
+			msblk->decompressor->name);
+
+	return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+	return 1;
+}
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 57dc70ebbb19..d8c2d747be28 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
 	int err, i, index, length = 0;
+	unsigned int size;
 	struct squashfs_dir_index dir_index;
 
 	TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n",
@@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb,
 			 */
 			break;
 
+		size = le32_to_cpu(dir_index.size) + 1;
+
+		/* size should never be larger than SQUASHFS_NAME_LEN */
+		if (size > SQUASHFS_NAME_LEN)
+			break;
+
 		err = squashfs_read_metadata(sb, NULL, &index_start,
-				&index_offset, le32_to_cpu(dir_index.size) + 1);
+				&index_offset, size);
 		if (err < 0)
 			break;
 
@@ -100,14 +107,13 @@ static int get_dir_index_using_offset(struct super_block *sb,
 }
 
 
-static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+static int squashfs_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 	u64 block = squashfs_i(inode)->start + msblk->directory_table;
-	int offset = squashfs_i(inode)->offset, length, dir_count, size,
-				type, err;
-	unsigned int inode_number;
+	int offset = squashfs_i(inode)->offset, length, err;
+	unsigned int inode_number, dir_count, size, type;
 	struct squashfs_dir_header dirh;
 	struct squashfs_dir_entry *dire;
 
@@ -127,11 +133,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 	 * It also means that the external f_pos is offset by 3 from the
 	 * on-disk directory f_pos.
 	 */
-	while (file->f_pos < 3) {
+	while (ctx->pos < 3) {
 		char *name;
 		int i_ino;
 
-		if (file->f_pos == 0) {
+		if (ctx->pos == 0) {
 			name = ".";
 			size = 1;
 			i_ino = inode->i_ino;
@@ -141,24 +147,18 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 			i_ino = squashfs_i(inode)->parent;
 		}
 
-		TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n",
-				dirent, name, size, file->f_pos, i_ino,
-				squashfs_filetype_table[1]);
-
-		if (filldir(dirent, name, size, file->f_pos, i_ino,
-				squashfs_filetype_table[1]) < 0) {
-				TRACE("Filldir returned less than 0\n");
+		if (!dir_emit(ctx, name, size, i_ino,
+				squashfs_filetype_table[1]))
 			goto finish;
-		}
 
-		file->f_pos += size;
+		ctx->pos += size;
 	}
 
 	length = get_dir_index_using_offset(inode->i_sb, &block, &offset,
 				squashfs_i(inode)->dir_idx_start,
 				squashfs_i(inode)->dir_idx_offset,
 				squashfs_i(inode)->dir_idx_cnt,
-				file->f_pos);
+				ctx->pos);
 
 	while (length < i_size_read(inode)) {
 		/*
@@ -198,7 +198,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 
 			length += sizeof(*dire) + size;
 
-			if (file->f_pos >= length)
+			if (ctx->pos >= length)
 				continue;
 
 			dire->name[size] = '\0';
@@ -206,22 +206,15 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 				((short) le16_to_cpu(dire->inode_number));
 			type = le16_to_cpu(dire->type);
 
-			TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)"
-					"\n", dirent, dire->name, size,
-					file->f_pos,
-					le32_to_cpu(dirh.start_block),
-					le16_to_cpu(dire->offset),
-					inode_number,
-					squashfs_filetype_table[type]);
+			if (type > SQUASHFS_MAX_DIR_TYPE)
+				goto failed_read;
 
-			if (filldir(dirent, dire->name, size, file->f_pos,
+			if (!dir_emit(ctx, dire->name, size,
 					inode_number,
-					squashfs_filetype_table[type]) < 0) {
-				TRACE("Filldir returned less than 0\n");
+					squashfs_filetype_table[type]))
 				goto finish;
-			}
 
-			file->f_pos = length;
+			ctx->pos = length;
 		}
 	}
 
@@ -238,6 +231,6 @@ failed_read:
 
 const struct file_operations squashfs_dir_ops = {
 	.read = generic_read_dir,
-	.readdir = squashfs_readdir,
+	.iterate = squashfs_readdir,
 	.llseek = default_llseek,
 };
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 8ca62c28fe12..e5c9689062ba 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
 	return le32_to_cpu(size);
 }
 
-
-static int squashfs_readpage(struct file *file, struct page *page)
+/* Copy data into page cache  */
+void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
+	int bytes, int offset)
 {
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int bytes, i, offset = 0, sparse = 0;
-	struct squashfs_cache_entry *buffer = NULL;
 	void *pageaddr;
-
-	int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
-	int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-	int start_index = page->index & ~mask;
-	int end_index = start_index | mask;
-	int file_end = i_size_read(inode) >> msblk->block_log;
-
-	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-				page->index, squashfs_i(inode)->start);
-
-	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-					PAGE_CACHE_SHIFT))
-		goto out;
-
-	if (index < file_end || squashfs_i(inode)->fragment_block ==
-					SQUASHFS_INVALID_BLK) {
-		/*
-		 * Reading a datablock from disk.  Need to read block list
-		 * to get location and block size.
-		 */
-		u64 block = 0;
-		int bsize = read_blocklist(inode, index, &block);
-		if (bsize < 0)
-			goto error_out;
-
-		if (bsize == 0) { /* hole */
-			bytes = index == file_end ?
-				(i_size_read(inode) & (msblk->block_size - 1)) :
-				 msblk->block_size;
-			sparse = 1;
-		} else {
-			/*
-			 * Read and decompress datablock.
-			 */
-			buffer = squashfs_get_datablock(inode->i_sb,
-								block, bsize);
-			if (buffer->error) {
-				ERROR("Unable to read page, block %llx, size %x"
-					"\n", block, bsize);
-				squashfs_cache_put(buffer);
-				goto error_out;
-			}
-			bytes = buffer->length;
-		}
-	} else {
-		/*
-		 * Datablock is stored inside a fragment (tail-end packed
-		 * block).
-		 */
-		buffer = squashfs_get_fragment(inode->i_sb,
-				squashfs_i(inode)->fragment_block,
-				squashfs_i(inode)->fragment_size);
-
-		if (buffer->error) {
-			ERROR("Unable to read page, block %llx, size %x\n",
-				squashfs_i(inode)->fragment_block,
-				squashfs_i(inode)->fragment_size);
-			squashfs_cache_put(buffer);
-			goto error_out;
-		}
-		bytes = i_size_read(inode) & (msblk->block_size - 1);
-		offset = squashfs_i(inode)->fragment_offset;
-	}
+	int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int start_index = page->index & ~mask, end_index = start_index | mask;
 
 	/*
 	 * Loop copying datablock into pages.  As the datablock likely covers
@@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
 	for (i = start_index; i <= end_index && bytes > 0; i++,
 			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
 		struct page *push_page;
-		int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+		int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
 
 		TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
 
@@ -475,11 +413,75 @@ skip_page:
 		if (i != page->index)
 			page_cache_release(push_page);
 	}
+}
+
+/* Read datablock stored packed inside a fragment (tail-end packed block) */
+static int squashfs_readpage_fragment(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+		squashfs_i(inode)->fragment_block,
+		squashfs_i(inode)->fragment_size);
+	int res = buffer->error;
+
+	if (res)
+		ERROR("Unable to read page, block %llx, size %x\n",
+			squashfs_i(inode)->fragment_block,
+			squashfs_i(inode)->fragment_size);
+	else
+		squashfs_copy_cache(page, buffer, i_size_read(inode) &
+			(msblk->block_size - 1),
+			squashfs_i(inode)->fragment_offset);
+
+	squashfs_cache_put(buffer);
+	return res;
+}
 
-	if (!sparse)
-		squashfs_cache_put(buffer);
+static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	int bytes = index == file_end ?
+			(i_size_read(inode) & (msblk->block_size - 1)) :
+			 msblk->block_size;
 
+	squashfs_copy_cache(page, NULL, bytes, 0);
 	return 0;
+}
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+	int file_end = i_size_read(inode) >> msblk->block_log;
+	int res;
+	void *pageaddr;
+
+	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+				page->index, squashfs_i(inode)->start);
+
+	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT))
+		goto out;
+
+	if (index < file_end || squashfs_i(inode)->fragment_block ==
+					SQUASHFS_INVALID_BLK) {
+		u64 block = 0;
+		int bsize = read_blocklist(inode, index, &block);
+		if (bsize < 0)
+			goto error_out;
+
+		if (bsize == 0)
+			res = squashfs_readpage_sparse(page, index, file_end);
+		else
+			res = squashfs_readpage_block(page, block, bsize);
+	} else
+		res = squashfs_readpage_fragment(page);
+
+	if (!res)
+		return 0;
 
 error_out:
 	SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 000000000000..f2310d2a2019
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+	struct inode *i = page->mapping->host;
+	struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+		block, bsize);
+	int res = buffer->error;
+
+	if (res)
+		ERROR("Unable to read page, block %llx, size %x\n", block,
+			bsize);
+	else
+		squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+	squashfs_cache_put(buffer);
+	return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
new file mode 100644
index 000000000000..43e7a7eddac0
--- /dev/null
+++ b/fs/squashfs/file_direct.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "page_actor.h"
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+	int pages, struct page **page);
+
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+
+{
+	struct inode *inode = target_page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+	int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int start_index = target_page->index & ~mask;
+	int end_index = start_index | mask;
+	int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+	struct page **page;
+	struct squashfs_page_actor *actor;
+	void *pageaddr;
+
+	if (end_index > file_end)
+		end_index = file_end;
+
+	pages = end_index - start_index + 1;
+
+	page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
+	if (page == NULL)
+		return res;
+
+	/*
+	 * Create a "page actor" which will kmap and kunmap the
+	 * page cache pages appropriately within the decompressor
+	 */
+	actor = squashfs_page_actor_init_special(page, pages, 0);
+	if (actor == NULL)
+		goto out;
+
+	/* Try to grab all the pages covered by the Squashfs block */
+	for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+		page[i] = (n == target_page->index) ? target_page :
+			grab_cache_page_nowait(target_page->mapping, n);
+
+		if (page[i] == NULL) {
+			missing_pages++;
+			continue;
+		}
+
+		if (PageUptodate(page[i])) {
+			unlock_page(page[i]);
+			page_cache_release(page[i]);
+			page[i] = NULL;
+			missing_pages++;
+		}
+	}
+
+	if (missing_pages) {
+		/*
+		 * Couldn't get one or more pages, this page has either
+		 * been VM reclaimed, but others are still in the page cache
+		 * and uptodate, or we're racing with another thread in
+		 * squashfs_readpage also trying to grab them.  Fall back to
+		 * using an intermediate buffer.
+		 */
+		res = squashfs_read_cache(target_page, block, bsize, pages,
+								page);
+		if (res < 0)
+			goto mark_errored;
+
+		goto out;
+	}
+
+	/* Decompress directly into the page cache buffers */
+	res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+	if (res < 0)
+		goto mark_errored;
+
+	/* Last page may have trailing bytes not filled */
+	bytes = res % PAGE_CACHE_SIZE;
+	if (bytes) {
+		pageaddr = kmap_atomic(page[pages - 1]);
+		memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+		kunmap_atomic(pageaddr);
+	}
+
+	/* Mark pages as uptodate, unlock and release */
+	for (i = 0; i < pages; i++) {
+		flush_dcache_page(page[i]);
+		SetPageUptodate(page[i]);
+		unlock_page(page[i]);
+		if (page[i] != target_page)
+			page_cache_release(page[i]);
+	}
+
+	kfree(actor);
+	kfree(page);
+
+	return 0;
+
+mark_errored:
+	/* Decompression failed, mark pages as errored.  Target_page is
+	 * dealt with by the caller
+	 */
+	for (i = 0; i < pages; i++) {
+		if (page[i] == NULL || page[i] == target_page)
+			continue;
+		flush_dcache_page(page[i]);
+		SetPageError(page[i]);
+		unlock_page(page[i]);
+		page_cache_release(page[i]);
+	}
+
+out:
+	kfree(actor);
+	kfree(page);
+	return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+	int pages, struct page **page)
+{
+	struct inode *i = target_page->mapping->host;
+	struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+						 block, bsize);
+	int bytes = buffer->length, res = buffer->error, n, offset = 0;
+	void *pageaddr;
+
+	if (res) {
+		ERROR("Unable to read page, block %llx, size %x\n", block,
+			bsize);
+		goto out;
+	}
+
+	for (n = 0; n < pages && bytes > 0; n++,
+			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+		int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+		if (page[n] == NULL)
+			continue;
+
+		pageaddr = kmap_atomic(page[n]);
+		squashfs_copy_data(pageaddr, buffer, offset, avail);
+		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		kunmap_atomic(pageaddr);
+		flush_dcache_page(page[n]);
+		SetPageUptodate(page[n]);
+		unlock_page(page[n]);
+		if (page[n] != target_page)
+			page_cache_release(page[n]);
+	}
+
+out:
+	squashfs_cache_put(buffer);
+	return res;
+}
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
new file mode 100644
index 000000000000..c31e2bc9c081
--- /dev/null
+++ b/fs/squashfs/lz4_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2013, 2014
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/lz4.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs.h"
+#include "decompressor.h"
+#include "page_actor.h"
+
+#define LZ4_LEGACY	1
+
+struct lz4_comp_opts {
+	__le32 version;
+	__le32 flags;
+};
+
+struct squashfs_lz4 {
+	void *input;
+	void *output;
+};
+
+
+static void *lz4_comp_opts(struct squashfs_sb_info *msblk,
+	void *buff, int len)
+{
+	struct lz4_comp_opts *comp_opts = buff;
+
+	/* LZ4 compressed filesystems always have compression options */
+	if (comp_opts == NULL || len < sizeof(*comp_opts))
+		return ERR_PTR(-EIO);
+
+	if (le32_to_cpu(comp_opts->version) != LZ4_LEGACY) {
+		/* LZ4 format currently used by the kernel is the 'legacy'
+		 * format */
+		ERROR("Unknown LZ4 version\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return NULL;
+}
+
+
+static void *lz4_init(struct squashfs_sb_info *msblk, void *buff)
+{
+	int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+	struct squashfs_lz4 *stream;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (stream == NULL)
+		goto failed;
+	stream->input = vmalloc(block_size);
+	if (stream->input == NULL)
+		goto failed2;
+	stream->output = vmalloc(block_size);
+	if (stream->output == NULL)
+		goto failed3;
+
+	return stream;
+
+failed3:
+	vfree(stream->input);
+failed2:
+	kfree(stream);
+failed:
+	ERROR("Failed to initialise LZ4 decompressor\n");
+	return ERR_PTR(-ENOMEM);
+}
+
+
+static void lz4_free(void *strm)
+{
+	struct squashfs_lz4 *stream = strm;
+
+	if (stream) {
+		vfree(stream->input);
+		vfree(stream->output);
+	}
+	kfree(stream);
+}
+
+
+static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
+{
+	struct squashfs_lz4 *stream = strm;
+	void *buff = stream->input, *data;
+	int avail, i, bytes = length, res;
+	size_t dest_len = output->length;
+
+	for (i = 0; i < b; i++) {
+		avail = min(bytes, msblk->devblksize - offset);
+		memcpy(buff, bh[i]->b_data + offset, avail);
+		buff += avail;
+		bytes -= avail;
+		offset = 0;
+		put_bh(bh[i]);
+	}
+
+	res = lz4_decompress_unknownoutputsize(stream->input, length,
+					stream->output, &dest_len);
+	if (res)
+		return -EIO;
+
+	bytes = dest_len;
+	data = squashfs_first_page(output);
+	buff = stream->output;
+	while (data) {
+		if (bytes <= PAGE_CACHE_SIZE) {
+			memcpy(data, buff, bytes);
+			break;
+		}
+		memcpy(data, buff, PAGE_CACHE_SIZE);
+		buff += PAGE_CACHE_SIZE;
+		bytes -= PAGE_CACHE_SIZE;
+		data = squashfs_next_page(output);
+	}
+	squashfs_finish_page(output);
+
+	return dest_len;
+}
+
+const struct squashfs_decompressor squashfs_lz4_comp_ops = {
+	.init = lz4_init,
+	.comp_opts = lz4_comp_opts,
+	.free = lz4_free,
+	.decompress = lz4_uncompress,
+	.id = LZ4_COMPRESSION,
+	.name = "lz4",
+	.supported = 1
+};
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 00f4dfc5f088..244b9fbfff7b 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -31,13 +31,14 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 struct squashfs_lzo {
 	void	*input;
 	void	*output;
 };
 
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
 {
 	int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
 
@@ -74,22 +75,16 @@ static void lzo_free(void *strm)
 }
 
 
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-	struct buffer_head **bh, int b, int offset, int length, int srclength,
-	int pages)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
 {
-	struct squashfs_lzo *stream = msblk->stream;
-	void *buff = stream->input;
+	struct squashfs_lzo *stream = strm;
+	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t out_len = srclength;
-
-	mutex_lock(&msblk->read_data_mutex);
+	size_t out_len = output->length;
 
 	for (i = 0; i < b; i++) {
-		wait_on_buffer(bh[i]);
-		if (!buffer_uptodate(bh[i]))
-			goto block_release;
-
 		avail = min(bytes, msblk->devblksize - offset);
 		memcpy(buff, bh[i]->b_data + offset, avail);
 		buff += avail;
@@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 		goto failed;
 
 	res = bytes = (int)out_len;
-	for (i = 0, buff = stream->output; bytes && i < pages; i++) {
-		avail = min_t(int, bytes, PAGE_CACHE_SIZE);
-		memcpy(buffer[i], buff, avail);
-		buff += avail;
-		bytes -= avail;
+	data = squashfs_first_page(output);
+	buff = stream->output;
+	while (data) {
+		if (bytes <= PAGE_CACHE_SIZE) {
+			memcpy(data, buff, bytes);
+			break;
+		} else {
+			memcpy(data, buff, PAGE_CACHE_SIZE);
+			buff += PAGE_CACHE_SIZE;
+			bytes -= PAGE_CACHE_SIZE;
+			data = squashfs_next_page(output);
+		}
 	}
+	squashfs_finish_page(output);
 
-	mutex_unlock(&msblk->read_data_mutex);
 	return res;
 
-block_release:
-	for (; i < b; i++)
-		put_bh(bh[i]);
-
 failed:
-	mutex_unlock(&msblk->read_data_mutex);
-
-	ERROR("lzo decompression failed, data probably corrupt\n");
 	return -EIO;
 }
 
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 7834a517f7f4..67cad77fefb4 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -79,7 +79,8 @@ static int get_dir_index_using_name(struct super_block *sb,
 			int len)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	int i, size, length = 0, err;
+	int i, length = 0, err;
+	unsigned int size;
 	struct squashfs_dir_index *index;
 	char *str;
 
@@ -103,6 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb,
 
 
 		size = le32_to_cpu(index->size) + 1;
+		if (size > SQUASHFS_NAME_LEN)
+			break;
 
 		err = squashfs_read_metadata(sb, index->name, &index_start,
 					&index_offset, size);
@@ -144,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct squashfs_dir_entry *dire;
 	u64 block = squashfs_i(dir)->start + msblk->directory_table;
 	int offset = squashfs_i(dir)->offset;
-	int err, length, dir_count, size;
+	int err, length;
+	unsigned int dir_count, size;
 
 	TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
 
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
new file mode 100644
index 000000000000..5a1c11f56441
--- /dev/null
+++ b/fs/squashfs/page_actor.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "page_actor.h"
+
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->buffer[0];
+}
+
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+	if (actor->next_page == actor->pages)
+		return NULL;
+
+	return actor->buffer[actor->next_page++];
+}
+
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+	/* empty */
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->buffer = buffer;
+	actor->pages = pages;
+	actor->next_page = 0;
+	actor->squashfs_first_page = cache_first_page;
+	actor->squashfs_next_page = cache_next_page;
+	actor->squashfs_finish_page = cache_finish_page;
+	return actor;
+}
+
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+	if (actor->pageaddr)
+		kunmap_atomic(actor->pageaddr);
+
+	return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+		kmap_atomic(actor->page[actor->next_page++]);
+}
+
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+	if (actor->pageaddr)
+		kunmap_atomic(actor->pageaddr);
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->page = page;
+	actor->pages = pages;
+	actor->next_page = 0;
+	actor->pageaddr = NULL;
+	actor->squashfs_first_page = direct_first_page;
+	actor->squashfs_next_page = direct_next_page;
+	actor->squashfs_finish_page = direct_finish_page;
+	return actor;
+}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
new file mode 100644
index 000000000000..26dd82008b82
--- /dev/null
+++ b/fs/squashfs/page_actor.h
@@ -0,0 +1,81 @@
+#ifndef PAGE_ACTOR_H
+#define PAGE_ACTOR_H
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
+struct squashfs_page_actor {
+	void	**page;
+	int	pages;
+	int	length;
+	int	next_page;
+};
+
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->page = page;
+	actor->pages = pages;
+	actor->next_page = 0;
+	return actor;
+}
+
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->page[0];
+}
+
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+	return actor->next_page == actor->pages ? NULL :
+		actor->page[actor->next_page++];
+}
+
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+	/* empty */
+}
+#else
+struct squashfs_page_actor {
+	union {
+		void		**buffer;
+		struct page	**page;
+	};
+	void	*pageaddr;
+	void    *(*squashfs_first_page)(struct squashfs_page_actor *);
+	void    *(*squashfs_next_page)(struct squashfs_page_actor *);
+	void    (*squashfs_finish_page)(struct squashfs_page_actor *);
+	int	pages;
+	int	length;
+	int	next_page;
+};
+
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+							 **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+	return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+	return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+	actor->squashfs_finish_page(actor);
+}
+#endif
+#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index d1266516ed08..887d6d270080 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -25,11 +25,11 @@
 
 #define ERROR(s, args...)	pr_err("SQUASHFS error: "s, ## args)
 
-#define WARNING(s, args...)	pr_warning("SQUASHFS: "s, ## args)
+#define WARNING(s, args...)	pr_warn("SQUASHFS: "s, ## args)
 
 /* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
-				int, int);
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
+				struct squashfs_page_actor *);
 
 /* cache.c */
 extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
 
 /* decompressor.c */
 extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
+
+/* decompressor_xxx.c */
+extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
+extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+	int, int, int, struct squashfs_page_actor *);
+extern int squashfs_max_decompressors(void);
 
 /* export.c */
 extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
 extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
 				u64, u64, unsigned int);
 
+/* file.c */
+void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
+				int);
+
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
+
 /* id.c */
 extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
 extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 9e2349d07cb1..506f4ba5b983 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -87,7 +87,7 @@
 #define SQUASHFS_COMP_OPTS(flags)		SQUASHFS_BIT(flags, \
 						SQUASHFS_COMP_OPT)
 
-/* Max number of types and file types */
+/* Inode types including extended types */
 #define SQUASHFS_DIR_TYPE		1
 #define SQUASHFS_REG_TYPE		2
 #define SQUASHFS_SYMLINK_TYPE		3
@@ -103,6 +103,9 @@
 #define SQUASHFS_LFIFO_TYPE		13
 #define SQUASHFS_LSOCKET_TYPE		14
 
+/* Max type value stored in directory entry */
+#define SQUASHFS_MAX_DIR_TYPE		7
+
 /* Xattr types */
 #define SQUASHFS_XATTR_USER             0
 #define SQUASHFS_XATTR_TRUSTED          1
@@ -237,6 +240,7 @@ struct meta_index {
 #define LZMA_COMPRESSION	2
 #define LZO_COMPRESSION		3
 #define XZ_COMPRESSION		4
+#define LZ4_COMPRESSION		5
 
 struct squashfs_super_block {
 	__le32			s_magic;
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 52934a22f296..1da565cb50c3 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -50,6 +50,7 @@ struct squashfs_cache_entry {
 	wait_queue_head_t	wait_queue;
 	struct squashfs_cache	*cache;
 	void			**data;
+	struct squashfs_page_actor	*actor;
 };
 
 struct squashfs_sb_info {
@@ -63,10 +64,9 @@ struct squashfs_sb_info {
 	__le64					*id_table;
 	__le64					*fragment_index;
 	__le64					*xattr_id_table;
-	struct mutex				read_data_mutex;
 	struct mutex				meta_index_mutex;
 	struct meta_index			*meta_index;
-	void					*stream;
+	struct squashfs_stream			*stream;
 	__le64					*inode_lookup_table;
 	u64					inode_table;
 	u64					directory_table;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 60553a9053ca..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
  * the filesystem.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
@@ -98,7 +100,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 	msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
 	msblk->devblksize_log2 = ffz(~msblk->devblksize);
 
-	mutex_init(&msblk->read_data_mutex);
 	mutex_init(&msblk->meta_index_mutex);
 
 	/*
@@ -206,13 +207,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 
 	/* Allocate read_page block */
-	msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+	msblk->read_page = squashfs_cache_init("data",
+		squashfs_max_decompressors(), msblk->block_size);
 	if (msblk->read_page == NULL) {
 		ERROR("Failed to allocate read_page block\n");
 		goto failed_mount;
 	}
 
-	msblk->stream = squashfs_decompressor_init(sb, flags);
+	msblk->stream = squashfs_decompressor_setup(sb, flags);
 	if (IS_ERR(msblk->stream)) {
 		err = PTR_ERR(msblk->stream);
 		msblk->stream = NULL;
@@ -336,7 +338,7 @@ failed_mount:
 	squashfs_cache_delete(msblk->block_cache);
 	squashfs_cache_delete(msblk->fragment_cache);
 	squashfs_cache_delete(msblk->read_page);
-	squashfs_decompressor_free(msblk, msblk->stream);
+	squashfs_decompressor_destroy(msblk);
 	kfree(msblk->inode_lookup_table);
 	kfree(msblk->fragment_index);
 	kfree(msblk->id_table);
@@ -371,6 +373,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static int squashfs_remount(struct super_block *sb, int *flags, char *data)
 {
+	sync_filesystem(sb);
 	*flags |= MS_RDONLY;
 	return 0;
 }
@@ -383,7 +386,7 @@ static void squashfs_put_super(struct super_block *sb)
 		squashfs_cache_delete(sbi->block_cache);
 		squashfs_cache_delete(sbi->fragment_cache);
 		squashfs_cache_delete(sbi->read_page);
-		squashfs_decompressor_free(sbi, sbi->stream);
+		squashfs_decompressor_destroy(sbi);
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);
 		kfree(sbi->meta_index);
@@ -447,8 +450,7 @@ static int __init init_squashfs_fs(void)
 		return err;
 	}
 
-	printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
-		"Phillip Lougher\n");
+	pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
 
 	return 0;
 }
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 1760b7d108f6..c609624e4b8a 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -32,44 +32,70 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 struct squashfs_xz {
 	struct xz_dec *state;
 	struct xz_buf buf;
 };
 
-struct comp_opts {
+struct disk_comp_opts {
 	__le32 dictionary_size;
 	__le32 flags;
 };
 
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
-	int len)
+struct comp_opts {
+	int dict_size;
+};
+
+static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
+	void *buff, int len)
 {
-	struct comp_opts *comp_opts = buff;
-	struct squashfs_xz *stream;
-	int dict_size = msblk->block_size;
-	int err, n;
+	struct disk_comp_opts *comp_opts = buff;
+	struct comp_opts *opts;
+	int err = 0, n;
+
+	opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+	if (opts == NULL) {
+		err = -ENOMEM;
+		goto out2;
+	}
 
 	if (comp_opts) {
 		/* check compressor options are the expected length */
 		if (len < sizeof(*comp_opts)) {
 			err = -EIO;
-			goto failed;
+			goto out;
 		}
 
-		dict_size = le32_to_cpu(comp_opts->dictionary_size);
+		opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
 
 		/* the dictionary size should be 2^n or 2^n+2^(n+1) */
-		n = ffs(dict_size) - 1;
-		if (dict_size != (1 << n) && dict_size != (1 << n) +
+		n = ffs(opts->dict_size) - 1;
+		if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
 						(1 << (n + 1))) {
 			err = -EIO;
-			goto failed;
+			goto out;
 		}
-	}
+	} else
+		/* use defaults */
+		opts->dict_size = max_t(int, msblk->block_size,
+							SQUASHFS_METADATA_SIZE);
+
+	return opts;
+
+out:
+	kfree(opts);
+out2:
+	return ERR_PTR(err);
+}
+
 
-	dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
+{
+	struct comp_opts *comp_opts = buff;
+	struct squashfs_xz *stream;
+	int err;
 
 	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
 	if (stream == NULL) {
@@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
 		goto failed;
 	}
 
-	stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+	stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
 	if (stream->state == NULL) {
 		kfree(stream);
 		err = -ENOMEM;
@@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm)
 }
 
 
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-	struct buffer_head **bh, int b, int offset, int length, int srclength,
-	int pages)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
 {
 	enum xz_ret xz_err;
-	int avail, total = 0, k = 0, page = 0;
-	struct squashfs_xz *stream = msblk->stream;
-
-	mutex_lock(&msblk->read_data_mutex);
+	int avail, total = 0, k = 0;
+	struct squashfs_xz *stream = strm;
 
 	xz_dec_reset(stream->state);
 	stream->buf.in_pos = 0;
 	stream->buf.in_size = 0;
 	stream->buf.out_pos = 0;
 	stream->buf.out_size = PAGE_CACHE_SIZE;
-	stream->buf.out = buffer[page++];
+	stream->buf.out = squashfs_first_page(output);
 
 	do {
 		if (stream->buf.in_pos == stream->buf.in_size && k < b) {
 			avail = min(length, msblk->devblksize - offset);
 			length -= avail;
-			wait_on_buffer(bh[k]);
-			if (!buffer_uptodate(bh[k]))
-				goto release_mutex;
-
 			stream->buf.in = bh[k]->b_data + offset;
 			stream->buf.in_size = avail;
 			stream->buf.in_pos = 0;
 			offset = 0;
 		}
 
-		if (stream->buf.out_pos == stream->buf.out_size
-							&& page < pages) {
-			stream->buf.out = buffer[page++];
-			stream->buf.out_pos = 0;
-			total += PAGE_CACHE_SIZE;
+		if (stream->buf.out_pos == stream->buf.out_size) {
+			stream->buf.out = squashfs_next_page(output);
+			if (stream->buf.out != NULL) {
+				stream->buf.out_pos = 0;
+				total += PAGE_CACHE_SIZE;
+			}
 		}
 
 		xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 			put_bh(bh[k++]);
 	} while (xz_err == XZ_OK);
 
-	if (xz_err != XZ_STREAM_END) {
-		ERROR("xz_dec_run error, data probably corrupt\n");
-		goto release_mutex;
-	}
-
-	if (k < b) {
-		ERROR("xz_uncompress error, input remaining\n");
-		goto release_mutex;
-	}
+	squashfs_finish_page(output);
 
-	total += stream->buf.out_pos;
-	mutex_unlock(&msblk->read_data_mutex);
-	return total;
+	if (xz_err != XZ_STREAM_END || k < b)
+		goto out;
 
-release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
+	return total + stream->buf.out_pos;
 
+out:
 	for (; k < b; k++)
 		put_bh(bh[k]);
 
@@ -172,6 +184,7 @@ release_mutex:
 
 const struct squashfs_decompressor squashfs_xz_comp_ops = {
 	.init = squashfs_xz_init,
+	.comp_opts = squashfs_xz_comp_opts,
 	.free = squashfs_xz_free,
 	.decompress = squashfs_xz_uncompress,
 	.id = XZ_COMPRESSION,
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 55d918fd2d86..8727caba6882 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -32,8 +32,9 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
 {
 	z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
 	if (stream == NULL)
@@ -61,44 +62,37 @@ static void zlib_free(void *strm)
 }
 
 
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-	struct buffer_head **bh, int b, int offset, int length, int srclength,
-	int pages)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
 {
-	int zlib_err, zlib_init = 0;
-	int k = 0, page = 0;
-	z_stream *stream = msblk->stream;
-
-	mutex_lock(&msblk->read_data_mutex);
+	int zlib_err, zlib_init = 0, k = 0;
+	z_stream *stream = strm;
 
-	stream->avail_out = 0;
+	stream->avail_out = PAGE_CACHE_SIZE;
+	stream->next_out = squashfs_first_page(output);
 	stream->avail_in = 0;
 
 	do {
 		if (stream->avail_in == 0 && k < b) {
 			int avail = min(length, msblk->devblksize - offset);
 			length -= avail;
-			wait_on_buffer(bh[k]);
-			if (!buffer_uptodate(bh[k]))
-				goto release_mutex;
-
 			stream->next_in = bh[k]->b_data + offset;
 			stream->avail_in = avail;
 			offset = 0;
 		}
 
-		if (stream->avail_out == 0 && page < pages) {
-			stream->next_out = buffer[page++];
-			stream->avail_out = PAGE_CACHE_SIZE;
+		if (stream->avail_out == 0) {
+			stream->next_out = squashfs_next_page(output);
+			if (stream->next_out != NULL)
+				stream->avail_out = PAGE_CACHE_SIZE;
 		}
 
 		if (!zlib_init) {
 			zlib_err = zlib_inflateInit(stream);
 			if (zlib_err != Z_OK) {
-				ERROR("zlib_inflateInit returned unexpected "
-					"result 0x%x, srclength %d\n",
-					zlib_err, srclength);
-				goto release_mutex;
+				squashfs_finish_page(output);
+				goto out;
 			}
 			zlib_init = 1;
 		}
@@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 			put_bh(bh[k++]);
 	} while (zlib_err == Z_OK);
 
-	if (zlib_err != Z_STREAM_END) {
-		ERROR("zlib_inflate error, data probably corrupt\n");
-		goto release_mutex;
-	}
+	squashfs_finish_page(output);
 
-	zlib_err = zlib_inflateEnd(stream);
-	if (zlib_err != Z_OK) {
-		ERROR("zlib_inflate error, data probably corrupt\n");
-		goto release_mutex;
-	}
+	if (zlib_err != Z_STREAM_END)
+		goto out;
 
-	if (k < b) {
-		ERROR("zlib_uncompress error, data remaining\n");
-		goto release_mutex;
-	}
+	zlib_err = zlib_inflateEnd(stream);
+	if (zlib_err != Z_OK)
+		goto out;
 
-	length = stream->total_out;
-	mutex_unlock(&msblk->read_data_mutex);
-	return length;
+	if (k < b)
+		goto out;
 
-release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
+	return stream->total_out;
 
+out:
 	for (; k < b; k++)
 		put_bh(bh[k]);
 
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c327d4ee1235..4742e58f3fc5 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct sysv_sb_info *sbi = SYSV_SB(sb);
 
+	sync_filesystem(sb);
 	if (sbi->s_forced_ro)
 		*flags |= MS_RDONLY;
 	return 0;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 32b644f03690..0013142c0475 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/alarmtimer.h>
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/init.h>
@@ -26,7 +27,10 @@
 #include <linux/rcupdate.h>
 
 struct timerfd_ctx {
-	struct hrtimer tmr;
+	union {
+		struct hrtimer tmr;
+		struct alarm alarm;
+	} t;
 	ktime_t tintv;
 	ktime_t moffs;
 	wait_queue_head_t wqh;
@@ -41,14 +45,19 @@ struct timerfd_ctx {
 static LIST_HEAD(cancel_list);
 static DEFINE_SPINLOCK(cancel_lock);
 
+static inline bool isalarm(struct timerfd_ctx *ctx)
+{
+	return ctx->clockid == CLOCK_REALTIME_ALARM ||
+		ctx->clockid == CLOCK_BOOTTIME_ALARM;
+}
+
 /*
  * This gets called when the timer event triggers. We set the "expired"
  * flag, but we do not re-arm the timer (in case it's necessary,
  * tintv.tv64 != 0) until the timer is accessed.
  */
-static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
+static void timerfd_triggered(struct timerfd_ctx *ctx)
 {
-	struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);
 	unsigned long flags;
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
@@ -56,10 +65,25 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
 	ctx->ticks++;
 	wake_up_locked(&ctx->wqh);
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+}
 
+static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
+{
+	struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx,
+					       t.tmr);
+	timerfd_triggered(ctx);
 	return HRTIMER_NORESTART;
 }
 
+static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
+	ktime_t now)
+{
+	struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx,
+					       t.alarm);
+	timerfd_triggered(ctx);
+	return ALARMTIMER_NORESTART;
+}
+
 /*
  * Called when the clock was set to cancel the timers in the cancel
  * list. This will wake up processes waiting on these timers. The
@@ -107,8 +131,9 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
 
 static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
 {
-	if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
-	    (flags & TFD_TIMER_CANCEL_ON_SET)) {
+	if ((ctx->clockid == CLOCK_REALTIME ||
+	     ctx->clockid == CLOCK_REALTIME_ALARM) &&
+	    (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
 		if (!ctx->might_cancel) {
 			ctx->might_cancel = true;
 			spin_lock(&cancel_lock);
@@ -124,7 +149,11 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
 {
 	ktime_t remaining;
 
-	remaining = hrtimer_expires_remaining(&ctx->tmr);
+	if (isalarm(ctx))
+		remaining = alarm_expires_remaining(&ctx->t.alarm);
+	else
+		remaining = hrtimer_expires_remaining(&ctx->t.tmr);
+
 	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
 
@@ -142,11 +171,28 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
 	ctx->expired = 0;
 	ctx->ticks = 0;
 	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
-	hrtimer_init(&ctx->tmr, clockid, htmode);
-	hrtimer_set_expires(&ctx->tmr, texp);
-	ctx->tmr.function = timerfd_tmrproc;
+
+	if (isalarm(ctx)) {
+		alarm_init(&ctx->t.alarm,
+			   ctx->clockid == CLOCK_REALTIME_ALARM ?
+			   ALARM_REALTIME : ALARM_BOOTTIME,
+			   timerfd_alarmproc);
+	} else {
+		hrtimer_init(&ctx->t.tmr, clockid, htmode);
+		hrtimer_set_expires(&ctx->t.tmr, texp);
+		ctx->t.tmr.function = timerfd_tmrproc;
+	}
+
 	if (texp.tv64 != 0) {
-		hrtimer_start(&ctx->tmr, texp, htmode);
+		if (isalarm(ctx)) {
+			if (flags & TFD_TIMER_ABSTIME)
+				alarm_start(&ctx->t.alarm, texp);
+			else
+				alarm_start_relative(&ctx->t.alarm, texp);
+		} else {
+			hrtimer_start(&ctx->t.tmr, texp, htmode);
+		}
+
 		if (timerfd_canceled(ctx))
 			return -ECANCELED;
 	}
@@ -158,7 +204,11 @@ static int timerfd_release(struct inode *inode, struct file *file)
 	struct timerfd_ctx *ctx = file->private_data;
 
 	timerfd_remove_cancel(ctx);
-	hrtimer_cancel(&ctx->tmr);
+
+	if (isalarm(ctx))
+		alarm_cancel(&ctx->t.alarm);
+	else
+		hrtimer_cancel(&ctx->t.tmr);
 	kfree_rcu(ctx, rcu);
 	return 0;
 }
@@ -215,9 +265,15 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 			 * callback to avoid DoS attacks specifying a very
 			 * short timer period.
 			 */
-			ticks += hrtimer_forward_now(&ctx->tmr,
-						     ctx->tintv) - 1;
-			hrtimer_restart(&ctx->tmr);
+			if (isalarm(ctx)) {
+				ticks += alarm_forward_now(
+					&ctx->t.alarm, ctx->tintv) - 1;
+				alarm_restart(&ctx->t.alarm);
+			} else {
+				ticks += hrtimer_forward_now(&ctx->t.tmr,
+							     ctx->tintv) - 1;
+				hrtimer_restart(&ctx->t.tmr);
+			}
 		}
 		ctx->expired = 0;
 		ctx->ticks = 0;
@@ -259,7 +315,10 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 
 	if ((flags & ~TFD_CREATE_FLAGS) ||
 	    (clockid != CLOCK_MONOTONIC &&
-	     clockid != CLOCK_REALTIME))
+	     clockid != CLOCK_REALTIME &&
+	     clockid != CLOCK_REALTIME_ALARM &&
+	     clockid != CLOCK_BOOTTIME &&
+	     clockid != CLOCK_BOOTTIME_ALARM))
 		return -EINVAL;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -268,7 +327,15 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 
 	init_waitqueue_head(&ctx->wqh);
 	ctx->clockid = clockid;
-	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+
+	if (isalarm(ctx))
+		alarm_init(&ctx->t.alarm,
+			   ctx->clockid == CLOCK_REALTIME_ALARM ?
+			   ALARM_REALTIME : ALARM_BOOTTIME,
+			   timerfd_alarmproc);
+	else
+		hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
+
 	ctx->moffs = ktime_get_monotonic_offset();
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
@@ -305,8 +372,14 @@ static int do_timerfd_settime(int ufd, int flags,
 	 */
 	for (;;) {
 		spin_lock_irq(&ctx->wqh.lock);
-		if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
-			break;
+
+		if (isalarm(ctx)) {
+			if (alarm_try_to_cancel(&ctx->t.alarm) >= 0)
+				break;
+		} else {
+			if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0)
+				break;
+		}
 		spin_unlock_irq(&ctx->wqh.lock);
 		cpu_relax();
 	}
@@ -317,8 +390,12 @@ static int do_timerfd_settime(int ufd, int flags,
 	 * We do not update "ticks" and "expired" since the timer will be
 	 * re-programmed again in the following timerfd_setup() call.
 	 */
-	if (ctx->expired && ctx->tintv.tv64)
-		hrtimer_forward_now(&ctx->tmr, ctx->tintv);
+	if (ctx->expired && ctx->tintv.tv64) {
+		if (isalarm(ctx))
+			alarm_forward_now(&ctx->t.alarm, ctx->tintv);
+		else
+			hrtimer_forward_now(&ctx->t.tmr, ctx->tintv);
+	}
 
 	old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 	old->it_interval = ktime_to_timespec(ctx->tintv);
@@ -345,9 +422,18 @@ static int do_timerfd_gettime(int ufd, struct itimerspec *t)
 	spin_lock_irq(&ctx->wqh.lock);
 	if (ctx->expired && ctx->tintv.tv64) {
 		ctx->expired = 0;
-		ctx->ticks +=
-			hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
-		hrtimer_restart(&ctx->tmr);
+
+		if (isalarm(ctx)) {
+			ctx->ticks +=
+				alarm_forward_now(
+					&ctx->t.alarm, ctx->tintv) - 1;
+			alarm_restart(&ctx->t.alarm);
+		} else {
+			ctx->ticks +=
+				hrtimer_forward_now(&ctx->t.tmr, ctx->tintv)
+				- 1;
+			hrtimer_restart(&ctx->t.tmr);
+		}
 	}
 	t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 	t->it_interval = ktime_to_timespec(ctx->tintv);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 05115d719408..3bf098a82dac 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1840,6 +1840,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
 	int err;
 	struct ubifs_info *c = sb->s_fs_info;
 
+	sync_filesystem(sb);
 	dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
 
 	err = ubifs_parse_options(c, data, 1);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 839a2bad7f45..32f5297dfbab 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -629,6 +629,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
 	struct udf_options uopt;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	int error = 0;
+	sync_filesystem(sb);
 
 	if (sbi->s_lvid_bh) {
 		int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 329f2f53b7ed..b8c6791f046f 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1280,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 	unsigned new_mount_opt, ufstype;
 	unsigned flags;
 
+	sync_filesystem(sb);
 	lock_ufs(sb);
 	mutex_lock(&UFS_SB(sb)->s_lock);
 	uspi = UFS_SB(sb)->s_uspi;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3033ba5e9762..478c0ad5e36f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1218,6 +1218,7 @@ xfs_fs_remount(
 	char			*p;
 	int			error;
 
+	sync_filesystem(sb);
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
 
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 639d7a4d033b..01613b382b0e 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -46,5 +46,20 @@
 #define read_barrier_depends()		do {} while (0)
 #define smp_read_barrier_depends()	do {} while (0)
 
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	smp_mb();							\
+	___p1;								\
+})
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/asm-generic/seccomp.h b/include/asm-generic/seccomp.h
new file mode 100644
index 000000000000..663ac3dc02ea
--- /dev/null
+++ b/include/asm-generic/seccomp.h
@@ -0,0 +1,29 @@
+/*
+ * include/asm-generic/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi <at> linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_GENERIC_SECCOMP_H
+#define _ASM_GENERIC_SECCOMP_H
+
+#include <asm-generic/unistd.h>
+
+#if defined(CONFIG_COMPAT) && !defined(__NR_seccomp_read_32)
+#define __NR_seccomp_read_32		__NR_read
+#define __NR_seccomp_write_32		__NR_write
+#define __NR_seccomp_exit_32		__NR_exit
+#define __NR_seccomp_sigreturn_32	__NR_rt_sigreturn
+#endif /* CONFIG_COMPAT && ! already defined */
+
+#define __NR_seccomp_read		__NR_read
+#define __NR_seccomp_write		__NR_write
+#define __NR_seccomp_exit		__NR_exit
+#define __NR_seccomp_sigreturn		__NR_rt_sigreturn
+
+#endif /* _ASM_GENERIC_SECCOMP_H */
+
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 5b09392db673..d401e5463fb0 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -144,8 +144,6 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
 
 /**
  * syscall_get_arch - return the AUDIT_ARCH for the current system call
- * @task:	task of interest, must be in system call entry tracing
- * @regs:	task_pt_regs() of @task
  *
  * Returns the AUDIT_ARCH_* based on the system call convention in use.
  *
@@ -155,5 +153,5 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
  * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must
  * provide an implementation of this.
  */
-int syscall_get_arch(struct task_struct *task, struct pt_regs *regs);
+int syscall_get_arch(void);
 #endif	/* _ASM_SYSCALL_H */
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 63d17ee9eb48..b836d131bb07 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1648,6 +1648,8 @@ int drm_gem_private_object_init(struct drm_device *dev,
 void drm_gem_object_handle_free(struct drm_gem_object *obj);
 void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
+int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
+		     struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
 #include <drm/drm_global.h>
diff --git a/include/drm/i2c/tda998x.h b/include/drm/i2c/tda998x.h
new file mode 100644
index 000000000000..31757dff5e91
--- /dev/null
+++ b/include/drm/i2c/tda998x.h
@@ -0,0 +1,31 @@
+#ifndef __DRM_I2C_TDA998X_H__
+#define __DRM_I2C_TDA998X_H__
+
+struct tda998x_encoder_params {
+	u8 swap_b:3;
+	u8 mirr_b:1;
+	u8 swap_a:3;
+	u8 mirr_a:1;
+	u8 swap_d:3;
+	u8 mirr_d:1;
+	u8 swap_c:3;
+	u8 mirr_c:1;
+	u8 swap_f:3;
+	u8 mirr_f:1;
+	u8 swap_e:3;
+	u8 mirr_e:1;
+
+	u8 audio_cfg;
+	u8 audio_clk_cfg;
+	u8 audio_frame[6];
+
+	enum {
+		AFMT_NO_AUDIO = 0,
+		AFMT_SPDIF,
+		AFMT_I2S
+	} audio_format;
+
+	unsigned audio_sample_rate;
+};
+
+#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
new file mode 100644
index 000000000000..a4608897a5f3
--- /dev/null
+++ b/include/linux/Kbuild
@@ -0,0 +1,2 @@
+header-y += if_pppolac.h
+header-y += if_pppopns.h
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 9069694e70eb..a899402a5a0e 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -44,10 +44,14 @@ struct alarm {
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 		enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
 int alarm_start(struct alarm *alarm, ktime_t start);
+int alarm_start_relative(struct alarm *alarm, ktime_t start);
+void alarm_restart(struct alarm *alarm);
 int alarm_try_to_cancel(struct alarm *alarm);
 int alarm_cancel(struct alarm *alarm);
 
 u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval);
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval);
+ktime_t alarm_expires_remaining(const struct alarm *alarm);
 
 /* Provide way to access the rtc device being used by alarmtimers */
 struct rtc_device *alarmtimer_get_rtcdev(void);
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 32a89cf5ec45..9d1d9caf2611 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -5,6 +5,15 @@
 #define AMBA_MMCI_H
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/sdio_func.h>
+
+struct embedded_sdio_data {
+        struct sdio_cis cis;
+        struct sdio_cccr cccr;
+        struct sdio_embedded_func *funcs;
+        int num_funcs;
+};
 
 
 /*
@@ -73,6 +82,9 @@ struct mmci_platform_data {
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
+	unsigned int status_irq;
+	struct embedded_sdio_data *embedded_sdio;
+	int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id);
 };
 
 #endif
diff --git a/include/linux/android_aid.h b/include/linux/android_aid.h
new file mode 100644
index 000000000000..06264b8be5f0
--- /dev/null
+++ b/include/linux/android_aid.h
@@ -0,0 +1,28 @@
+/* include/linux/android_aid.h
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_ANDROID_AID_H
+#define _LINUX_ANDROID_AID_H
+
+/* AIDs that the kernel treats differently */
+#define AID_OBSOLETE_000 3001  /* was NET_BT_ADMIN */
+#define AID_OBSOLETE_001 3002  /* was NET_BT */
+#define AID_INET         3003
+#define AID_NET_RAW      3004
+#define AID_NET_ADMIN    3005
+#define AID_NET_BW_STATS 3006  /* read bandwidth statistics */
+#define AID_NET_BW_ACCT  3007  /* change bandwidth statistics accounting */
+
+#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8852d370c720..a3997d560114 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -578,6 +578,7 @@ struct cgroup_subsys {
 	void (*css_offline)(struct cgroup *cgrp);
 	void (*css_free)(struct cgroup *cgrp);
 
+	int (*allow_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
 	int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
 	void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
@@ -868,6 +869,17 @@ unsigned short css_id(struct cgroup_subsys_state *css);
 unsigned short css_depth(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 
+/*
+ * Default Android check for whether the current process is allowed to move a
+ * task across cgroups, either because CAP_SYS_NICE is set or because the uid
+ * of the calling process is the same as the moved task or because we are
+ * running as root.
+ * Returns 0 if this is allowed, or -EACCES otherwise.
+ */
+int subsys_cgroup_allow_attach(struct cgroup *cgrp,
+			       struct cgroup_taskset *tset);
+
+
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index a2329c5e6206..2472740d7ab2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -302,6 +302,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 #endif
 
+/* Is this type a native word size -- useful for atomic operations */
+#ifndef __native_word
+# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#endif
+
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
@@ -341,6 +346,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #define compiletime_assert(condition, msg) \
 	_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
 
+#define compiletime_assert_atomic_type(t)				\
+	compiletime_assert(__native_word(t),				\
+		"Need native word sized stores/loads for atomicity.")
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 770c8047e998..a482811399a9 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -280,4 +280,11 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void arch_cpu_idle_dead(void);
 
+#define IDLE_START 1
+#define IDLE_END 2
+
+void idle_notifier_register(struct notifier_block *n);
+void idle_notifier_unregister(struct notifier_block *n);
+void idle_notifier_call_chain(unsigned long val);
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c303d383def1..8ac033182419 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -28,6 +28,9 @@
 #include <linux/thermal.h>
 #include <linux/cpumask.h>
 
+typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
+			    unsigned long voltage, u32 *power);
+
 #ifdef CONFIG_CPU_THERMAL
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
@@ -36,6 +39,10 @@
 struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus);
 
+struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+			       u32 capacitance, get_static_t plat_static_func);
+
 /**
  * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
  * @np: a valid struct device_node to the cooling device device tree node.
@@ -45,6 +52,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus);
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus);
+
+struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func);
 #else
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
@@ -52,6 +65,15 @@ of_cpufreq_cooling_register(struct device_node *np,
 {
 	return NULL;
 }
+
+static inline struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func)
+{
+	return NULL;
+}
 #endif
 
 /**
@@ -68,11 +90,28 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus)
 	return NULL;
 }
 static inline struct thermal_cooling_device *
+cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+			       u32 capacitance, get_static_t plat_static_func)
+{
+	return NULL;
+}
+
+static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus)
 {
 	return NULL;
 }
+
+static inline struct thermal_cooling_device *
+of_cpufreq_power_cooling_register(struct device_node *np,
+				  const struct cpumask *clip_cpus,
+				  u32 capacitance,
+				  get_static_t plat_static_func)
+{
+	return NULL;
+}
+
 static inline
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1a81b7470bc4..df55b769f550 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -338,9 +338,11 @@ const char *cpufreq_get_current_driver(void);
 /*********************************************************************
  *                        CPUFREQ 2.6. INTERFACE                     *
  *********************************************************************/
+u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu);
 int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
+struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
 
 #ifdef CONFIG_CPU_FREQ
 /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */
@@ -394,6 +396,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand;
 #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
 extern struct cpufreq_governor cpufreq_gov_conservative;
 #define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_conservative)
+#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE)
+extern struct cpufreq_governor cpufreq_gov_interactive;
+#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_interactive)
 #endif
 
 
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 21ca773f77bf..822c1354f3a6 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -51,7 +51,7 @@ struct task_struct;
 extern void debug_show_all_locks(void);
 extern void debug_show_held_locks(struct task_struct *task);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-extern void debug_check_no_locks_held(struct task_struct *task);
+extern void debug_check_no_locks_held(void);
 #else
 static inline void debug_show_all_locks(void)
 {
@@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len)
 }
 
 static inline void
-debug_check_no_locks_held(struct task_struct *task)
+debug_check_no_locks_held(void)
 {
 }
 #endif
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index fe8c4476f7e4..e2976e4cf678 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -192,6 +192,9 @@ extern int devfreq_register_opp_notifier(struct device *dev,
 extern int devfreq_unregister_opp_notifier(struct device *dev,
 					   struct devfreq *devfreq);
 
+int devfreq_qos_set_min(struct devfreq *df, unsigned long value);
+int devfreq_qos_set_max(struct devfreq *df, unsigned long value);
+
 #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
 /**
  * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
new file mode 100644
index 000000000000..cf18afb458ac
--- /dev/null
+++ b/include/linux/devfreq_cooling.h
@@ -0,0 +1,93 @@
+/*
+ * devfreq_cooling: Thermal cooling device implementation for devices using
+ *                  devfreq
+ *
+ * Copyright (C) 2014 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __DEVFREQ_COOLING_H__
+#define __DEVFREQ_COOLING_H__
+
+#include <linux/devfreq.h>
+#include <linux/thermal.h>
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+
+/**
+ * struct devfreq_cooling_ops - Devfreq cooling power ops
+ * @get_static_power: Take voltage, in mV, and return the static power in mW.
+ * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and return
+ * the dynamic power draw in mW.
+ */
+struct devfreq_cooling_ops {
+	unsigned long (*get_static_power)(unsigned long voltage);
+	unsigned long (*get_dynamic_power)(unsigned long freq,
+			unsigned long voltage);
+};
+
+/**
+ * struct devfreq_cooling_device - Devfreq cooling device
+ * @cdev:          Pointer to associated thermal cooling device.
+ * @devfreq:       Pointer to associated devfreq device.
+ * @cooling_state: Current cooling state.
+ * @power_table:   Pointer to table with maximum power draw for each cooling
+ *                 state. State is the index into the table, and the power is
+ *                 stated in mW.
+ * @power_ops:     Pointer to power operations, used to generate @power_table.
+ * @last_status:   The devfreq status at last devfreq polling.
+ */
+struct devfreq_cooling_device {
+	struct thermal_cooling_device *cdev;
+	struct devfreq *devfreq;
+	unsigned long cooling_state;
+	u32 *power_table;
+	struct devfreq_cooling_ops *power_ops;
+	struct devfreq_dev_status last_status;
+};
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+		struct devfreq_cooling_ops *ops);
+struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
+struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df);
+void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc);
+
+#else /* !CONFIG_DEVFREQ_THERMAL */
+
+struct devfreq_cooling_device *
+of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
+		struct devfreq_cooling_ops *ops)
+{
+	return NULL;
+}
+
+static inline struct devfreq_cooling_device *
+of_devfreq_cooling_register(struct device_node *np, struct devfreq *df)
+{
+	return NULL;
+}
+
+static inline struct devfreq_cooling_device *
+devfreq_cooling_register(struct devfreq *df)
+{
+	return NULL;
+}
+
+static inline void
+devfreq_cooling_unregister(struct devfreq_cooling_device *dfc)
+{
+	return;
+}
+
+#endif /* CONFIG_DEVFREQ_THERMAL */
+#endif /* __DEVFREQ_COOLING_H__ */
diff --git a/include/linux/dma-buf-test-exporter.h b/include/linux/dma-buf-test-exporter.h
new file mode 100755
index 000000000000..db0e93898133
--- /dev/null
+++ b/include/linux/dma-buf-test-exporter.h
@@ -0,0 +1,78 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_
+#define _LINUX_DMA_BUF_TEST_EXPORTER_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+#define DMA_BUF_TE_VER_MAJOR 1
+#define DMA_BUF_TE_VER_MINOR 0
+#define DMA_BUF_TE_ENQ 0x642d7465
+#define DMA_BUF_TE_ACK 0x68692100
+
+struct dma_buf_te_ioctl_version
+{
+	int op;    /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */
+	int major; /**< Major version */
+	int minor; /**< Minor version */
+};
+
+struct dma_buf_te_ioctl_alloc
+{
+	__u64 size; /* size of buffer to allocate, in pages */
+};
+
+struct dma_buf_te_ioctl_status
+{
+	/* in */
+	int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */
+	/* out */
+	int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */
+	int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */
+	int cpu_mappings;    /* number of cpu mappings (active 'mmap's) */
+};
+
+struct dma_buf_te_ioctl_set_failing
+{
+	/* in */
+	int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */
+
+	/* zero = no fail injection, non-zero = inject failure */
+	int fail_attach;
+	int fail_map;
+	int fail_mmap;
+};
+
+struct dma_buf_te_ioctl_fill
+{
+	int fd;
+	unsigned int value;
+};
+
+#define DMA_BUF_TE_IOCTL_BASE 'E'
+/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */
+#define DMA_BUF_TE_VERSION         _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version)
+#define DMA_BUF_TE_ALLOC           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_QUERY           _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status)
+#define DMA_BUF_TE_SET_FAILING     _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing)
+#define DMA_BUF_TE_ALLOC_CONT      _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc)
+#define DMA_BUF_TE_FILL            _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill)
+
+#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e70df40d84f6..7fd81b8c4897 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -3,6 +3,7 @@
 #ifndef FREEZER_H_INCLUDED
 #define FREEZER_H_INCLUDED
 
+#include <linux/debug_locks.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
@@ -46,7 +47,11 @@ extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
 extern void thaw_kernel_threads(void);
 
-static inline bool try_to_freeze(void)
+/*
+ * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION
+ * If try_to_freeze causes a lockdep warning it means the caller may deadlock
+ */
+static inline bool try_to_freeze_unsafe(void)
 {
 	might_sleep();
 	if (likely(!freezing(current)))
@@ -54,6 +59,13 @@ static inline bool try_to_freeze(void)
 	return __refrigerator(false);
 }
 
+static inline bool try_to_freeze(void)
+{
+	if (!(current->flags & PF_NOFREEZE))
+		debug_check_no_locks_held();
+	return try_to_freeze_unsafe();
+}
+
 extern bool freeze_task(struct task_struct *p);
 extern bool set_freezable(void);
 
@@ -115,6 +127,14 @@ static inline void freezer_count(void)
 	try_to_freeze();
 }
 
+/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
+static inline void freezer_count_unsafe(void)
+{
+	current->flags &= ~PF_FREEZER_SKIP;
+	smp_mb();
+	try_to_freeze_unsafe();
+}
+
 /**
  * freezer_should_skip - whether to skip a task when determining frozen
  *			 state is reached
@@ -139,28 +159,86 @@ static inline bool freezer_should_skip(struct task_struct *p)
 }
 
 /*
- * These macros are intended to be used whenever you want allow a sleeping
+ * These functions are intended to be used whenever you want allow a sleeping
  * task to be frozen. Note that neither return any clear indication of
  * whether a freeze event happened while in this function.
  */
 
 /* Like schedule(), but should not block the freezer. */
-#define freezable_schedule()						\
-({									\
-	freezer_do_not_count();						\
-	schedule();							\
-	freezer_count();						\
-})
+static inline void freezable_schedule(void)
+{
+	freezer_do_not_count();
+	schedule();
+	freezer_count();
+}
+
+/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
+static inline void freezable_schedule_unsafe(void)
+{
+	freezer_do_not_count();
+	schedule();
+	freezer_count_unsafe();
+}
+
+/*
+ * Like freezable_schedule_timeout(), but should not block the freezer.  Do not
+ * call this with locks held.
+ */
+static inline long freezable_schedule_timeout(long timeout)
+{
+	long __retval;
+	freezer_do_not_count();
+	__retval = schedule_timeout(timeout);
+	freezer_count();
+	return __retval;
+}
+
+/*
+ * Like schedule_timeout_interruptible(), but should not block the freezer.  Do not
+ * call this with locks held.
+ */
+static inline long freezable_schedule_timeout_interruptible(long timeout)
+{
+	long __retval;
+	freezer_do_not_count();
+	__retval = schedule_timeout_interruptible(timeout);
+	freezer_count();
+	return __retval;
+}
 
 /* Like schedule_timeout_killable(), but should not block the freezer. */
-#define freezable_schedule_timeout_killable(timeout)			\
-({									\
-	long __retval;							\
-	freezer_do_not_count();						\
-	__retval = schedule_timeout_killable(timeout);			\
-	freezer_count();						\
-	__retval;							\
-})
+static inline long freezable_schedule_timeout_killable(long timeout)
+{
+	long __retval;
+	freezer_do_not_count();
+	__retval = schedule_timeout_killable(timeout);
+	freezer_count();
+	return __retval;
+}
+
+/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
+static inline long freezable_schedule_timeout_killable_unsafe(long timeout)
+{
+	long __retval;
+	freezer_do_not_count();
+	__retval = schedule_timeout_killable(timeout);
+	freezer_count_unsafe();
+	return __retval;
+}
+
+/*
+ * Like schedule_hrtimeout_range(), but should not block the freezer.  Do not
+ * call this with locks held.
+ */
+static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
+		unsigned long delta, const enum hrtimer_mode mode)
+{
+	int __retval;
+	freezer_do_not_count();
+	__retval = schedule_hrtimeout_range(expires, delta, mode);
+	freezer_count();
+	return __retval;
+}
 
 /*
  * Freezer-friendly wrappers around wait_event_interruptible(),
@@ -177,33 +255,45 @@ static inline bool freezer_should_skip(struct task_struct *p)
 	__retval;							\
 })
 
+/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
+#define wait_event_freezekillable_unsafe(wq, condition)			\
+({									\
+	int __retval;							\
+	freezer_do_not_count();						\
+	__retval = wait_event_killable(wq, (condition));		\
+	freezer_count_unsafe();						\
+	__retval;							\
+})
+
 #define wait_event_freezable(wq, condition)				\
 ({									\
 	int __retval;							\
-	for (;;) {							\
-		__retval = wait_event_interruptible(wq, 		\
-				(condition) || freezing(current));	\
-		if (__retval || (condition))				\
-			break;						\
-		try_to_freeze();					\
-	}								\
+	freezer_do_not_count();						\
+	__retval = wait_event_interruptible(wq, (condition));		\
+	freezer_count();						\
 	__retval;							\
 })
 
 #define wait_event_freezable_timeout(wq, condition, timeout)		\
 ({									\
 	long __retval = timeout;					\
-	for (;;) {							\
-		__retval = wait_event_interruptible_timeout(wq,		\
-				(condition) || freezing(current),	\
-				__retval); 				\
-		if (__retval <= 0 || (condition))			\
-			break;						\
-		try_to_freeze();					\
-	}								\
+	freezer_do_not_count();						\
+	__retval = wait_event_interruptible_timeout(wq,	(condition),	\
+				__retval);				\
+	freezer_count();						\
 	__retval;							\
 })
 
+#define wait_event_freezable_exclusive(wq, condition)			\
+({									\
+	int __retval;							\
+	freezer_do_not_count();						\
+	__retval = wait_event_interruptible_exclusive(wq, condition);	\
+	freezer_count();						\
+	__retval;							\
+})
+
+
 #else /* !CONFIG_FREEZER */
 static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
@@ -225,18 +315,37 @@ static inline void set_freezable(void) {}
 
 #define freezable_schedule()  schedule()
 
+#define freezable_schedule_unsafe()  schedule()
+
+#define freezable_schedule_timeout(timeout)  schedule_timeout(timeout)
+
+#define freezable_schedule_timeout_interruptible(timeout)		\
+	schedule_timeout_interruptible(timeout)
+
 #define freezable_schedule_timeout_killable(timeout)			\
 	schedule_timeout_killable(timeout)
 
+#define freezable_schedule_timeout_killable_unsafe(timeout)		\
+	schedule_timeout_killable(timeout)
+
+#define freezable_schedule_hrtimeout_range(expires, delta, mode)	\
+	schedule_hrtimeout_range(expires, delta, mode)
+
 #define wait_event_freezable(wq, condition)				\
 		wait_event_interruptible(wq, condition)
 
 #define wait_event_freezable_timeout(wq, condition, timeout)		\
 		wait_event_interruptible_timeout(wq, condition, timeout)
 
+#define wait_event_freezable_exclusive(wq, condition)			\
+		wait_event_interruptible_exclusive(wq, condition)
+
 #define wait_event_freezekillable(wq, condition)		\
 		wait_event_killable(wq, condition)
 
+#define wait_event_freezekillable_unsafe(wq, condition)			\
+		wait_event_killable(wq, condition)
+
 #endif /* !CONFIG_FREEZER */
 
 #endif	/* FREEZER_H_INCLUDED */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d57bc5df7225..9a3437377f71 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1506,6 +1506,17 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
  * to have different dirent layouts depending on the binary type.
  */
 typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
+struct dir_context {
+	const filldir_t actor;
+	loff_t pos;
+};
+
+static inline bool dir_emit(struct dir_context *ctx,
+			    const char *name, int namelen,
+			    u64 ino, unsigned type)
+{
+	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+}
 struct block_device_operations;
 
 /* These macros are for out of kernel modules to test that
@@ -1522,6 +1533,7 @@ struct file_operations {
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	int (*readdir) (struct file *, void *, filldir_t);
+	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
@@ -2495,6 +2507,7 @@ loff_t inode_get_bytes(struct inode *inode);
 void inode_set_bytes(struct inode *inode, loff_t bytes);
 
 extern int vfs_readdir(struct file *, filldir_t, void *);
+extern int iterate_dir(struct file *, struct dir_context *);
 
 extern int vfs_stat(const char __user *, struct kstat *);
 extern int vfs_lstat(const char __user *, struct kstat *);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b5e36017acd7..8685e2084591 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -36,9 +36,16 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
 								 *symbol_array);
 #endif
 
+const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+				     unsigned int bitmask_size);
+
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 
+const char *ftrace_print_array_seq(struct trace_seq *p,
+				   const void *buf, int buf_len,
+				   size_t el_size);
+
 struct trace_iterator;
 struct trace_event;
 
diff --git a/include/linux/gpio_event.h b/include/linux/gpio_event.h
new file mode 100644
index 000000000000..2613fc5e4a93
--- /dev/null
+++ b/include/linux/gpio_event.h
@@ -0,0 +1,170 @@
+/* include/linux/gpio_event.h
+ *
+ * Copyright (C) 2007 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_GPIO_EVENT_H
+#define _LINUX_GPIO_EVENT_H
+
+#include <linux/input.h>
+
+struct gpio_event_input_devs {
+	int count;
+	struct input_dev *dev[];
+};
+enum {
+	GPIO_EVENT_FUNC_UNINIT  = 0x0,
+	GPIO_EVENT_FUNC_INIT    = 0x1,
+	GPIO_EVENT_FUNC_SUSPEND = 0x2,
+	GPIO_EVENT_FUNC_RESUME  = 0x3,
+};
+struct gpio_event_info {
+	int (*func)(struct gpio_event_input_devs *input_devs,
+		    struct gpio_event_info *info,
+		    void **data, int func);
+	int (*event)(struct gpio_event_input_devs *input_devs,
+		     struct gpio_event_info *info,
+		     void **data, unsigned int dev, unsigned int type,
+		     unsigned int code, int value); /* out events */
+	bool no_suspend;
+};
+
+struct gpio_event_platform_data {
+	const char *name;
+	struct gpio_event_info **info;
+	size_t info_count;
+	int (*power)(const struct gpio_event_platform_data *pdata, bool on);
+	const char *names[]; /* If name is NULL, names contain a NULL */
+			     /* terminated list of input devices to create */
+};
+
+#define GPIO_EVENT_DEV_NAME "gpio-event"
+
+/* Key matrix */
+
+enum gpio_event_matrix_flags {
+	/* unset: drive active output low, set: drive active output high */
+	GPIOKPF_ACTIVE_HIGH              = 1U << 0,
+	GPIOKPF_DEBOUNCE                 = 1U << 1,
+	GPIOKPF_REMOVE_SOME_PHANTOM_KEYS = 1U << 2,
+	GPIOKPF_REMOVE_PHANTOM_KEYS      = GPIOKPF_REMOVE_SOME_PHANTOM_KEYS |
+					   GPIOKPF_DEBOUNCE,
+	GPIOKPF_DRIVE_INACTIVE           = 1U << 3,
+	GPIOKPF_LEVEL_TRIGGERED_IRQ      = 1U << 4,
+	GPIOKPF_PRINT_UNMAPPED_KEYS      = 1U << 16,
+	GPIOKPF_PRINT_MAPPED_KEYS        = 1U << 17,
+	GPIOKPF_PRINT_PHANTOM_KEYS       = 1U << 18,
+};
+
+#define MATRIX_CODE_BITS (10)
+#define MATRIX_KEY_MASK ((1U << MATRIX_CODE_BITS) - 1)
+#define MATRIX_KEY(dev, code) \
+	(((dev) << MATRIX_CODE_BITS) | (code & MATRIX_KEY_MASK))
+
+extern int gpio_event_matrix_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+struct gpio_event_matrix_info {
+	/* initialize to gpio_event_matrix_func */
+	struct gpio_event_info info;
+	/* size must be ninputs * noutputs */
+	const unsigned short *keymap;
+	unsigned int *input_gpios;
+	unsigned int *output_gpios;
+	unsigned int ninputs;
+	unsigned int noutputs;
+	/* time to wait before reading inputs after driving each output */
+	ktime_t settle_time;
+	/* time to wait before scanning the keypad a second time */
+	ktime_t debounce_delay;
+	ktime_t poll_time;
+	unsigned flags;
+};
+
+/* Directly connected inputs and outputs */
+
+enum gpio_event_direct_flags {
+	GPIOEDF_ACTIVE_HIGH         = 1U << 0,
+/*	GPIOEDF_USE_DOWN_IRQ        = 1U << 1, */
+/*	GPIOEDF_USE_IRQ             = (1U << 2) | GPIOIDF_USE_DOWN_IRQ, */
+	GPIOEDF_PRINT_KEYS          = 1U << 8,
+	GPIOEDF_PRINT_KEY_DEBOUNCE  = 1U << 9,
+	GPIOEDF_PRINT_KEY_UNSTABLE  = 1U << 10,
+};
+
+struct gpio_event_direct_entry {
+	uint32_t gpio:16;
+	uint32_t code:10;
+	uint32_t dev:6;
+};
+
+/* inputs */
+extern int gpio_event_input_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+struct gpio_event_input_info {
+	/* initialize to gpio_event_input_func */
+	struct gpio_event_info info;
+	ktime_t debounce_time;
+	ktime_t poll_time;
+	uint16_t flags;
+	uint16_t type;
+	const struct gpio_event_direct_entry *keymap;
+	size_t keymap_size;
+};
+
+/* outputs */
+extern int gpio_event_output_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+extern int gpio_event_output_event(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data,
+			unsigned int dev, unsigned int type,
+			unsigned int code, int value);
+struct gpio_event_output_info {
+	/* initialize to gpio_event_output_func and gpio_event_output_event */
+	struct gpio_event_info info;
+	uint16_t flags;
+	uint16_t type;
+	const struct gpio_event_direct_entry *keymap;
+	size_t keymap_size;
+};
+
+
+/* axes */
+
+enum gpio_event_axis_flags {
+	GPIOEAF_PRINT_UNKNOWN_DIRECTION  = 1U << 16,
+	GPIOEAF_PRINT_RAW                = 1U << 17,
+	GPIOEAF_PRINT_EVENT              = 1U << 18,
+};
+
+extern int gpio_event_axis_func(struct gpio_event_input_devs *input_devs,
+			struct gpio_event_info *info, void **data, int func);
+struct gpio_event_axis_info {
+	/* initialize to gpio_event_axis_func */
+	struct gpio_event_info info;
+	uint8_t  count; /* number of gpios for this axis */
+	uint8_t  dev; /* device index when using multiple input devices */
+	uint8_t  type; /* EV_REL or EV_ABS */
+	uint16_t code;
+	uint16_t decoded_size;
+	uint16_t (*map)(struct gpio_event_axis_info *info, uint16_t in);
+	uint32_t *gpio;
+	uint32_t flags;
+};
+#define gpio_axis_2bit_gray_map gpio_axis_4bit_gray_map
+#define gpio_axis_3bit_gray_map gpio_axis_4bit_gray_map
+uint16_t gpio_axis_4bit_gray_map(
+			struct gpio_event_axis_info *info, uint16_t in);
+uint16_t gpio_axis_5bit_singletrack_map(
+			struct gpio_event_axis_info *info, uint16_t in);
+
+#endif
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 4f8aa4733fb6..40e9c9d76d1e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -649,8 +649,8 @@ struct hid_driver {
 	int (*input_mapped)(struct hid_device *hdev,
 			struct hid_input *hidinput, struct hid_field *field,
 			struct hid_usage *usage, unsigned long **bit, int *max);
-	void (*input_configured)(struct hid_device *hdev,
-				 struct hid_input *hidinput);
+	int (*input_configured)(struct hid_device *hdev,
+				struct hid_input *hidinput);
 	void (*feature_mapping)(struct hid_device *hdev,
 			struct hid_field *field,
 			struct hid_usage *usage);
diff --git a/include/linux/if_pppolac.h b/include/linux/if_pppolac.h
new file mode 100644
index 000000000000..e40aa1075a30
--- /dev/null
+++ b/include/linux/if_pppolac.h
@@ -0,0 +1,23 @@
+/* include/linux/if_pppolac.h
+ *
+ * Header for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_IF_PPPOLAC_H
+#define __LINUX_IF_PPPOLAC_H
+
+#include <uapi/linux/if_pppolac.h>
+
+#endif /* __LINUX_IF_PPPOLAC_H */
diff --git a/include/linux/if_pppopns.h b/include/linux/if_pppopns.h
new file mode 100644
index 000000000000..4ac621a9ce7c
--- /dev/null
+++ b/include/linux/if_pppopns.h
@@ -0,0 +1,23 @@
+/* include/linux/if_pppopns.h
+ *
+ * Header for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_IF_PPPOPNS_H
+#define __LINUX_IF_PPPOPNS_H
+
+#include <uapi/linux/if_pppopns.h>
+
+#endif /* __LINUX_IF_PPPOPNS_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index aff7ad8a4ea3..dccd621d6377 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -41,6 +41,25 @@ struct pptp_opt {
 	u32 seq_sent, seq_recv;
 	int ppp_flags;
 };
+
+struct pppolac_opt {
+	__u32		local;
+	__u32		remote;
+	__u32		recv_sequence;
+	__u32		xmit_sequence;
+	atomic_t	sequencing;
+	int		(*backlog_rcv)(struct sock *sk_udp, struct sk_buff *skb);
+};
+
+struct pppopns_opt {
+	__u16		local;
+	__u16		remote;
+	__u32		recv_sequence;
+	__u32		xmit_sequence;
+	void		(*data_ready)(struct sock *sk_raw, int length);
+	int		(*backlog_rcv)(struct sock *sk_raw, struct sk_buff *skb);
+};
+
 #include <net/sock.h>
 
 struct pppox_sock {
@@ -51,6 +70,8 @@ struct pppox_sock {
 	union {
 		struct pppoe_opt pppoe;
 		struct pptp_opt  pptp;
+		struct pppolac_opt lac;
+		struct pppopns_opt pns;
 	} proto;
 	__be16			num;
 };
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b8b7dc755752..ebc6306661eb 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -36,10 +36,12 @@ struct ipv6_devconf {
 	__s32		accept_ra_rt_info_max_plen;
 #endif
 #endif
+	__s32		accept_ra_rt_table;
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	__s32		optimistic_dad;
+	__s32		use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 	__s32		mc_forwarding;
diff --git a/include/linux/kds.h b/include/linux/kds.h
new file mode 100755
index 000000000000..0ba994db08e7
--- /dev/null
+++ b/include/linux/kds.h
@@ -0,0 +1,173 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KDS_H_
+#define _KDS_H_
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+
+#define KDS_WAIT_BLOCKING (ULONG_MAX)
+
+struct kds_resource_set;
+
+typedef void (*kds_callback_fn) (void *callback_parameter, void *callback_extra_parameter);
+
+struct kds_callback
+{
+	kds_callback_fn  user_cb; /* real cb */
+	int direct;               /* do direct or queued call? */
+	struct workqueue_struct *wq;
+};
+
+struct kds_link
+{
+	struct kds_resource_set *parent;
+	struct list_head         link;
+	unsigned long            state;
+};
+
+struct kds_resource
+{
+	struct kds_link waiters;
+};
+
+/* callback API */
+
+/* Initialize a callback object.
+ *
+ * Typically created per context or per hw resource.
+ *
+ * Callbacks can be performed directly if no nested locking can
+ * happen in the client.
+ *
+ * Nested locking can occur when a lock is held during the kds_async_waitall or
+ * kds_resource_set_release call. If the callback needs to take the same lock
+ * nested locking will happen.
+ *
+ * If nested locking could happen non-direct callbacks can be requested.
+ * Callbacks will then be called asynchronous to the triggering call.
+ */
+int kds_callback_init(struct kds_callback *cb, int direct, kds_callback_fn user_cb);
+
+/* Terminate the use of a callback object.
+ *
+ * If the callback object was set up as non-direct
+ * any pending callbacks will be flushed first.
+ * Note that to avoid a deadlock the lock callbacks needs
+ * can't be held when a callback object is terminated.
+ */
+void kds_callback_term(struct kds_callback *cb);
+
+
+/* resource object API */
+
+/* initialize a resource handle for a shared resource */
+void kds_resource_init(struct kds_resource * const resource);
+
+/*
+ * Will return 0 on success.
+ * If the resource is being used or waited -EBUSY is returned.
+ * The caller should NOT try to terminate a resource that could still have clients.
+ * After the function returns the resource is no longer known by kds.
+ */
+int kds_resource_term(struct kds_resource *resource);
+
+/* Asynchronous wait for a set of resources.
+ * Callback will be called when all resources are available.
+ * If all the resources was available the callback will be called before kds_async_waitall returns.
+ * So one must not hold any locks the callback code-flow can take when calling kds_async_waitall.
+ * Caller considered to own/use the resources until \a kds_rset_release is called.
+ * exclusive_access_bitmap is a bitmap where a high bit means exclusive access while a low bit means shared access.
+ * Use the Linux __set_bit API, where the index of the buffer to control is used as the bit index.
+ *
+ * Standard Linux error return value.
+ */
+int kds_async_waitall(
+		struct kds_resource_set ** const pprset,
+		struct kds_callback      *cb,
+		void                     *callback_parameter,
+		void                     *callback_extra_parameter,
+		int                       number_resources,
+		unsigned long            *exclusive_access_bitmap,
+		struct kds_resource     **resource_list);
+
+/* Synchronous wait for a set of resources.
+ * Function will return when one of these have happened:
+ * - all resources have been obtained
+ * - timeout lapsed while waiting
+ * - a signal was received while waiting
+ *
+ * To wait without a timeout, specify KDS_WAIT_BLOCKING for \a jifies_timeout, otherwise
+ * the timeout in jiffies. A zero timeout attempts to obtain all resources and returns
+ * immediately with a timeout if all resources could not be obtained.
+ *
+ * Caller considered to own/use the resources when the function returns.
+ * Caller must release the resources using \a kds_rset_release.
+ *
+ * Calling this function while holding already locked resources or other locking primitives is dangerous.
+ * One must if this is needed decide on a lock order of the resources and/or the other locking primitives
+ * and always take the resources/locking primitives in the specific order.
+ *
+ * Use the ERR_PTR framework to decode the return value.
+ * NULL = time out
+ * If IS_ERR then PTR_ERR gives:
+ *  ERESTARTSYS = signal received, retry call after signal
+ *  all other values = internal error, lock failed
+ * Other values  = successful wait, now the owner, must call kds_resource_set_release
+ */
+struct kds_resource_set *kds_waitall(
+		int                   number_resources,
+		unsigned long        *exclusive_access_bitmap,
+		struct kds_resource **resource_list,
+		unsigned long         jifies_timeout);
+
+/* Release resources after use.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ */
+void kds_resource_set_release(struct kds_resource_set **pprset);
+
+/* Release resources after use and wait callbacks to complete.
+ * Caller must handle that other async callbacks will trigger,
+ * so must avoid holding any locks a callback will take.
+ *
+ * The function takes a pointer to your poiner to handle a race
+ * between a cancelation and a completion.
+ *
+ * If the caller can't guarantee that a race can't occur then
+ * the passed in pointer must be the same in both call paths
+ * to allow kds to manage the potential race.
+ *
+ * This should be used to cancel waits which are pending on a kds
+ * resource.
+ *
+ * It is a bug to call this from atomic contexts and from within
+ * a kds callback that now owns the kds_rseource.
+ */
+
+void kds_resource_set_release_sync(struct kds_resource_set **pprset);
+#endif /* _KDS_H_ */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 341551c7b4c8..5ff503bb94f6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -805,4 +805,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
 #endif
 
+/* To identify board information in panic logs, set this */
+extern char *mach_panic_string;
+
 #endif
diff --git a/include/linux/keychord.h b/include/linux/keychord.h
new file mode 100644
index 000000000000..08cf5402102c
--- /dev/null
+++ b/include/linux/keychord.h
@@ -0,0 +1,23 @@
+/*
+ *  Key chord input driver
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#ifndef __LINUX_KEYCHORD_H_
+#define __LINUX_KEYCHORD_H_
+
+#include <uapi/linux/keychord.h>
+
+#endif	/* __LINUX_KEYCHORD_H_ */
diff --git a/include/linux/keycombo.h b/include/linux/keycombo.h
new file mode 100644
index 000000000000..c6db2626b0d3
--- /dev/null
+++ b/include/linux/keycombo.h
@@ -0,0 +1,36 @@
+/*
+ * include/linux/keycombo.h - platform data structure for keycombo driver
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_KEYCOMBO_H
+#define _LINUX_KEYCOMBO_H
+
+#define KEYCOMBO_NAME "keycombo"
+
+/*
+ * if key_down_fn and key_up_fn are both present, you are guaranteed that
+ * key_down_fn will return before key_up_fn is called, and that key_up_fn
+ * is called iff key_down_fn is called.
+ */
+struct keycombo_platform_data {
+	void (*key_down_fn)(void *);
+	void (*key_up_fn)(void *);
+	void *priv;
+	int key_down_delay; /* Time in ms */
+	int *keys_up;
+	int keys_down[]; /* 0 terminated */
+};
+
+#endif /* _LINUX_KEYCOMBO_H */
diff --git a/include/linux/keyreset.h b/include/linux/keyreset.h
new file mode 100644
index 000000000000..2e34afab65e4
--- /dev/null
+++ b/include/linux/keyreset.h
@@ -0,0 +1,29 @@
+/*
+ * include/linux/keyreset.h - platform data structure for resetkeys driver
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_KEYRESET_H
+#define _LINUX_KEYRESET_H
+
+#define KEYRESET_NAME "keyreset"
+
+struct keyreset_platform_data {
+	int (*reset_fn)(void);
+	int key_down_delay;
+	int *keys_up;
+	int keys_down[]; /* 0 terminated */
+};
+
+#endif /* _LINUX_KEYRESET_H */
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 1cc89e9df480..ffb9c9da4f39 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -40,6 +40,11 @@ struct lsm_network_audit {
 	} fam;
 };
 
+struct lsm_ioctlop_audit {
+	struct path path;
+	u16 cmd;
+};
+
 /* Auxiliary data to use in generating the audit record. */
 struct common_audit_data {
 	char type;
@@ -53,6 +58,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_KMOD	8
 #define LSM_AUDIT_DATA_INODE	9
 #define LSM_AUDIT_DATA_DENTRY	10
+#define LSM_AUDIT_DATA_IOCTL_OP	11
 	union 	{
 		struct path path;
 		struct dentry *dentry;
@@ -68,6 +74,7 @@ struct common_audit_data {
 		} key_struct;
 #endif
 		char *kmod_name;
+		struct lsm_ioctlop_audit *op;
 	} u;
 	/* this union contains LSM specific data */
 	union {
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
index 307d9cab2026..1726ccbd8009 100644
--- a/include/linux/mailbox_client.h
+++ b/include/linux/mailbox_client.h
@@ -25,6 +25,8 @@ struct mbox_chan;
  *			if the client receives some ACK packet for transmission.
  *			Unused if the controller already has TX_Done/RTR IRQ.
  * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_prepare: 	Atomic callback to ask client to prepare the payload
+ *			before initiating the transmission if required.
  * @tx_done:		Atomic callback to tell client of data transmission
  */
 struct mbox_client {
@@ -34,6 +36,7 @@ struct mbox_client {
 	bool knows_txdone;
 
 	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_prepare)(struct mbox_client *cl, void *mssg);
 	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
 };
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0ea351df9e14..d06441c10e32 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -328,6 +328,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 }
 #endif
 
+extern void kvfree(const void *addr);
+
 static inline void compound_lock(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -926,6 +928,7 @@ extern void pagefault_out_of_memory(void);
 extern void show_free_areas(unsigned int flags);
 extern bool skip_free_areas_node(unsigned int flags, int nid);
 
+void shmem_set_file(struct vm_area_struct *vma, struct file *file);
 int shmem_zero_setup(struct vm_area_struct *);
 
 extern int can_do_mlock(void);
@@ -1182,6 +1185,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm)
 		mm->hiwater_vm = mm->total_vm;
 }
 
+static inline void reset_mm_hiwater_rss(struct mm_struct *mm)
+{
+	mm->hiwater_rss = get_mm_rss(mm);
+}
+
 static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
 					 struct mm_struct *mm)
 {
@@ -1504,7 +1512,7 @@ extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
-	struct mempolicy *);
+	struct mempolicy *, const char __user *);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int split_vma(struct mm_struct *,
 	struct vm_area_struct *, unsigned long addr, int new_below);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 10a9a17342fc..c14df44a9567 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -255,6 +255,10 @@ struct vm_area_struct {
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree, or
 	 * linkage of vma in the address_space->i_mmap_nonlinear list.
+	 *
+	 * For private anonymous mappings, a pointer to a null terminated string
+	 * in the user process containing the name given to the vma, or NULL
+	 * if unnamed.
 	 */
 	union {
 		struct {
@@ -262,6 +266,7 @@ struct vm_area_struct {
 			unsigned long rb_subtree_last;
 		} linear;
 		struct list_head nonlinear;
+		const char __user *anon_name;
 	} shared;
 
 	/*
@@ -465,6 +470,14 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return mm->cpu_vm_mask_var;
 }
 
+/* Return the name for an anonymous mapping or NULL for a file-backed mapping */
+static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_file)
+		return NULL;
+
+	return vma->shared.anon_name;
+}
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e326ae2882a0..66b465927602 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
+#include <linux/wakelock.h>
 
 #include <linux/mmc/core.h>
 #include <linux/mmc/pm.h>
@@ -329,12 +330,17 @@ struct mmc_host {
 	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
+	struct wake_lock	detect_wake_lock;
 	int			detect_change;	/* card detect flag */
 	struct mmc_slot		slot;
 
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
 
+	unsigned int		bus_resume_flags;
+#define MMC_BUSRESUME_MANUAL_RESUME	(1 << 0)
+#define MMC_BUSRESUME_NEEDS_RESUME	(1 << 1)
+
 	unsigned int		sdio_irqs;
 	struct task_struct	*sdio_irq_thread;
 	bool			sdio_irq_pending;
@@ -362,6 +368,15 @@ struct mmc_host {
 
 	unsigned int		slotno;	/* used for sdio acpi binding */
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+	struct {
+		struct sdio_cis			*cis;
+		struct sdio_cccr		*cccr;
+		struct sdio_embedded_func	*funcs;
+		int				num_funcs;
+	} embedded_sdio_data;
+#endif
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
@@ -371,6 +386,14 @@ void mmc_remove_host(struct mmc_host *);
 void mmc_free_host(struct mmc_host *);
 void mmc_of_parse(struct mmc_host *host);
 
+#ifdef CONFIG_MMC_EMBEDDED_SDIO
+extern void mmc_set_embedded_sdio_data(struct mmc_host *host,
+				       struct sdio_cis *cis,
+				       struct sdio_cccr *cccr,
+				       struct sdio_embedded_func *funcs,
+				       int num_funcs);
+#endif
+
 static inline void *mmc_priv(struct mmc_host *host)
 {
 	return (void *)host->private;
@@ -381,6 +404,18 @@ static inline void *mmc_priv(struct mmc_host *host)
 #define mmc_dev(x)	((x)->parent)
 #define mmc_classdev(x)	(&(x)->class_dev)
 #define mmc_hostname(x)	(dev_name(&(x)->class_dev))
+#define mmc_bus_needs_resume(host) ((host)->bus_resume_flags & MMC_BUSRESUME_NEEDS_RESUME)
+#define mmc_bus_manual_resume(host) ((host)->bus_resume_flags & MMC_BUSRESUME_MANUAL_RESUME)
+
+static inline void mmc_set_bus_resume_policy(struct mmc_host *host, int manual)
+{
+	if (manual)
+		host->bus_resume_flags |= MMC_BUSRESUME_MANUAL_RESUME;
+	else
+		host->bus_resume_flags &= ~MMC_BUSRESUME_MANUAL_RESUME;
+}
+
+extern int mmc_resume_bus(struct mmc_host *host);
 
 int mmc_suspend_host(struct mmc_host *);
 int mmc_resume_host(struct mmc_host *);
diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h
index 4a139204c20c..6e2d6a135c7e 100644
--- a/include/linux/mmc/pm.h
+++ b/include/linux/mmc/pm.h
@@ -26,5 +26,6 @@ typedef unsigned int mmc_pm_flag_t;
 
 #define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
 #define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
+#define MMC_PM_IGNORE_PM_NOTIFY	(1 << 2)	/* ignore mmc pm notify */
 
 #endif /* LINUX_MMC_PM_H */
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 50f0bc952328..dc680c4b50d4 100644..100755
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -23,6 +23,14 @@ struct sdio_func;
 typedef void (sdio_irq_handler_t)(struct sdio_func *);
 
 /*
+ * Structure used to hold embedded SDIO device data from platform layer
+ */
+struct sdio_embedded_func {
+	uint8_t f_class;
+	uint32_t f_maxblksize;
+};
+
+/*
  * SDIO function CIS tuple (unknown to the core)
  */
 struct sdio_func_tuple {
@@ -130,6 +138,8 @@ extern int sdio_release_irq(struct sdio_func *func);
 extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz);
 
 extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret);
+extern u8 sdio_readb_ext(struct sdio_func *func, unsigned int addr, int *err_ret,
+	unsigned in);
 extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret);
 extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret);
 
diff --git a/include/linux/netfilter/xt_qtaguid.h b/include/linux/netfilter/xt_qtaguid.h
new file mode 100644
index 000000000000..ca60fbdec2f3
--- /dev/null
+++ b/include/linux/netfilter/xt_qtaguid.h
@@ -0,0 +1,13 @@
+#ifndef _XT_QTAGUID_MATCH_H
+#define _XT_QTAGUID_MATCH_H
+
+/* For now we just replace the xt_owner.
+ * FIXME: make iptables aware of qtaguid. */
+#include <linux/netfilter/xt_owner.h>
+
+#define XT_QTAGUID_UID    XT_OWNER_UID
+#define XT_QTAGUID_GID    XT_OWNER_GID
+#define XT_QTAGUID_SOCKET XT_OWNER_SOCKET
+#define xt_qtaguid_match_info xt_owner_match_info
+
+#endif /* _XT_QTAGUID_MATCH_H */
diff --git a/include/linux/netfilter/xt_quota2.h b/include/linux/netfilter/xt_quota2.h
new file mode 100644
index 000000000000..eadc6903314e
--- /dev/null
+++ b/include/linux/netfilter/xt_quota2.h
@@ -0,0 +1,25 @@
+#ifndef _XT_QUOTA_H
+#define _XT_QUOTA_H
+
+enum xt_quota_flags {
+	XT_QUOTA_INVERT    = 1 << 0,
+	XT_QUOTA_GROW      = 1 << 1,
+	XT_QUOTA_PACKET    = 1 << 2,
+	XT_QUOTA_NO_CHANGE = 1 << 3,
+	XT_QUOTA_MASK      = 0x0F,
+};
+
+struct xt_quota_counter;
+
+struct xt_quota_mtinfo2 {
+	char name[15];
+	u_int8_t flags;
+
+	/* Comparison-invariant */
+	aligned_u64 quota;
+
+	/* Used internally by the kernel */
+	struct xt_quota_counter *master __attribute__((aligned(8)));
+};
+
+#endif /* _XT_QUOTA_H */
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index db50840e6355..c8f8aa0383e5 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,8 +14,11 @@
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR_NMI)
 #include <asm/nmi.h>
+#endif
+
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 extern void touch_nmi_watchdog(void);
 #else
 static inline void touch_nmi_watchdog(void)
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index c9722fdf39c5..7c00ee271395 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -90,6 +90,27 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
 
+/*
+ * early_init_dt_scan_chosen - scan the device tree for ramdisk and bootargs
+ *
+ * The boot arguments will be placed into the memory pointed to by @data.
+ * That memory should be COMMAND_LINE_SIZE big and initialized to be a valid
+ * (possibly empty) string.  Logic for what will be in @data after this
+ * function finishes:
+ *
+ * - CONFIG_CMDLINE_FORCE=true
+ *     CONFIG_CMDLINE
+ * - CONFIG_CMDLINE_EXTEND=true, @data is non-empty string
+ *     @data + dt bootargs (even if dt bootargs are empty)
+ * - CONFIG_CMDLINE_EXTEND=true, @data is empty string
+ *     CONFIG_CMDLINE + dt bootargs (even if dt bootargs are empty)
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=non-empty:
+ *     dt bootargs
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is non-empty string
+ *     @data is left unchanged
+ * - CMDLINE_FROM_BOOTLOADER=true, dt bootargs=empty, @data is empty string
+ *     CONFIG_CMDLINE (or "" if that's not defined)
+ */
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_check_for_initrd(unsigned long node);
diff --git a/include/linux/platform_data/ds2482.h b/include/linux/platform_data/ds2482.h
new file mode 100644
index 000000000000..5a6879e2a09a
--- /dev/null
+++ b/include/linux/platform_data/ds2482.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PLATFORM_DATA_DS2482__
+#define __PLATFORM_DATA_DS2482__
+
+struct ds2482_platform_data {
+	int		slpz_gpio;
+};
+
+#endif /* __PLATFORM_DATA_DS2482__ */
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 3828cefb4f65..03d921feb094 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -15,6 +15,7 @@
 
 #include <linux/workqueue.h>
 #include <linux/leds.h>
+#include <linux/types.h>
 
 struct device;
 
@@ -140,6 +141,12 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
 	POWER_SUPPLY_PROP_SCOPE,
+	/* Local extensions */
+	POWER_SUPPLY_PROP_USB_HC,
+	POWER_SUPPLY_PROP_USB_OTG,
+	POWER_SUPPLY_PROP_CHARGE_ENABLED,
+	/* Local extensions of type int64_t */
+	POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
@@ -160,6 +167,7 @@ enum power_supply_type {
 union power_supply_propval {
 	int intval;
 	const char *strval;
+	int64_t int64val;
 };
 
 struct power_supply {
@@ -194,6 +202,8 @@ struct power_supply {
 	/* private */
 	struct device *dev;
 	struct work_struct changed_work;
+	spinlock_t changed_lock;
+	bool changed;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
 	struct thermal_cooling_device *tcd;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 75d01760c911..289884121d30 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,6 +35,7 @@ enum pstore_type_id {
 	PSTORE_TYPE_MCE		= 1,
 	PSTORE_TYPE_CONSOLE	= 2,
 	PSTORE_TYPE_FTRACE	= 3,
+	PSTORE_TYPE_PMSG	= 4, /* Backport: 7 in upstream 3.19.0-rc3 */
 	PSTORE_TYPE_UNKNOWN	= 255
 };
 
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 4af3fdc85b01..712757f320a4 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -68,6 +68,8 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz);
 ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
 	char *str, size_t len);
 
+void ramoops_console_write_buf(const char *buf, size_t size);
+
 /*
  * Ramoops platform data
  * @mem_size	memory size for ramoops
@@ -81,6 +83,7 @@ struct ramoops_platform_data {
 	unsigned long	record_size;
 	unsigned long	console_size;
 	unsigned long	ftrace_size;
+	unsigned long	pmsg_size;
 	int		dump_oops;
 	struct persistent_ram_ecc_info ecc_info;
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6ca9630fde2d..5229df9d7107 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1143,13 +1143,12 @@ struct task_struct {
 				 * execve */
 	unsigned in_iowait:1;
 
-	/* task may not gain privileges */
-	unsigned no_new_privs:1;
-
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
+	unsigned long atomic_flags; /* Flags needing atomic access. */
+
 	pid_t pid;
 	pid_t tgid;
 
@@ -1639,6 +1638,9 @@ static inline cputime_t task_gtime(struct task_struct *t)
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
+extern int task_free_register(struct notifier_block *n);
+extern int task_free_unregister(struct notifier_block *n);
+
 /*
  * Per process flags
  */
@@ -1719,6 +1721,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
 }
 
+/* Per-process atomic flags. */
+#define PFA_NO_NEW_PRIVS 0x00000001	/* May not gain new privileges. */
+
+static inline bool task_no_new_privs(struct task_struct *p)
+{
+	return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
+static inline void task_set_no_new_privs(struct task_struct *p)
+{
+	set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
 /*
  * task->jobctl flags
  */
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
new file mode 100644
index 000000000000..a17ed16d14c3
--- /dev/null
+++ b/include/linux/scpi_protocol.h
@@ -0,0 +1,59 @@
+/*
+ * SCPI Message Protocol driver header
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+
+struct scpi_opp {
+	u32 freq;
+	u32 m_volt;
+} __packed;
+
+struct scpi_dvfs_info {
+	unsigned int count;
+	unsigned int latency; /* in nanoseconds */
+	struct scpi_opp *opps;
+};
+
+/**
+ * struct scpi_ops - represents the various operations provided
+ *	by SCP through SCPI message protocol
+ * @get_version: returns the major and minor revision on the SCPI
+ *	message protocol
+ * @clk_get_range: gets clock range limit(min - max in Hz)
+ * @clk_get_val: gets clock value(in Hz)
+ * @clk_set_val: sets the clock value, setting to 0 will disable the
+ *	clock (if supported)
+ * @dvfs_get_idx: gets the Operating Point of the given power domain.
+ *	OPP is an index to the list return by @dvfs_get_info
+ * @dvfs_set_idx: sets the Operating Point of the given power domain.
+ *	OPP is an index to the list return by @dvfs_get_info
+ * @dvfs_get_info: returns the DVFS capabilities of the given power
+ *	domain. It includes the OPP list and the latency information
+ */
+struct scpi_ops {
+	u32 (*get_version)(void);
+	int (*clk_get_range)(u16, unsigned long *, unsigned long *);
+	unsigned long (*clk_get_val)(u16);
+	int (*clk_set_val)(u16, unsigned long);
+	int (*dvfs_get_idx)(u8);
+	int (*dvfs_set_idx)(u8, u8);
+	struct scpi_dvfs_info *(*dvfs_get_info)(u8);
+	int (*sensor_get_id)(char *);
+	int (*sensor_get_value)(u16, u32 *);
+};
+
+struct scpi_ops *get_scpi_ops(void);
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd1840e..9687691799ff 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
@@ -14,11 +16,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: The metadata and ruleset for determining what system calls
- *          are allowed for a task.
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ *          accessed without locking during system call entry.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          is no read locking.
  */
 struct seccomp {
 	int mode;
diff --git a/include/linux/security.h b/include/linux/security.h
index 4686491852a7..17e1888ff505 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1402,6 +1402,11 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 struct security_operations {
 	char name[SECURITY_NAME_MAX + 1];
 
+	int (*binder_set_context_mgr) (struct task_struct *mgr);
+	int (*binder_transaction) (struct task_struct *from, struct task_struct *to);
+	int (*binder_transfer_binder) (struct task_struct *from, struct task_struct *to);
+	int (*binder_transfer_file) (struct task_struct *from, struct task_struct *to, struct file *file);
+
 	int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
 	int (*ptrace_traceme) (struct task_struct *parent);
 	int (*capget) (struct task_struct *target,
@@ -1690,6 +1695,10 @@ extern void __init security_fixup_ops(struct security_operations *ops);
 
 
 /* Security operations */
+int security_binder_set_context_mgr(struct task_struct *mgr);
+int security_binder_transaction(struct task_struct *from, struct task_struct *to);
+int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to);
+int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file);
 int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
 int security_ptrace_traceme(struct task_struct *parent);
 int security_capget(struct task_struct *target,
@@ -1869,6 +1878,26 @@ static inline int security_init(void)
 	return 0;
 }
 
+static inline int security_binder_set_context_mgr(struct task_struct *mgr)
+{
+	return 0;
+}
+
+static inline int security_binder_transaction(struct task_struct *from, struct task_struct *to)
+{
+	return 0;
+}
+
+static inline int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to)
+{
+	return 0;
+}
+
+static inline int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file)
+{
+	return 0;
+}
+
 static inline int security_ptrace_access_check(struct task_struct *child,
 					     unsigned int mode)
 {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index c2b355fd921a..21c074b2200e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -60,6 +60,7 @@ struct uart_ops {
 	void		(*pm)(struct uart_port *, unsigned int state,
 			      unsigned int oldstate);
 	int		(*set_wake)(struct uart_port *, unsigned int state);
+	void		(*wake_peer)(struct uart_port *);
 
 	/*
 	 * Return a string describing the type of the port
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d4e3f16d5e89..a34821358ae5 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -363,7 +363,7 @@ extern bool pm_wakeup_pending(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
-
+extern void pm_get_active_wakeup_sources(char *pending_sources, size_t max);
 static inline void lock_system_sleep(void)
 {
 	current->flags |= PF_FREEZER_SKIP;
diff --git a/include/linux/switch.h b/include/linux/switch.h
new file mode 100644
index 000000000000..3e4c748e343a
--- /dev/null
+++ b/include/linux/switch.h
@@ -0,0 +1,53 @@
+/*
+ *  Switch class driver
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#ifndef __LINUX_SWITCH_H__
+#define __LINUX_SWITCH_H__
+
+struct switch_dev {
+	const char	*name;
+	struct device	*dev;
+	int		index;
+	int		state;
+
+	ssize_t	(*print_name)(struct switch_dev *sdev, char *buf);
+	ssize_t	(*print_state)(struct switch_dev *sdev, char *buf);
+};
+
+struct gpio_switch_platform_data {
+	const char *name;
+	unsigned 	gpio;
+
+	/* if NULL, switch_dev.name will be printed */
+	const char *name_on;
+	const char *name_off;
+	/* if NULL, "0" or "1" will be printed */
+	const char *state_on;
+	const char *state_off;
+};
+
+extern int switch_dev_register(struct switch_dev *sdev);
+extern void switch_dev_unregister(struct switch_dev *sdev);
+
+static inline int switch_get_state(struct switch_dev *sdev)
+{
+	return sdev->state;
+}
+
+extern void switch_set_state(struct switch_dev *sdev, int state);
+
+#endif /* __LINUX_SWITCH_H__ */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 84662ecc7b51..4e98d7174134 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -846,4 +846,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+			    const char __user *uargs);
 #endif
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 55fce47b0095..dc74e3750dc1 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -40,6 +40,9 @@
 /* No upper/lower limit requirement */
 #define THERMAL_NO_LIMIT	THERMAL_CSTATE_INVALID
 
+/* Default weight of a bound cooling device */
+#define THERMAL_WEIGHT_DEFAULT 0
+
 /* Unit conversion macros */
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
@@ -57,10 +60,13 @@
 #define DEFAULT_THERMAL_GOVERNOR       "fair_share"
 #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE)
 #define DEFAULT_THERMAL_GOVERNOR       "user_space"
+#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR)
+#define DEFAULT_THERMAL_GOVERNOR       "power_allocator"
 #endif
 
 struct thermal_zone_device;
 struct thermal_cooling_device;
+struct thermal_instance;
 
 enum thermal_device_mode {
 	THERMAL_DEVICE_DISABLED = 0,
@@ -138,6 +144,12 @@ struct thermal_cooling_device_ops {
 	int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);
 	int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
 	int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
+	int (*get_requested_power)(struct thermal_cooling_device *,
+				   struct thermal_zone_device *, u32 *);
+	int (*state2power)(struct thermal_cooling_device *,
+			   struct thermal_zone_device *, unsigned long, u32 *);
+	int (*power2state)(struct thermal_cooling_device *,
+			   struct thermal_zone_device *, u32, unsigned long *);
 };
 
 struct thermal_cooling_device {
@@ -158,6 +170,41 @@ struct thermal_attr {
 	char name[THERMAL_NAME_LENGTH];
 };
 
+/**
+ * struct thermal_zone_device - structure for a thermal zone
+ * @id:		unique id number for each thermal zone
+ * @type:	the thermal zone device type
+ * @device:	&struct device for this thermal zone
+ * @trip_temp_attrs:	attributes for trip points for sysfs: trip temperature
+ * @trip_type_attrs:	attributes for trip points for sysfs: trip type
+ * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
+ * @devdata:	private pointer for device private data
+ * @trips:	number of trip points the thermal zone supports
+ * @passive_delay:	number of milliseconds to wait between polls when
+ *			performing passive cooling.
+ * @polling_delay:	number of milliseconds to wait between polls when
+ *			checking whether trip points have been crossed (0 for
+ *			interrupt driven systems)
+ * @temperature:	current temperature.  This is only for core code,
+ *			drivers should use thermal_zone_get_temp() to get the
+ *			current temperature
+ * @last_temperature:	previous temperature read
+ * @emul_temperature:	emulated temperature when using CONFIG_THERMAL_EMULATION
+ * @passive:		1 if you've crossed a passive trip point, 0 otherwise.
+ * @forced_passive:	If > 0, temperature at which to switch on all ACPI
+ *			processor cooling devices.  Currently only used by the
+ *			step-wise governor.
+ * @ops:	operations this &thermal_zone_device supports
+ * @tzp:	thermal zone parameters
+ * @governor:	pointer to the governor for this thermal zone
+ * @governor_data:	private pointer for governor data
+ * @thermal_instances:	list of &struct thermal_instance of this thermal zone
+ * @idr:	&struct idr to generate unique id for this zone's cooling
+ *		devices
+ * @lock:	lock to protect thermal_instances list
+ * @node:	node in thermal_tz_list (in thermal_core.c)
+ * @poll_queue:	delayed work for polling
+ */
 struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
@@ -175,18 +222,32 @@ struct thermal_zone_device {
 	int passive;
 	unsigned int forced_passive;
 	struct thermal_zone_device_ops *ops;
-	const struct thermal_zone_params *tzp;
+	struct thermal_zone_params *tzp;
 	struct thermal_governor *governor;
+	void *governor_data;
 	struct list_head thermal_instances;
 	struct idr idr;
-	struct mutex lock; /* protect thermal_instances list */
+	struct mutex lock;
 	struct list_head node;
 	struct delayed_work poll_queue;
 };
 
-/* Structure that holds thermal governor information */
+/**
+ * struct thermal_governor - structure that holds thermal governor information
+ * @name:	name of the governor
+ * @bind_to_tz: callback called when binding to a thermal zone.  If it
+ *		returns 0, the governor is bound to the thermal zone,
+ *		otherwise it fails.
+ * @unbind_from_tz:	callback called when a governor is unbound from a
+ *			thermal zone.
+ * @throttle:	callback called for every trip point even if temperature is
+ *		below the trip point temperature
+ * @governor_list:	node in thermal_governor_list (in thermal_core.c)
+ */
 struct thermal_governor {
 	char name[THERMAL_NAME_LENGTH];
+	int (*bind_to_tz)(struct thermal_zone_device *tz);
+	void (*unbind_from_tz)(struct thermal_zone_device *tz);
 	int (*throttle)(struct thermal_zone_device *tz, int trip);
 	struct list_head	governor_list;
 };
@@ -197,9 +258,12 @@ struct thermal_bind_params {
 
 	/*
 	 * This is a measure of 'how effectively these devices can
-	 * cool 'this' thermal zone. The shall be determined by platform
-	 * characterization. This is on a 'percentage' scale.
-	 * See Documentation/thermal/sysfs-api.txt for more information.
+	 * cool 'this' thermal zone. It shall be determined by
+	 * platform characterization. This value is relative to the
+	 * rest of the weights so a cooling device whose weight is
+	 * double that of another cooling device is twice as
+	 * effective. See Documentation/thermal/sysfs-api.txt for more
+	 * information.
 	 */
 	int weight;
 
@@ -226,6 +290,33 @@ struct thermal_zone_params {
 
 	int num_tbps;	/* Number of tbp entries */
 	struct thermal_bind_params *tbp;
+
+	/*
+	 * Sustainable power (heat) that this thermal zone can dissipate in
+	 * mW
+	 */
+	u32 sustainable_power;
+
+	/*
+	 * Proportional parameter of the PID controller when
+	 * overshooting (i.e., when temperature is below the target)
+	 */
+	s32 k_po;
+
+	/*
+	 * Proportional parameter of the PID controller when
+	 * undershooting
+	 */
+	s32 k_pu;
+
+	/* Integral parameter of the PID controller */
+	s32 k_i;
+
+	/* Derivative parameter of the PID controller */
+	s32 k_d;
+
+	/* threshold below which the error is no longer accumulated */
+	s32 integral_cutoff;
 };
 
 struct thermal_genl_event {
@@ -257,14 +348,26 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 }
 
 #endif
+
+static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
+{
+	return cdev->ops->get_requested_power && cdev->ops->state2power &&
+		cdev->ops->power2state;
+}
+
+int power_actor_get_max_power(struct thermal_cooling_device *,
+			      struct thermal_zone_device *tz, u32 *max_power);
+int power_actor_set_power(struct thermal_cooling_device *,
+			  struct thermal_instance *, u32);
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
 		void *, struct thermal_zone_device_ops *,
-		const struct thermal_zone_params *, int, int);
+		struct thermal_zone_params *, int, int);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
 				     struct thermal_cooling_device *,
-				     unsigned long, unsigned long);
+				     unsigned long, unsigned long,
+				     unsigned int);
 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *);
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index a32d86ec8bf2..902e071bef83 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -25,6 +25,21 @@ trace_seq_init(struct trace_seq *s)
 	s->full = 0;
 }
 
+/**
+ * trace_seq_buffer_ptr - return pointer to next location in buffer
+ * @s: trace sequence descriptor
+ *
+ * Returns the pointer to the buffer where the next write to
+ * the buffer will happen. This is useful to save the location
+ * that is about to be written to and then return the result
+ * of that write.
+ */
+static inline unsigned char *
+trace_seq_buffer_ptr(struct trace_seq *s)
+{
+	return s->buffer + s->len;
+}
+
 /*
  * Currently only defined when tracing is enabled.
  */
@@ -46,6 +61,9 @@ extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
+extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+			     int nmaskbits);
+
 #else /* CONFIG_TRACING */
 static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
@@ -57,6 +75,13 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	return 0;
 }
 
+static inline int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	return 0;
+}
+
 static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	return 0;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index ba605015c4d8..32335bec6ebd 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -190,6 +190,11 @@ static inline void tracepoint_synchronize_unregister(void)
 	static inline void						\
 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
 /*
@@ -235,6 +240,11 @@ static inline void tracepoint_synchronize_unregister(void)
 	}								\
 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return false;						\
 	}
 
 #define DEFINE_TRACE_FN(name, reg, unreg)
diff --git a/include/linux/uid_stat.h b/include/linux/uid_stat.h
new file mode 100644
index 000000000000..6bd6c4e52d17
--- /dev/null
+++ b/include/linux/uid_stat.h
@@ -0,0 +1,29 @@
+/* include/linux/uid_stat.h
+ *
+ * Copyright (C) 2008-2009 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __uid_stat_h
+#define __uid_stat_h
+
+/* Contains definitions for resource tracking per uid. */
+
+#ifdef CONFIG_UID_STAT
+int uid_stat_tcp_snd(uid_t uid, int size);
+int uid_stat_tcp_rcv(uid_t uid, int size);
+#else
+#define uid_stat_tcp_snd(uid, size) do {} while (0);
+#define uid_stat_tcp_rcv(uid, size) do {} while (0);
+#endif
+
+#endif /* _LINUX_UID_STAT_H */
diff --git a/include/linux/ump-common.h b/include/linux/ump-common.h
new file mode 100755
index 000000000000..1deefd84381f
--- /dev/null
+++ b/include/linux/ump-common.h
@@ -0,0 +1,252 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file ump-common.h
+ *
+ * This file contains some common enum values used both in both the user and kernel space side of UMP.
+ */
+
+#ifndef _UMP_COMMON_H_
+#define _UMP_COMMON_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define UMP_UINT32_MAX (4294967295U)
+#define UMP_UINT64_MAX (18446744073709551615ULL)
+
+#ifdef __GNUC__
+	#define CHECK_RESULT        __attribute__((__warn_unused_result__))
+	#define INLINE	__inline__
+#else
+	#define CHECK_RESULT
+	#define INLINE __inline
+#endif
+
+#ifndef STATIC
+	#define STATIC static
+#endif
+
+/**
+ * Values to identify major and minor version of UMP
+ */
+#define UMP_VERSION_MAJOR 2
+#define UMP_VERSION_MINOR 0
+
+/**
+ * Typedef for a secure ID, a system wide identifier for UMP memory buffers.
+ */
+typedef int32_t ump_secure_id;
+
+/**
+ * Value to indicate an invalid secure Id.
+ */
+#define UMP_INVALID_SECURE_ID  ((ump_secure_id)0)
+
+/**
+ * UMP error codes.
+ */
+typedef enum
+{
+	UMP_OK    = 0, /**< indicates success */
+	UMP_ERROR = 1  /**< indicates failure */
+} ump_result;
+
+/**
+ * Allocation flag bits.
+ *
+ * ump_allocate accepts zero or more flags to specify the type of memory to allocate and how to expose it to devices.
+ *
+ * For each supported device there are 4 flags to control access permissions and give usage characteristic hints to optimize the allocation/mapping.
+ * They are;
+ * @li @a UMP_PROT_<device>_RD   read permission
+ * @li @a UMP_PROT_<device>_WR   write permission
+ * @li @a UMP_HINT_<device>_RD   read often
+ * @li @a UMP_HINT_<device>_WR   written often
+ *
+ * 5 devices are currently supported, with a device being the CPU itself.
+ * The other 4 devices will be mapped to real devices per SoC design.
+ * They are just named W,X,Y,Z by UMP as it has no knowledge of their real names/identifiers.
+ * As an example device W could be a camera device while device Z could be an ARM GPU device, leaving X and Y unused.
+ *
+ * 2 additional flags control the allocation;
+ * @li @a UMP_CONSTRAINT_PHYSICALLY_LINEAR   the allocation must be physical linear. Typical for devices without an MMU and no IOMMU to help it.
+ * @li @a UMP_PROT_SHAREABLE                 the allocation can be shared with other processes on the system. Without this flag the returned allocation won't be resolvable in other processes.
+ *
+ * All UMP allocation are growable unless they're @a UMP_PROT_SHAREABLE.
+ * The hint bits should be used to indicate the access pattern so the driver can select the most optimal memory type and cache settings based on the what the system supports.
+ */
+typedef enum
+{
+	/* Generic helpers */
+	UMP_PROT_DEVICE_RD = (1u << 0),
+	UMP_PROT_DEVICE_WR = (1u << 1),
+	UMP_HINT_DEVICE_RD = (1u << 2),
+	UMP_HINT_DEVICE_WR = (1u << 3),
+	UMP_DEVICE_MASK = 0xF,
+	UMP_DEVICE_CPU_SHIFT = 0,
+	UMP_DEVICE_W_SHIFT = 4,
+	UMP_DEVICE_X_SHIFT = 8,
+	UMP_DEVICE_Y_SHIFT = 12,
+	UMP_DEVICE_Z_SHIFT = 16,
+
+	/* CPU protection and hints. */
+	UMP_PROT_CPU_RD = (1u <<  0),
+	UMP_PROT_CPU_WR = (1u <<  1),
+	UMP_HINT_CPU_RD = (1u <<  2),
+	UMP_HINT_CPU_WR = (1u <<  3),
+
+	/* device W */
+	UMP_PROT_W_RD = (1u <<  4),
+	UMP_PROT_W_WR = (1u <<  5),
+	UMP_HINT_W_RD = (1u <<  6),
+	UMP_HINT_W_WR = (1u <<  7),
+
+	/* device X */
+	UMP_PROT_X_RD = (1u <<  8),
+	UMP_PROT_X_WR = (1u <<  9),
+	UMP_HINT_X_RD = (1u << 10),
+	UMP_HINT_X_WR = (1u << 11),
+	
+	/* device Y */
+	UMP_PROT_Y_RD = (1u << 12),
+	UMP_PROT_Y_WR = (1u << 13),
+	UMP_HINT_Y_RD = (1u << 14),
+	UMP_HINT_Y_WR = (1u << 15),
+
+	/* device Z */
+	UMP_PROT_Z_RD = (1u << 16),
+	UMP_PROT_Z_WR = (1u << 17),
+	UMP_HINT_Z_RD = (1u << 18),
+	UMP_HINT_Z_WR = (1u << 19),
+
+	/* 20-26 reserved for future use */
+	UMPP_ALLOCBITS_UNUSED = (0x7Fu << 20),
+	/** Allocations marked as @ UMP_CONSTRAINT_UNCACHED won't be mapped as cached by the cpu  */
+	UMP_CONSTRAINT_UNCACHED = (1u << 27),
+	/** Require 32-bit physically addressable memory */
+	UMP_CONSTRAINT_32BIT_ADDRESSABLE = (1u << 28),
+	/** For devices without an MMU and with no IOMMU assistance. */
+	UMP_CONSTRAINT_PHYSICALLY_LINEAR = (1u << 29),
+	/** Shareable must be set to allow the allocation to be used by other processes, the default is non-shared */
+	UMP_PROT_SHAREABLE = (1u << 30)
+	/* (1u << 31) should not be used to ensure compiler portability */
+} ump_allocation_bits;
+
+/**
+ * ump_allocation_bits combination argument type.
+ *
+ * Type used to pass zero or more bits from the @ref ump_allocation_bits enum
+ */
+typedef uint32_t ump_alloc_flags;
+
+
+/**
+ *  Default allocation flags for UMP v1 compatible allocations.
+ */
+#define UMP_V1_API_DEFAULT_ALLOCATION_FLAGS		UMP_PROT_CPU_RD | UMP_PROT_CPU_WR | \
+												UMP_PROT_W_RD | UMP_PROT_W_WR |	\
+												UMP_PROT_X_RD | UMP_PROT_X_WR |	\
+												UMP_PROT_Y_RD | UMP_PROT_Y_WR |	\
+												UMP_PROT_Z_RD | UMP_PROT_Z_WR |	\
+												UMP_PROT_SHAREABLE |	\
+												UMP_CONSTRAINT_32BIT_ADDRESSABLE
+
+/**
+ * CPU cache sync operations.
+ *
+ * Cache synchronization operations to pass to @ref ump_cpu_msync_now
+ */
+enum
+{
+	/**
+	 * Cleans any dirty cache lines to main memory, but the data will still be available in the cache.
+	 * After a clean the contents of memory is considered to be "owned" by the device.
+	 * */
+	UMP_MSYNC_CLEAN = 1,
+
+	/** Cleans any dirty cache lines to main memory and Ejects all lines from the cache.
+	 * After an clean&invalidate the contents of memory is considered to be "owned" by the CPU.
+	 * Any subsequent access will fetch data from main memory.
+	 *
+	 * @note Due to CPUs doing speculative prefetching a UMP_MSYNC_CLEAN_AND_INVALIDATE must be done before and after interacting with hardware.
+	 * */
+	UMP_MSYNC_CLEAN_AND_INVALIDATE
+
+};
+
+typedef uint32_t ump_cpu_msync_op;
+
+/**
+ * Memory import types supported.
+ * If new import types are added they will appear here.
+ * They must be added before UMPP_EXTERNAL_MEM_COUNT and
+ * must be assigned an explicit sequantial number.
+ *
+ * @li UMP_EXTERNAL_MEM_TYPE_ION - Import an ION allocation
+ *                                 Takes a int* (pointer to a file descriptor)
+ *                                 Another ION reference is taken which is released on the final ump_release
+ */
+enum ump_external_memory_type
+{
+	UMPP_EXTERNAL_MEM_TYPE_UNUSED = 0, /* reserve type 0 */
+	UMP_EXTERNAL_MEM_TYPE_ION = 1,
+	UMPP_EXTERNAL_MEM_COUNT
+};
+
+/** @name UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Allocation constraints.
+ *
+ * Allocation flags to pass @ref ump_ref_drv_allocate
+ *
+ * UMP v1 API only.
+ */
+typedef enum
+{
+	/** the allocation is mapped as noncached. */
+	UMP_REF_DRV_CONSTRAINT_NONE = 0,
+	/** not supported. */
+	UMP_REF_DRV_CONSTRAINT_PHYSICALLY_LINEAR = 1,
+	/** the allocation is mapped as cached by the cpu. */
+	UMP_REF_DRV_CONSTRAINT_USE_CACHE = 4
+} ump_alloc_constraints;
+
+/* @} */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _UMP_COMMON_H_ */
diff --git a/include/linux/ump-import.h b/include/linux/ump-import.h
new file mode 100755
index 000000000000..0641111afb6e
--- /dev/null
+++ b/include/linux/ump-import.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IMPORT_H_
+#define _UMP_IMPORT_H_
+
+#include <linux/ump.h>
+#include <linux/module.h>
+
+/**
+ * UMP import module info.
+ * Contains information about the Linux module providing the import module,
+ * used to block unloading of the Linux module while imported memory exists.
+ * Lists the functions implementing the UMP import functions.
+ */
+struct ump_import_handler
+{
+	/**
+	 * Linux module of the import handler
+	 */
+	struct module * linux_module;
+
+	/**
+	 * UMP session usage begin.
+	 *
+	 * Called when a UMP session first is bound to the handler.
+	 * Typically used to set up any import module specific per-session data.
+	 * The function returns a pointer to this data in the output pointer custom_session_data
+	 * which will be passed to \a session_end and \a import.
+	 *
+	 * Note: custom_session_data must be set to non-NULL if successful.
+	 * If no pointer is needed set it a magic value to validate instead.
+	 *
+	 * @param[out] custom_session_data Pointer to a generic pointer where any data can be stored
+	 * @return 0 on success, error code if the session could not be initiated.
+	 */
+	int (*session_begin)(void ** custom_session_data);
+
+	/**
+	 * UMP session usage end.
+	 *
+	 * Called when a UMP session is no longer using the handler.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 */
+	void (*session_end)(void * custom_session_data);
+
+	/**
+	 * Import request.
+	 *
+	 * Called when a client has asked to import a resource of the type the import module was installed for.
+	 * Only called if @a session_begin returned OK.
+	 *
+	 * The requested flags must be verified to be valid to apply to the imported memory.
+	 * If not valid return UMP_DD_INVALID_MEMORY_HANDLE.
+	 * If the flags are found to be valid call \a ump_dd_create_from_phys_blocks_64 to create a handle.
+	 *
+	 * @param[in] custom_session_data The value set by the session_begin handler
+	 * @param[in] phandle Pointer to the handle to import
+	 * @param     flags The requested UMPv2 flags to assign to the imported handle
+	 * @return UMP_DD_INVALID_MEMORY_HANDLE if the import failed, a valid ump handle on success
+	 */
+	ump_dd_handle (*import)(void * custom_session_data, void * phandle, ump_alloc_flags flags);
+};
+
+/**
+ * Import module registration.
+ * Registers a ump_import_handler structure for a memory type.
+ * @param     type    Type of the memory to register a handler for
+ * @param[in] handler Handler strcture to install
+ * @return 0 on success, a Linux error code on failure
+ */
+int ump_import_module_register(enum ump_external_memory_type type, struct ump_import_handler * handler);
+
+/**
+ * Import module deregistration.
+ * Uninstalls the handler for the given memory type.
+ * @param type Type of the memory to unregister the handler for
+ */
+void ump_import_module_unregister(enum ump_external_memory_type type);
+
+#endif /* _UMP_IMPORT_H_ */
diff --git a/include/linux/ump-ioctl.h b/include/linux/ump-ioctl.h
new file mode 100755
index 000000000000..a6219566ebb4
--- /dev/null
+++ b/include/linux/ump-ioctl.h
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _UMP_IOCTL_H_
+#define _UMP_IOCTL_H
+
+#include <linux/ump-common.h>
+
+/*
+ * The order and size of the members of these have been chosen so the structures look the same in 32-bit and 64-bit builds.
+ * If any changes are done build the ump_struct_size_checker test for 32-bit and 64-bit targets. Both must compile successfully to commit.
+ */
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union ump_pointer
+{
+	void * value; /**< client should store their pointers here */
+	uint32_t compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+	uint64_t sizer; /**< Force 64-bit storage for all clients regardless */
+} ump_pointer;
+
+/**
+ * UMP allocation request.
+ * Used when performing ump_allocate
+ */
+typedef struct ump_k_allocate
+{
+	uint64_t size; /**< [in] Size in bytes to allocate */
+	ump_secure_id secure_id; /**< [out] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [in] Flags to use when allocating */
+} ump_k_allocate;
+
+/**
+ * UMP size query request.
+ * Used when performing ump_size_get
+ */
+typedef struct ump_k_sizequery
+{
+	uint64_t size; /**< [out] Size of allocation */
+	ump_secure_id secure_id; /**< [in] ID of allocation to query the size of */
+	uint32_t padding; /* don't remove */
+} ump_k_sizequery;
+
+/**
+ * UMP cache synchronization request.
+ * Used when performing ump_cpu_msync_now
+ */
+typedef struct ump_k_msync
+{
+	ump_pointer mapped_ptr; /**< [in] CPU VA to perform cache operation on */
+	ump_secure_id secure_id; /**< [in] ID of allocation to perform cache operation on */
+	ump_cpu_msync_op cache_operation; /**< [in] Cache operation to perform */
+	uint64_t size; /**< [in] Size in bytes of the range to synchronize */
+} ump_k_msync;
+
+/**
+ * UMP memory retain request.
+ * Used when performing ump_retain
+ */
+typedef struct ump_k_retain
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to retain a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_retain;
+
+/**
+ * UMP memory release request.
+ * Used when performing ump_release
+ */
+typedef struct ump_k_release
+{
+	ump_secure_id secure_id; /**< [in] ID of allocation to release a reference to */
+	uint32_t padding; /* don't remove */
+} ump_k_release;
+
+typedef struct ump_k_import
+{
+	ump_pointer phandle;                /**< [in]  Pointer to handle to import */
+	uint32_t type;                           /**< [in]  Type of handle to import */
+	ump_alloc_flags alloc_flags;        /**< [in]  Flags to assign to the imported memory */
+	ump_secure_id secure_id;            /**< [out] UMP ID representing the imported memory */
+	uint32_t padding;                        /* don't remove */
+} ump_k_import;
+
+/**
+ * UMP allocation flags request.
+ * Used when performing umpp_get_allocation_flags
+ *
+ * used only by v1 API
+ */
+typedef struct ump_k_allocation_flags
+{
+	ump_secure_id secure_id; /**< [in] Secure ID of allocation on success */
+	ump_alloc_flags alloc_flags; /**< [out] Flags to use when allocating */
+} ump_k_allocation_flags;
+
+#define UMP_CALL_MAX_SIZE 512
+/*
+ * Ioctl definitions
+ */
+
+/* Use '~' as magic number */
+
+#define UMP_IOC_MAGIC  '~'
+
+#define UMP_FUNC_ALLOCATE _IOWR(UMP_IOC_MAGIC,  1, ump_k_allocate)
+#define UMP_FUNC_SIZEQUERY _IOWR(UMP_IOC_MAGIC,  2, ump_k_sizequery)
+#define UMP_FUNC_MSYNC _IOWR(UMP_IOC_MAGIC,  3, ump_k_msync)
+#define UMP_FUNC_RETAIN _IOW(UMP_IOC_MAGIC,  4, ump_k_retain)
+#define UMP_FUNC_RELEASE _IOW(UMP_IOC_MAGIC,  5, ump_k_release)
+#define UMP_FUNC_ALLOCATION_FLAGS_GET _IOWR(UMP_IOC_MAGIC,  6, ump_k_allocation_flags)
+#define UMP_FUNC_IMPORT _IOWR(UMP_IOC_MAGIC, 7, ump_k_import)
+
+/*max ioctl sequential number*/
+#define UMP_IOC_MAXNR 7
+
+/* 15 bits for the UMP ID (allowing 32768 IDs) */
+#define UMP_LINUX_ID_BITS 15
+#define UMP_LINUX_ID_MASK ((1ULL << UMP_LINUX_ID_BITS) - 1ULL)
+
+/* 64-bit (really 52 bits) encoding: 15 bits for the ID, 37 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_64 37
+#define UMP_LINUX_OFFSET_MASK_64 ((1ULL << UMP_LINUX_OFFSET_BITS_64)-1)
+/* 32-bit encoding: 15 bits for the ID, 17 bits for the offset */
+#define UMP_LINUX_OFFSET_BITS_32 17
+#define UMP_LINUX_OFFSET_MASK_32 ((1ULL << UMP_LINUX_OFFSET_BITS_32)-1)
+
+#if __SIZEOF_LONG__ == 8
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_64
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_64
+#else
+#define UMP_LINUX_OFFSET_BITS UMP_LINUX_OFFSET_BITS_32
+#define UMP_LINUX_OFFSET_MASK UMP_LINUX_OFFSET_MASK_32
+#endif
+
+#endif /* _UMP_IOCTL_H_ */
diff --git a/include/linux/ump.h b/include/linux/ump.h
new file mode 100755
index 000000000000..423ba852a841
--- /dev/null
+++ b/include/linux/ump.h
@@ -0,0 +1,481 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ *
+ * This file contains the kernel space part of the UMP API.
+ *
+ */
+
+#ifndef _UMP_KERNEL_INTERFACE_H_
+#define _UMP_KERNEL_INTERFACE_H_
+
+/**
+ * @addtogroup ump_api
+ * @{
+ */
+
+/** @defgroup ump_kernel_space_api UMP Kernel Space API
+ * @{ */
+
+/**
+ * External representation of a UMP handle in kernel space.
+ */
+typedef void * ump_dd_handle;
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif
+
+#include <linux/ump-common.h>
+
+#define UMP_KERNEL_API_EXPORT
+
+#if defined(__KERNEL__)
+#include <linux/ump-import.h>
+#else
+#include <ump/src/library/common/ump_user.h>
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_INVALID_MEMORY_HANDLE ((ump_dd_handle)0)
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block_64
+{
+	uint64_t addr; /**< The physical address of the block */
+	uint64_t size; /**< The length of the block, in bytes, typically page aligned */
+} ump_dd_physical_block_64;
+
+/**
+ * Security filter hook.
+ *
+ * Each allocation can have a security filter attached to it.@n
+ * The hook receives
+ * @li the secure ID
+ * @li a handle to the allocation
+ * @li  the callback_data argument provided to @ref ump_dd_allocate_64 or @ref ump_dd_create_from_phys_blocks_64
+ *
+ * The hook must return @a MALI_TRUE to indicate that access to the handle is allowed or @n
+ * @a MALI_FALSE to state that no access is permitted.@n
+ * This hook is guaranteed to be called in the context of the requesting process/address space.
+ *
+ * The arguments provided to the hook are;
+ * @li the secure ID
+ * @li handle to the allocation
+ * @li the callback_data set when registering the hook
+ *
+ * Return value;
+ * @li @a TRUE to permit access
+ * @li @a FALSE to deny access
+ */
+typedef bool (*ump_dd_security_filter)(ump_secure_id, ump_dd_handle, void *);
+
+/**
+ * Final release notify hook.
+ *
+ * Allocations can have a hook attached to them which is called when the last reference to the allocation is released.
+ * No reference count manipulation is allowed on the provided handle, just property querying (ID get, size get, phys block get).
+ * This is similar to finalizers in OO languages.
+ *
+ * The arguments provided to the hook are;
+ * * handle to the allocation
+ * * the callback_data set when registering the hook
+ */
+typedef void (*ump_dd_final_release_callback)(const ump_dd_handle, void *);
+
+/**
+ * Allocate a buffer.
+ * The lifetime of the allocation is controlled by a reference count.
+ * The reference count of the returned buffer is set to 1.
+ * The memory will be freed once the reference count reaches 0.
+ * Use @ref ump_dd_retain and @ref ump_dd_release to control the reference count.
+ * @param size Number of bytes to allocate. Will be padded up to a multiple of the page size.
+ * @param flags Bit-wise OR of zero or more of the allocation flag bits.
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a
+ * secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed,
+ * just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the new allocation, or @a UMP_DD_INVALID_MEMORY_HANDLE on allocation failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_allocate_64(uint64_t size, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/**
+ * Allocation bits getter.
+ * Retrieves the allocation flags used when instantiating the given handle.
+ * Just a copy of the flag given to @ref ump_dd_allocate_64 and @ref ump_dd_create_from_phys_blocks_64
+ * @param mem The handle retrieve the bits for
+ * @return The allocation bits used to instantiate the allocation
+ */
+UMP_KERNEL_API_EXPORT ump_alloc_flags ump_dd_allocation_flags_get(const ump_dd_handle mem);
+
+
+/**
+ * Retrieves the secure ID for the specified UMP memory.
+ *
+ * This identifier is unique across the entire system, and uniquely identifies
+ * the specified UMP memory allocation. This identifier can later be used through the
+ * @ref ump_dd_from_secure_id or
+ * @ref ump_from_secure_id
+ * functions in order to access this UMP memory, for instance from another process (if shared of course).
+ * Unless the allocation was marked as shared the returned ID will only be resolvable in the same process as did the allocation.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE will result in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_secure_id_get
+ *
+ * @see ump_dd_from_secure_id
+ * @see ump_from_secure_id
+ * @see ump_secure_id_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the secure ID for the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT ump_secure_id ump_dd_secure_id_get(const ump_dd_handle mem);
+
+#ifdef CONFIG_KDS
+/**
+ * Retrieve the KDS resource for the specified UMP memory.
+ *
+ * The KDS resource should be used to synchronize access to the UMP allocation.
+ * See the KDS API for how to do that.
+ *
+ * @param mem Handle to the UMP memory to query.
+ * @return Pointer to the KDS resource controlling access to the UMP memory.
+ */
+UMP_KERNEL_API_EXPORT struct kds_resource * ump_dd_kds_resource_get(const ump_dd_handle mem);
+#endif
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_release when there is no longer any
+ * use for the retrieved handle.
+ *
+ * If called on an non-shared allocation and this is a different process @a UMP_DD_INVALID_MEMORY_HANDLE will be returned.
+ *
+ * Calling on an @a UMP_INVALID_SECURE_ID will return @a UMP_DD_INVALID_MEMORY_HANDLE
+ *
+ * @note There is a user space equivalent function called @ref ump_from_secure_id
+ *
+ * @see ump_dd_release
+ * @see ump_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get function.
+ *
+ * @return @a UMP_DD_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_from_secure_id(ump_secure_id secure_id);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ * This function will return a pointer to an array of @ref ump_dd_physical_block_64 in @a pArray and the number of array elements in @a pCount
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @param mem Handle to UMP memory.
+ * @param[out] pCount Pointer to where to store the number of items in the returned array
+ * @param[out] pArray Pointer to where to store a pointer to the physical blocks array
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_phys_blocks_get_64(const ump_dd_handle mem, uint64_t * const pCount, const ump_dd_physical_block_64 ** const pArray);
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT uint64_t ump_dd_size_get_64(const ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory allocation.
+ *
+ * The function @ref ump_dd_release must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * Calling on an @a UMP_DD_INVALID_MEMORY_HANDLE results in undefined behavior.
+ * Debug builds will assert on this.
+ *
+ * @note You are not required to call @ref ump_dd_retain
+ * for UMP handles returned from
+ * @ref ump_dd_from_secure_id,
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_retain
+ *
+ * @see ump_retain
+ *
+ * @param mem Handle to UMP memory.
+ * @return 0 indicates success, any other value indicates failure.
+ */
+UMP_KERNEL_API_EXPORT int ump_dd_retain(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function must be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * One can only call ump_release when matched with a successful ump_dd_retain, ump_dd_allocate_64 or ump_dd_from_secure_id
+ * If called on an @a UMP_DD_INVALID_MEMORY_HANDLE the function will early out.
+ *
+ * @note There is a user space equivalent function called @ref ump_release
+ *
+ * @see ump_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_release(ump_dd_handle mem);
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ * Once wrapped the memory acts just like a normal allocation coming from @ref ump_dd_allocate_64.
+ * The only exception is that the freed physical memory is not put into the pool of free memory, but instead considered returned to the caller once @a final_release_func returns.
+ * The blocks array will be copied, so no need to hold on to it after this function returns.
+ * @param[in] blocks Array of @ref ump_dd_physical_block_64
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ * @param flags Allocation flags to mark the handle with
+ * @param[in] filter_func Pointer to a function which will be called when an allocation is required from a secure id before the allocation itself is returned to user-space.
+ * NULL permitted if no need for a callback.
+ * @param[in] final_release_func Pointer to a function which will be called when the last reference is removed, just before the allocation is freed. NULL permitted if no need for a callback.
+ * @param[in] callback_data An opaque pointer which will be provided to @a filter_func and @a final_release_func
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_create_from_phys_blocks_64(const ump_dd_physical_block_64 * blocks, uint64_t num_blocks, ump_alloc_flags flags, ump_dd_security_filter filter_func, ump_dd_final_release_callback final_release_func, void* callback_data);
+
+
+/** @name UMP v1 API
+ * Functions provided to support compatibility with UMP v1 API
+ *
+ *@{
+ */
+
+/**
+ * Value to indicate an invalid UMP memory handle.
+ */
+#define UMP_DD_HANDLE_INVALID UMP_DD_INVALID_MEMORY_HANDLE
+
+/**
+ * UMP error codes for kernel space.
+ */
+typedef enum
+{
+	UMP_DD_SUCCESS, /**< indicates success */
+	UMP_DD_INVALID /**< indicates failure */
+} ump_dd_status_code;
+
+
+/**
+ * Struct used to describe a physical block used by UMP memory
+ */
+typedef struct ump_dd_physical_block
+{
+	unsigned long addr; /**< The physical address of the block */
+	unsigned long size; /**< The length of the block, typically page aligned */
+} ump_dd_physical_block;
+
+
+/**
+ * Retrieves a handle to allocated UMP memory.
+ *
+ * The usage of UMP memory is reference counted, so this will increment the reference
+ * count by one for the specified UMP memory.
+ * Use @ref ump_dd_reference_release "ump_dd_reference_release" when there is no longer any
+ * use for the retrieved handle.
+ *
+ * @note There is a user space equivalent function called @ref ump_handle_create_from_secure_id "ump_handle_create_from_secure_id"
+ *
+ * @see ump_dd_reference_release
+ * @see ump_handle_create_from_secure_id
+ *
+ * @param secure_id The secure ID of the UMP memory to open, that can be retrieved using the @ref ump_secure_id_get "ump_secure_id_get " function.
+ *
+ * @return UMP_INVALID_MEMORY_HANDLE indicates failure, otherwise a valid handle is returned.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_secure_id(ump_secure_id secure_id);
+
+
+
+/**
+ * Create an ump allocation handle based on externally managed memory.
+ * Used to wrap an existing allocation as an UMP memory handle.
+ *
+ * @param[in] blocks Array of @ref ump_dd_physical_block
+ * @param num_blocks Number of elements in the array pointed to by @a blocks
+ *
+ * @return Handle to the UMP allocation handle created, or @a UMP_DD_INVALID_MEMORY_HANDLE if no such handle could be created.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_handle ump_dd_handle_create_from_phys_blocks(ump_dd_physical_block * blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the number of physical blocks used by the specified UMP memory.
+ *
+ * This function retrieves the number of @ref ump_dd_physical_block "ump_dd_physical_block" structs needed
+ * to describe the physical memory layout of the given UMP memory. This can later be used when calling
+ * the functions @ref ump_dd_phys_blocks_get "ump_dd_phys_blocks_get" and
+ * @ref ump_dd_phys_block_get "ump_dd_phys_block_get".
+ *
+ * @see ump_dd_phys_blocks_get
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return The number of ump_dd_physical_block structs required to describe the physical memory layout of the specified UMP memory.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_phys_block_count_get(ump_dd_handle mem);
+
+
+/**
+ * Retrieves all physical memory block information for specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will fail if the num_blocks parameter is either to large or to small.
+ *
+ * @see ump_dd_phys_block_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param blocks An array of @ref ump_dd_physical_block "ump_dd_physical_block" structs that will receive the physical description.
+ * @param num_blocks The number of blocks to return in the blocks array. Use the function
+ *                   @ref ump_dd_phys_block_count_get "ump_dd_phys_block_count_get" first to determine the number of blocks required.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_blocks_get(ump_dd_handle mem, ump_dd_physical_block * const blocks, unsigned long num_blocks);
+
+
+/**
+ * Retrieves the physical memory block information for specified block for the specified UMP memory.
+ *
+ * This function can be used by other device drivers in order to create MMU tables.
+ *
+ * @note This function will return UMP_DD_INVALID if the specified index is out of range.
+ *
+ * @see ump_dd_phys_blocks_get
+ *
+ * @param mem Handle to UMP memory.
+ * @param index Which physical info block to retrieve.
+ * @param block Pointer to a @ref ump_dd_physical_block "ump_dd_physical_block" struct which will receive the requested information.
+ *
+ * @return UMP_DD_SUCCESS indicates success, UMP_DD_INVALID indicates failure.
+ */
+UMP_KERNEL_API_EXPORT ump_dd_status_code ump_dd_phys_block_get(ump_dd_handle mem, unsigned long index, ump_dd_physical_block * const block);
+
+
+/**
+ * Retrieves the actual size of the specified UMP memory.
+ *
+ * The size is reported in bytes, and is typically page aligned.
+ *
+ * @note There is a user space equivalent function called @ref ump_size_get "ump_size_get"
+ *
+ * @see ump_size_get
+ *
+ * @param mem Handle to UMP memory.
+ *
+ * @return Returns the allocated size of the specified UMP memory, in bytes.
+ */
+UMP_KERNEL_API_EXPORT unsigned long ump_dd_size_get(ump_dd_handle mem);
+
+
+/**
+ * Adds an extra reference to the specified UMP memory.
+ *
+ * This function adds an extra reference to the specified UMP memory. This function should
+ * be used every time a UMP memory handle is duplicated, that is, assigned to another ump_dd_handle
+ * variable. The function @ref ump_dd_reference_release "ump_dd_reference_release" must then be used
+ * to release each copy of the UMP memory handle.
+ *
+ * @note You are not required to call @ref ump_dd_reference_add "ump_dd_reference_add"
+ * for UMP handles returned from
+ * @ref ump_dd_handle_create_from_secure_id "ump_dd_handle_create_from_secure_id",
+ * because these handles are already reference counted by this function.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_add "ump_reference_add"
+ *
+ * @see ump_reference_add
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_add(ump_dd_handle mem);
+
+
+/**
+ * Releases a reference from the specified UMP memory.
+ *
+ * This function should be called once for every reference to the UMP memory handle.
+ * When the last reference is released, all resources associated with this UMP memory
+ * handle are freed.
+ *
+ * @note There is a user space equivalent function called @ref ump_reference_release "ump_reference_release"
+ *
+ * @see ump_reference_release
+ *
+ * @param mem Handle to UMP memory.
+ */
+UMP_KERNEL_API_EXPORT void ump_dd_reference_release(ump_dd_handle mem);
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+/** @} */ /* end group ump_kernel_space_api */
+
+/** @} */ /* end group ump_api */
+
+#endif /* _UMP_KERNEL_INTERFACE_H_ */
diff --git a/include/linux/usb/f_accessory.h b/include/linux/usb/f_accessory.h
new file mode 100644
index 000000000000..ebe3c4d59309
--- /dev/null
+++ b/include/linux/usb/f_accessory.h
@@ -0,0 +1,23 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_F_ACCESSORY_H
+#define __LINUX_USB_F_ACCESSORY_H
+
+#include <uapi/linux/usb/f_accessory.h>
+
+#endif /* __LINUX_USB_F_ACCESSORY_H */
diff --git a/include/linux/usb/f_mtp.h b/include/linux/usb/f_mtp.h
new file mode 100644
index 000000000000..4e8417791bea
--- /dev/null
+++ b/include/linux/usb/f_mtp.h
@@ -0,0 +1,23 @@
+/*
+ * Gadget Function Driver for MTP
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_F_MTP_H
+#define __LINUX_USB_F_MTP_H
+
+#include <uapi/linux/usb/f_mtp.h>
+
+#endif /* __LINUX_USB_F_MTP_H */
diff --git a/include/linux/wakelock.h b/include/linux/wakelock.h
new file mode 100644
index 000000000000..f4a698a22880
--- /dev/null
+++ b/include/linux/wakelock.h
@@ -0,0 +1,67 @@
+/* include/linux/wakelock.h
+ *
+ * Copyright (C) 2007-2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_WAKELOCK_H
+#define _LINUX_WAKELOCK_H
+
+#include <linux/ktime.h>
+#include <linux/device.h>
+
+/* A wake_lock prevents the system from entering suspend or other low power
+ * states when active. If the type is set to WAKE_LOCK_SUSPEND, the wake_lock
+ * prevents a full system suspend.
+ */
+
+enum {
+	WAKE_LOCK_SUSPEND, /* Prevent suspend */
+	WAKE_LOCK_TYPE_COUNT
+};
+
+struct wake_lock {
+	struct wakeup_source ws;
+};
+
+static inline void wake_lock_init(struct wake_lock *lock, int type,
+				  const char *name)
+{
+	wakeup_source_init(&lock->ws, name);
+}
+
+static inline void wake_lock_destroy(struct wake_lock *lock)
+{
+	wakeup_source_trash(&lock->ws);
+}
+
+static inline void wake_lock(struct wake_lock *lock)
+{
+	__pm_stay_awake(&lock->ws);
+}
+
+static inline void wake_lock_timeout(struct wake_lock *lock, long timeout)
+{
+	__pm_wakeup_event(&lock->ws, jiffies_to_msecs(timeout));
+}
+
+static inline void wake_unlock(struct wake_lock *lock)
+{
+	__pm_relax(&lock->ws);
+}
+
+static inline int wake_lock_active(struct wake_lock *lock)
+{
+	return lock->ws.active;
+}
+
+#endif
diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h
new file mode 100644
index 000000000000..ad8b76936c7f
--- /dev/null
+++ b/include/linux/wakeup_reason.h
@@ -0,0 +1,27 @@
+/*
+ * include/linux/wakeup_reason.h
+ *
+ * Logs the reason which caused the kernel to resume
+ * from the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_WAKEUP_REASON_H
+#define _LINUX_WAKEUP_REASON_H
+
+#define MAX_SUSPEND_ABORT_LEN 256
+
+void log_wakeup_reason(int irq);
+void log_suspend_abort_reason(const char *fmt, ...);
+int check_wakeup_reason(int irq);
+
+#endif /* _LINUX_WAKEUP_REASON_H */
diff --git a/include/linux/wifi_tiwlan.h b/include/linux/wifi_tiwlan.h
new file mode 100644
index 000000000000..f07e0679fb82
--- /dev/null
+++ b/include/linux/wifi_tiwlan.h
@@ -0,0 +1,27 @@
+/* include/linux/wifi_tiwlan.h
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _LINUX_WIFI_TIWLAN_H_
+#define _LINUX_WIFI_TIWLAN_H_
+
+#include <linux/wlan_plat.h>
+
+#define WMPA_NUMBER_OF_SECTIONS	3
+#define WMPA_NUMBER_OF_BUFFERS	160
+#define WMPA_SECTION_HEADER	24
+#define WMPA_SECTION_SIZE_0	(WMPA_NUMBER_OF_BUFFERS * 64)
+#define WMPA_SECTION_SIZE_1	(WMPA_NUMBER_OF_BUFFERS * 256)
+#define WMPA_SECTION_SIZE_2	(WMPA_NUMBER_OF_BUFFERS * 2048)
+
+#endif
diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h
new file mode 100644
index 000000000000..8e8b06f1ba4a
--- /dev/null
+++ b/include/linux/wlan_plat.h
@@ -0,0 +1,30 @@
+/* include/linux/wlan_plat.h
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _LINUX_WLAN_PLAT_H_
+#define _LINUX_WLAN_PLAT_H_
+
+#define WLAN_PLAT_NODFS_FLAG	0x01
+
+struct wifi_platform_data {
+	int (*set_power)(int val);
+	int (*set_reset)(int val);
+	int (*set_carddetect)(int val);
+	void *(*mem_prealloc)(int section, unsigned long size);
+	int (*get_mac_addr)(unsigned char *buf);
+	int (*get_wake_irq)(void);
+	void *(*get_country_code)(char *ccode, u32 flags);
+};
+
+#endif
diff --git a/include/net/activity_stats.h b/include/net/activity_stats.h
new file mode 100644
index 000000000000..10e4c1506eeb
--- /dev/null
+++ b/include/net/activity_stats.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ */
+
+#ifndef __activity_stats_h
+#define __activity_stats_h
+
+#ifdef CONFIG_NET_ACTIVITY_STATS
+void activity_stats_update(void);
+#else
+#define activity_stats_update(void) {}
+#endif
+
+#endif /* _NET_ACTIVITY_STATS_H */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 25100687babb..2cbf0baa6226 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -190,6 +190,8 @@ static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset)
 extern void addrconf_prefix_rcv(struct net_device *dev,
 				u8 *opt, int len, bool sllao);
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table);
+
 /*
  *	anycast prototypes (anycast.c)
  */
@@ -201,6 +203,7 @@ extern int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr);
 extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr);
 extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 				const struct in6_addr *addr);
+extern void ipv6_ac_destroy_dev(struct inet6_dev *idev);
 
 
 /* Device notifier */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0ef00066dae8..db43501b7599 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -199,8 +199,10 @@ enum {
 #define ESCO_2EV5	0x0100
 #define ESCO_3EV5	0x0200
 
-#define SCO_ESCO_MASK  (ESCO_HV1 | ESCO_HV2 | ESCO_HV3)
-#define EDR_ESCO_MASK  (ESCO_2EV3 | ESCO_3EV3 | ESCO_2EV5 | ESCO_3EV5)
+#define SCO_ESCO_MASK	(ESCO_HV1 | ESCO_HV2 | ESCO_HV3)
+#define EDR_ESCO_MASK	(ESCO_2EV3 | ESCO_3EV3 | ESCO_2EV5 | ESCO_3EV5)
+#define ALL_ESCO_MASK	(SCO_ESCO_MASK | ESCO_EV3 | ESCO_EV4 | ESCO_EV5 | \
+			EDR_ESCO_MASK)
 
 /* ACL flags */
 #define ACL_START_NO_FLUSH	0x00
@@ -1629,6 +1631,9 @@ struct hci_conn_info {
 	__u8     out;
 	__u16    state;
 	__u32    link_mode;
+	__u32    mtu;
+	__u32    cnt;
+	__u32    pkts;
 };
 
 struct hci_dev_req {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 7cb6d360d147..57123eeb21aa 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -581,7 +581,8 @@ void hci_disconnect(struct hci_conn *conn, __u8 reason);
 void hci_setup_sync(struct hci_conn *conn, __u16 handle);
 void hci_sco_setup(struct hci_conn *conn, __u8 status);
 
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst);
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type,
+					__u16 pkt_type, bdaddr_t *dst);
 int hci_conn_del(struct hci_conn *conn);
 void hci_conn_hash_flush(struct hci_dev *hdev);
 void hci_conn_check_pending(struct hci_dev *hdev);
@@ -591,7 +592,8 @@ void hci_chan_del(struct hci_chan *chan);
 void hci_chan_list_flush(struct hci_conn *conn);
 struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle);
 
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type,
+			     __u16 pkt_type, bdaddr_t *dst,
 			     __u8 dst_type, __u8 sec_level, __u8 auth_type);
 int hci_conn_check_link_mode(struct hci_conn *conn);
 int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
@@ -654,7 +656,7 @@ static inline void hci_conn_drop(struct hci_conn *conn)
 			if (conn->state == BT_CONNECTED) {
 				timeo = conn->disc_timeout;
 				if (!conn->out)
-					timeo *= 2;
+					timeo *= 20;
 			} else {
 				timeo = msecs_to_jiffies(10);
 			}
diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h
index 1e35c43657c8..6d1857ab8e5f 100644
--- a/include/net/bluetooth/sco.h
+++ b/include/net/bluetooth/sco.h
@@ -37,6 +37,7 @@
 struct sockaddr_sco {
 	sa_family_t	sco_family;
 	bdaddr_t	sco_bdaddr;
+	__u16		sco_pkt_type;
 };
 
 /* SCO socket options */
@@ -72,7 +73,8 @@ struct sco_conn {
 
 struct sco_pinfo {
 	struct bt_sock	bt;
-	__u32		flags;
+	__u16		pkt_type;
+
 	struct sco_conn	*conn;
 };
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 26b5b692c22b..d9681a288ce6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2254,22 +2254,28 @@ struct cfg80211_ops {
  * enum wiphy_flags - wiphy capability flags
  *
  * @WIPHY_FLAG_CUSTOM_REGULATORY:  tells us the driver for this device
- * 	has its own custom regulatory domain and cannot identify the
- * 	ISO / IEC 3166 alpha2 it belongs to. When this is enabled
- * 	we will disregard the first regulatory hint (when the
- * 	initiator is %REGDOM_SET_BY_CORE).
- * @WIPHY_FLAG_STRICT_REGULATORY: tells us the driver for this device will
- *	ignore regulatory domain settings until it gets its own regulatory
- *	domain via its regulatory_hint() unless the regulatory hint is
- *	from a country IE. After its gets its own regulatory domain it will
- *	only allow further regulatory domain settings to further enhance
- *	compliance. For example if channel 13 and 14 are disabled by this
- *	regulatory domain no user regulatory domain can enable these channels
- *	at a later time. This can be used for devices which do not have
- *	calibration information guaranteed for frequencies or settings
- *	outside of its regulatory domain. If used in combination with
- *	WIPHY_FLAG_CUSTOM_REGULATORY the inspected country IE power settings
- *	will be followed.
+ *	has its own custom regulatory domain and cannot identify the
+ *	ISO / IEC 3166 alpha2 it belongs to. When this is enabled
+ *	we will disregard the first regulatory hint (when the
+ *	initiator is %REGDOM_SET_BY_CORE). wiphys can set the custom
+ *	regulatory domain using wiphy_apply_custom_regulatory()
+ *	prior to wiphy registration.
+ * @WIPHY_FLAG_STRICT_REGULATORY: tells us that the wiphy for this device
+ *	has regulatory domain that it wishes to be considered as the
+ *	superset for regulatory rules. After this device gets its regulatory
+ *	domain programmed further regulatory hints shall only be considered
+ *	for this device to enhance regulatory compliance, forcing the
+ *	device to only possibly use subsets of the original regulatory
+ *	rules. For example if channel 13 and 14 are disabled by this
+ *	device's regulatory domain no user specified regulatory hint which
+ *	has these channels enabled would enable them for this wiphy,
+ *	the device's original regulatory domain will be trusted as the
+ *	base. You can program the superset of regulatory rules for this
+ *	wiphy with regulatory_hint() for cards programmed with an
+ *	ISO3166-alpha2 country code. wiphys that use regulatory_hint()
+ *	will have their wiphy->regd programmed once the regulatory
+ *	domain is set, and all other regulatory hints will be ignored
+ *	until their own regulatory domain gets programmed.
  * @WIPHY_FLAG_DISABLE_BEACON_HINTS: enable this if your driver needs to ensure
  *	that passive scan flags and beaconing flags may not be lifted by
  *	cfg80211 due to regulatory beacon hints. For more information on beacon
@@ -2467,6 +2473,34 @@ struct wiphy_wowlan_support {
 };
 
 /**
+ * enum wiphy_vendor_command_flags - validation flags for vendor commands
+ * @WIPHY_VENDOR_CMD_NEED_WDEV: vendor command requires wdev
+ * @WIPHY_VENDOR_CMD_NEED_NETDEV: vendor command requires netdev
+ * @WIPHY_VENDOR_CMD_NEED_RUNNING: interface/wdev must be up & running
+ *	(must be combined with %_WDEV or %_NETDEV)
+ */
+enum wiphy_vendor_command_flags {
+	WIPHY_VENDOR_CMD_NEED_WDEV = BIT(0),
+	WIPHY_VENDOR_CMD_NEED_NETDEV = BIT(1),
+	WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2),
+};
+
+/**
+ * struct wiphy_vendor_command - vendor command definition
+ * @info: vendor command identifying information, as used in nl80211
+ * @flags: flags, see &enum wiphy_vendor_command_flags
+ * @doit: callback for the operation, note that wdev is %NULL if the
+ *	flags didn't ask for a wdev and non-%NULL otherwise; the data
+ *	pointer may be %NULL if userspace provided no data at all
+ */
+struct wiphy_vendor_command {
+	struct nl80211_vendor_cmd_info info;
+	u32 flags;
+	int (*doit)(struct wiphy *wiphy, struct wireless_dev *wdev,
+		    const void *data, int data_len);
+};
+
+/**
  * struct wiphy - wireless hardware description
  * @reg_notifier: the driver's regulatory notification callback,
  *	note that if your driver uses wiphy_apply_custom_regulatory()
@@ -2573,6 +2607,12 @@ struct wiphy_wowlan_support {
  *	802.11-2012 8.4.2.29 for the defined fields.
  * @extended_capabilities_mask: mask of the valid values
  * @extended_capabilities_len: length of the extended capabilities
+ * @country_ie_pref: country IE processing preferences specified
+ *	by enum nl80211_country_ie_pref
+ * @vendor_commands: array of vendor commands supported by the hardware
+ * @n_vendor_commands: number of vendor commands
+ * @vendor_events: array of vendor events supported by the hardware
+ * @n_vendor_events: number of vendor events
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -2642,6 +2682,8 @@ struct wiphy {
 	const u8 *extended_capabilities, *extended_capabilities_mask;
 	u8 extended_capabilities_len;
 
+	u8 country_ie_pref;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
@@ -2681,6 +2723,10 @@ struct wiphy {
 	const struct iw_handler_def *wext;
 #endif
 
+	const struct wiphy_vendor_command *vendor_commands;
+	const struct nl80211_vendor_cmd_info *vendor_events;
+	int n_vendor_commands, n_vendor_events;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -3591,6 +3637,121 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy);
  */
 void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
 
+/**
+ * DOC: Vendor commands
+ *
+ * Occasionally, there are special protocol or firmware features that
+ * can't be implemented very openly. For this and similar cases, the
+ * vendor command functionality allows implementing the features with
+ * (typically closed-source) userspace and firmware, using nl80211 as
+ * the configuration mechanism.
+ *
+ * A driver supporting vendor commands must register them as an array
+ * in struct wiphy, with handlers for each one, each command has an
+ * OUI and sub command ID to identify it.
+ *
+ * Note that this feature should not be (ab)used to implement protocol
+ * features that could openly be shared across drivers. In particular,
+ * it must never be required to use vendor commands to implement any
+ * "normal" functionality that higher-level userspace like connection
+ * managers etc. need.
+ */
+
+struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
+					   enum nl80211_commands cmd,
+					   enum nl80211_attrs attr,
+					   int approxlen);
+
+struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
+					   enum nl80211_commands cmd,
+					   enum nl80211_attrs attr,
+					   int vendor_event_idx,
+					   int approxlen, gfp_t gfp);
+
+void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp);
+
+/**
+ * cfg80211_vendor_cmd_alloc_reply_skb - allocate vendor command reply
+ * @wiphy: the wiphy
+ * @approxlen: an upper bound of the length of the data that will
+ *	be put into the skb
+ *
+ * This function allocates and pre-fills an skb for a reply to
+ * a vendor command. Since it is intended for a reply, calling
+ * it outside of a vendor command's doit() operation is invalid.
+ *
+ * The returned skb is pre-filled with some identifying data in
+ * a way that any data that is put into the skb (with skb_put(),
+ * nla_put() or similar) will end up being within the
+ * %NL80211_ATTR_VENDOR_DATA attribute, so all that needs to be done
+ * with the skb is adding data for the corresponding userspace tool
+ * which can then read that data out of the testdata attribute. You
+ * must not modify the skb in any other way.
+ *
+ * When done, call cfg80211_vendor_cmd_reply() with the skb and return
+ * its error code as the result of the doit() operation.
+ *
+ * Return: An allocated and pre-filled skb. %NULL if any errors happen.
+ */
+static inline struct sk_buff *
+cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
+{
+	return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_VENDOR,
+					  NL80211_ATTR_VENDOR_DATA, approxlen);
+}
+
+/**
+ * cfg80211_vendor_cmd_reply - send the reply skb
+ * @skb: The skb, must have been allocated with
+ *	cfg80211_vendor_cmd_alloc_reply_skb()
+ *
+ * Since calling this function will usually be the last thing
+ * before returning from the vendor command doit() you should
+ * return the error code.  Note that this function consumes the
+ * skb regardless of the return value.
+ *
+ * Return: An error code or 0 on success.
+ */
+int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
+
+/**
+ * cfg80211_vendor_event_alloc - allocate vendor-specific event skb
+ * @wiphy: the wiphy
+ * @event_idx: index of the vendor event in the wiphy's vendor_events
+ * @approxlen: an upper bound of the length of the data that will
+ *	be put into the skb
+ * @gfp: allocation flags
+ *
+ * This function allocates and pre-fills an skb for an event on the
+ * vendor-specific multicast group.
+ *
+ * When done filling the skb, call cfg80211_vendor_event() with the
+ * skb to send the event.
+ *
+ * Return: An allocated and pre-filled skb. %NULL if any errors happen.
+ */
+static inline struct sk_buff *
+cfg80211_vendor_event_alloc(struct wiphy *wiphy, int approxlen,
+			    int event_idx, gfp_t gfp)
+{
+	return __cfg80211_alloc_event_skb(wiphy, NL80211_CMD_VENDOR,
+					  NL80211_ATTR_VENDOR_DATA,
+					  event_idx, approxlen, gfp);
+}
+
+/**
+ * cfg80211_vendor_event - send the event
+ * @skb: The skb, must have been allocated with cfg80211_vendor_event_alloc()
+ * @gfp: allocation flags
+ *
+ * This function sends the given @skb, which must have been allocated
+ * by cfg80211_vendor_event_alloc(), as an event. It always consumes it.
+ */
+static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp)
+{
+	__cfg80211_send_event_skb(skb, gfp);
+}
+
 #ifdef CONFIG_NL80211_TESTMODE
 /**
  * DOC: Test mode
@@ -3626,8 +3787,12 @@ void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
  *
  * Return: An allocated and pre-filled skb. %NULL if any errors happen.
  */
-struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
-						  int approxlen);
+static inline struct sk_buff *
+cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
+{
+	return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_TESTMODE,
+					  NL80211_ATTR_TESTDATA, approxlen);
+}
 
 /**
  * cfg80211_testmode_reply - send the reply skb
@@ -3641,7 +3806,10 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
  *
  * Return: An error code or 0 on success.
  */
-int cfg80211_testmode_reply(struct sk_buff *skb);
+static inline int cfg80211_testmode_reply(struct sk_buff *skb)
+{
+	return cfg80211_vendor_cmd_reply(skb);
+}
 
 /**
  * cfg80211_testmode_alloc_event_skb - allocate testmode event
@@ -3664,8 +3832,13 @@ int cfg80211_testmode_reply(struct sk_buff *skb);
  *
  * Return: An allocated and pre-filled skb. %NULL if any errors happen.
  */
-struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy,
-						  int approxlen, gfp_t gfp);
+static inline struct sk_buff *
+cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp)
+{
+	return __cfg80211_alloc_event_skb(wiphy, NL80211_CMD_TESTMODE,
+					  NL80211_ATTR_TESTDATA, -1,
+					  approxlen, gfp);
+}
 
 /**
  * cfg80211_testmode_event - send the event
@@ -3677,7 +3850,10 @@ struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy,
  * by cfg80211_testmode_alloc_event_skb(), as an event. It always
  * consumes it.
  */
-void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp);
+static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
+{
+	__cfg80211_send_event_skb(skb, gfp);
+}
 
 #define CFG80211_TESTMODE_CMD(cmd)	.testmode_cmd = (cmd),
 #define CFG80211_TESTMODE_DUMP(cmd)	.testmode_dump = (cmd),
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e361f4882426..4ac12e14c6d9 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -23,6 +23,8 @@ struct fib_rule {
 	struct fib_rule __rcu	*ctarget;
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
+	kuid_t			uid_start;
+	kuid_t			uid_end;
 	struct rcu_head		rcu;
 	struct net *		fr_net;
 };
@@ -80,7 +82,9 @@ struct fib_rules_ops {
 	[FRA_FWMARK]	= { .type = NLA_U32 }, \
 	[FRA_FWMASK]	= { .type = NLA_U32 }, \
 	[FRA_TABLE]     = { .type = NLA_U32 }, \
-	[FRA_GOTO]	= { .type = NLA_U32 }
+	[FRA_GOTO]	= { .type = NLA_U32 }, \
+	[FRA_UID_START]	= { .type = NLA_U32 }, \
+	[FRA_UID_END]	= { .type = NLA_U32 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
diff --git a/include/net/flow.h b/include/net/flow.h
index 628e11b98c58..1426681f7cf3 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -10,6 +10,15 @@
 #include <linux/socket.h>
 #include <linux/in6.h>
 #include <linux/atomic.h>
+#include <linux/uidgid.h>
+
+/*
+ * ifindex generation is per-net namespace, and loopback is
+ * always the 1st device in ns (see net_dev_init), thus any
+ * loopback device should get ifindex 1
+ */
+
+#define LOOPBACK_IFINDEX	1
 
 struct flowi_common {
 	int	flowic_oif;
@@ -23,6 +32,7 @@ struct flowi_common {
 #define FLOWI_FLAG_CAN_SLEEP		0x02
 #define FLOWI_FLAG_KNOWN_NH		0x04
 	__u32	flowic_secid;
+	kuid_t	flowic_uid;
 };
 
 union flowi_uli {
@@ -59,6 +69,7 @@ struct flowi4 {
 #define flowi4_proto		__fl_common.flowic_proto
 #define flowi4_flags		__fl_common.flowic_flags
 #define flowi4_secid		__fl_common.flowic_secid
+#define flowi4_uid		__fl_common.flowic_uid
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -78,16 +89,18 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 				      __u32 mark, __u8 tos, __u8 scope,
 				      __u8 proto, __u8 flags,
 				      __be32 daddr, __be32 saddr,
-				      __be16 dport, __be16 sport)
+				      __be16 dport, __be16 sport,
+				      kuid_t uid)
 {
 	fl4->flowi4_oif = oif;
-	fl4->flowi4_iif = 0;
+	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = tos;
 	fl4->flowi4_scope = scope;
 	fl4->flowi4_proto = proto;
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
+	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 	fl4->fl4_dport = dport;
@@ -115,6 +128,7 @@ struct flowi6 {
 #define flowi6_proto		__fl_common.flowic_proto
 #define flowi6_flags		__fl_common.flowic_flags
 #define flowi6_secid		__fl_common.flowic_secid
+#define flowi6_uid		__fl_common.flowic_uid
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	__be32			flowlabel;
@@ -158,6 +172,7 @@ struct flowi {
 #define flowi_proto	u.__fl_common.flowic_proto
 #define flowi_flags	u.__fl_common.flowic_flags
 #define flowi_secid	u.__fl_common.flowic_secid
+#define flowi_uid	u.__fl_common.flowic_uid
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 7235ae73a1e8..9528e10fa0b4 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -88,6 +88,7 @@ struct inet_request_sock {
 				acked	   : 1,
 				no_srccheck: 1;
 	kmemcheck_bitfield_end(flags);
+	u32                     ir_mark;
 	struct ip_options_rcu	*opt;
 };
 
@@ -96,6 +97,14 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 	return (struct inet_request_sock *)sk;
 }
 
+static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+{
+	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+		return skb->mark;
+
+	return sk->sk_mark;
+}
+
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
diff --git a/include/net/ip.h b/include/net/ip.h
index 0a62365149e2..fc62ae0a47d2 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -154,6 +154,7 @@ struct ip_reply_arg {
 				/* -1 if not needed */ 
 	int	    bound_dev_if;
 	u8  	    tos;
+	kuid_t	    uid;
 }; 
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
@@ -226,6 +227,9 @@ extern void ipfrag_init(void);
 
 extern void ip_static_sysctl_init(void);
 
+#define IP4_REPLY_MARK(net, mark) \
+	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+
 static inline bool ip_is_fragment(const struct iphdr *iph)
 {
 	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 8d977b343647..6be6debb5361 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -136,7 +136,7 @@ extern int			rt6_route_rcv(struct net_device *dev,
 					      const struct in6_addr *gwaddr);
 
 extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-			    int oif, u32 mark);
+			    int oif, u32 mark, kuid_t uid);
 extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
 			       __be32 mtu);
 extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 087370ff05f1..ac1d532965a2 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -111,6 +111,9 @@ struct frag_hdr {
 
 #define	IP6_MF	0x0001
 
+#define IP6_REPLY_MARK(net, mark) \
+	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
+
 #include <net/sock.h>
 
 /* sysctls */
@@ -260,6 +263,12 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
 
 extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);
 
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+			       struct icmp6hdr *thdr, int len);
+
+struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+				      struct sock *sk, struct flowi6 *fl6);
+
 extern int 			ip6_ra_control(struct sock *sk, int sel);
 
 extern int			ipv6_parse_hopopts(struct sk_buff *skb);
@@ -796,8 +805,7 @@ extern int			compat_ipv6_getsockopt(struct sock *sk,
 extern int			ip6_datagram_connect(struct sock *sk, 
 						     struct sockaddr *addr, int addr_len);
 
-extern int 			ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
-						int *addr_len);
+extern int 			ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len);
 extern int 			ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
 						 int *addr_len);
 extern void			ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b17697827482..b064d6dd14fb 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -9,6 +9,7 @@
 #include <linux/list.h>
 #include <linux/sysctl.h>
 
+#include <net/flow.h>
 #include <net/netns/core.h>
 #include <net/netns/mib.h>
 #include <net/netns/unix.h>
@@ -120,14 +121,6 @@ struct net {
 	atomic_t		rt_genid;
 };
 
-/*
- * ifindex generation is per-net namespace, and loopback is
- * always the 1st device in ns (see net_dev_init), thus any
- * loopback device should get ifindex 1
- */
-
-#define LOOPBACK_IFINDEX	1
-
 #include <linux/seq_file_net.h>
 
 /* Init's network namespace */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 03e6378d5353..4f6c72095cf6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -65,6 +65,9 @@ struct netns_ipv4 {
 
 	int sysctl_tcp_ecn;
 
+	int sysctl_fwmark_reflect;
+	int sysctl_tcp_fwmark_accept;
+
 	kgid_t sysctl_ping_group_range[2];
 	long sysctl_tcp_mem[3];
 
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 005e2c2e39a9..4b9f99e3a91c 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
 	int icmpv6_time;
+	int fwmark_reflect;
 };
 
 struct netns_ipv6 {
diff --git a/include/net/ping.h b/include/net/ping.h
index 682b5ae9af51..2db4860e5848 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -13,6 +13,7 @@
 #ifndef _PING_H
 #define _PING_H
 
+#include <net/icmp.h>
 #include <net/netns/hash.h>
 
 /* PING_HTABLE_SIZE must be power of 2 */
@@ -28,6 +29,18 @@
  */
 #define GID_T_MAX (((gid_t)~0U) >> 1)
 
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+struct pingv6_ops {
+	int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len);
+	int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg,
+				     struct sk_buff *skb);
+	int (*icmpv6_err_convert)(u8 type, u8 code, int *err);
+	void (*ipv6_icmp_error)(struct sock *sk, struct sk_buff *skb, int err,
+				__be16 port, u32 info, u8 *payload);
+	int (*ipv6_chk_addr)(struct net *net, const struct in6_addr *addr,
+			     const struct net_device *dev, int strict);
+};
+
 struct ping_table {
 	struct hlist_nulls_head	hash[PING_HTABLE_SIZE];
 	rwlock_t		lock;
@@ -39,10 +52,39 @@ struct ping_iter_state {
 };
 
 extern struct proto ping_prot;
+extern struct ping_table ping_table;
+#if IS_ENABLED(CONFIG_IPV6)
+extern struct pingv6_ops pingv6_ops;
+#endif
 
+struct pingfakehdr {
+	struct icmphdr icmph;
+	struct iovec *iov;
+	sa_family_t family;
+	__wsum wcheck;
+};
 
-extern void ping_rcv(struct sk_buff *);
-extern void ping_err(struct sk_buff *, u32 info);
+int  ping_get_port(struct sock *sk, unsigned short ident);
+void ping_hash(struct sock *sk);
+void ping_unhash(struct sock *sk);
+
+int  ping_init_sock(struct sock *sk);
+void ping_close(struct sock *sk, long timeout);
+int  ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+void ping_err(struct sk_buff *skb, int offset, u32 info);
+int  ping_getfrag(void *from, char *to, int offset, int fraglen, int odd,
+		  struct sk_buff *);
+
+int  ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		  size_t len, int noblock, int flags, int *addr_len);
+int  ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
+			 void *user_icmph, size_t icmph_len);
+int  ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		     size_t len);
+int  ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		     size_t len);
+int  ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+void ping_rcv(struct sk_buff *skb);
 
 #ifdef CONFIG_PROC_FS
 extern int __init ping_proc_init(void);
@@ -50,6 +92,7 @@ extern void ping_proc_exit(void);
 #endif
 
 void __init ping_init(void);
-
+int  __init pingv6_init(void);
+void pingv6_exit(void);
 
 #endif /* _PING_H */
diff --git a/include/net/route.h b/include/net/route.h
index 2ea40c1b5e00..647bb2adbffd 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -142,7 +142,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
 	flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
 			   RT_SCOPE_UNIVERSE, proto,
 			   sk ? inet_sk_flowi_flags(sk) : 0,
-			   daddr, saddr, dport, sport);
+			   daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0);
 	if (sk)
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 	return ip_route_output_flow(net, fl4, sk);
@@ -253,7 +253,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 		flow_flags |= FLOWI_FLAG_CAN_SLEEP;
 
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
-			   protocol, flow_flags, dst, src, dport, sport);
+			   protocol, flow_flags, dst, src, dport, sport,
+			   sock_i_uid(sk));
 }
 
 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 29a1a63cd303..e0fc21357582 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -288,6 +288,7 @@ extern int sysctl_tcp_early_retrans;
 extern int sysctl_tcp_limit_output_bytes;
 extern int sysctl_tcp_challenge_ack_limit;
 extern int sysctl_tcp_min_tso_segs;
+extern int sysctl_tcp_default_init_rwnd;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -1553,6 +1554,8 @@ extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
 extern int tcp_gro_complete(struct sk_buff *skb);
 extern int tcp4_gro_complete(struct sk_buff *skb);
 
+extern int tcp_nuke_addr(struct net *net, struct sockaddr *addr);
+
 #ifdef CONFIG_PROC_FS
 extern int tcp4_proc_init(void);
 extern void tcp4_proc_exit(void);
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 938b7fd11204..eb40e71ff2ee 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -11,6 +11,7 @@ extern struct proto rawv6_prot;
 extern struct proto udpv6_prot;
 extern struct proto udplitev6_prot;
 extern struct proto tcpv6_prot;
+extern struct proto pingv6_prot;
 
 struct flowi6;
 
@@ -21,6 +22,8 @@ extern int				ipv6_frag_init(void);
 extern void				ipv6_frag_exit(void);
 
 /* transport protocols */
+extern int				pingv6_init(void);
+extern void				pingv6_exit(void);
 extern int				rawv6_init(void);
 extern void				rawv6_exit(void);
 extern int				udpv6_init(void);
diff --git a/include/trace/events/cpufreq_interactive.h b/include/trace/events/cpufreq_interactive.h
new file mode 100644
index 000000000000..951e6ca12da8
--- /dev/null
+++ b/include/trace/events/cpufreq_interactive.h
@@ -0,0 +1,112 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpufreq_interactive
+
+#if !defined(_TRACE_CPUFREQ_INTERACTIVE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUFREQ_INTERACTIVE_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(set,
+	TP_PROTO(u32 cpu_id, unsigned long targfreq,
+	         unsigned long actualfreq),
+	TP_ARGS(cpu_id, targfreq, actualfreq),
+
+	TP_STRUCT__entry(
+	    __field(          u32, cpu_id    )
+	    __field(unsigned long, targfreq   )
+	    __field(unsigned long, actualfreq )
+	   ),
+
+	TP_fast_assign(
+	    __entry->cpu_id = (u32) cpu_id;
+	    __entry->targfreq = targfreq;
+	    __entry->actualfreq = actualfreq;
+	),
+
+	TP_printk("cpu=%u targ=%lu actual=%lu",
+	      __entry->cpu_id, __entry->targfreq,
+	      __entry->actualfreq)
+);
+
+DEFINE_EVENT(set, cpufreq_interactive_setspeed,
+	TP_PROTO(u32 cpu_id, unsigned long targfreq,
+	     unsigned long actualfreq),
+	TP_ARGS(cpu_id, targfreq, actualfreq)
+);
+
+DECLARE_EVENT_CLASS(loadeval,
+	    TP_PROTO(unsigned long cpu_id, unsigned long load,
+		     unsigned long curtarg, unsigned long curactual,
+		     unsigned long newtarg),
+		    TP_ARGS(cpu_id, load, curtarg, curactual, newtarg),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long, cpu_id    )
+		    __field(unsigned long, load      )
+		    __field(unsigned long, curtarg   )
+		    __field(unsigned long, curactual )
+		    __field(unsigned long, newtarg   )
+	    ),
+
+	    TP_fast_assign(
+		    __entry->cpu_id = cpu_id;
+		    __entry->load = load;
+		    __entry->curtarg = curtarg;
+		    __entry->curactual = curactual;
+		    __entry->newtarg = newtarg;
+	    ),
+
+	    TP_printk("cpu=%lu load=%lu cur=%lu actual=%lu targ=%lu",
+		      __entry->cpu_id, __entry->load, __entry->curtarg,
+		      __entry->curactual, __entry->newtarg)
+);
+
+DEFINE_EVENT(loadeval, cpufreq_interactive_target,
+	    TP_PROTO(unsigned long cpu_id, unsigned long load,
+		     unsigned long curtarg, unsigned long curactual,
+		     unsigned long newtarg),
+	    TP_ARGS(cpu_id, load, curtarg, curactual, newtarg)
+);
+
+DEFINE_EVENT(loadeval, cpufreq_interactive_already,
+	    TP_PROTO(unsigned long cpu_id, unsigned long load,
+		     unsigned long curtarg, unsigned long curactual,
+		     unsigned long newtarg),
+	    TP_ARGS(cpu_id, load, curtarg, curactual, newtarg)
+);
+
+DEFINE_EVENT(loadeval, cpufreq_interactive_notyet,
+	    TP_PROTO(unsigned long cpu_id, unsigned long load,
+		     unsigned long curtarg, unsigned long curactual,
+		     unsigned long newtarg),
+	    TP_ARGS(cpu_id, load, curtarg, curactual, newtarg)
+);
+
+TRACE_EVENT(cpufreq_interactive_boost,
+	    TP_PROTO(const char *s),
+	    TP_ARGS(s),
+	    TP_STRUCT__entry(
+		    __string(s, s)
+	    ),
+	    TP_fast_assign(
+		    __assign_str(s, s);
+	    ),
+	    TP_printk("%s", __get_str(s))
+);
+
+TRACE_EVENT(cpufreq_interactive_unboost,
+	    TP_PROTO(const char *s),
+	    TP_ARGS(s),
+	    TP_STRUCT__entry(
+		    __string(s, s)
+	    ),
+	    TP_fast_assign(
+		    __assign_str(s, s);
+	    ),
+	    TP_printk("%s", __get_str(s))
+);
+
+#endif /* _TRACE_CPUFREQ_INTERACTIVE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/gpu.h b/include/trace/events/gpu.h
new file mode 100644
index 000000000000..7e15cdfafe5a
--- /dev/null
+++ b/include/trace/events/gpu.h
@@ -0,0 +1,143 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu
+
+#if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GPU_H
+
+#include <linux/tracepoint.h>
+#include <linux/time.h>
+
+#define show_secs_from_ns(ns) \
+	({ \
+		u64 t = ns + (NSEC_PER_USEC / 2); \
+		do_div(t, NSEC_PER_SEC); \
+		t; \
+	})
+
+#define show_usecs_from_ns(ns) \
+	({ \
+		u64 t = ns + (NSEC_PER_USEC / 2) ; \
+		u32 rem; \
+		do_div(t, NSEC_PER_USEC); \
+		rem = do_div(t, USEC_PER_SEC); \
+	})
+
+/*
+ * The gpu_sched_switch event indicates that a switch from one GPU context to
+ * another occurred on one of the GPU hardware blocks.
+ *
+ * The gpu_name argument identifies the GPU hardware block.  Each independently
+ * scheduled GPU hardware block should have a different name.  This may be used
+ * in different ways for different GPUs.  For example, if a GPU includes
+ * multiple processing cores it may use names "GPU 0", "GPU 1", etc.  If a GPU
+ * includes a separately scheduled 2D and 3D hardware block, it might use the
+ * names "2D" and "3D".
+ *
+ * The timestamp argument is the timestamp at which the switch occurred on the
+ * GPU. These timestamps are in units of nanoseconds and must use
+ * approximately the same time as sched_clock, though they need not come from
+ * any CPU clock. The timestamps for a single hardware block must be
+ * monotonically nondecreasing.  This means that if a variable compensation
+ * offset is used to translate from some other clock to the sched_clock, then
+ * care must be taken when increasing that offset, and doing so may result in
+ * multiple events with the same timestamp.
+ *
+ * The next_ctx_id argument identifies the next context that was running on
+ * the GPU hardware block.  A value of 0 indicates that the hardware block
+ * will be idle.
+ *
+ * The next_prio argument indicates the priority of the next context at the
+ * time of the event.  The exact numeric values may mean different things for
+ * different GPUs, but they should follow the rule that lower values indicate a
+ * higher priority.
+ *
+ * The next_job_id argument identifies the batch of work that the GPU will be
+ * working on.  This should correspond to a job_id that was previously traced
+ * as a gpu_job_enqueue event when the batch of work was created.
+ */
+TRACE_EVENT(gpu_sched_switch,
+
+	TP_PROTO(const char *gpu_name, u64 timestamp,
+		u32 next_ctx_id, s32 next_prio, u32 next_job_id),
+
+	TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id),
+
+	TP_STRUCT__entry(
+		__string(       gpu_name,       gpu_name        )
+		__field(        u64,            timestamp       )
+		__field(        u32,            next_ctx_id     )
+		__field(        s32,            next_prio       )
+		__field(        u32,            next_job_id     )
+	),
+
+	TP_fast_assign(
+		__assign_str(gpu_name, gpu_name);
+		__entry->timestamp = timestamp;
+		__entry->next_ctx_id = next_ctx_id;
+		__entry->next_prio = next_prio;
+		__entry->next_job_id = next_job_id;
+	),
+
+	TP_printk("gpu_name=%s ts=%llu.%06lu next_ctx_id=%lu next_prio=%ld "
+		"next_job_id=%lu",
+		__get_str(gpu_name),
+		(unsigned long long)show_secs_from_ns(__entry->timestamp),
+		(unsigned long)show_usecs_from_ns(__entry->timestamp),
+		(unsigned long)__entry->next_ctx_id,
+		(long)__entry->next_prio,
+		(unsigned long)__entry->next_job_id)
+);
+
+/*
+ * The gpu_job_enqueue event indicates that a batch of work has been queued up
+ * to be processed by the GPU.  This event is not intended to indicate that
+ * the batch of work has been submitted to the GPU hardware, but rather that
+ * it has been submitted to the GPU kernel driver.
+ *
+ * This event should be traced on the thread that initiated the work being
+ * queued.  For example, if a batch of work is submitted to the kernel by a
+ * userland thread, the event should be traced on that thread.
+ *
+ * The ctx_id field identifies the GPU context in which the batch of work
+ * being queued is to be run.
+ *
+ * The job_id field identifies the batch of work being queued within the given
+ * GPU context.  The first batch of work submitted for a given GPU context
+ * should have a job_id of 0, and each subsequent batch of work should
+ * increment the job_id by 1.
+ *
+ * The type field identifies the type of the job being enqueued.  The job
+ * types may be different for different GPU hardware.  For example, a GPU may
+ * differentiate between "2D", "3D", and "compute" jobs.
+ */
+TRACE_EVENT(gpu_job_enqueue,
+
+	TP_PROTO(u32 ctx_id, u32 job_id, const char *type),
+
+	TP_ARGS(ctx_id, job_id, type),
+
+	TP_STRUCT__entry(
+		__field(        u32,            ctx_id          )
+		__field(        u32,            job_id          )
+		__string(       type,           type            )
+	),
+
+	TP_fast_assign(
+		__entry->ctx_id = ctx_id;
+		__entry->job_id = job_id;
+		__assign_str(type, type);
+	),
+
+	TP_printk("ctx_id=%lu job_id=%lu type=%s",
+		(unsigned long)__entry->ctx_id,
+		(unsigned long)__entry->job_id,
+		__get_str(type))
+);
+
+#undef show_secs_from_ns
+#undef show_usecs_from_ns
+
+#endif /* _TRACE_GPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h
new file mode 100644
index 000000000000..82b368dbcefc
--- /dev/null
+++ b/include/trace/events/mmc.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmc
+
+#if !defined(_TRACE_MMC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMC_H
+
+#include <linux/tracepoint.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/core.h>
+
+/*
+ * Unconditional logging of mmc block erase operations,
+ * including cmd, address, size
+ */
+DECLARE_EVENT_CLASS(mmc_blk_erase_class,
+	TP_PROTO(unsigned int cmd, unsigned int addr, unsigned int size),
+	TP_ARGS(cmd, addr, size),
+	TP_STRUCT__entry(
+		__field(unsigned int, cmd)
+		__field(unsigned int, addr)
+		__field(unsigned int, size)
+	),
+	TP_fast_assign(
+		__entry->cmd = cmd;
+		__entry->addr = addr;
+		__entry->size = size;
+	),
+	TP_printk("cmd=%u,addr=0x%08x,size=0x%08x",
+		  __entry->cmd, __entry->addr, __entry->size)
+);
+
+DEFINE_EVENT(mmc_blk_erase_class, mmc_blk_erase_start,
+	TP_PROTO(unsigned int cmd, unsigned int addr, unsigned int size),
+	TP_ARGS(cmd, addr, size));
+
+DEFINE_EVENT(mmc_blk_erase_class, mmc_blk_erase_end,
+	TP_PROTO(unsigned int cmd, unsigned int addr, unsigned int size),
+	TP_ARGS(cmd, addr, size));
+
+/*
+ * Logging of start of read or write mmc block operation,
+ * including cmd, address, size
+ */
+DECLARE_EVENT_CLASS(mmc_blk_rw_class,
+	TP_PROTO(unsigned int cmd, unsigned int addr, struct mmc_data *data),
+	TP_ARGS(cmd, addr, data),
+	TP_STRUCT__entry(
+		__field(unsigned int, cmd)
+		__field(unsigned int, addr)
+		__field(unsigned int, size)
+	),
+	TP_fast_assign(
+		__entry->cmd = cmd;
+		__entry->addr = addr;
+		__entry->size = data->blocks;
+	),
+	TP_printk("cmd=%u,addr=0x%08x,size=0x%08x",
+		  __entry->cmd, __entry->addr, __entry->size)
+);
+
+DEFINE_EVENT_CONDITION(mmc_blk_rw_class, mmc_blk_rw_start,
+	TP_PROTO(unsigned int cmd, unsigned int addr, struct mmc_data *data),
+	TP_ARGS(cmd, addr, data),
+	TP_CONDITION(((cmd == MMC_READ_MULTIPLE_BLOCK) ||
+		      (cmd == MMC_WRITE_MULTIPLE_BLOCK)) &&
+		      data));
+
+DEFINE_EVENT_CONDITION(mmc_blk_rw_class, mmc_blk_rw_end,
+	TP_PROTO(unsigned int cmd, unsigned int addr, struct mmc_data *data),
+	TP_ARGS(cmd, addr, data),
+	TP_CONDITION(((cmd == MMC_READ_MULTIPLE_BLOCK) ||
+		      (cmd == MMC_WRITE_MULTIPLE_BLOCK)) &&
+		      data));
+#endif /* _TRACE_MMC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 427acab5d69a..503fc2b870b9 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -146,6 +146,25 @@ DEFINE_EVENT(clock, clock_set_rate,
 	TP_ARGS(name, state, cpu_id)
 );
 
+TRACE_EVENT(clock_set_parent,
+
+	TP_PROTO(const char *name, const char *parent_name),
+
+	TP_ARGS(name, parent_name),
+
+	TP_STRUCT__entry(
+		__string(       name,           name            )
+		__string(       parent_name,    parent_name     )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__assign_str(parent_name, parent_name);
+	),
+
+	TP_printk("%s parent=%s", __get_str(name), __get_str(parent_name))
+);
+
 /*
  * The power domain events are used for power domains transitions
  */
diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
new file mode 100644
index 000000000000..ffa870c0d2d2
--- /dev/null
+++ b/include/trace/events/thermal.h
@@ -0,0 +1,192 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM thermal
+
+#if !defined(_TRACE_THERMAL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_THERMAL_H
+
+#include <linux/thermal.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(thermal_temperature,
+
+	TP_PROTO(struct thermal_zone_device *tz),
+
+	TP_ARGS(tz),
+
+	TP_STRUCT__entry(
+		__string(thermal_zone, tz->type)
+		__field(int, id)
+		__field(int, temp_prev)
+		__field(int, temp)
+	),
+
+	TP_fast_assign(
+		__assign_str(thermal_zone, tz->type);
+		__entry->id = tz->id;
+		__entry->temp_prev = tz->last_temperature;
+		__entry->temp = tz->temperature;
+	),
+
+	TP_printk("thermal_zone=%s id=%d temp_prev=%d temp=%d",
+		__get_str(thermal_zone), __entry->id, __entry->temp_prev,
+		__entry->temp)
+);
+
+TRACE_EVENT(cdev_update,
+
+	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long target),
+
+	TP_ARGS(cdev, target),
+
+	TP_STRUCT__entry(
+		__string(type, cdev->type)
+		__field(unsigned long, target)
+	),
+
+	TP_fast_assign(
+		__assign_str(type, cdev->type);
+		__entry->target = target;
+	),
+
+	TP_printk("type=%s target=%lu", __get_str(type), __entry->target)
+);
+
+TRACE_EVENT(thermal_zone_trip,
+
+	TP_PROTO(struct thermal_zone_device *tz, int trip,
+		enum thermal_trip_type trip_type),
+
+	TP_ARGS(tz, trip, trip_type),
+
+	TP_STRUCT__entry(
+		__string(thermal_zone, tz->type)
+		__field(int, id)
+		__field(int, trip)
+		__field(enum thermal_trip_type, trip_type)
+	),
+
+	TP_fast_assign(
+		__assign_str(thermal_zone, tz->type);
+		__entry->id = tz->id;
+		__entry->trip = trip;
+		__entry->trip_type = trip_type;
+	),
+
+	TP_printk("thermal_zone=%s id=%d trip=%d trip_type=%d",
+		__get_str(thermal_zone), __entry->id, __entry->trip,
+		__entry->trip_type)
+);
+
+TRACE_EVENT(thermal_power_cpu_get_power,
+	TP_PROTO(const struct cpumask *cpus, unsigned long freq, u32 *load,
+		size_t load_len, u32 dynamic_power, u32 static_power),
+
+	TP_ARGS(cpus, freq, load, load_len, dynamic_power, static_power),
+
+	TP_STRUCT__entry(
+		__bitmask(cpumask, num_possible_cpus())
+		__field(unsigned long, freq          )
+		__dynamic_array(u32,   load, load_len)
+		__field(size_t,        load_len      )
+		__field(u32,           dynamic_power )
+		__field(u32,           static_power  )
+	),
+
+	TP_fast_assign(
+		__assign_bitmask(cpumask, cpumask_bits(cpus),
+				num_possible_cpus());
+		__entry->freq = freq;
+		memcpy(__get_dynamic_array(load), load,
+			load_len * sizeof(*load));
+		__entry->load_len = load_len;
+		__entry->dynamic_power = dynamic_power;
+		__entry->static_power = static_power;
+	),
+
+	TP_printk("cpus=%s freq=%lu load={%s} dynamic_power=%d static_power=%d",
+		__get_bitmask(cpumask), __entry->freq,
+		__print_array(__get_dynamic_array(load), __entry->load_len, 4),
+		__entry->dynamic_power, __entry->static_power)
+);
+
+TRACE_EVENT(thermal_power_cpu_limit,
+	TP_PROTO(const struct cpumask *cpus, unsigned int freq,
+		unsigned long cdev_state, u32 power),
+
+	TP_ARGS(cpus, freq, cdev_state, power),
+
+	TP_STRUCT__entry(
+		__bitmask(cpumask, num_possible_cpus())
+		__field(unsigned int,  freq      )
+		__field(unsigned long, cdev_state)
+		__field(u32,           power     )
+	),
+
+	TP_fast_assign(
+		__assign_bitmask(cpumask, cpumask_bits(cpus),
+				num_possible_cpus());
+		__entry->freq = freq;
+		__entry->cdev_state = cdev_state;
+		__entry->power = power;
+	),
+
+	TP_printk("cpus=%s freq=%u cdev_state=%lu power=%u",
+		__get_bitmask(cpumask), __entry->freq, __entry->cdev_state,
+		__entry->power)
+);
+
+TRACE_EVENT(thermal_power_devfreq_get_power,
+	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
+		u32 load, u32 dynamic_power, u32 static_power),
+
+	TP_ARGS(cdev, freq, load, dynamic_power, static_power),
+
+	TP_STRUCT__entry(
+		__string(type,         cdev->type    )
+		__field(unsigned long, freq          )
+		__field(u32,           load          )
+		__field(u32,           dynamic_power )
+		__field(u32,           static_power  )
+	),
+
+	TP_fast_assign(
+		__assign_str(type, cdev->type);
+		__entry->freq = freq;
+		__entry->load = load;
+		__entry->dynamic_power = dynamic_power;
+		__entry->static_power = static_power;
+	),
+
+	TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u",
+		__get_str(type), __entry->freq,
+		__entry->load, __entry->dynamic_power, __entry->static_power)
+);
+
+TRACE_EVENT(thermal_power_devfreq_limit,
+	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long freq,
+		unsigned long cdev_state, u32 power),
+
+	TP_ARGS(cdev, freq, cdev_state, power),
+
+	TP_STRUCT__entry(
+		__string(type,         cdev->type)
+		__field(unsigned int,  freq      )
+		__field(unsigned long, cdev_state)
+		__field(u32,           power     )
+	),
+
+	TP_fast_assign(
+		__assign_str(type, cdev->type);
+		__entry->freq = freq;
+		__entry->cdev_state = cdev_state;
+		__entry->power = power;
+	),
+
+	TP_printk("type=%s freq=%u cdev_state=%lu power=%u",
+		__get_str(type), __entry->freq, __entry->cdev_state,
+		__entry->power)
+);
+#endif /* _TRACE_THERMAL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h
new file mode 100644
index 000000000000..12e1321c4e0c
--- /dev/null
+++ b/include/trace/events/thermal_power_allocator.h
@@ -0,0 +1,87 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM thermal_power_allocator
+
+#if !defined(_TRACE_THERMAL_POWER_ALLOCATOR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_THERMAL_POWER_ALLOCATOR_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(thermal_power_allocator,
+	TP_PROTO(struct thermal_zone_device *tz, u32 *req_power,
+		 u32 total_req_power, u32 *granted_power,
+		 u32 total_granted_power, size_t num_actors,
+		 u32 power_range, u32 max_allocatable_power,
+		 unsigned long current_temp, s32 delta_temp),
+	TP_ARGS(tz, req_power, total_req_power, granted_power,
+		total_granted_power, num_actors, power_range,
+		max_allocatable_power, current_temp, delta_temp),
+	TP_STRUCT__entry(
+		__field(int,           tz_id          )
+		__dynamic_array(u32,   req_power, num_actors    )
+		__field(u32,           total_req_power          )
+		__dynamic_array(u32,   granted_power, num_actors)
+		__field(u32,           total_granted_power      )
+		__field(size_t,        num_actors               )
+		__field(u32,           power_range              )
+		__field(u32,           max_allocatable_power    )
+		__field(unsigned long, current_temp             )
+		__field(s32,           delta_temp               )
+	),
+	TP_fast_assign(
+		__entry->tz_id = tz->id;
+		memcpy(__get_dynamic_array(req_power), req_power,
+			num_actors * sizeof(*req_power));
+		__entry->total_req_power = total_req_power;
+		memcpy(__get_dynamic_array(granted_power), granted_power,
+			num_actors * sizeof(*granted_power));
+		__entry->total_granted_power = total_granted_power;
+		__entry->num_actors = num_actors;
+		__entry->power_range = power_range;
+		__entry->max_allocatable_power = max_allocatable_power;
+		__entry->current_temp = current_temp;
+		__entry->delta_temp = delta_temp;
+	),
+
+	TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d",
+		__entry->tz_id,
+		__print_array(__get_dynamic_array(req_power),
+                              __entry->num_actors, 4),
+		__entry->total_req_power,
+		__print_array(__get_dynamic_array(granted_power),
+                              __entry->num_actors, 4),
+		__entry->total_granted_power, __entry->power_range,
+		__entry->max_allocatable_power, __entry->current_temp,
+		__entry->delta_temp)
+);
+
+TRACE_EVENT(thermal_power_allocator_pid,
+	TP_PROTO(struct thermal_zone_device *tz, s32 err, s32 err_integral,
+		 s64 p, s64 i, s64 d, s32 output),
+	TP_ARGS(tz, err, err_integral, p, i, d, output),
+	TP_STRUCT__entry(
+		__field(int, tz_id       )
+		__field(s32, err         )
+		__field(s32, err_integral)
+		__field(s64, p           )
+		__field(s64, i           )
+		__field(s64, d           )
+		__field(s32, output      )
+	),
+	TP_fast_assign(
+		__entry->tz_id = tz->id;
+		__entry->err = err;
+		__entry->err_integral = err_integral;
+		__entry->p = p;
+		__entry->i = i;
+		__entry->d = d;
+		__entry->output = output;
+	),
+
+	TP_printk("thermal_zone_id=%d err=%d err_integral=%d p=%lld i=%lld d=%lld output=%d",
+		  __entry->tz_id, __entry->err, __entry->err_integral,
+		  __entry->p, __entry->i, __entry->d, __entry->output)
+);
+#endif /* _TRACE_THERMAL_POWER_ALLOCATOR_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index dbb47418df81..02cbafd5df0e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -53,6 +53,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -120,6 +123,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
@@ -189,6 +195,15 @@
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field)						\
+	({								\
+		void *__bitmask = __get_dynamic_array(field);		\
+		unsigned int __bitmask_size;				\
+		__bitmask_size = (__entry->__data_loc_##field >> 16) & 0xffff; \
+		ftrace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
+	})
+
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
@@ -221,6 +236,14 @@
 #undef __print_hex
 #define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len)
 
+#undef __print_array
+#define __print_array(array, count, el_size)				\
+	({								\
+		BUILD_BUG_ON(el_size != 1 && el_size != 2 &&		\
+			     el_size != 4 && el_size != 8);		\
+		ftrace_print_array_seq(p, array, count, el_size);	\
+	})
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static notrace enum print_line_t					\
@@ -319,6 +342,9 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int notrace __init						\
@@ -368,6 +394,29 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #define __string(item, src) __dynamic_array(char, item,			\
 		    strlen((src) ? (const char *)(src) : "(null)") + 1)
 
+/*
+ * __bitmask_size_in_bytes_raw is the number of bytes needed to hold
+ * num_possible_cpus().
+ */
+#define __bitmask_size_in_bytes_raw(nr_bits)	\
+	(((nr_bits) + 7) / 8)
+
+#define __bitmask_size_in_longs(nr_bits)			\
+	((__bitmask_size_in_bytes_raw(nr_bits) +		\
+	  ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8))
+
+/*
+ * __bitmask_size_in_bytes is the number of bytes needed to hold
+ * num_possible_cpus() padded out to the nearest long. This is what
+ * is saved in the buffer, just to be consistent.
+ */
+#define __bitmask_size_in_bytes(nr_bits)				\
+	(__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8))
+
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item,	\
+					 __bitmask_size_in_longs(nr_bits))
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline notrace int ftrace_get_offsets_##call(			\
@@ -492,12 +541,22 @@ static inline notrace int ftrace_get_offsets_##call(			\
 	__entry->__data_loc_##item = __data_offsets.item;
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)       	\
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef __assign_str
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __assign_bitmask
+#define __assign_bitmask(dst, src, nr_bits)					\
+	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
@@ -570,6 +629,8 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef __print_hex
 #undef __get_dynamic_array
 #undef __get_str
+#undef __get_bitmask
+#undef __print_array
 
 #undef TP_printk
 #define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)
@@ -629,6 +690,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
 #undef __perf_addr
 #define __perf_addr(a) __addr = (a)
 
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 0cc74c4403e4..b422ad5d238b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
 __SYSCALL(__NR_finit_module, sys_finit_module)
+/* Backporting seccomp, skip a few ...
+ * #define __NR_sched_setattr 274
+__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
+ * #define __NR_sched_getattr 275
+__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
+ * #define __NR_renameat2 276
+__SYSCALL(__NR_renameat2, sys_renameat2)
+ */
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 274
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 405887bec8b3..c5268778832e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -1,4 +1,5 @@
 # UAPI Header export list
+header-y += android/
 header-y += byteorder/
 header-y += can/
 header-y += caif/
diff --git a/include/uapi/linux/android/Kbuild b/include/uapi/linux/android/Kbuild
new file mode 100644
index 000000000000..ca011eec252a
--- /dev/null
+++ b/include/uapi/linux/android/Kbuild
@@ -0,0 +1,2 @@
+# UAPI Header export list
+header-y += binder.h
diff --git a/drivers/staging/android/binder.h b/include/uapi/linux/android/binder.h
index dbe81ceca1bd..c653800b3a1a 100644
--- a/drivers/staging/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -17,8 +17,8 @@
  *
  */
 
-#ifndef _LINUX_BINDER_H
-#define _LINUX_BINDER_H
+#ifndef _UAPI_LINUX_BINDER_H
+#define _UAPI_LINUX_BINDER_H
 
 #include <linux/ioctl.h>
 
@@ -39,6 +39,14 @@ enum {
 	FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100,
 };
 
+#ifdef BINDER_IPC_32BIT
+typedef __u32 binder_size_t;
+typedef __u32 binder_uintptr_t;
+#else
+typedef __u64 binder_size_t;
+typedef __u64 binder_uintptr_t;
+#endif
+
 /*
  * This is the flattened representation of a Binder object for transfer
  * between processes.  The 'offsets' supplied as part of a binder transaction
@@ -48,17 +56,17 @@ enum {
  */
 struct flat_binder_object {
 	/* 8 bytes for large_flat_header. */
-	unsigned long		type;
-	unsigned long		flags;
+	__u32	type;
+	__u32	flags;
 
 	/* 8 bytes of data. */
 	union {
-		void __user	*binder;	/* local object */
-		signed long	handle;		/* remote object */
+		binder_uintptr_t	binder;	/* local object */
+		__u32			handle;	/* remote object */
 	};
 
 	/* extra data associated with local object */
-	void __user		*cookie;
+	binder_uintptr_t	cookie;
 };
 
 /*
@@ -67,26 +75,30 @@ struct flat_binder_object {
  */
 
 struct binder_write_read {
-	signed long	write_size;	/* bytes to write */
-	signed long	write_consumed;	/* bytes consumed by driver */
-	unsigned long	write_buffer;
-	signed long	read_size;	/* bytes to read */
-	signed long	read_consumed;	/* bytes consumed by driver */
-	unsigned long	read_buffer;
+	binder_size_t		write_size;	/* bytes to write */
+	binder_size_t		write_consumed;	/* bytes consumed by driver */
+	binder_uintptr_t	write_buffer;
+	binder_size_t		read_size;	/* bytes to read */
+	binder_size_t		read_consumed;	/* bytes consumed by driver */
+	binder_uintptr_t	read_buffer;
 };
 
 /* Use with BINDER_VERSION, driver fills in fields. */
 struct binder_version {
 	/* driver protocol version -- increment with incompatible change */
-	signed long	protocol_version;
+	__s32	protocol_version;
 };
 
 /* This is the current protocol version. */
+#ifdef BINDER_IPC_32BIT
 #define BINDER_CURRENT_PROTOCOL_VERSION 7
+#else
+#define BINDER_CURRENT_PROTOCOL_VERSION 8
+#endif
 
 #define BINDER_WRITE_READ		_IOWR('b', 1, struct binder_write_read)
 #define	BINDER_SET_IDLE_TIMEOUT		_IOW('b', 3, __s64)
-#define	BINDER_SET_MAX_THREADS		_IOW('b', 5, size_t)
+#define	BINDER_SET_MAX_THREADS		_IOW('b', 5, __u32)
 #define	BINDER_SET_IDLE_PRIORITY	_IOW('b', 6, __s32)
 #define	BINDER_SET_CONTEXT_MGR		_IOW('b', 7, __s32)
 #define	BINDER_THREAD_EXIT		_IOW('b', 8, __s32)
@@ -119,18 +131,20 @@ struct binder_transaction_data {
 	 * identifying the target and contents of the transaction.
 	 */
 	union {
-		size_t	handle;	/* target descriptor of command transaction */
-		void	*ptr;	/* target descriptor of return transaction */
+		/* target descriptor of command transaction */
+		__u32	handle;
+		/* target descriptor of return transaction */
+		binder_uintptr_t ptr;
 	} target;
-	void		*cookie;	/* target object cookie */
-	unsigned int	code;		/* transaction command */
+	binder_uintptr_t	cookie;	/* target object cookie */
+	__u32		code;		/* transaction command */
 
 	/* General information about the transaction. */
-	unsigned int	flags;
+	__u32		flags;
 	pid_t		sender_pid;
 	uid_t		sender_euid;
-	size_t		data_size;	/* number of bytes of data */
-	size_t		offsets_size;	/* number of bytes of offsets */
+	binder_size_t	data_size;	/* number of bytes of data */
+	binder_size_t	offsets_size;	/* number of bytes of offsets */
 
 	/* If this transaction is inline, the data immediately
 	 * follows here; otherwise, it ends with a pointer to
@@ -139,32 +153,37 @@ struct binder_transaction_data {
 	union {
 		struct {
 			/* transaction data */
-			const void __user	*buffer;
+			binder_uintptr_t	buffer;
 			/* offsets from buffer to flat_binder_object structs */
-			const void __user	*offsets;
+			binder_uintptr_t	offsets;
 		} ptr;
-		uint8_t	buf[8];
+		__u8	buf[8];
 	} data;
 };
 
 struct binder_ptr_cookie {
-	void *ptr;
-	void *cookie;
+	binder_uintptr_t ptr;
+	binder_uintptr_t cookie;
 };
 
+struct binder_handle_cookie {
+	__u32 handle;
+	binder_uintptr_t cookie;
+} __packed;
+
 struct binder_pri_desc {
-	int priority;
-	int desc;
+	__s32 priority;
+	__u32 desc;
 };
 
 struct binder_pri_ptr_cookie {
-	int priority;
-	void *ptr;
-	void *cookie;
+	__s32 priority;
+	binder_uintptr_t ptr;
+	binder_uintptr_t cookie;
 };
 
 enum binder_driver_return_protocol {
-	BR_ERROR = _IOR('r', 0, int),
+	BR_ERROR = _IOR('r', 0, __s32),
 	/*
 	 * int: error code
 	 */
@@ -178,7 +197,7 @@ enum binder_driver_return_protocol {
 	 * binder_transaction_data: the received command.
 	 */
 
-	BR_ACQUIRE_RESULT = _IOR('r', 4, int),
+	BR_ACQUIRE_RESULT = _IOR('r', 4, __s32),
 	/*
 	 * not currently supported
 	 * int: 0 if the last bcATTEMPT_ACQUIRE was not successful.
@@ -235,11 +254,11 @@ enum binder_driver_return_protocol {
 	 * stop threadpool thread
 	 */
 
-	BR_DEAD_BINDER = _IOR('r', 15, void *),
+	BR_DEAD_BINDER = _IOR('r', 15, binder_uintptr_t),
 	/*
 	 * void *: cookie
 	 */
-	BR_CLEAR_DEATH_NOTIFICATION_DONE = _IOR('r', 16, void *),
+	BR_CLEAR_DEATH_NOTIFICATION_DONE = _IOR('r', 16, binder_uintptr_t),
 	/*
 	 * void *: cookie
 	 */
@@ -258,22 +277,22 @@ enum binder_driver_command_protocol {
 	 * binder_transaction_data: the sent command.
 	 */
 
-	BC_ACQUIRE_RESULT = _IOW('c', 2, int),
+	BC_ACQUIRE_RESULT = _IOW('c', 2, __s32),
 	/*
 	 * not currently supported
 	 * int:  0 if the last BR_ATTEMPT_ACQUIRE was not successful.
 	 * Else you have acquired a primary reference on the object.
 	 */
 
-	BC_FREE_BUFFER = _IOW('c', 3, int),
+	BC_FREE_BUFFER = _IOW('c', 3, binder_uintptr_t),
 	/*
 	 * void *: ptr to transaction data received on a read
 	 */
 
-	BC_INCREFS = _IOW('c', 4, int),
-	BC_ACQUIRE = _IOW('c', 5, int),
-	BC_RELEASE = _IOW('c', 6, int),
-	BC_DECREFS = _IOW('c', 7, int),
+	BC_INCREFS = _IOW('c', 4, __u32),
+	BC_ACQUIRE = _IOW('c', 5, __u32),
+	BC_RELEASE = _IOW('c', 6, __u32),
+	BC_DECREFS = _IOW('c', 7, __u32),
 	/*
 	 * int:	descriptor
 	 */
@@ -308,23 +327,23 @@ enum binder_driver_command_protocol {
 	 * of looping threads it has available.
 	 */
 
-	BC_REQUEST_DEATH_NOTIFICATION = _IOW('c', 14, struct binder_ptr_cookie),
+	BC_REQUEST_DEATH_NOTIFICATION = _IOW('c', 14, struct binder_handle_cookie),
 	/*
-	 * void *: ptr to binder
+	 * int: handle
 	 * void *: cookie
 	 */
 
-	BC_CLEAR_DEATH_NOTIFICATION = _IOW('c', 15, struct binder_ptr_cookie),
+	BC_CLEAR_DEATH_NOTIFICATION = _IOW('c', 15, struct binder_handle_cookie),
 	/*
-	 * void *: ptr to binder
+	 * int: handle
 	 * void *: cookie
 	 */
 
-	BC_DEAD_BINDER_DONE = _IOW('c', 16, void *),
+	BC_DEAD_BINDER_DONE = _IOW('c', 16, binder_uintptr_t),
 	/*
 	 * void *: cookie
 	 */
 };
 
-#endif /* _LINUX_BINDER_H */
+#endif /* _UAPI_LINUX_BINDER_H */
 
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 75cef3fd97ad..ce8750f8788a 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -324,6 +324,8 @@ enum {
 /* distinguish syscall tables */
 #define __AUDIT_ARCH_64BIT 0x80000000
 #define __AUDIT_ARCH_LE	   0x40000000
+
+#define AUDIT_ARCH_AARCH64	(EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ALPHA	(EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ARM		(EM_ARM|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ARMEB	(EM_ARM)
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 2c267bcbb85c..bc81fb2e1f0e 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -61,5 +61,16 @@ struct epoll_event {
 	__u64 data;
 } EPOLL_PACKED;
 
-
+#ifdef CONFIG_PM_SLEEP
+static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
+{
+	if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
+		epev->events &= ~EPOLLWAKEUP;
+}
+#else
+static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
+{
+	epev->events &= ~EPOLLWAKEUP;
+}
+#endif
 #endif /* _UAPI_LINUX_EVENTPOLL_H */
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 51da65b68b85..9dcdb6251cb8 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -49,6 +49,8 @@ enum {
 	FRA_TABLE,	/* Extended table id */
 	FRA_FWMASK,	/* mask for netfilter mark */
 	FRA_OIFNAME,
+	FRA_UID_START,	/* UID range */
+	FRA_UID_END,
 	__FRA_MAX
 };
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index a4ed56cf0eac..5014a5c472ed 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -154,6 +154,8 @@ struct inodes_stat_t {
 #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
 
+#define FIDTRIM	_IOWR('f', 128, struct fstrim_range)	/* Deep discard trim */
+
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
 #define	FS_IOC_GETVERSION		_IOR('v', 1, long)
diff --git a/include/uapi/linux/if_pppolac.h b/include/uapi/linux/if_pppolac.h
new file mode 100644
index 000000000000..b7eb8153ef66
--- /dev/null
+++ b/include/uapi/linux/if_pppolac.h
@@ -0,0 +1,33 @@
+/* include/uapi/linux/if_pppolac.h
+ *
+ * Header for PPP on L2TP Access Concentrator / PPPoLAC Socket (RFC 2661)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_LINUX_IF_PPPOLAC_H
+#define _UAPI_LINUX_IF_PPPOLAC_H
+
+#include <linux/socket.h>
+#include <linux/types.h>
+
+struct sockaddr_pppolac {
+	sa_family_t	sa_family;	/* AF_PPPOX */
+	unsigned int	sa_protocol;	/* PX_PROTO_OLAC */
+	int		udp_socket;
+	struct __attribute__((packed)) {
+		__u16	tunnel, session;
+	} local, remote;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_IF_PPPOLAC_H */
diff --git a/include/uapi/linux/if_pppopns.h b/include/uapi/linux/if_pppopns.h
new file mode 100644
index 000000000000..a392b52ea6ec
--- /dev/null
+++ b/include/uapi/linux/if_pppopns.h
@@ -0,0 +1,32 @@
+/* include/uapi/linux/if_pppopns.h
+ *
+ * Header for PPP on PPTP Network Server / PPPoPNS Socket (RFC 2637)
+ *
+ * Copyright (C) 2009 Google, Inc.
+ * Author: Chia-chi Yeh <chiachi@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_LINUX_IF_PPPOPNS_H
+#define _UAPI_LINUX_IF_PPPOPNS_H
+
+#include <linux/socket.h>
+#include <linux/types.h>
+
+struct sockaddr_pppopns {
+	sa_family_t	sa_family;	/* AF_PPPOX */
+	unsigned int	sa_protocol;	/* PX_PROTO_OPNS */
+	int		tcp_socket;
+	__u16		local;
+	__u16		remote;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_IF_PPPOPNS_H */
diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h
index e36a4aecd311..87f478b0ca6f 100644
--- a/include/uapi/linux/if_pppox.h
+++ b/include/uapi/linux/if_pppox.h
@@ -23,6 +23,8 @@
 #include <linux/socket.h>
 #include <linux/if_ether.h>
 #include <linux/if_pppol2tp.h>
+#include <linux/if_pppolac.h>
+#include <linux/if_pppopns.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
@@ -56,7 +58,9 @@ struct pptp_addr {
 #define PX_PROTO_OE    0 /* Currently just PPPoE */
 #define PX_PROTO_OL2TP 1 /* Now L2TP also */
 #define PX_PROTO_PPTP  2
-#define PX_MAX_PROTO   3
+#define PX_PROTO_OLAC  3
+#define PX_PROTO_OPNS  4
+#define PX_MAX_PROTO   5
 
 struct sockaddr_pppox {
 	__kernel_sa_family_t sa_family;       /* address family, AF_PPPOX */
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 4649ee35b605..a86f5301c609 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -153,6 +153,9 @@ struct input_keymap_entry {
 
 #define EVIOCGRAB		_IOW('E', 0x90, int)			/* Grab/Release device */
 
+#define EVIOCGSUSPENDBLOCK	_IOR('E', 0x91, int)			/* get suspend block enable */
+#define EVIOCSSUSPENDBLOCK	_IOW('E', 0x91, int)			/* set suspend block enable */
+
 #define EVIOCSCLOCKID		_IOW('E', 0xa0, int)			/* Set clockid to be used for timestamps */
 
 /*
@@ -458,10 +461,14 @@ struct input_keymap_entry {
 #define KEY_VIDEO_NEXT		241	/* drive next video source */
 #define KEY_VIDEO_PREV		242	/* drive previous video source */
 #define KEY_BRIGHTNESS_CYCLE	243	/* brightness up, after max is min */
-#define KEY_BRIGHTNESS_ZERO	244	/* brightness off, use ambient */
+#define KEY_BRIGHTNESS_AUTO	244	/* Set Auto Brightness: manual
+					  brightness control is off,
+					  rely on ambient */
+#define KEY_BRIGHTNESS_ZERO	KEY_BRIGHTNESS_AUTO
 #define KEY_DISPLAY_OFF		245	/* display device to off state */
 
-#define KEY_WIMAX		246
+#define KEY_WWAN		246	/* Wireless WAN (LTE, UMTS, GSM, etc.) */
+#define KEY_WIMAX		KEY_WWAN
 #define KEY_RFKILL		247	/* Key that controls all radios */
 
 #define KEY_MICMUTE		248	/* Mute / unmute the microphone */
@@ -506,11 +513,15 @@ struct input_keymap_entry {
 #define BTN_DEAD		0x12f
 
 #define BTN_GAMEPAD		0x130
-#define BTN_A			0x130
-#define BTN_B			0x131
+#define BTN_SOUTH		0x130
+#define BTN_A			BTN_SOUTH
+#define BTN_EAST		0x131
+#define BTN_B			BTN_EAST
 #define BTN_C			0x132
-#define BTN_X			0x133
-#define BTN_Y			0x134
+#define BTN_NORTH		0x133
+#define BTN_X			BTN_NORTH
+#define BTN_WEST		0x134
+#define BTN_Y			BTN_WEST
 #define BTN_Z			0x135
 #define BTN_TL			0x136
 #define BTN_TR			0x137
@@ -623,6 +634,7 @@ struct input_keymap_entry {
 #define KEY_ADDRESSBOOK		0x1ad	/* AL Contacts/Address Book */
 #define KEY_MESSENGER		0x1ae	/* AL Instant Messaging */
 #define KEY_DISPLAYTOGGLE	0x1af	/* Turn display (LCD) on and off */
+#define KEY_BRIGHTNESS_TOGGLE	KEY_DISPLAYTOGGLE
 #define KEY_SPELLCHECK		0x1b0   /* AL Spell Check */
 #define KEY_LOGOFF		0x1b1   /* AL Logoff */
 
@@ -707,6 +719,24 @@ struct input_keymap_entry {
 #define KEY_ATTENDANT_TOGGLE	0x21d	/* Attendant call on or off */
 #define KEY_LIGHTS_TOGGLE	0x21e	/* Reading light on or off */
 
+#define BTN_DPAD_UP		0x220
+#define BTN_DPAD_DOWN		0x221
+#define BTN_DPAD_LEFT		0x222
+#define BTN_DPAD_RIGHT		0x223
+
+#define KEY_ALS_TOGGLE		0x230	/* Ambient light sensor */
+
+#define KEY_BUTTONCONFIG		0x240	/* AL Button Configuration */
+#define KEY_TASKMANAGER		0x241	/* AL Task/Project Manager */
+#define KEY_JOURNAL		0x242	/* AL Log/Journal/Timecard */
+#define KEY_CONTROLPANEL		0x243	/* AL Control Panel */
+#define KEY_APPSELECT		0x244	/* AL Select Task/Application */
+#define KEY_SCREENSAVER		0x245	/* AL Screen Saver */
+#define KEY_VOICECOMMAND		0x246	/* Listening Voice Command */
+
+#define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
+#define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
@@ -844,6 +874,7 @@ struct input_keymap_entry {
 #define SW_FRONT_PROXIMITY	0x0b  /* set = front proximity sensor active */
 #define SW_ROTATE_LOCK		0x0c  /* set = rotate locked/disabled */
 #define SW_LINEIN_INSERT	0x0d  /* set = inserted */
+#define SW_MUTE_DEVICE		0x0e  /* set = device disabled */
 #define SW_MAX			0x0f
 #define SW_CNT			(SW_MAX+1)
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 4bda4cf5b0f5..e9d0f7efde3b 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -160,6 +160,8 @@ enum {
 	DEVCONF_ACCEPT_DAD,
 	DEVCONF_FORCE_TLLAO,
 	DEVCONF_NDISC_NOTIFY,
+	DEVCONF_ACCEPT_RA_RT_TABLE,
+	DEVCONF_USE_OPTIMISTIC,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/keychord.h b/include/uapi/linux/keychord.h
new file mode 100644
index 000000000000..ea7cf4d27bbd
--- /dev/null
+++ b/include/uapi/linux/keychord.h
@@ -0,0 +1,52 @@
+/*
+ *  Key chord input driver
+ *
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+
+#ifndef _UAPI_LINUX_KEYCHORD_H_
+#define _UAPI_LINUX_KEYCHORD_H_
+
+#include <linux/input.h>
+
+#define KEYCHORD_VERSION		1
+
+/*
+ * One or more input_keychord structs are written to /dev/keychord
+ * at once to specify the list of keychords to monitor.
+ * Reading /dev/keychord returns the id of a keychord when the
+ * keychord combination is pressed.  A keychord is signalled when
+ * all of the keys in the keycode list are in the pressed state.
+ * The order in which the keys are pressed does not matter.
+ * The keychord will not be signalled if keys not in the keycode
+ * list are pressed.
+ * Keychords will not be signalled on key release events.
+ */
+struct input_keychord {
+	/* should be KEYCHORD_VERSION */
+	__u16 version;
+	/*
+	 * client specified ID, returned from read()
+	 * when this keychord is pressed.
+	 */
+	__u16 id;
+
+	/* number of keycodes in this keychord */
+	__u16 count;
+
+	/* variable length array of keycodes */
+	__u16 keycodes[];
+};
+
+#endif	/* _UAPI_LINUX_KEYCHORD_H_ */
diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h
index f055e58b3147..db4ae0cd16c7 100644
--- a/include/uapi/linux/msdos_fs.h
+++ b/include/uapi/linux/msdos_fs.h
@@ -104,6 +104,7 @@ struct __fat_dirent {
 /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
 #define FAT_IOCTL_GET_ATTRIBUTES	_IOR('r', 0x10, __u32)
 #define FAT_IOCTL_SET_ATTRIBUTES	_IOW('r', 0x11, __u32)
+#define VFAT_IOCTL_GET_VOLUME_ID	_IOR('r', 0x12, __u32)
 
 struct fat_boot_sector {
 	__u8	ignored[3];	/* Boot strap short or near jump */
@@ -161,6 +162,17 @@ struct fat_boot_fsinfo {
 	__le32   reserved2[4];
 };
 
+struct fat_boot_bsx {
+	__u8     drive;		/* drive number */
+	__u8     reserved1;
+	__u8     signature;	/* extended boot signature */
+	__u8     vol_id[4];	/* volume ID */
+	__u8     vol_label[11];	/* volume label */
+	__u8     type[8];	/* file system type */
+};
+#define FAT16_BSX_OFFSET 36 /* offset of fat_boot_bsx in FAT12 and FAT16 */
+#define FAT32_BSX_OFFSET 64 /* offset of fat_boot_bsx in FAT32 */
+
 struct msdos_dir_entry {
 	__u8	name[MSDOS_NAME];/* name and extension */
 	__u8	attr;		/* attribute bits */
diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h
index 208ae9387331..faaa28b3d061 100644
--- a/include/uapi/linux/netfilter/xt_IDLETIMER.h
+++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h
@@ -4,6 +4,7 @@
  * Header file for Xtables timer target module.
  *
  * Copyright (C) 2004, 2010 Nokia Corporation
+ *
  * Written by Timo Teras <ext-timo.teras@nokia.com>
  *
  * Converted to x_tables and forward-ported to 2.6.34
@@ -32,12 +33,19 @@
 #include <linux/types.h>
 
 #define MAX_IDLETIMER_LABEL_SIZE 28
+#define NLMSG_MAX_SIZE 64
+
+#define NL_EVENT_TYPE_INACTIVE 0
+#define NL_EVENT_TYPE_ACTIVE 1
 
 struct idletimer_tg_info {
 	__u32 timeout;
 
 	char label[MAX_IDLETIMER_LABEL_SIZE];
 
+	/* Use netlink messages for notification in addition to sysfs */
+	__u8 send_nl_msg;
+
 	/* for kernel module internal use only */
 	struct idletimer_tg *timer __attribute__((aligned(8)));
 };
diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h
index 26d7217bd4f1..63594564831c 100644
--- a/include/uapi/linux/netfilter/xt_socket.h
+++ b/include/uapi/linux/netfilter/xt_socket.h
@@ -11,4 +11,10 @@ struct xt_socket_mtinfo1 {
 	__u8 flags;
 };
 
+void xt_socket_put_sk(struct sock *sk);
+struct sock *xt_socket_get4_sk(const struct sk_buff *skb,
+			       struct xt_action_param *par);
+struct sock *xt_socket_get6_sk(const struct sk_buff *skb,
+			       struct xt_action_param *par);
+
 #endif /* _XT_SOCKET_H */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d1e48b5e348f..11c8c1707d23 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -646,6 +646,28 @@
  * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can
  *	return back to normal.
  *
+ * @NL80211_CMD_GET_COALESCE: Get currently supported coalesce rules.
+ * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules.
+ *
+ * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the
+ *	the new channel information (Channel Switch Announcement - CSA)
+ *	in the beacon for some time (as defined in the
+ *	%NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the
+ *	new channel. Userspace provides the new channel information (using
+ *	%NL80211_ATTR_WIPHY_FREQ and the attributes determining channel
+ *	width). %NL80211_ATTR_CH_SWITCH_BLOCK_TX may be supplied to inform
+ *	other station that transmission must be blocked until the channel
+ *	switch is complete.
+ *
+ * @NL80211_CMD_VENDOR: Vendor-specified command/event. The command is specified
+ *	by the %NL80211_ATTR_VENDOR_ID attribute and a sub-command in
+ *	%NL80211_ATTR_VENDOR_SUBCMD. Parameter(s) can be transported in
+ *	%NL80211_ATTR_VENDOR_DATA.
+ *	For feature advertisement, the %NL80211_ATTR_VENDOR_DATA attribute is
+ *	used in the wiphy data as a nested attribute containing descriptions
+ *	(&struct nl80211_vendor_cmd_info) of the supported vendor commands.
+ *	This may also be sent as an event with the same attributes.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -808,6 +830,13 @@ enum nl80211_commands {
 	NL80211_CMD_CRIT_PROTOCOL_START,
 	NL80211_CMD_CRIT_PROTOCOL_STOP,
 
+	NL80211_CMD_GET_COALESCE,
+	NL80211_CMD_SET_COALESCE,
+
+	NL80211_CMD_CHANNEL_SWITCH,
+
+	NL80211_CMD_VENDOR,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1429,6 +1458,48 @@ enum nl80211_commands {
  * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which
  *      the connection should have increased reliability (u16).
  *
+ * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16).
+ *	This is similar to @NL80211_ATTR_STA_AID but with a difference of being
+ *	allowed to be used with the first @NL80211_CMD_SET_STATION command to
+ *	update a TDLS peer STA entry.
+ *
+ * @NL80211_ATTR_COALESCE_RULE: Coalesce rule information.
+ *
+ * @NL80211_ATTR_CH_SWITCH_COUNT: u32 attribute specifying the number of TBTT's
+ *	until the channel switch event.
+ * @NL80211_ATTR_CH_SWITCH_BLOCK_TX: flag attribute specifying that transmission
+ *	must be blocked on the current channel (before the channel switch
+ *	operation).
+ * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
+ *	for the time while performing a channel switch.
+ * @NL80211_ATTR_CSA_C_OFF_BEACON: Offset of the channel switch counter
+ *	field in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CSA_C_OFF_PRESP: Offset of the channel switch counter
+ *	field in the probe response (%NL80211_ATTR_PROBE_RESP).
+ *
+ * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
+ *	As specified in the &enum nl80211_rxmgmt_flags.
+ *
+ * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels.
+ *
+ * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported
+ *      supported operating classes.
+ *
+ * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space
+ *	controls DFS operation in IBSS mode. If the flag is included in
+ *	%NL80211_CMD_JOIN_IBSS request, the driver will allow use of DFS
+ *	channels and reports radar events to userspace. Userspace is required
+ *	to react to radar events, e.g. initiate a channel switch or leave the
+ *	IBSS network.
+ *
+ * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if
+ *	%NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet)
+ * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command
+ * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this
+ *	attribute is also used for vendor command feature advertisement
+ * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy
+ *	info, containing a nested array of possible events
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1727,6 +1798,35 @@ enum nl80211_attrs {
 	NL80211_ATTR_CRIT_PROT_ID,
 	NL80211_ATTR_MAX_CRIT_PROT_DURATION,
 
+	NL80211_ATTR_PEER_AID,
+
+	NL80211_ATTR_COALESCE_RULE,
+
+	NL80211_ATTR_CH_SWITCH_COUNT,
+	NL80211_ATTR_CH_SWITCH_BLOCK_TX,
+	NL80211_ATTR_CSA_IES,
+	NL80211_ATTR_CSA_C_OFF_BEACON,
+	NL80211_ATTR_CSA_C_OFF_PRESP,
+
+	NL80211_ATTR_RXMGMT_FLAGS,
+
+	NL80211_ATTR_STA_SUPPORTED_CHANNELS,
+
+	NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES,
+
+	NL80211_ATTR_HANDLE_DFS,
+
+	NL80211_ATTR_SUPPORT_5_MHZ,
+	NL80211_ATTR_SUPPORT_10_MHZ,
+
+	NL80211_ATTR_OPMODE_NOTIF,
+
+	NL80211_ATTR_VENDOR_ID,
+	NL80211_ATTR_VENDOR_SUBCMD,
+	NL80211_ATTR_VENDOR_DATA,
+
+	NL80211_ATTR_VENDOR_EVENTS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2278,9 +2378,15 @@ enum nl80211_reg_rule_attr {
  * enum nl80211_sched_scan_match_attr - scheduled scan match attributes
  * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved
  * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching,
- * only report BSS with matching SSID.
+ *	only report BSS with matching SSID.
  * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a
- *	BSS in scan results. Filtering is turned off if not specified.
+ *	BSS in scan results. Filtering is turned off if not specified. Note that
+ *	if this attribute is in a match set of its own, then it is treated as
+ *	the default value for all matchsets with an SSID, rather than being a
+ *	matchset of its own without an RSSI filter. This is due to problems with
+ *	how this API was implemented in the past. Also, due to the same problem,
+ *	the only way to create a matchset with only an RSSI filter (with this
+ *	attribute) is if there's only a single matchset with the RSSI attribute.
  * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter
  *	attribute number currently defined
  * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use
@@ -3721,4 +3827,24 @@ enum nl80211_crit_proto_id {
 /* maximum duration for critical protocol measures */
 #define NL80211_CRIT_PROTO_MAX_DURATION		5000 /* msec */
 
+/*
+ * If this flag is unset, the lower 24 bits are an OUI, if set
+ * a Linux nl80211 vendor ID is used (no such IDs are allocated
+ * yet, so that's not valid so far)
+ */
+#define NL80211_VENDOR_ID_IS_LINUX	0x80000000
+
+/**
+ * struct nl80211_vendor_cmd_info - vendor command data
+ * @vendor_id: If the %NL80211_VENDOR_ID_IS_LINUX flag is clear, then the
+ *	value is a 24-bit OUI; if it is set then a separately allocated ID
+ *	may be used, but no such IDs are allocated yet. New IDs should be
+ *	added to this file when needed.
+ * @subcmd: sub-command ID for the command
+ */
+struct nl80211_vendor_cmd_info {
+	__u32 vendor_id;
+	__u32 subcmd;
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 289760f424aa..28bb0b3a08bf 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -149,4 +149,13 @@
 
 #define PR_GET_TID_ADDRESS	40
 
+/* Sets the timerslack for arbitrary threads
+ * arg2 slack value, 0 means "use default"
+ * arg3 pid of the thread whose timer slack needs to be set
+ */
+#define PR_SET_TIMERSLACK_PID 41
+
+#define PR_SET_VMA		0x53564d41
+# define PR_SET_VMA_ANON_NAME		0
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 7a2144e1afae..07c1146c1f51 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -297,6 +297,7 @@ enum rtattr_type_t {
 	RTA_TABLE,
 	RTA_MARK,
 	RTA_MFC_STATS,
+	RTA_UID,
 	__RTA_MAX
 };
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..0f238a43ff1e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,13 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT	0
+#define SECCOMP_SET_MODE_FILTER	1
+
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index 7997a506ad41..f7ffe36db03c 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -65,6 +65,7 @@
 #define SIOCDIFADDR	0x8936		/* delete PA address		*/
 #define	SIOCSIFHWBROADCAST	0x8937	/* set hardware broadcast addr	*/
 #define SIOCGIFCOUNT	0x8938		/* get number of devices */
+#define SIOCKILLADDR	0x8939		/* kill sockets with this local addr */
 
 #define SIOCGIFBR	0x8940		/* Bridging support		*/
 #define SIOCSIFBR	0x8941		/* Set bridging options 	*/
diff --git a/include/uapi/linux/usb/f_accessory.h b/include/uapi/linux/usb/f_accessory.h
new file mode 100644
index 000000000000..0baeb7d0d74c
--- /dev/null
+++ b/include/uapi/linux/usb/f_accessory.h
@@ -0,0 +1,146 @@
+/*
+ * Gadget Function Driver for Android USB accessories
+ *
+ * Copyright (C) 2011 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_USB_F_ACCESSORY_H
+#define _UAPI_LINUX_USB_F_ACCESSORY_H
+
+/* Use Google Vendor ID when in accessory mode */
+#define USB_ACCESSORY_VENDOR_ID 0x18D1
+
+
+/* Product ID to use when in accessory mode */
+#define USB_ACCESSORY_PRODUCT_ID 0x2D00
+
+/* Product ID to use when in accessory mode and adb is enabled */
+#define USB_ACCESSORY_ADB_PRODUCT_ID 0x2D01
+
+/* Indexes for strings sent by the host via ACCESSORY_SEND_STRING */
+#define ACCESSORY_STRING_MANUFACTURER   0
+#define ACCESSORY_STRING_MODEL          1
+#define ACCESSORY_STRING_DESCRIPTION    2
+#define ACCESSORY_STRING_VERSION        3
+#define ACCESSORY_STRING_URI            4
+#define ACCESSORY_STRING_SERIAL         5
+
+/* Control request for retrieving device's protocol version
+ *
+ *	requestType:    USB_DIR_IN | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_GET_PROTOCOL
+ *	value:          0
+ *	index:          0
+ *	data            version number (16 bits little endian)
+ *                     1 for original accessory support
+ *                     2 adds HID and device to host audio support
+ */
+#define ACCESSORY_GET_PROTOCOL  51
+
+/* Control request for host to send a string to the device
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SEND_STRING
+ *	value:          0
+ *	index:          string ID
+ *	data            zero terminated UTF8 string
+ *
+ *  The device can later retrieve these strings via the
+ *  ACCESSORY_GET_STRING_* ioctls
+ */
+#define ACCESSORY_SEND_STRING   52
+
+/* Control request for starting device in accessory mode.
+ * The host sends this after setting all its strings to the device.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_START
+ *	value:          0
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_START         53
+
+/* Control request for registering a HID device.
+ * Upon registering, a unique ID is sent by the accessory in the
+ * value parameter. This ID will be used for future commands for
+ * the device
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_REGISTER_HID_DEVICE
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          total length of the HID report descriptor
+ *	data            none
+ */
+#define ACCESSORY_REGISTER_HID         54
+
+/* Control request for unregistering a HID device.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_REGISTER_HID
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_UNREGISTER_HID         55
+
+/* Control request for sending the HID report descriptor.
+ * If the HID descriptor is longer than the endpoint zero max packet size,
+ * the descriptor will be sent in multiple ACCESSORY_SET_HID_REPORT_DESC
+ * commands. The data for the descriptor must be sent sequentially
+ * if multiple packets are needed.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SET_HID_REPORT_DESC
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          offset of data in descriptor
+ *                      (needed when HID descriptor is too big for one packet)
+ *	data            the HID report descriptor
+ */
+#define ACCESSORY_SET_HID_REPORT_DESC         56
+
+/* Control request for sending HID events.
+ *
+ *	requestType:    USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SEND_HID_EVENT
+ *	value:          Accessory assigned ID for the HID device
+ *	index:          0
+ *	data            the HID report for the event
+ */
+#define ACCESSORY_SEND_HID_EVENT         57
+
+/* Control request for setting the audio mode.
+ *
+ *	requestType:	USB_DIR_OUT | USB_TYPE_VENDOR
+ *	request:        ACCESSORY_SET_AUDIO_MODE
+ *	value:          0 - no audio
+ *                     1 - device to host, 44100 16-bit stereo PCM
+ *	index:          0
+ *	data            none
+ */
+#define ACCESSORY_SET_AUDIO_MODE         58
+
+/* ioctls for retrieving strings set by the host */
+#define ACCESSORY_GET_STRING_MANUFACTURER   _IOW('M', 1, char[256])
+#define ACCESSORY_GET_STRING_MODEL          _IOW('M', 2, char[256])
+#define ACCESSORY_GET_STRING_DESCRIPTION    _IOW('M', 3, char[256])
+#define ACCESSORY_GET_STRING_VERSION        _IOW('M', 4, char[256])
+#define ACCESSORY_GET_STRING_URI            _IOW('M', 5, char[256])
+#define ACCESSORY_GET_STRING_SERIAL         _IOW('M', 6, char[256])
+/* returns 1 if there is a start request pending */
+#define ACCESSORY_IS_START_REQUESTED        _IO('M', 7)
+/* returns audio mode (set via the ACCESSORY_SET_AUDIO_MODE control request) */
+#define ACCESSORY_GET_AUDIO_MODE            _IO('M', 8)
+
+#endif /* _UAPI_LINUX_USB_F_ACCESSORY_H */
diff --git a/include/uapi/linux/usb/f_mtp.h b/include/uapi/linux/usb/f_mtp.h
new file mode 100644
index 000000000000..503291855abd
--- /dev/null
+++ b/include/uapi/linux/usb/f_mtp.h
@@ -0,0 +1,61 @@
+/*
+ * Gadget Function Driver for MTP
+ *
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_LINUX_USB_F_MTP_H
+#define _UAPI_LINUX_USB_F_MTP_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+struct mtp_file_range {
+	/* file descriptor for file to transfer */
+	int			fd;
+	/* offset in file for start of transfer */
+	loff_t		offset;
+	/* number of bytes to transfer */
+	int64_t		length;
+	/* MTP command ID for data header,
+	 * used only for MTP_SEND_FILE_WITH_HEADER
+	 */
+	uint16_t	command;
+	/* MTP transaction ID for data header,
+	 * used only for MTP_SEND_FILE_WITH_HEADER
+	 */
+	uint32_t	transaction_id;
+};
+
+struct mtp_event {
+	/* size of the event */
+	size_t		length;
+	/* event data to send */
+	void		*data;
+};
+
+/* Sends the specified file range to the host */
+#define MTP_SEND_FILE              _IOW('M', 0, struct mtp_file_range)
+/* Receives data from the host and writes it to a file.
+ * The file is created if it does not exist.
+ */
+#define MTP_RECEIVE_FILE           _IOW('M', 1, struct mtp_file_range)
+/* Sends an event to the host via the interrupt endpoint */
+#define MTP_SEND_EVENT             _IOW('M', 3, struct mtp_event)
+/* Sends the specified file range to the host,
+ * with a 12 byte MTP data packet header at the beginning.
+ */
+#define MTP_SEND_FILE_WITH_HEADER  _IOW('M', 4, struct mtp_file_range)
+
+#endif /* _UAPI_LINUX_USB_F_MTP_H */
diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index 602dc6c45d1a..165e7059de75 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -57,6 +57,7 @@
 #define MAX_NUM_CODECS 32
 #define MAX_NUM_CODEC_DESCRIPTORS 32
 #define MAX_NUM_BITRATES 32
+#define MAX_NUM_SAMPLE_RATES 32
 
 /* Codecs are listed linearly to allow for extensibility */
 #define SND_AUDIOCODEC_PCM                   ((__u32) 0x00000001)
@@ -324,7 +325,8 @@ union snd_codec_options {
 
 /** struct snd_codec_desc - description of codec capabilities
  * @max_ch: Maximum number of audio channels
- * @sample_rates: Sampling rates in Hz, use SNDRV_PCM_RATE_xxx for this
+ * @sample_rates: Sampling rates in Hz, use values like 48000 for this
+ * @num_sample_rates: Number of valid values in sample_rates array
  * @bit_rate: Indexed array containing supported bit rates
  * @num_bitrates: Number of valid values in bit_rate array
  * @rate_control: value is specified by SND_RATECONTROLMODE defines.
@@ -346,7 +348,8 @@ union snd_codec_options {
 
 struct snd_codec_desc {
 	__u32 max_ch;
-	__u32 sample_rates;
+	__u32 sample_rates[MAX_NUM_SAMPLE_RATES];
+	__u32 num_sample_rates;
 	__u32 bit_rate[MAX_NUM_BITRATES];
 	__u32 num_bitrates;
 	__u32 rate_control;
@@ -364,7 +367,8 @@ struct snd_codec_desc {
  * @ch_out: Number of output channels. In case of contradiction between
  *		this field and the channelMode field, the channelMode field
  *		overrides.
- * @sample_rate: Audio sample rate of input data
+ * @sample_rate: Audio sample rate of input data in Hz, use values like 48000
+ *		for this.
  * @bit_rate: Bitrate of encoded data. May be ignored by decoders
  * @rate_control: Encoding rate control. See SND_RATECONTROLMODE defines.
  *               Encoders may rely on profiles for quality levels.
diff --git a/include/uapi/video/adf.h b/include/uapi/video/adf.h
new file mode 100644
index 000000000000..c5d2e62cdb9b
--- /dev/null
+++ b/include/uapi/video/adf.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UAPI_VIDEO_ADF_H_
+#define _UAPI_VIDEO_ADF_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_mode.h>
+
+#define ADF_NAME_LEN 32
+#define ADF_MAX_CUSTOM_DATA_SIZE 4096
+
+enum adf_interface_type {
+	ADF_INTF_DSI = 0,
+	ADF_INTF_eDP = 1,
+	ADF_INTF_DPI = 2,
+	ADF_INTF_VGA = 3,
+	ADF_INTF_DVI = 4,
+	ADF_INTF_HDMI = 5,
+	ADF_INTF_MEMORY = 6,
+	ADF_INTF_TYPE_DEVICE_CUSTOM = 128,
+	ADF_INTF_TYPE_MAX = (~(__u32)0),
+};
+
+#define ADF_INTF_FLAG_PRIMARY (1 << 0)
+#define ADF_INTF_FLAG_EXTERNAL (1 << 1)
+
+enum adf_event_type {
+	ADF_EVENT_VSYNC = 0,
+	ADF_EVENT_HOTPLUG = 1,
+	ADF_EVENT_DEVICE_CUSTOM = 128,
+	ADF_EVENT_TYPE_MAX = 255,
+};
+
+/**
+ * struct adf_set_event - start or stop subscribing to ADF events
+ *
+ * @type: the type of event to (un)subscribe
+ * @enabled: subscribe or unsubscribe
+ *
+ * After subscribing to an event, userspace may poll() the ADF object's fd
+ * to wait for events or read() to consume the event's data.
+ *
+ * ADF reserves event types 0 to %ADF_EVENT_DEVICE_CUSTOM-1 for its own events.
+ * Devices may use event types %ADF_EVENT_DEVICE_CUSTOM to %ADF_EVENT_TYPE_MAX-1
+ * for driver-private events.
+ */
+struct adf_set_event {
+	__u8 type;
+	__u8 enabled;
+};
+
+/**
+ * struct adf_event - common header for ADF event data
+ *
+ * @type: event type
+ * @length: total size of event data, header inclusive
+ */
+struct adf_event {
+	__u8 type;
+	__u32 length;
+};
+
+/**
+ * struct adf_vsync_event - ADF vsync event
+ *
+ * @base: event header (see &struct adf_event)
+ * @timestamp: time of vsync event, in nanoseconds
+ */
+struct adf_vsync_event {
+	struct adf_event base;
+	__aligned_u64 timestamp;
+};
+
+/**
+ * struct adf_vsync_event - ADF display hotplug event
+ *
+ * @base: event header (see &struct adf_event)
+ * @connected: whether a display is now connected to the interface
+ */
+struct adf_hotplug_event {
+	struct adf_event base;
+	__u8 connected;
+};
+
+#define ADF_MAX_PLANES 4
+/**
+ * struct adf_buffer_config - description of buffer displayed by adf_post_config
+ *
+ * @overlay_engine: id of the target overlay engine
+ * @w: width of display region in pixels
+ * @h: height of display region in pixels
+ * @format: DRM-style fourcc, see drm_fourcc.h for standard formats
+ * @fd: dma_buf fd for each plane
+ * @offset: location of first pixel to scan out, in bytes
+ * @pitch: stride (i.e. length of a scanline including padding) in bytes
+ * @n_planes: number of planes in buffer
+ * @acquire_fence: sync_fence fd which will clear when the buffer is
+ *	ready for display, or <0 if the buffer is already ready
+ */
+struct adf_buffer_config {
+	__u32 overlay_engine;
+
+	__u32 w;
+	__u32 h;
+	__u32 format;
+
+	__s32 fd[ADF_MAX_PLANES];
+	__u32 offset[ADF_MAX_PLANES];
+	__u32 pitch[ADF_MAX_PLANES];
+	__u8 n_planes;
+
+	__s32 acquire_fence;
+};
+#define ADF_MAX_BUFFERS (4096 / sizeof(struct adf_buffer_config))
+
+/**
+ * struct adf_post_config - request to flip to a new set of buffers
+ *
+ * @n_interfaces: number of interfaces targeted by the flip (input)
+ * @interfaces: ids of interfaces targeted by the flip (input)
+ * @n_bufs: number of buffers displayed (input)
+ * @bufs: description of buffers displayed (input)
+ * @custom_data_size: size of driver-private data (input)
+ * @custom_data: driver-private data (input)
+ * @complete_fence: sync_fence fd which will clear when this
+ *	configuration has left the screen (output)
+ */
+struct adf_post_config {
+	size_t n_interfaces;
+	__u32 __user *interfaces;
+
+	size_t n_bufs;
+	struct adf_buffer_config __user *bufs;
+
+	size_t custom_data_size;
+	void __user *custom_data;
+
+	__s32 complete_fence;
+};
+#define ADF_MAX_INTERFACES (4096 / sizeof(__u32))
+
+/**
+ * struct adf_simple_buffer_allocate - request to allocate a "simple" buffer
+ *
+ * @w: width of buffer in pixels (input)
+ * @h: height of buffer in pixels (input)
+ * @format: DRM-style fourcc (input)
+ *
+ * @fd: dma_buf fd (output)
+ * @offset: location of first pixel, in bytes (output)
+ * @pitch: length of a scanline including padding, in bytes (output)
+ *
+ * Simple buffers are analogous to DRM's "dumb" buffers.  They have a single
+ * plane of linear RGB data which can be allocated and scanned out without
+ * any driver-private ioctls or data.
+ *
+ * @format must be a standard RGB format defined in drm_fourcc.h.
+ *
+ * ADF clients must NOT assume that an interface can scan out a simple buffer
+ * allocated by a different ADF interface, even if the two interfaces belong to
+ * the same ADF device.
+ */
+struct adf_simple_buffer_alloc {
+	__u16 w;
+	__u16 h;
+	__u32 format;
+
+	__s32 fd;
+	__u32 offset;
+	__u32 pitch;
+};
+
+/**
+ * struct adf_simple_post_config - request to flip to a single buffer without
+ * driver-private data
+ *
+ * @buf: description of buffer displayed (input)
+ * @complete_fence: sync_fence fd which will clear when this buffer has left the
+ * screen (output)
+ */
+struct adf_simple_post_config {
+	struct adf_buffer_config buf;
+	__s32 complete_fence;
+};
+
+/**
+ * struct adf_attachment_config - description of attachment between an overlay
+ * engine and an interface
+ *
+ * @overlay_engine: id of the overlay engine
+ * @interface: id of the interface
+ */
+struct adf_attachment_config {
+	__u32 overlay_engine;
+	__u32 interface;
+};
+
+/**
+ * struct adf_device_data - describes a display device
+ *
+ * @name: display device's name
+ * @n_attachments: the number of current attachments
+ * @attachments: list of current attachments
+ * @n_allowed_attachments: the number of allowed attachments
+ * @allowed_attachments: list of allowed attachments
+ * @custom_data_size: size of driver-private data
+ * @custom_data: driver-private data
+ */
+struct adf_device_data {
+	char name[ADF_NAME_LEN];
+
+	size_t n_attachments;
+	struct adf_attachment_config __user *attachments;
+
+	size_t n_allowed_attachments;
+	struct adf_attachment_config __user *allowed_attachments;
+
+	size_t custom_data_size;
+	void __user *custom_data;
+};
+#define ADF_MAX_ATTACHMENTS (4096 / sizeof(struct adf_attachment_config))
+
+/**
+ * struct adf_device_data - describes a display interface
+ *
+ * @name: display interface's name
+ * @type: interface type (see enum @adf_interface_type)
+ * @id: which interface of type @type;
+ *	e.g. interface DSI.1 -> @type=@ADF_INTF_TYPE_DSI, @id=1
+ * @flags: informational flags (bitmask of %ADF_INTF_FLAG_* values)
+ * @dpms_state: DPMS state (one of @DRM_MODE_DPMS_* defined in drm_mode.h)
+ * @hotplug_detect: whether a display is plugged in
+ * @width_mm: screen width in millimeters, or 0 if unknown
+ * @height_mm: screen height in millimeters, or 0 if unknown
+ * @current_mode: current display mode
+ * @n_available_modes: the number of hardware display modes
+ * @available_modes: list of hardware display modes
+ * @custom_data_size: size of driver-private data
+ * @custom_data: driver-private data
+ */
+struct adf_interface_data {
+	char name[ADF_NAME_LEN];
+
+	__u32 type;
+	__u32 id;
+	/* e.g. type=ADF_INTF_TYPE_DSI, id=1 => DSI.1 */
+	__u32 flags;
+
+	__u8 dpms_state;
+	__u8 hotplug_detect;
+	__u16 width_mm;
+	__u16 height_mm;
+
+	struct drm_mode_modeinfo current_mode;
+	size_t n_available_modes;
+	struct drm_mode_modeinfo __user *available_modes;
+
+	size_t custom_data_size;
+	void __user *custom_data;
+};
+#define ADF_MAX_MODES (4096 / sizeof(struct drm_mode_modeinfo))
+
+/**
+ * struct adf_overlay_engine_data - describes an overlay engine
+ *
+ * @name: overlay engine's name
+ * @n_supported_formats: number of supported formats
+ * @supported_formats: list of supported formats
+ * @custom_data_size: size of driver-private data
+ * @custom_data: driver-private data
+ */
+struct adf_overlay_engine_data {
+	char name[ADF_NAME_LEN];
+
+	size_t n_supported_formats;
+	__u32 __user *supported_formats;
+
+	size_t custom_data_size;
+	void __user *custom_data;
+};
+#define ADF_MAX_SUPPORTED_FORMATS (4096 / sizeof(__u32))
+
+#define ADF_IOCTL_TYPE		'D'
+#define ADF_IOCTL_NR_CUSTOM	128
+
+#define ADF_SET_EVENT		_IOW(ADF_IOCTL_TYPE, 0, struct adf_set_event)
+#define ADF_BLANK		_IOW(ADF_IOCTL_TYPE, 1, __u8)
+#define ADF_POST_CONFIG		_IOW(ADF_IOCTL_TYPE, 2, struct adf_post_config)
+#define ADF_SET_MODE		_IOW(ADF_IOCTL_TYPE, 3, \
+					struct drm_mode_modeinfo)
+#define ADF_GET_DEVICE_DATA	_IOR(ADF_IOCTL_TYPE, 4, struct adf_device_data)
+#define ADF_GET_INTERFACE_DATA	_IOR(ADF_IOCTL_TYPE, 5, \
+					struct adf_interface_data)
+#define ADF_GET_OVERLAY_ENGINE_DATA \
+				_IOR(ADF_IOCTL_TYPE, 6, \
+					struct adf_overlay_engine_data)
+#define ADF_SIMPLE_POST_CONFIG	_IOW(ADF_IOCTL_TYPE, 7, \
+					struct adf_simple_post_config)
+#define ADF_SIMPLE_BUFFER_ALLOC	_IOW(ADF_IOCTL_TYPE, 8, \
+					struct adf_simple_buffer_alloc)
+#define ADF_ATTACH		_IOW(ADF_IOCTL_TYPE, 9, \
+					struct adf_attachment_config)
+#define ADF_DETACH		_IOW(ADF_IOCTL_TYPE, 10, \
+					struct adf_attachment_config)
+
+#endif /* _UAPI_VIDEO_ADF_H_ */
diff --git a/include/video/adf.h b/include/video/adf.h
new file mode 100644
index 000000000000..34f10e538f9e
--- /dev/null
+++ b/include/video/adf.h
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _VIDEO_ADF_H
+#define _VIDEO_ADF_H
+
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/idr.h>
+#include <linux/kref.h>
+#include <linux/kthread.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <uapi/video/adf.h>
+#include "sync.h"
+
+struct adf_obj;
+struct adf_obj_ops;
+struct adf_device;
+struct adf_device_ops;
+struct adf_interface;
+struct adf_interface_ops;
+struct adf_overlay_engine;
+struct adf_overlay_engine_ops;
+
+/**
+ * struct adf_buffer - buffer displayed by adf_post
+ *
+ * @overlay_engine: target overlay engine
+ * @w: width of display region in pixels
+ * @h: height of display region in pixels
+ * @format: DRM-style fourcc, see drm_fourcc.h for standard formats
+ * @dma_bufs: dma_buf for each plane
+ * @offset: location of first pixel to scan out, in bytes
+ * @pitch: length of a scanline including padding, in bytes
+ * @n_planes: number of planes in buffer
+ * @acquire_fence: sync_fence which will clear when the buffer is
+ *	ready for display
+ *
+ * &struct adf_buffer is the in-kernel counterpart to the userspace-facing
+ * &struct adf_buffer_config.
+ */
+struct adf_buffer {
+	struct adf_overlay_engine *overlay_engine;
+
+	u32 w;
+	u32 h;
+	u32 format;
+
+	struct dma_buf *dma_bufs[ADF_MAX_PLANES];
+	u32 offset[ADF_MAX_PLANES];
+	u32 pitch[ADF_MAX_PLANES];
+	u8 n_planes;
+
+	struct sync_fence *acquire_fence;
+};
+
+/**
+ * struct adf_buffer_mapping - state for mapping a &struct adf_buffer into the
+ * display device
+ *
+ * @attachments: dma-buf attachment for each plane
+ * @sg_tables: SG tables for each plane
+ */
+struct adf_buffer_mapping {
+	struct dma_buf_attachment *attachments[ADF_MAX_PLANES];
+	struct sg_table *sg_tables[ADF_MAX_PLANES];
+};
+
+/**
+ * struct adf_post - request to flip to a new set of buffers
+ *
+ * @n_bufs: number of buffers displayed
+ * @bufs: buffers displayed
+ * @mappings: in-device mapping state for each buffer
+ * @custom_data_size: size of driver-private data
+ * @custom_data: driver-private data
+ *
+ * &struct adf_post is the in-kernel counterpart to the userspace-facing
+ * &struct adf_post_config.
+ */
+struct adf_post {
+	size_t n_bufs;
+	struct adf_buffer *bufs;
+	struct adf_buffer_mapping *mappings;
+
+	size_t custom_data_size;
+	void *custom_data;
+};
+
+/**
+ * struct adf_attachment - description of attachment between an overlay engine
+ * and an interface
+ *
+ * @overlay_engine: the overlay engine
+ * @interface: the interface
+ *
+ * &struct adf_attachment is the in-kernel counterpart to the userspace-facing
+ * &struct adf_attachment_config.
+ */
+struct adf_attachment {
+	struct adf_overlay_engine *overlay_engine;
+	struct adf_interface *interface;
+};
+
+struct adf_pending_post {
+	struct list_head head;
+	struct adf_post config;
+	void *state;
+};
+
+enum adf_obj_type {
+	ADF_OBJ_OVERLAY_ENGINE = 0,
+	ADF_OBJ_INTERFACE = 1,
+	ADF_OBJ_DEVICE = 2,
+};
+
+/**
+ * struct adf_obj_ops - common ADF object implementation ops
+ *
+ * @open: handle opening the object's device node
+ * @release: handle releasing an open file
+ * @ioctl: handle custom ioctls
+ *
+ * @supports_event: return whether the object supports generating events of type
+ *	@type
+ * @set_event: enable or disable events of type @type
+ * @event_type_str: return a string representation of custom event @type
+ *	(@type >= %ADF_EVENT_DEVICE_CUSTOM).
+ *
+ * @custom_data: copy up to %ADF_MAX_CUSTOM_DATA_SIZE bytes of driver-private
+ *	data into @data (allocated by ADF) and return the number of copied bytes
+ *	in @size.  Return 0 on success or an error code (<0) on failure.
+ */
+struct adf_obj_ops {
+	/* optional */
+	int (*open)(struct adf_obj *obj, struct inode *inode,
+			struct file *file);
+	/* optional */
+	void (*release)(struct adf_obj *obj, struct inode *inode,
+			struct file *file);
+	/* optional */
+	long (*ioctl)(struct adf_obj *obj, unsigned int cmd, unsigned long arg);
+
+	/* optional */
+	bool (*supports_event)(struct adf_obj *obj, enum adf_event_type type);
+	/* required if supports_event is implemented */
+	void (*set_event)(struct adf_obj *obj, enum adf_event_type type,
+			bool enabled);
+	/* optional */
+	const char *(*event_type_str)(struct adf_obj *obj,
+			enum adf_event_type type);
+
+	/* optional */
+	int (*custom_data)(struct adf_obj *obj, void *data, size_t *size);
+};
+
+struct adf_obj {
+	enum adf_obj_type type;
+	char name[ADF_NAME_LEN];
+	struct adf_device *parent;
+
+	const struct adf_obj_ops *ops;
+
+	struct device dev;
+
+	struct spinlock file_lock;
+	struct list_head file_list;
+
+	struct mutex event_lock;
+	struct rb_root event_refcount;
+
+	int id;
+	int minor;
+};
+
+/**
+ * struct adf_device_quirks - common display device quirks
+ *
+ * @buffer_padding: whether the last scanline of a buffer extends to the
+ * 	buffer's pitch (@ADF_BUFFER_PADDED_TO_PITCH) or just to the visible
+ * 	width (@ADF_BUFFER_UNPADDED)
+ */
+struct adf_device_quirks {
+	/* optional, defaults to ADF_BUFFER_PADDED_TO_PITCH */
+	enum {
+		ADF_BUFFER_PADDED_TO_PITCH = 0,
+		ADF_BUFFER_UNPADDED = 1,
+	} buffer_padding;
+};
+
+/**
+ * struct adf_device_ops - display device implementation ops
+ *
+ * @owner: device's module
+ * @base: common operations (see &struct adf_obj_ops)
+ * @quirks: device's quirks (see &struct adf_device_quirks)
+ *
+ * @attach: attach overlay engine @eng to interface @intf.  Return 0 on success
+ *	or error code (<0) on failure.
+ * @detach: detach overlay engine @eng from interface @intf.  Return 0 on
+ *	success or error code (<0) on failure.
+ *
+ * @validate_custom_format: validate the number and size of planes
+ *	in buffers with a custom format (i.e., not one of the @DRM_FORMAT_*
+ *	types defined in drm/drm_fourcc.h).  Return 0 if the buffer is valid or
+ *	an error code (<0) otherwise.
+ *
+ * @validate: validate that the proposed configuration @cfg is legal.  The
+ *	driver may optionally allocate and return some driver-private state in
+ *	@driver_state, which will be passed to the corresponding post().  The
+ *	driver may NOT commit any changes to hardware.  Return 0 if @cfg is
+ *	valid or an error code (<0) otherwise.
+ * @complete_fence: create a hardware-backed sync fence to be signaled when
+ *	@cfg is removed from the screen.  If unimplemented, ADF automatically
+ *	creates an sw_sync fence.  Return the sync fence on success or a
+ *	PTR_ERR() on failure.
+ * @post: flip @cfg onto the screen.  Wait for the display to begin scanning out
+ *	@cfg before returning.
+ * @advance_timeline: signal the sync fence for the last configuration to leave
+ *	the display.  If unimplemented, ADF automatically advances an sw_sync
+ *	timeline.
+ * @state_free: free driver-private state allocated during validate()
+ */
+struct adf_device_ops {
+	/* required */
+	struct module *owner;
+	const struct adf_obj_ops base;
+	/* optional */
+	const struct adf_device_quirks quirks;
+
+	/* optional */
+	int (*attach)(struct adf_device *dev, struct adf_overlay_engine *eng,
+			struct adf_interface *intf);
+	/* optional */
+	int (*detach)(struct adf_device *dev, struct adf_overlay_engine *eng,
+			struct adf_interface *intf);
+
+	/* required if any of the device's overlay engines supports at least one
+	   custom format */
+	int (*validate_custom_format)(struct adf_device *dev,
+			struct adf_buffer *buf);
+
+	/* required */
+	int (*validate)(struct adf_device *dev, struct adf_post *cfg,
+			void **driver_state);
+	/* optional */
+	struct sync_fence *(*complete_fence)(struct adf_device *dev,
+			struct adf_post *cfg, void *driver_state);
+	/* required */
+	void (*post)(struct adf_device *dev, struct adf_post *cfg,
+			void *driver_state);
+	/* required if complete_fence is implemented */
+	void (*advance_timeline)(struct adf_device *dev,
+			struct adf_post *cfg, void *driver_state);
+	/* required if validate allocates driver state */
+	void (*state_free)(struct adf_device *dev, void *driver_state);
+};
+
+struct adf_attachment_list {
+	struct adf_attachment attachment;
+	struct list_head head;
+};
+
+struct adf_device {
+	struct adf_obj base;
+	struct device *dev;
+
+	const struct adf_device_ops *ops;
+
+	struct mutex client_lock;
+
+	struct idr interfaces;
+	size_t n_interfaces;
+	struct idr overlay_engines;
+
+	struct list_head post_list;
+	struct mutex post_lock;
+	struct kthread_worker post_worker;
+	struct task_struct *post_thread;
+	struct kthread_work post_work;
+
+	struct list_head attached;
+	size_t n_attached;
+	struct list_head attach_allowed;
+	size_t n_attach_allowed;
+
+	struct adf_pending_post *onscreen;
+
+	struct sw_sync_timeline *timeline;
+	int timeline_max;
+};
+
+/**
+ * struct adf_interface_ops - display interface implementation ops
+ *
+ * @base: common operations (see &struct adf_obj_ops)
+ *
+ * @blank: change the display's DPMS state.  Return 0 on success or error
+ *	code (<0) on failure.
+ *
+ * @alloc_simple_buffer: allocate a buffer with the specified @w, @h, and
+ *	@format.  @format will be a standard RGB format (i.e.,
+ *	adf_format_is_rgb(@format) == true).  Return 0 on success or error code
+ *	(<0) on failure.  On success, return the buffer, offset, and pitch in
+ *	@dma_buf, @offset, and @pitch respectively.
+ * @describe_simple_post: provide driver-private data needed to post a single
+ *	buffer @buf.  Copy up to ADF_MAX_CUSTOM_DATA_SIZE bytes into @data
+ *	(allocated by ADF) and return the number of bytes in @size.  Return 0 on
+ *	success or error code (<0) on failure.
+ *
+ * @modeset: change the interface's mode.  @mode is not necessarily part of the
+ *	modelist passed to adf_hotplug_notify_connected(); the driver may
+ *	accept or reject custom modes at its discretion.  Return 0 on success or
+ *	error code (<0) if the mode could not be set.
+ *
+ * @screen_size: copy the screen dimensions in millimeters into @width_mm
+ *	and @height_mm.  Return 0 on success or error code (<0) if the display
+ *	dimensions are unknown.
+ *
+ * @type_str: return a string representation of custom @intf->type
+ *	(@intf->type >= @ADF_INTF_TYPE_DEVICE_CUSTOM).
+ */
+struct adf_interface_ops {
+	const struct adf_obj_ops base;
+
+	/* optional */
+	int (*blank)(struct adf_interface *intf, u8 state);
+
+	/* optional */
+	int (*alloc_simple_buffer)(struct adf_interface *intf,
+			u16 w, u16 h, u32 format,
+			struct dma_buf **dma_buf, u32 *offset, u32 *pitch);
+	/* optional */
+	int (*describe_simple_post)(struct adf_interface *intf,
+			struct adf_buffer *fb, void *data, size_t *size);
+
+	/* optional */
+	int (*modeset)(struct adf_interface *intf,
+			struct drm_mode_modeinfo *mode);
+
+	/* optional */
+	int (*screen_size)(struct adf_interface *intf, u16 *width_mm,
+			u16 *height_mm);
+
+	/* optional */
+	const char *(*type_str)(struct adf_interface *intf);
+};
+
+struct adf_interface {
+	struct adf_obj base;
+	const struct adf_interface_ops *ops;
+
+	struct drm_mode_modeinfo current_mode;
+
+	enum adf_interface_type type;
+	u32 idx;
+	u32 flags;
+
+	wait_queue_head_t vsync_wait;
+	ktime_t vsync_timestamp;
+	rwlock_t vsync_lock;
+
+	u8 dpms_state;
+
+	bool hotplug_detect;
+	struct drm_mode_modeinfo *modelist;
+	size_t n_modes;
+	rwlock_t hotplug_modelist_lock;
+};
+
+/**
+ * struct adf_interface_ops - overlay engine implementation ops
+ *
+ * @base: common operations (see &struct adf_obj_ops)
+ *
+ * @supported_formats: list of fourccs the overlay engine can scan out
+ * @n_supported_formats: length of supported_formats, up to
+ *	ADF_MAX_SUPPORTED_FORMATS
+ */
+struct adf_overlay_engine_ops {
+	const struct adf_obj_ops base;
+
+	/* required */
+	const u32 *supported_formats;
+	/* required */
+	const size_t n_supported_formats;
+};
+
+struct adf_overlay_engine {
+	struct adf_obj base;
+
+	const struct adf_overlay_engine_ops *ops;
+};
+
+#define adf_obj_to_device(ptr) \
+	container_of((ptr), struct adf_device, base)
+
+#define adf_obj_to_interface(ptr) \
+	container_of((ptr), struct adf_interface, base)
+
+#define adf_obj_to_overlay_engine(ptr) \
+	container_of((ptr), struct adf_overlay_engine, base)
+
+int __printf(4, 5) adf_device_init(struct adf_device *dev,
+		struct device *parent, const struct adf_device_ops *ops,
+		const char *fmt, ...);
+void adf_device_destroy(struct adf_device *dev);
+int __printf(7, 8) adf_interface_init(struct adf_interface *intf,
+		struct adf_device *dev, enum adf_interface_type type, u32 idx,
+		u32 flags, const struct adf_interface_ops *ops, const char *fmt,
+		...);
+void adf_interface_destroy(struct adf_interface *intf);
+static inline struct adf_device *adf_interface_parent(
+		struct adf_interface *intf)
+{
+	return intf->base.parent;
+}
+int __printf(4, 5) adf_overlay_engine_init(struct adf_overlay_engine *eng,
+		struct adf_device *dev,
+		const struct adf_overlay_engine_ops *ops, const char *fmt, ...);
+void adf_overlay_engine_destroy(struct adf_overlay_engine *eng);
+static inline struct adf_device *adf_overlay_engine_parent(
+		struct adf_overlay_engine *eng)
+{
+	return eng->base.parent;
+}
+
+int adf_attachment_allow(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf);
+
+const char *adf_obj_type_str(enum adf_obj_type type);
+const char *adf_interface_type_str(struct adf_interface *intf);
+const char *adf_event_type_str(struct adf_obj *obj, enum adf_event_type type);
+
+#define ADF_FORMAT_STR_SIZE 5
+void adf_format_str(u32 format, char buf[ADF_FORMAT_STR_SIZE]);
+int adf_format_validate_yuv(struct adf_device *dev, struct adf_buffer *buf,
+		u8 num_planes, u8 hsub, u8 vsub, u8 cpp[]);
+/**
+ * adf_format_validate_rgb - validate the number and size of planes in buffers
+ * with a custom RGB format.
+ *
+ * @dev: ADF device performing the validation
+ * @buf: buffer to validate
+ * @cpp: expected bytes per pixel
+ *
+ * adf_format_validate_rgb() is intended to be called as a helper from @dev's
+ * validate_custom_format() op.  @buf must have a single RGB plane.
+ *
+ * Returns 0 if @buf has a single plane with sufficient size, or -EINVAL
+ * otherwise.
+ */
+static inline int adf_format_validate_rgb(struct adf_device *dev,
+		struct adf_buffer *buf, u8 cpp)
+{
+	return adf_format_validate_yuv(dev, buf, 1, 1, 1, &cpp);
+}
+
+int adf_event_get(struct adf_obj *obj, enum adf_event_type type);
+int adf_event_put(struct adf_obj *obj, enum adf_event_type type);
+int adf_event_notify(struct adf_obj *obj, struct adf_event *event);
+
+static inline void adf_vsync_get(struct adf_interface *intf)
+{
+	adf_event_get(&intf->base, ADF_EVENT_VSYNC);
+}
+
+static inline void adf_vsync_put(struct adf_interface *intf)
+{
+	adf_event_put(&intf->base, ADF_EVENT_VSYNC);
+}
+
+int adf_vsync_wait(struct adf_interface *intf, long timeout);
+void adf_vsync_notify(struct adf_interface *intf, ktime_t timestamp);
+
+int adf_hotplug_notify_connected(struct adf_interface *intf,
+		struct drm_mode_modeinfo *modelist, size_t n_modes);
+void adf_hotplug_notify_disconnected(struct adf_interface *intf);
+
+void adf_modeinfo_set_name(struct drm_mode_modeinfo *mode);
+void adf_modeinfo_set_vrefresh(struct drm_mode_modeinfo *mode);
+
+#endif /* _VIDEO_ADF_H */
diff --git a/include/video/adf_client.h b/include/video/adf_client.h
new file mode 100644
index 000000000000..983f2b6a5890
--- /dev/null
+++ b/include/video/adf_client.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _VIDEO_ADF_CLIENT_H_
+#define _VIDEO_ADF_CLIENT_H_
+
+#include <video/adf.h>
+
+int adf_interface_blank(struct adf_interface *intf, u8 state);
+u8 adf_interface_dpms_state(struct adf_interface *intf);
+
+void adf_interface_current_mode(struct adf_interface *intf,
+		struct drm_mode_modeinfo *mode);
+size_t adf_interface_modelist(struct adf_interface *intf,
+		struct drm_mode_modeinfo *modelist, size_t n_modes);
+int adf_interface_set_mode(struct adf_interface *intf,
+		struct drm_mode_modeinfo *mode);
+int adf_interface_get_screen_size(struct adf_interface *intf, u16 *width,
+		u16 *height);
+int adf_interface_simple_buffer_alloc(struct adf_interface *intf, u16 w, u16 h,
+		u32 format, struct dma_buf **dma_buf, u32 *offset, u32 *pitch);
+struct sync_fence *adf_interface_simple_post(struct adf_interface *intf,
+		struct adf_buffer *buf);
+
+bool adf_overlay_engine_supports_format(struct adf_overlay_engine *eng,
+		u32 format);
+
+size_t adf_device_attachments(struct adf_device *dev,
+		struct adf_attachment *attachments, size_t n_attachments);
+size_t adf_device_attachments_allowed(struct adf_device *dev,
+		struct adf_attachment *attachments, size_t n_attachments);
+bool adf_device_attached(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf);
+bool adf_device_attach_allowed(struct adf_device *dev,
+		struct adf_overlay_engine *eng, struct adf_interface *intf);
+int adf_device_attach(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf);
+int adf_device_detach(struct adf_device *dev, struct adf_overlay_engine *eng,
+		struct adf_interface *intf);
+
+struct sync_fence *adf_device_post(struct adf_device *dev,
+		struct adf_interface **intfs, size_t n_intfs,
+		struct adf_buffer *bufs, size_t n_bufs, void *custom_data,
+		size_t custom_data_size);
+struct sync_fence *adf_device_post_nocopy(struct adf_device *dev,
+		struct adf_interface **intfs, size_t n_intfs,
+		struct adf_buffer *bufs, size_t n_bufs, void *custom_data,
+		size_t custom_data_size);
+
+#endif /* _VIDEO_ADF_CLIENT_H_ */
diff --git a/include/video/adf_fbdev.h b/include/video/adf_fbdev.h
new file mode 100644
index 000000000000..b722c6b3ab02
--- /dev/null
+++ b/include/video/adf_fbdev.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _VIDEO_ADF_FBDEV_H_
+#define _VIDEO_ADF_FBDEV_H_
+
+#include <linux/fb.h>
+#include <linux/mutex.h>
+#include <video/adf.h>
+
+struct adf_fbdev {
+	struct adf_interface *intf;
+	struct adf_overlay_engine *eng;
+	struct fb_info *info;
+	u32 pseudo_palette[16];
+
+	unsigned int refcount;
+	struct mutex refcount_lock;
+
+	struct dma_buf *dma_buf;
+	u32 offset;
+	u32 pitch;
+	void *vaddr;
+	u32 format;
+
+	u16 default_xres_virtual;
+	u16 default_yres_virtual;
+	u32 default_format;
+};
+
+#if IS_ENABLED(CONFIG_ADF_FBDEV)
+void adf_modeinfo_to_fb_videomode(const struct drm_mode_modeinfo *mode,
+		struct fb_videomode *vmode);
+void adf_modeinfo_from_fb_videomode(const struct fb_videomode *vmode,
+		struct drm_mode_modeinfo *mode);
+
+int adf_fbdev_init(struct adf_fbdev *fbdev, struct adf_interface *interface,
+		struct adf_overlay_engine *eng,
+		u16 xres_virtual, u16 yres_virtual, u32 format,
+		struct fb_ops *fbops, const char *fmt, ...);
+void adf_fbdev_destroy(struct adf_fbdev *fbdev);
+
+int adf_fbdev_open(struct fb_info *info, int user);
+int adf_fbdev_release(struct fb_info *info, int user);
+int adf_fbdev_check_var(struct fb_var_screeninfo *var, struct fb_info *info);
+int adf_fbdev_set_par(struct fb_info *info);
+int adf_fbdev_blank(int blank, struct fb_info *info);
+int adf_fbdev_pan_display(struct fb_var_screeninfo *var, struct fb_info *info);
+int adf_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma);
+#else
+static inline void adf_modeinfo_to_fb_videomode(const struct drm_mode_modeinfo *mode,
+		struct fb_videomode *vmode)
+{
+	WARN_ONCE(1, "%s: CONFIG_ADF_FBDEV is disabled\n", __func__);
+}
+
+static inline void adf_modeinfo_from_fb_videomode(const struct fb_videomode *vmode,
+		struct drm_mode_modeinfo *mode)
+{
+	WARN_ONCE(1, "%s: CONFIG_ADF_FBDEV is disabled\n", __func__);
+}
+
+static inline int adf_fbdev_init(struct adf_fbdev *fbdev,
+		struct adf_interface *interface,
+		struct adf_overlay_engine *eng,
+		u16 xres_virtual, u16 yres_virtual, u32 format,
+		struct fb_ops *fbops, const char *fmt, ...)
+{
+	return -ENODEV;
+}
+
+static inline void adf_fbdev_destroy(struct adf_fbdev *fbdev) { }
+
+static inline int adf_fbdev_open(struct fb_info *info, int user)
+{
+	return -ENODEV;
+}
+
+static inline int adf_fbdev_release(struct fb_info *info, int user)
+{
+	return -ENODEV;
+}
+
+static inline int adf_fbdev_check_var(struct fb_var_screeninfo *var,
+		struct fb_info *info)
+{
+	return -ENODEV;
+}
+
+static inline int adf_fbdev_set_par(struct fb_info *info)
+{
+	return -ENODEV;
+}
+
+static inline int adf_fbdev_blank(int blank, struct fb_info *info)
+{
+	return -ENODEV;
+}
+
+static inline int adf_fbdev_pan_display(struct fb_var_screeninfo *var,
+		struct fb_info *info)
+{
+	return -ENODEV;
+}
+
+static inline int adf_fbdev_mmap(struct fb_info *info,
+		struct vm_area_struct *vma)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* _VIDEO_ADF_FBDEV_H_ */
diff --git a/include/video/adf_format.h b/include/video/adf_format.h
new file mode 100644
index 000000000000..e03182cdcb06
--- /dev/null
+++ b/include/video/adf_format.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _VIDEO_ADF_FORMAT_H
+#define _VIDEO_ADF_FORMAT_H
+
+bool adf_format_is_standard(u32 format);
+bool adf_format_is_rgb(u32 format);
+u8 adf_format_num_planes(u32 format);
+u8 adf_format_bpp(u32 format);
+u8 adf_format_plane_cpp(u32 format, int plane);
+u8 adf_format_horz_chroma_subsampling(u32 format);
+u8 adf_format_vert_chroma_subsampling(u32 format);
+
+#endif /* _VIDEO_ADF_FORMAT_H */
diff --git a/include/video/adf_memblock.h b/include/video/adf_memblock.h
new file mode 100644
index 000000000000..6256e0eebcc9
--- /dev/null
+++ b/include/video/adf_memblock.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _VIDEO_ADF_MEMBLOCK_H_
+#define _VIDEO_ADF_MEMBLOCK_H_
+
+struct dma_buf *adf_memblock_export(phys_addr_t base, size_t size, int flags);
+
+#endif /* _VIDEO_ADF_MEMBLOCK_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 8fa4f758821a..79b356f892f9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1251,6 +1251,12 @@ config HOTPLUG
 config HAVE_PCSPKR_PLATFORM
 	bool
 
+config PANIC_TIMEOUT
+	int "Default panic timeout"
+	default 0
+	help
+	  Set default panic timeout.
+
 menuconfig EXPERT
 	bool "Configure standard kernel features (expert users)"
 	# Unhide debug options, to make the on-by-default options visible
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ef130605ac43..fcf0aeff45b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2106,6 +2106,43 @@ out_free_group_list:
 	return retval;
 }
 
+static int cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+	struct cgroup_subsys *ss;
+	int ret;
+
+	for_each_subsys(cgrp->root, ss) {
+		if (ss->allow_attach) {
+			ret = ss->allow_attach(cgrp, tset);
+			if (ret)
+				return ret;
+		} else {
+			return -EACCES;
+		}
+	}
+
+	return 0;
+}
+
+int subsys_cgroup_allow_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+{
+	const struct cred *cred = current_cred(), *tcred;
+	struct task_struct *task;
+
+	if (capable(CAP_SYS_NICE))
+		return 0;
+
+	cgroup_taskset_for_each(task, cgrp, tset) {
+		tcred = __task_cred(task);
+
+		if (current != task && cred->euid != tcred->uid &&
+		    cred->euid != tcred->suid)
+			return -EACCES;
+	}
+
+	return 0;
+}
+
 /*
  * Find the task_struct of the task to attach by vpid and pass it along to the
  * function to attach either it or all tasks in its threadgroup. Will lock
@@ -2137,9 +2174,18 @@ retry_find_task:
 		if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 		    !uid_eq(cred->euid, tcred->uid) &&
 		    !uid_eq(cred->euid, tcred->suid)) {
-			rcu_read_unlock();
-			ret = -EACCES;
-			goto out_unlock_cgroup;
+			/*
+			 * if the default permission check fails, give each
+			 * cgroup a chance to extend the permission check
+			 */
+			struct cgroup_taskset tset = { };
+			tset.single.task = tsk;
+			tset.single.cgrp = cgrp;
+			ret = cgroup_allow_attach(cgrp, &tset);
+			if (ret) {
+				rcu_read_unlock();
+				goto out_unlock_cgroup;
+			}
 		}
 	} else
 		tsk = current;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5fbcbdd31fb3..f1cfa7367a92 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -745,3 +745,23 @@ void init_cpu_online(const struct cpumask *src)
 {
 	cpumask_copy(to_cpumask(cpu_online_bits), src);
 }
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+	atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+	atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+
+void idle_notifier_call_chain(unsigned long val)
+{
+	atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_call_chain);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0506d447aed2..2d4438b14b42 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -86,6 +86,10 @@ static int kgdb_use_con;
 bool dbg_is_early = true;
 /* Next cpu to become the master debug core */
 int dbg_switch_cpu;
+/* Flag for entering kdb when a panic occurs */
+static bool break_on_panic = true;
+/* Flag for entering kdb when an exception occurs */
+static bool break_on_exception = true;
 
 /* Use kdb or gdbserver mode */
 int dbg_kdb_mode = 1;
@@ -100,6 +104,8 @@ early_param("kgdbcon", opt_kgdb_con);
 
 module_param(kgdb_use_con, int, 0644);
 module_param(kgdbreboot, int, 0644);
+module_param(break_on_panic, bool, 0644);
+module_param(break_on_exception, bool, 0644);
 
 /*
  * Holds information about breakpoints in a kernel. These breakpoints are
@@ -678,6 +684,9 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
 	if (arch_kgdb_ops.enable_nmi)
 		arch_kgdb_ops.enable_nmi(0);
 
+	if (unlikely(signo != SIGTRAP && !break_on_exception))
+		return 1;
+
 	ks->cpu			= raw_smp_processor_id();
 	ks->ex_vector		= evector;
 	ks->signo		= signo;
@@ -784,6 +793,9 @@ static int kgdb_panic_event(struct notifier_block *self,
 			    unsigned long val,
 			    void *data)
 {
+	if (!break_on_panic)
+		return NOTIFY_DONE;
+
 	if (dbg_kdb_mode)
 		kdb_printf("PANIC: %s\n", (char *)data);
 	kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 14ff4849262c..4b0fb2fb779c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize)
 	int i;
 	int diag, dtab_count;
 	int key;
-
+	static int last_crlf;
 
 	diag = kdbgetintenv("DTABCOUNT", &dtab_count);
 	if (diag)
@@ -237,6 +237,9 @@ poll_again:
 		return buffer;
 	if (key != 9)
 		tab = 0;
+	if (key != 10 && key != 13)
+		last_crlf = 0;
+
 	switch (key) {
 	case 8: /* backspace */
 		if (cp > buffer) {
@@ -254,7 +257,12 @@ poll_again:
 			*cp = tmp;
 		}
 		break;
-	case 13: /* enter */
+	case 10: /* new line */
+	case 13: /* carriage return */
+		/* handle \n after \r */
+		if (last_crlf && last_crlf != key)
+			break;
+		last_crlf = key;
 		*lastchar++ = '\n';
 		*lastchar++ = '\0';
 		if (!KDB_STATE(KGDB_TRANS)) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 717efbd7cb72..33fde71b83d0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -841,7 +841,7 @@ void do_exit(long code)
 	/*
 	 * Make sure we are holding no locks:
 	 */
-	debug_check_no_locks_held(tsk);
+	debug_check_no_locks_held();
 	/*
 	 * We can do this unlocked here. The futex code uses this flag
 	 * just to verify whether the pi state cleanup has been done
diff --git a/kernel/fork.c b/kernel/fork.c
index 514dbc40f98f..b0663950634b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -198,6 +198,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+/* Notifier list called when a task struct is freed */
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+
 static void account_kernel_stack(struct thread_info *ti, int account)
 {
 	struct zone *zone = page_zone(virt_to_page(ti));
@@ -231,6 +234,18 @@ static inline void put_signal_struct(struct signal_struct *sig)
 		free_signal_struct(sig);
 }
 
+int task_free_register(struct notifier_block *n)
+{
+	return atomic_notifier_chain_register(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_register);
+
+int task_free_unregister(struct notifier_block *n)
+{
+	return atomic_notifier_chain_unregister(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_unregister);
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
@@ -242,6 +257,7 @@ void __put_task_struct(struct task_struct *tsk)
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
 
+	atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
@@ -311,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto free_ti;
 
 	tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+	/*
+	 * We must handle setting up seccomp filters once we're under
+	 * the sighand lock in case orig has changed between now and
+	 * then. Until then, filter must be NULL to avoid messing up
+	 * the usage counts on the error path calling free_task.
+	 */
+	tsk->seccomp.filter = NULL;
+#endif
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -697,7 +722,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 
 	mm = get_task_mm(task);
 	if (mm && mm != current->mm &&
-			!ptrace_may_access(task, mode)) {
+			!ptrace_may_access(task, mode) &&
+			!capable(CAP_SYS_RESOURCE)) {
 		mmput(mm);
 		mm = ERR_PTR(-EACCES);
 	}
@@ -1091,6 +1117,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	p->flags = new_flags;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+	/*
+	 * Must be called with sighand->lock held, which is common to
+	 * all threads in the group. Holding cred_guard_mutex is not
+	 * needed because this new task is not yet running and cannot
+	 * be racing exec.
+	 */
+	assert_spin_locked(&current->sighand->siglock);
+
+	/* Ref-count the new filter user, and assign it. */
+	get_seccomp_filter(current);
+	p->seccomp = current->seccomp;
+
+	/*
+	 * Explicitly enable no_new_privs here in case it got set
+	 * between the task_struct being duplicated and holding the
+	 * sighand lock. The seccomp state and nnp must be in sync.
+	 */
+	if (task_no_new_privs(current))
+		task_set_no_new_privs(p);
+
+	/*
+	 * If the parent gained a seccomp mode after copying thread
+	 * flags and between before we held the sighand lock, we have
+	 * to manually enable the seccomp thread flag here.
+	 */
+	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+		set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -1195,7 +1254,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	ftrace_graph_init_task(p);
-	get_seccomp_filter(p);
 
 	rt_mutex_init_task(p);
 
@@ -1438,6 +1496,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_lock(&current->sighand->siglock);
 
 	/*
+	 * Copy seccomp details explicitly here, in case they were changed
+	 * before holding sighand lock.
+	 */
+	copy_seccomp(p);
+
+	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
 	 * fork. Restart if a signal comes in before we add the new process to
diff --git a/kernel/freezer.c b/kernel/freezer.c
index bd733f6e610d..4ada72f5f55a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -119,6 +119,18 @@ bool freeze_task(struct task_struct *p)
 {
 	unsigned long flags;
 
+	/*
+	 * This check can race with freezer_do_not_count, but worst case that
+	 * will result in an extra wakeup being sent to the task.  It does not
+	 * race with freezer_count(), the barriers in freezer_count() and
+	 * freezer_should_skip() ensure that either freezer_count() sees
+	 * freezing == true in try_to_freeze() and freezes, or
+	 * freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task
+	 * normally.
+	 */
+	if (freezer_should_skip(p))
+		return false;
+
 	spin_lock_irqsave(&freezer_lock, flags);
 	if (!freezing(p) || frozen(p)) {
 		spin_unlock_irqrestore(&freezer_lock, flags);
diff --git a/kernel/futex.c b/kernel/futex.c
index 625a4e659e7a..ad971d0f0be0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -62,6 +62,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched/rt.h>
 #include <linux/hugetlb.h>
+#include <linux/freezer.h>
 
 #include <asm/futex.h>
 
@@ -1935,7 +1936,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
 		 * is no timeout, or if it has yet to expire.
 		 */
 		if (!timeout || timeout->task)
-			schedule();
+			freezable_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
 }
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index aadf4b7a607c..1e84be107436 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -47,6 +47,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/timer.h>
+#include <linux/freezer.h>
 
 #include <asm/uaccess.h>
 
@@ -1565,7 +1566,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 			t->task = NULL;
 
 		if (likely(t->task))
-			schedule();
+			freezable_schedule();
 
 		hrtimer_cancel(&t->timer);
 		mode = HRTIMER_MODE_ABS;
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index abcd6ca86cb7..da5f2ffa7106 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/syscore_ops.h>
-
+#include <linux/wakeup_reason.h>
 #include "internals.h"
 
 /**
@@ -103,14 +103,18 @@ int check_wakeup_irqs(void)
 	int irq;
 
 	for_each_irq_desc(irq, desc) {
-		/*
-		 * Only interrupts which are marked as wakeup source
-		 * and have not been disabled before the suspend check
-		 * can abort suspend.
-		 */
 		if (irqd_is_wakeup_set(&desc->irq_data)) {
-			if (desc->depth == 1 && desc->istate & IRQS_PENDING)
+			if (desc->istate & IRQS_PENDING) {
+				log_suspend_abort_reason("Wakeup IRQ %d %s pending",
+					irq,
+					desc->action && desc->action->name ?
+					desc->action->name : "");
+				pr_info("Wakeup IRQ %d %s pending, suspend aborted\n",
+					irq,
+					desc->action && desc->action->name ?
+					desc->action->name : "");
 				return -EBUSY;
+			}
 			continue;
 		}
 		/*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 1f3186b37fd5..e16c45b9ee77 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4090,7 +4090,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 
-static void print_held_locks_bug(struct task_struct *curr)
+static void print_held_locks_bug(void)
 {
 	if (!debug_locks_off())
 		return;
@@ -4099,22 +4099,21 @@ static void print_held_locks_bug(struct task_struct *curr)
 
 	printk("\n");
 	printk("=====================================\n");
-	printk("[ BUG: lock held at task exit time! ]\n");
+	printk("[ BUG: %s/%d still has locks held! ]\n",
+	       current->comm, task_pid_nr(current));
 	print_kernel_ident();
 	printk("-------------------------------------\n");
-	printk("%s/%d is exiting with locks still held!\n",
-		curr->comm, task_pid_nr(curr));
-	lockdep_print_held_locks(curr);
-
+	lockdep_print_held_locks(current);
 	printk("\nstack backtrace:\n");
 	dump_stack();
 }
 
-void debug_check_no_locks_held(struct task_struct *task)
+void debug_check_no_locks_held(void)
 {
-	if (unlikely(task->lockdep_depth > 0))
-		print_held_locks_bug(task);
+	if (unlikely(current->lockdep_depth > 0))
+		print_held_locks_bug();
 }
+EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
 
 void debug_show_all_locks(void)
 {
diff --git a/kernel/panic.c b/kernel/panic.c
index 167ec097ce8b..126b2ef2eb61 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -26,13 +26,19 @@
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
 
+/* Machine specific panic information string */
+char *mach_panic_string;
+
 int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
 static unsigned long tainted_mask;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 
-int panic_timeout;
+#ifndef CONFIG_PANIC_TIMEOUT
+#define CONFIG_PANIC_TIMEOUT 0
+#endif
+int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -375,6 +381,11 @@ late_initcall(init_oops_id);
 void print_oops_end_marker(void)
 {
 	init_oops_id();
+
+	if (mach_panic_string)
+		printk(KERN_WARNING "Board Information: %s\n",
+		       mach_panic_string);
+
 	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
 		(unsigned long long)oops_id);
 }
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 52423b5d7ae9..e97c63994f6b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,6 +18,14 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HAS_WAKELOCK
+	bool
+	default y
+
+config WAKELOCK
+	bool
+	default y
+
 config HIBERNATE_CALLBACKS
 	bool
 
@@ -298,3 +306,10 @@ config PM_GENERIC_DOMAINS_OF
 config CPU_PM
 	bool
 	depends on SUSPEND || CPU_IDLE
+
+config SUSPEND_TIME
+	bool "Log time spent in suspend"
+	---help---
+	  Prints the time spent in suspend in the kernel log, and
+	  keeps statistics on the time spent in suspend in
+	  /sys/kernel/debug/suspend_time
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bff11ef..74c713ba61b0 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -11,5 +11,8 @@ obj-$(CONFIG_HIBERNATION)	+= hibernate.o snapshot.o swap.o user.o \
 				   block_io.o
 obj-$(CONFIG_PM_AUTOSLEEP)	+= autosleep.o
 obj-$(CONFIG_PM_WAKELOCKS)	+= wakelock.o
+obj-$(CONFIG_SUSPEND_TIME)	+= suspend_time.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
+
+obj-$(CONFIG_SUSPEND)	+= wakeup_reason.o
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0695319b5fde..d1ccce3cfee8 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -17,7 +17,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/kmod.h>
-
+#include <linux/wakeup_reason.h>
 /* 
  * Timeout for stopping processes
  */
@@ -30,9 +30,13 @@ static int try_to_freeze_tasks(bool user_only)
 	unsigned int todo;
 	bool wq_busy = false;
 	struct timeval start, end;
-	u64 elapsed_csecs64;
-	unsigned int elapsed_csecs;
+	u64 elapsed_msecs64;
+	unsigned int elapsed_msecs;
 	bool wakeup = false;
+	int sleep_usecs = USEC_PER_MSEC;
+#ifdef CONFIG_PM_SLEEP
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+#endif
 
 	do_gettimeofday(&start);
 
@@ -62,42 +66,51 @@ static int try_to_freeze_tasks(bool user_only)
 			break;
 
 		if (pm_wakeup_pending()) {
+#ifdef CONFIG_PM_SLEEP
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
+#endif
 			wakeup = true;
 			break;
 		}
 
 		/*
 		 * We need to retry, but first give the freezing tasks some
-		 * time to enter the refrigerator.
+		 * time to enter the refrigerator.  Start with an initial
+		 * 1 ms sleep followed by exponential backoff until 8 ms.
 		 */
-		msleep(10);
+		usleep_range(sleep_usecs / 2, sleep_usecs);
+		if (sleep_usecs < 8 * USEC_PER_MSEC)
+			sleep_usecs *= 2;
 	}
 
 	do_gettimeofday(&end);
-	elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
-	do_div(elapsed_csecs64, NSEC_PER_SEC / 100);
-	elapsed_csecs = elapsed_csecs64;
+	elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
+	do_div(elapsed_msecs64, NSEC_PER_MSEC);
+	elapsed_msecs = elapsed_msecs64;
 
-	if (todo) {
+	if (wakeup) {
+		printk("\n");
+		printk(KERN_ERR "Freezing of tasks aborted after %d.%03d seconds",
+		       elapsed_msecs / 1000, elapsed_msecs % 1000);
+	} else if (todo) {
 		printk("\n");
-		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
-		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
-		       wakeup ? "aborted" : "failed",
-		       elapsed_csecs / 100, elapsed_csecs % 100,
+		printk(KERN_ERR "Freezing of tasks failed after %d.%03d seconds"
+		       " (%d tasks refusing to freeze, wq_busy=%d):\n",
+		       elapsed_msecs / 1000, elapsed_msecs % 1000,
 		       todo - wq_busy, wq_busy);
 
-		if (!wakeup) {
-			read_lock(&tasklist_lock);
-			do_each_thread(g, p) {
-				if (p != current && !freezer_should_skip(p)
-				    && freezing(p) && !frozen(p))
-					sched_show_task(p);
-			} while_each_thread(g, p);
-			read_unlock(&tasklist_lock);
-		}
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			if (p != current && !freezer_should_skip(p)
+			    && freezing(p) && !frozen(p))
+				sched_show_task(p);
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
 	} else {
-		printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100,
-			elapsed_csecs % 100);
+		printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
+			elapsed_msecs % 1000);
 	}
 
 	return todo ? -EBUSY : 0;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 903c517b14da..82450c20875c 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -25,7 +25,9 @@
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <linux/ftrace.h>
+#include <linux/rtc.h>
 #include <trace/events/power.h>
+#include <linux/wakeup_reason.h>
 
 #include "power.h"
 
@@ -138,7 +140,7 @@ static int suspend_prepare(suspend_state_t state)
 	error = suspend_freeze_processes();
 	if (!error)
 		return 0;
-
+	log_suspend_abort_reason("One or more tasks refusing to freeze");
 	suspend_stats.failed_freeze++;
 	dpm_save_failed_step(SUSPEND_FREEZE);
  Finish:
@@ -168,7 +170,8 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
  */
 static int suspend_enter(suspend_state_t state, bool *wakeup)
 {
-	int error;
+	char suspend_abort[MAX_SUSPEND_ABORT_LEN];
+	int error, last_dev;
 
 	if (need_suspend_ops(state) && suspend_ops->prepare) {
 		error = suspend_ops->prepare();
@@ -178,7 +181,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 
 	error = dpm_suspend_end(PMSG_SUSPEND);
 	if (error) {
+		last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+		last_dev %= REC_FAILED_NUM;
 		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		log_suspend_abort_reason("%s device failed to power down",
+			suspend_stats.failed_devs[last_dev]);
 		goto Platform_finish;
 	}
 
@@ -203,8 +210,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	}
 
 	error = disable_nonboot_cpus();
-	if (error || suspend_test(TEST_CPUS))
+	if (error || suspend_test(TEST_CPUS)) {
+		log_suspend_abort_reason("Disabling non-boot cpus failed");
 		goto Enable_cpus;
+	}
 
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
@@ -215,6 +224,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 		if (!(suspend_test(TEST_CORE) || *wakeup)) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
+		} else {
+			pm_get_active_wakeup_sources(suspend_abort,
+				MAX_SUSPEND_ABORT_LEN);
+			log_suspend_abort_reason(suspend_abort);
 		}
 		syscore_resume();
 	}
@@ -262,6 +275,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: Some devices failed to suspend\n");
+		log_suspend_abort_reason("Some devices failed to suspend");
 		goto Recover_platform;
 	}
 	suspend_test_finish("suspend devices");
@@ -358,6 +372,18 @@ static int enter_state(suspend_state_t state)
 	return error;
 }
 
+static void pm_suspend_marker(char *annotation)
+{
+	struct timespec ts;
+	struct rtc_time tm;
+
+	getnstimeofday(&ts);
+	rtc_time_to_tm(ts.tv_sec, &tm);
+	pr_info("PM: suspend %s %d-%02d-%02d %02d:%02d:%02d.%09lu UTC\n",
+		annotation, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
+}
+
 /**
  * pm_suspend - Externally visible function for suspending the system.
  * @state: System sleep state to enter.
@@ -372,6 +398,7 @@ int pm_suspend(suspend_state_t state)
 	if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
 		return -EINVAL;
 
+	pm_suspend_marker("entry");
 	error = enter_state(state);
 	if (error) {
 		suspend_stats.fail++;
@@ -379,6 +406,7 @@ int pm_suspend(suspend_state_t state)
 	} else {
 		suspend_stats.success++;
 	}
+	pm_suspend_marker("exit");
 	return error;
 }
 EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c
new file mode 100644
index 000000000000..d2a65da9f22c
--- /dev/null
+++ b/kernel/power/suspend_time.c
@@ -0,0 +1,111 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/syscore_ops.h>
+#include <linux/time.h>
+
+static struct timespec suspend_time_before;
+static unsigned int time_in_suspend_bins[32];
+
+#ifdef CONFIG_DEBUG_FS
+static int suspend_time_debug_show(struct seq_file *s, void *data)
+{
+	int bin;
+	seq_printf(s, "time (secs)  count\n");
+	seq_printf(s, "------------------\n");
+	for (bin = 0; bin < 32; bin++) {
+		if (time_in_suspend_bins[bin] == 0)
+			continue;
+		seq_printf(s, "%4d - %4d %4u\n",
+			bin ? 1 << (bin - 1) : 0, 1 << bin,
+				time_in_suspend_bins[bin]);
+	}
+	return 0;
+}
+
+static int suspend_time_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, suspend_time_debug_show, NULL);
+}
+
+static const struct file_operations suspend_time_debug_fops = {
+	.open		= suspend_time_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init suspend_time_debug_init(void)
+{
+	struct dentry *d;
+
+	d = debugfs_create_file("suspend_time", 0755, NULL, NULL,
+		&suspend_time_debug_fops);
+	if (!d) {
+		pr_err("Failed to create suspend_time debug file\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+late_initcall(suspend_time_debug_init);
+#endif
+
+static int suspend_time_syscore_suspend(void)
+{
+	read_persistent_clock(&suspend_time_before);
+
+	return 0;
+}
+
+static void suspend_time_syscore_resume(void)
+{
+	struct timespec after;
+
+	read_persistent_clock(&after);
+
+	after = timespec_sub(after, suspend_time_before);
+
+	time_in_suspend_bins[fls(after.tv_sec)]++;
+
+	pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec,
+		after.tv_nsec / NSEC_PER_MSEC);
+}
+
+static struct syscore_ops suspend_time_syscore_ops = {
+	.suspend = suspend_time_syscore_suspend,
+	.resume = suspend_time_syscore_resume,
+};
+
+static int suspend_time_syscore_init(void)
+{
+	register_syscore_ops(&suspend_time_syscore_ops);
+
+	return 0;
+}
+
+static void suspend_time_syscore_exit(void)
+{
+	unregister_syscore_ops(&suspend_time_syscore_ops);
+}
+module_init(suspend_time_syscore_init);
+module_exit(suspend_time_syscore_exit);
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 8f50de394d22..c8fba3380076 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -9,7 +9,6 @@
  * manipulate wakelocks on Android.
  */
 
-#include <linux/capability.h>
 #include <linux/ctype.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -189,9 +188,6 @@ int pm_wake_lock(const char *buf)
 	size_t len;
 	int ret = 0;
 
-	if (!capable(CAP_BLOCK_SUSPEND))
-		return -EPERM;
-
 	while (*str && !isspace(*str))
 		str++;
 
@@ -235,9 +231,6 @@ int pm_wake_unlock(const char *buf)
 	size_t len;
 	int ret = 0;
 
-	if (!capable(CAP_BLOCK_SUSPEND))
-		return -EPERM;
-
 	len = strlen(buf);
 	if (!len)
 		return -EINVAL;
diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c
new file mode 100644
index 000000000000..76b53400cb16
--- /dev/null
+++ b/kernel/power/wakeup_reason.c
@@ -0,0 +1,216 @@
+/*
+ * kernel/power/wakeup_reason.c
+ *
+ * Logs the reasons which caused the kernel to resume from
+ * the suspend mode.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/wakeup_reason.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+
+
+#define MAX_WAKEUP_REASON_IRQS 32
+static int irq_list[MAX_WAKEUP_REASON_IRQS];
+static int irqcount;
+static bool suspend_abort;
+static char abort_reason[MAX_SUSPEND_ABORT_LEN];
+static struct kobject *wakeup_reason;
+static DEFINE_SPINLOCK(resume_reason_lock);
+
+static struct timespec last_xtime; /* wall time before last suspend */
+static struct timespec curr_xtime; /* wall time after last suspend */
+static struct timespec last_stime; /* total_sleep_time before last suspend */
+static struct timespec curr_stime; /* total_sleep_time after last suspend */
+
+static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	int irq_no, buf_offset = 0;
+	struct irq_desc *desc;
+	spin_lock(&resume_reason_lock);
+	if (suspend_abort) {
+		buf_offset = sprintf(buf, "Abort: %s", abort_reason);
+	} else {
+		for (irq_no = 0; irq_no < irqcount; irq_no++) {
+			desc = irq_to_desc(irq_list[irq_no]);
+			if (desc && desc->action && desc->action->name)
+				buf_offset += sprintf(buf + buf_offset, "%d %s\n",
+						irq_list[irq_no], desc->action->name);
+			else
+				buf_offset += sprintf(buf + buf_offset, "%d\n",
+						irq_list[irq_no]);
+		}
+	}
+	spin_unlock(&resume_reason_lock);
+	return buf_offset;
+}
+
+static ssize_t last_suspend_time_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	struct timespec sleep_time;
+	struct timespec total_time;
+	struct timespec suspend_resume_time;
+
+	sleep_time = timespec_sub(curr_stime, last_stime);
+	total_time = timespec_sub(curr_xtime, last_xtime);
+	suspend_resume_time = timespec_sub(total_time, sleep_time);
+
+	/*
+	 * suspend_resume_time is calculated from sleep_time. Userspace would
+	 * always need both. Export them in pair here.
+	 */
+	return sprintf(buf, "%lu.%09lu %lu.%09lu\n",
+				suspend_resume_time.tv_sec, suspend_resume_time.tv_nsec,
+				sleep_time.tv_sec, sleep_time.tv_nsec);
+}
+
+static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason);
+static struct kobj_attribute suspend_time = __ATTR_RO(last_suspend_time);
+
+static struct attribute *attrs[] = {
+	&resume_reason.attr,
+	&suspend_time.attr,
+	NULL,
+};
+static struct attribute_group attr_group = {
+	.attrs = attrs,
+};
+
+/*
+ * logs all the wake up reasons to the kernel
+ * stores the irqs to expose them to the userspace via sysfs
+ */
+void log_wakeup_reason(int irq)
+{
+	struct irq_desc *desc;
+	desc = irq_to_desc(irq);
+	if (desc && desc->action && desc->action->name)
+		printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq,
+				desc->action->name);
+	else
+		printk(KERN_INFO "Resume caused by IRQ %d\n", irq);
+
+	spin_lock(&resume_reason_lock);
+	if (irqcount == MAX_WAKEUP_REASON_IRQS) {
+		spin_unlock(&resume_reason_lock);
+		printk(KERN_WARNING "Resume caused by more than %d IRQs\n",
+				MAX_WAKEUP_REASON_IRQS);
+		return;
+	}
+
+	irq_list[irqcount++] = irq;
+	spin_unlock(&resume_reason_lock);
+}
+
+int check_wakeup_reason(int irq)
+{
+	int irq_no;
+	int ret = false;
+
+	spin_lock(&resume_reason_lock);
+	for (irq_no = 0; irq_no < irqcount; irq_no++)
+		if (irq_list[irq_no] == irq) {
+			ret = true;
+			break;
+	}
+	spin_unlock(&resume_reason_lock);
+	return ret;
+}
+
+void log_suspend_abort_reason(const char *fmt, ...)
+{
+	va_list args;
+
+	spin_lock(&resume_reason_lock);
+
+	//Suspend abort reason has already been logged.
+	if (suspend_abort) {
+		spin_unlock(&resume_reason_lock);
+		return;
+	}
+
+	suspend_abort = true;
+	va_start(args, fmt);
+	snprintf(abort_reason, MAX_SUSPEND_ABORT_LEN, fmt, args);
+	va_end(args);
+	spin_unlock(&resume_reason_lock);
+}
+
+/* Detects a suspend and clears all the previous wake up reasons*/
+static int wakeup_reason_pm_event(struct notifier_block *notifier,
+		unsigned long pm_event, void *unused)
+{
+	struct timespec xtom; /* wall_to_monotonic, ignored */
+
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+		spin_lock(&resume_reason_lock);
+		irqcount = 0;
+		suspend_abort = false;
+		spin_unlock(&resume_reason_lock);
+
+		get_xtime_and_monotonic_and_sleep_offset(&last_xtime, &xtom,
+			&last_stime);
+		break;
+	case PM_POST_SUSPEND:
+		get_xtime_and_monotonic_and_sleep_offset(&curr_xtime, &xtom,
+			&curr_stime);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block wakeup_reason_pm_notifier_block = {
+	.notifier_call = wakeup_reason_pm_event,
+};
+
+/* Initializes the sysfs parameter
+ * registers the pm_event notifier
+ */
+int __init wakeup_reason_init(void)
+{
+	int retval;
+
+	retval = register_pm_notifier(&wakeup_reason_pm_notifier_block);
+	if (retval)
+		printk(KERN_WARNING "[%s] failed to register PM notifier %d\n",
+				__func__, retval);
+
+	wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj);
+	if (!wakeup_reason) {
+		printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n",
+				__func__);
+		return 1;
+	}
+	retval = sysfs_create_group(wakeup_reason, &attr_group);
+	if (retval) {
+		kobject_put(wakeup_reason);
+		printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n",
+				__func__, retval);
+	}
+	return 0;
+}
+
+late_initcall(wakeup_reason_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index aa08f6419beb..ea4e780697b4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7138,13 +7138,24 @@ static inline int preempt_count_equals(int preempt_offset)
 	return (nested == preempt_offset);
 }
 
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+	__might_sleep_init_called = 1;
+	return 0;
+}
+early_initcall(__might_sleep_init);
+
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
-	    system_state != SYSTEM_RUNNING || oops_in_progress)
+	    oops_in_progress)
+		return;
+	if (system_state != SYSTEM_RUNNING &&
+	    (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
@@ -8116,6 +8127,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.css_offline	= cpu_cgroup_css_offline,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
+	.allow_attach	= subsys_cgroup_allow_attach,
 	.exit		= cpu_cgroup_exit,
 	.subsys_id	= cpu_cgroup_subsys_id,
 	.base_cftypes	= cpu_files,
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b7a10048a32c..1fbb1a2bc459 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,15 +18,17 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
+#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
-#include <linux/slab.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 
@@ -95,7 +97,7 @@ u32 seccomp_bpf_load(int off)
 	if (off == BPF_DATA(nr))
 		return syscall_get_nr(current, regs);
 	if (off == BPF_DATA(arch))
-		return syscall_get_arch(current, regs);
+		return syscall_get_arch();
 	if (off >= BPF_DATA(args[0]) && off < BPF_DATA(args[6])) {
 		unsigned long value;
 		int arg = (off - BPF_DATA(args[0])) / sizeof(u64);
@@ -201,32 +203,170 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-	struct seccomp_filter *f;
+	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
-	if (WARN_ON(current->seccomp.filter == NULL))
+	if (unlikely(WARN_ON(f == NULL)))
 		return SECCOMP_RET_KILL;
 
+	/* Make sure cross-thread synced filter points somewhere sane. */
+	smp_read_barrier_depends();
+
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
-	for (f = current->seccomp.filter; f; f = f->prev) {
+	for (; f; f = f->prev) {
 		u32 cur_ret = sk_run_filter(NULL, f->insns);
+		
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
 	}
 	return ret;
 }
+#endif /* CONFIG_SECCOMP_FILTER */
+
+static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+{
+	assert_spin_locked(&current->sighand->siglock);
+
+	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
+		return false;
+
+	return true;
+}
+
+static inline void seccomp_assign_mode(struct task_struct *task,
+				       unsigned long seccomp_mode)
+{
+	assert_spin_locked(&task->sighand->siglock);
+
+	task->seccomp.mode = seccomp_mode;
+	/*
+	 * Make sure TIF_SECCOMP cannot be set before the mode (and
+	 * filter) is set.
+	 */
+	smp_mb();
+	set_tsk_thread_flag(task, TIF_SECCOMP);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+		       struct seccomp_filter *child)
+{
+	/* NULL is the root ancestor. */
+	if (parent == NULL)
+		return 1;
+	for (; child; child = child->prev)
+		if (child == parent)
+			return 1;
+	return 0;
+}
 
 /**
- * seccomp_attach_filter: Attaches a seccomp filter to current.
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	assert_spin_locked(&current->sighand->siglock);
+
+	/* Validate all threads being eligible for synchronization. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		pid_t failed;
+
+		/* Skip current, since it is initiating the sync. */
+		if (thread == caller)
+			continue;
+
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+		     is_ancestor(thread->seccomp.filter,
+				 caller->seccomp.filter)))
+			continue;
+
+		/* Return the first thread that cannot be synchronized. */
+		failed = task_pid_vnr(thread);
+		/* If the pid cannot be resolved, then return -ESRCH */
+		if (unlikely(WARN_ON(failed == 0)))
+			failed = -ESRCH;
+		return failed;
+	}
+
+	return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	assert_spin_locked(&current->sighand->siglock);
+
+	/* Synchronize all threads. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		/* Skip current, since it needs no changes. */
+		if (thread == caller)
+			continue;
+
+		/* Get a task reference for the new leaf node. */
+		get_seccomp_filter(caller);
+		/*
+		 * Drop the task reference to the shared ancestor since
+		 * current's path will hold a reference.  (This also
+		 * allows a put before the assignment.)
+		 */
+		put_seccomp_filter(thread);
+		smp_store_release(&thread->seccomp.filter,
+				  caller->seccomp.filter);
+		/*
+		 * Opt the other thread into seccomp if needed.
+		 * As threads are considered to be trust-realm
+		 * equivalent (see ptrace_may_access), it is safe to
+		 * allow one thread to transition the other.
+		 */
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+			/*
+			 * Don't let an unprivileged task work around
+			 * the no_new_privs restriction by creating
+			 * a thread that sets it up, enters seccomp,
+			 * then dies.
+			 */
+			if (task_no_new_privs(caller))
+				task_set_no_new_privs(thread);
+
+			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+		}
+	}
+}
+
+/**
+ * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
  *
- * Returns 0 on success or an errno on failure.
+ * Returns filter on success or an ERR_PTR on failure.
  */
-static long seccomp_attach_filter(struct sock_fprog *fprog)
+static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
 	struct seccomp_filter *filter;
 	unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
@@ -234,12 +374,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	long ret;
 
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
 
 	for (filter = current->seccomp.filter; filter; filter = filter->prev)
 		total_insns += filter->len + 4;  /* include a 4 instr penalty */
 	if (total_insns > MAX_INSNS_PER_PATH)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * Installing a seccomp filter requires that the task have
@@ -247,16 +388,16 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	 * This avoids scenarios where unprivileged tasks can affect the
 	 * behavior of privileged children.
 	 */
-	if (!current->no_new_privs &&
+	if (!task_no_new_privs(current) &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
-		return -EACCES;
+		return ERR_PTR(-EACCES);
 
 	/* Allocate a new seccomp_filter */
 	filter = kzalloc(sizeof(struct seccomp_filter) + fp_size,
 			 GFP_KERNEL|__GFP_NOWARN);
 	if (!filter)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);;
 	atomic_set(&filter->usage, 1);
 	filter->len = fprog->len;
 
@@ -275,28 +416,24 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (ret)
 		goto fail;
 
-	/*
-	 * If there is an existing filter, make it the prev and don't drop its
-	 * task reference.
-	 */
-	filter->prev = current->seccomp.filter;
-	current->seccomp.filter = filter;
-	return 0;
+	return filter;
+
 fail:
 	kfree(filter);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /**
- * seccomp_attach_user_filter - attaches a user-supplied sock_fprog
+ * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
  * @user_filter: pointer to the user data containing a sock_fprog.
  *
  * Returns 0 on success and non-zero otherwise.
  */
-long seccomp_attach_user_filter(char __user *user_filter)
+static struct seccomp_filter *
+seccomp_prepare_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
-	long ret = -EFAULT;
+	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 
 #ifdef CONFIG_COMPAT
 	if (is_compat_task()) {
@@ -309,9 +446,56 @@ long seccomp_attach_user_filter(char __user *user_filter)
 #endif
 	if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 		goto out;
-	ret = seccomp_attach_filter(&fprog);
+	filter = seccomp_prepare_filter(&fprog);
 out:
-	return ret;
+	return filter;
+}
+
+/**
+ * seccomp_attach_filter: validate and attach filter
+ * @flags:  flags to change filter behavior
+ * @filter: seccomp filter to add to the current process
+ *
+ * Caller must be holding current->sighand->siglock lock.
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static long seccomp_attach_filter(unsigned int flags,
+				  struct seccomp_filter *filter)
+{
+	unsigned long total_insns;
+	struct seccomp_filter *walker;
+
+	assert_spin_locked(&current->sighand->siglock);
+
+	/* Validate resulting filter length. */
+	total_insns = filter->len;
+	for (walker = current->seccomp.filter; walker; walker = walker->prev)
+		total_insns += walker->len + 4;  /* 4 instr penalty */
+	if (total_insns > MAX_INSNS_PER_PATH)
+		return -ENOMEM;
+
+	/* If thread sync has been requested, check that it is possible. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+		int ret;
+
+		ret = seccomp_can_sync_threads();
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * If there is an existing filter, make it the prev and don't drop its
+	 * task reference.
+	 */
+	filter->prev = current->seccomp.filter;
+	current->seccomp.filter = filter;
+
+	/* Now that the new filter is in place, synchronize to all threads. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		seccomp_sync_threads();
+
+	return 0;
 }
 
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
@@ -324,6 +508,13 @@ void get_seccomp_filter(struct task_struct *tsk)
 	atomic_inc(&orig->usage);
 }
 
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+	if (filter) {
+		kfree(filter);
+	}
+}
+
 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -332,7 +523,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		kfree(freeme);
+		seccomp_filter_free(freeme);
 	}
 }
 
@@ -351,7 +542,7 @@ static void seccomp_send_sigsys(int syscall, int reason)
 	info.si_code = SYS_SECCOMP;
 	info.si_call_addr = (void __user *)KSTK_EIP(current);
 	info.si_errno = reason;
-	info.si_arch = syscall_get_arch(current, task_pt_regs(current));
+	info.si_arch = syscall_get_arch();
 	info.si_syscall = syscall;
 	force_sig_info(SIGSYS, &info, current);
 }
@@ -376,12 +567,17 @@ static int mode1_syscalls_32[] = {
 
 int __secure_computing(int this_syscall)
 {
-	int mode = current->seccomp.mode;
 	int exit_sig = 0;
 	int *syscall;
 	u32 ret;
 
-	switch (mode) {
+	/*
+	 * Make sure that any changes to mode from another thread have
+	 * been seen after TIF_SECCOMP was seen.
+	 */
+	rmb();
+
+	switch (current->seccomp.mode) {
 	case SECCOMP_MODE_STRICT:
 		syscall = mode1_syscalls;
 #ifdef CONFIG_COMPAT
@@ -467,47 +663,152 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * prctl_set_seccomp: configures current->seccomp.mode
- * @seccomp_mode: requested mode to use
- * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ * seccomp_set_mode_strict: internal function for setting strict seccomp
+ *
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+static long seccomp_set_mode_strict(void)
+{
+	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
+	long ret = -EINVAL;
+
+	spin_lock_irq(&current->sighand->siglock);
+
+	if (!seccomp_may_assign_mode(seccomp_mode))
+		goto out;
+
+#ifdef TIF_NOTSC
+	disable_TSC();
+#endif
+	seccomp_assign_mode(current, seccomp_mode);
+	ret = 0;
+
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+
+	return ret;
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+/**
+ * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
+ * @filter: struct sock_fprog containing filter
  *
- * This function may be called repeatedly with a @seccomp_mode of
- * SECCOMP_MODE_FILTER to install additional filters.  Every filter
- * successfully installed will be evaluated (in reverse order) for each system
- * call the task makes.
+ * This function may be called repeatedly to install additional filters.
+ * Every filter successfully installed will be evaluated (in reverse order)
+ * for each system call the task makes.
  *
  * Once current->seccomp.mode is non-zero, it may not be changed.
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+				    const char __user *filter)
 {
+	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	struct seccomp_filter *prepared = NULL;
 	long ret = -EINVAL;
 
-	if (current->seccomp.mode &&
-	    current->seccomp.mode != seccomp_mode)
+	/* Validate flags. */
+	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
+		return -EINVAL;
+
+	/* Prepare the new filter before holding any locks. */
+	prepared = seccomp_prepare_user_filter(filter);
+	if (IS_ERR(prepared))
+		return PTR_ERR(prepared);
+
+	/*
+	 * Make sure we cannot change seccomp or nnp state via TSYNC
+	 * while another thread is in the middle of calling exec.
+	 */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+		goto out_free;
+
+	spin_lock_irq(&current->sighand->siglock);
+
+	if (!seccomp_may_assign_mode(seccomp_mode))
+		goto out;
+
+	ret = seccomp_attach_filter(flags, prepared);
+	if (ret)
 		goto out;
+	/* Do not free the successfully attached filter. */
+	prepared = NULL;
+
+	seccomp_assign_mode(current, seccomp_mode);
+out:
+	spin_unlock_irq(&current->sighand->siglock);
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		mutex_unlock(&current->signal->cred_guard_mutex);
+out_free:
+	seccomp_filter_free(prepared);
+	return ret;
+}
+#else
+static inline long seccomp_set_mode_filter(unsigned int flags,
+					   const char __user *filter)
+{
+	return -EINVAL;
+}
+#endif
+
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+		       const char __user *uargs)
+{
+	switch (op) {
+	case SECCOMP_SET_MODE_STRICT:
+		if (flags != 0 || uargs != NULL)
+			return -EINVAL;
+		return seccomp_set_mode_strict();
+	case SECCOMP_SET_MODE_FILTER:
+		return seccomp_set_mode_filter(flags, uargs);
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+			 const char __user *, uargs)
+{
+	return do_seccomp(op, flags, uargs);
+}
+
+/**
+ * prctl_set_seccomp: configures current->seccomp.mode
+ * @seccomp_mode: requested mode to use
+ * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+{
+	unsigned int op;
+	char __user *uargs;
 
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		ret = 0;
-#ifdef TIF_NOTSC
-		disable_TSC();
-#endif
+		op = SECCOMP_SET_MODE_STRICT;
+		/*
+		 * Setting strict mode through prctl always ignored filter,
+		 * so make sure it is always NULL here to pass the internal
+		 * check in do_seccomp().
+		 */
+		uargs = NULL;
 		break;
-#ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER:
-		ret = seccomp_attach_user_filter(filter);
-		if (ret)
-			goto out;
+		op = SECCOMP_SET_MODE_FILTER;
+		uargs = filter;
 		break;
-#endif
 	default:
-		goto out;
+		return -EINVAL;
 	}
 
-	current->seccomp.mode = seccomp_mode;
-	set_thread_flag(TIF_SECCOMP);
-out:
-	return ret;
+	/* prctl interface doesn't have flags, so they are always zero. */
+	return do_seccomp(op, 0, uargs);
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index 113411bfe8b1..50e41075ac77 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2848,7 +2848,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 		recalc_sigpending();
 		spin_unlock_irq(&tsk->sighand->siglock);
 
-		timeout = schedule_timeout_interruptible(timeout);
+		timeout = freezable_schedule_timeout_interruptible(timeout);
 
 		spin_lock_irq(&tsk->sighand->siglock);
 		__set_task_blocked(tsk, &tsk->real_blocked);
diff --git a/kernel/sys.c b/kernel/sys.c
index 2bbd9a73b54c..a3bef5bd452b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -42,6 +42,9 @@
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
 #include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mempolicy.h>
+#include <linux/sched.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -2099,10 +2102,158 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 }
 #endif
 
+#ifdef CONFIG_MMU
+static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
+		struct vm_area_struct **prev,
+		unsigned long start, unsigned long end,
+		const char __user *name_addr)
+{
+	struct mm_struct * mm = vma->vm_mm;
+	int error = 0;
+	pgoff_t pgoff;
+
+	if (name_addr == vma_get_anon_name(vma)) {
+		*prev = vma;
+		goto out;
+	}
+
+	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+	*prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
+				vma->vm_file, pgoff, vma_policy(vma),
+				name_addr);
+	if (*prev) {
+		vma = *prev;
+		goto success;
+	}
+
+	*prev = vma;
+
+	if (start != vma->vm_start) {
+		error = split_vma(mm, vma, start, 1);
+		if (error)
+			goto out;
+	}
+
+	if (end != vma->vm_end) {
+		error = split_vma(mm, vma, end, 0);
+		if (error)
+			goto out;
+	}
+
+success:
+	if (!vma->vm_file)
+		vma->shared.anon_name = name_addr;
+
+out:
+	if (error == -ENOMEM)
+		error = -EAGAIN;
+	return error;
+}
+
+static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
+			unsigned long arg)
+{
+	unsigned long tmp;
+	struct vm_area_struct * vma, *prev;
+	int unmapped_error = 0;
+	int error = -EINVAL;
+
+	/*
+	 * If the interval [start,end) covers some unmapped address
+	 * ranges, just ignore them, but return -ENOMEM at the end.
+	 * - this matches the handling in madvise.
+	 */
+	vma = find_vma_prev(current->mm, start, &prev);
+	if (vma && start > vma->vm_start)
+		prev = vma;
+
+	for (;;) {
+		/* Still start < end. */
+		error = -ENOMEM;
+		if (!vma)
+			return error;
+
+		/* Here start < (end|vma->vm_end). */
+		if (start < vma->vm_start) {
+			unmapped_error = -ENOMEM;
+			start = vma->vm_start;
+			if (start >= end)
+				return error;
+		}
+
+		/* Here vma->vm_start <= start < (end|vma->vm_end) */
+		tmp = vma->vm_end;
+		if (end < tmp)
+			tmp = end;
+
+		/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
+		error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
+				(const char __user *)arg);
+		if (error)
+			return error;
+		start = tmp;
+		if (prev && start < prev->vm_end)
+			start = prev->vm_end;
+		error = unmapped_error;
+		if (start >= end)
+			return error;
+		if (prev)
+			vma = prev->vm_next;
+		else	/* madvise_remove dropped mmap_sem */
+			vma = find_vma(current->mm, start);
+	}
+}
+
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+		unsigned long len_in, unsigned long arg)
+{
+	struct mm_struct *mm = current->mm;
+	int error;
+	unsigned long len;
+	unsigned long end;
+
+	if (start & ~PAGE_MASK)
+		return -EINVAL;
+	len = (len_in + ~PAGE_MASK) & PAGE_MASK;
+
+	/* Check to see whether len was rounded up from small -ve to zero */
+	if (len_in && !len)
+		return -EINVAL;
+
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+
+	if (end == start)
+		return 0;
+
+	down_write(&mm->mmap_sem);
+
+	switch (opt) {
+	case PR_SET_VMA_ANON_NAME:
+		error = prctl_set_vma_anon_name(start, end, arg);
+		break;
+	default:
+		error = -EINVAL;
+	}
+
+	up_write(&mm->mmap_sem);
+
+	return error;
+}
+#else /* CONFIG_MMU */
+static int prctl_set_vma(unsigned long opt, unsigned long start,
+		unsigned long len_in, unsigned long arg)
+{
+	return -EINVAL;
+}
+#endif
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
 	struct task_struct *me = current;
+	struct task_struct *tsk;
 	unsigned char comm[sizeof(me->comm)];
 	long error;
 
@@ -2245,6 +2396,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_TID_ADDRESS:
 		error = prctl_get_tid_address(me, (int __user **)arg2);
 		break;
+	case PR_SET_TIMERSLACK_PID:
+		if (task_pid_vnr(current) != (pid_t)arg3 &&
+				!capable(CAP_SYS_NICE))
+			return -EPERM;
+		rcu_read_lock();
+		tsk = find_task_by_vpid((pid_t)arg3);
+		if (tsk == NULL) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		get_task_struct(tsk);
+		rcu_read_unlock();
+		if (arg2 <= 0)
+			tsk->timer_slack_ns =
+				tsk->default_timer_slack_ns;
+		else
+			tsk->timer_slack_ns = arg2;
+		put_task_struct(tsk);
+		error = 0;
+		break;
 	case PR_SET_CHILD_SUBREAPER:
 		me->signal->is_child_subreaper = !!arg2;
 		break;
@@ -2256,12 +2427,15 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (arg2 != 1 || arg3 || arg4 || arg5)
 			return -EINVAL;
 
-		current->no_new_privs = 1;
+		task_set_no_new_privs(current);
 		break;
 	case PR_GET_NO_NEW_PRIVS:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		return current->no_new_privs ? 1 : 0;
+		return task_no_new_privs(current) ? 1 : 0;
+	case PR_SET_VMA:
+		error = prctl_set_vma(arg2, arg3, arg4, arg5);
+		break;
 	default:
 		error = -EINVAL;
 		break;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7078052284fd..7e7fc0a082c4 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9469f4c61a30..34b8f77b4c02 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,6 +105,8 @@ extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 #endif
 extern int pid_max;
+extern int extra_free_kbytes;
+extern int min_free_order_shift;
 extern int pid_max_min, pid_max_max;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
@@ -1282,6 +1284,21 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 	{
+		.procname	= "extra_free_kbytes",
+		.data		= &extra_free_kbytes,
+		.maxlen		= sizeof(extra_free_kbytes),
+		.mode		= 0644,
+		.proc_handler	= min_free_kbytes_sysctl_handler,
+		.extra1		= &zero,
+	},
+	{
+		.procname	= "min_free_order_shift",
+		.data		= &min_free_order_shift,
+		.maxlen		= sizeof(min_free_order_shift),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
 		.procname	= "percpu_pagelist_fraction",
 		.data		= &percpu_pagelist_fraction,
 		.maxlen		= sizeof(percpu_pagelist_fraction),
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7d19fca0617e..bb08d0961f80 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -199,6 +199,12 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
 
 }
 
+ktime_t alarm_expires_remaining(const struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	return ktime_sub(alarm->node.expires, base->gettime());
+}
+
 #ifdef CONFIG_RTC_CLASS
 /**
  * alarmtimer_suspend - Suspend time callback
@@ -305,7 +311,7 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 }
 
 /**
- * alarm_start - Sets an alarm to fire
+ * alarm_start - Sets an absolute alarm to fire
  * @alarm: ptr to alarm to set
  * @start: time to run the alarm
  */
@@ -325,6 +331,31 @@ int alarm_start(struct alarm *alarm, ktime_t start)
 }
 
 /**
+ * alarm_start_relative - Sets a relative alarm to fire
+ * @alarm: ptr to alarm to set
+ * @start: time relative to now to run the alarm
+ */
+int alarm_start_relative(struct alarm *alarm, ktime_t start)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+
+	start = ktime_add(start, base->gettime());
+	return alarm_start(alarm, start);
+}
+
+void alarm_restart(struct alarm *alarm)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+	unsigned long flags;
+
+	spin_lock_irqsave(&base->lock, flags);
+	hrtimer_set_expires(&alarm->timer, alarm->node.expires);
+	hrtimer_restart(&alarm->timer);
+	alarmtimer_enqueue(base, alarm);
+	spin_unlock_irqrestore(&base->lock, flags);
+}
+
+/**
  * alarm_try_to_cancel - Tries to cancel an alarm timer
  * @alarm: ptr to alarm to be canceled
  *
@@ -394,6 +425,12 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
 	return overrun;
 }
 
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
+{
+	struct alarm_base *base = &alarm_bases[alarm->type];
+
+	return alarm_forward(alarm, base->gettime(), interval);
+}
 
 
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 015f85aaca08..e24c188cbbcc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,6 +82,9 @@ config EVENT_TRACING
 	select CONTEXT_SWITCH_TRACER
 	bool
 
+config GPU_TRACEPOINTS
+	bool
+
 config CONTEXT_SWITCH_TRACER
 	bool
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d7e2068e4b71..45012122fbb6 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -60,5 +60,6 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 endif
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
 obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/gpu-traces.c b/kernel/trace/gpu-traces.c
new file mode 100644
index 000000000000..a4b3f00faee3
--- /dev/null
+++ b/kernel/trace/gpu-traces.c
@@ -0,0 +1,23 @@
+/*
+ * GPU tracepoints
+ *
+ * Copyright (C) 2013 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gpu.h>
+
+EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch);
+EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 640e4c44b170..249e32b5aa0d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -730,6 +730,7 @@ static const char *trace_options[] = {
 	"irq-info",
 	"markers",
 	"function-trace",
+	"print-tgid",
 	NULL
 };
 
@@ -1242,6 +1243,7 @@ void tracing_reset_all_online_cpus(void)
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
+static unsigned saved_tgids[SAVED_CMDLINES];
 static int cmdline_idx;
 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
@@ -1443,6 +1445,7 @@ static int trace_save_cmdline(struct task_struct *tsk)
 	}
 
 	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+	saved_tgids[idx] = tsk->tgid;
 
 	arch_spin_unlock(&trace_cmdline_lock);
 
@@ -1480,6 +1483,25 @@ void trace_find_cmdline(int pid, char comm[])
 	preempt_enable();
 }
 
+int trace_find_tgid(int pid)
+{
+	unsigned map;
+	int tgid;
+
+	preempt_disable();
+	arch_spin_lock(&trace_cmdline_lock);
+	map = map_pid_to_cmdline[pid];
+	if (map != NO_CMDLINE_MAP)
+		tgid = saved_tgids[map];
+	else
+		tgid = -1;
+
+	arch_spin_unlock(&trace_cmdline_lock);
+	preempt_enable();
+
+	return tgid;
+}
+
 void tracing_record_cmdline(struct task_struct *tsk)
 {
 	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
@@ -2435,6 +2457,13 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
 	seq_puts(m, "#              | |       |          |         |\n");
 }
 
+static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+	print_event_info(buf, m);
+	seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |        |      |          |         |\n");
+}
+
 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
 {
 	print_event_info(buf, m);
@@ -2447,6 +2476,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
 	seq_puts(m, "#              | |       |   ||||       |         |\n");
 }
 
+static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
+{
+	print_event_info(buf, m);
+	seq_puts(m, "#                                      _-----=> irqs-off\n");
+	seq_puts(m, "#                                     / _----=> need-resched\n");
+	seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
+	seq_puts(m, "#                                    || / _--=> preempt-depth\n");
+	seq_puts(m, "#                                    ||| /     delay\n");
+	seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
+	seq_puts(m, "#              | |        |      |   ||||       |         |\n");
+}
+
 void
 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 {
@@ -2747,9 +2788,15 @@ void trace_default_header(struct seq_file *m)
 	} else {
 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
 			if (trace_flags & TRACE_ITER_IRQ_INFO)
-				print_func_help_header_irq(iter->trace_buffer, m);
+				if (trace_flags & TRACE_ITER_TGID)
+					print_func_help_header_irq_tgid(iter->trace_buffer, m);
+				else
+					print_func_help_header_irq(iter->trace_buffer, m);
 			else
-				print_func_help_header(iter->trace_buffer, m);
+				if (trace_flags & TRACE_ITER_TGID)
+					print_func_help_header_tgid(iter->trace_buffer, m);
+				else
+					print_func_help_header(iter->trace_buffer, m);
 		}
 	}
 }
@@ -3601,9 +3648,53 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
 }
 
 static const struct file_operations tracing_saved_cmdlines_fops = {
-    .open       = tracing_open_generic,
-    .read       = tracing_saved_cmdlines_read,
-    .llseek	= generic_file_llseek,
+	.open	= tracing_open_generic,
+	.read	= tracing_saved_cmdlines_read,
+	.llseek	= generic_file_llseek,
+};
+
+static ssize_t
+tracing_saved_tgids_read(struct file *file, char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	char *file_buf;
+	char *buf;
+	int len = 0;
+	int pid;
+	int i;
+
+	file_buf = kmalloc(SAVED_CMDLINES*(16+1+16), GFP_KERNEL);
+	if (!file_buf)
+		return -ENOMEM;
+
+	buf = file_buf;
+
+	for (i = 0; i < SAVED_CMDLINES; i++) {
+		int tgid;
+		int r;
+
+		pid = map_cmdline_to_pid[i];
+		if (pid == -1 || pid == NO_CMDLINE_MAP)
+			continue;
+
+		tgid = trace_find_tgid(pid);
+		r = sprintf(buf, "%d %d\n", pid, tgid);
+		buf += r;
+		len += r;
+	}
+
+	len = simple_read_from_buffer(ubuf, cnt, ppos,
+				      file_buf, len);
+
+	kfree(file_buf);
+
+	return len;
+}
+
+static const struct file_operations tracing_saved_tgids_fops = {
+	.open	= tracing_open_generic,
+	.read	= tracing_saved_tgids_read,
+	.llseek	= generic_file_llseek,
 };
 
 static ssize_t
@@ -6157,6 +6248,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 	trace_create_file("trace_marker", 0220, d_tracer,
 			  tr, &tracing_mark_fops);
 
+	trace_create_file("saved_tgids", 0444, d_tracer,
+			  tr, &tracing_saved_tgids_fops);
+
 	trace_create_file("trace_clock", 0644, d_tracer, tr,
 			  &trace_clock_fops);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index aa0e736b72ac..79462443658d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -653,6 +653,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
 extern cycle_t ftrace_now(int cpu);
 
 extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
@@ -866,6 +867,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_IRQ_INFO		= 0x800000,
 	TRACE_ITER_MARKERS		= 0x1000000,
 	TRACE_ITER_FUNCTION		= 0x2000000,
+	TRACE_ITER_TGID 		= 0x4000000,
 };
 
 /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8388bc99f2ee..28dd40c2c423 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -46,6 +46,8 @@ struct fgraph_data {
 #define TRACE_GRAPH_PRINT_DURATION	0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME	0x20
 #define TRACE_GRAPH_PRINT_IRQS		0x40
+#define TRACE_GRAPH_PRINT_FLAT		0x80
+
 
 static unsigned int max_depth;
 
@@ -64,6 +66,8 @@ static struct tracer_opt trace_opts[] = {
 	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
 	/* Display interrupts */
 	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+	/* Use standard trace formatting rather than hierarchical */
+	{ TRACER_OPT(funcgraph-flat, TRACE_GRAPH_PRINT_FLAT) },
 	{ } /* Empty entry */
 };
 
@@ -1234,6 +1238,9 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 	int cpu = iter->cpu;
 	int ret;
 
+	if (flags & TRACE_GRAPH_PRINT_FLAT)
+		return TRACE_TYPE_UNHANDLED;
+
 	if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
 		per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
 		return TRACE_TYPE_HANDLED;
@@ -1291,13 +1298,6 @@ print_graph_function(struct trace_iterator *iter)
 	return print_graph_function_flags(iter, tracer_flags.val);
 }
 
-static enum print_line_t
-print_graph_function_event(struct trace_iterator *iter, int flags,
-			   struct trace_event *event)
-{
-	return print_graph_function(iter);
-}
-
 static void print_lat_header(struct seq_file *s, u32 flags)
 {
 	static const char spaces[] = "                "	/* 16 spaces */
@@ -1364,6 +1364,11 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
 {
 	struct trace_iterator *iter = s->private;
 
+	if (flags & TRACE_GRAPH_PRINT_FLAT) {
+		trace_default_header(s);
+		return;
+	}
+
 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
 		return;
 
@@ -1434,20 +1439,6 @@ static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
 	return 0;
 }
 
-static struct trace_event_functions graph_functions = {
-	.trace		= print_graph_function_event,
-};
-
-static struct trace_event graph_trace_entry_event = {
-	.type		= TRACE_GRAPH_ENT,
-	.funcs		= &graph_functions,
-};
-
-static struct trace_event graph_trace_ret_event = {
-	.type		= TRACE_GRAPH_RET,
-	.funcs		= &graph_functions
-};
-
 static struct tracer graph_trace __read_mostly = {
 	.name		= "function_graph",
 	.open		= graph_trace_open,
@@ -1523,16 +1514,6 @@ static __init int init_graph_trace(void)
 {
 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
 
-	if (!register_ftrace_event(&graph_trace_entry_event)) {
-		pr_warning("Warning: could not register graph trace events\n");
-		return 1;
-	}
-
-	if (!register_ftrace_event(&graph_trace_ret_event)) {
-		pr_warning("Warning: could not register graph trace events\n");
-		return 1;
-	}
-
 	return register_tracer(&graph_trace);
 }
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bb922d9ee51b..d9be9f074712 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -126,6 +126,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
 /**
+ * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * @s:		trace sequence descriptor
+ * @maskp:	points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits:	The number of bits that are valid in @maskp
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ */
+int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
+	s->len += ret;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_bitmask);
+
+/**
  * trace_seq_vprintf - sequence printing of trace information
  * @s: trace sequence descriptor
  * @fmt: printf format string
@@ -315,7 +343,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 {
 	unsigned long mask;
 	const char *str;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 	int i, first = 1;
 
 	for (i = 0;  flag_array[i].name && flags; i++) {
@@ -351,7 +379,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 			 const struct trace_print_flags *symbol_array)
 {
 	int i;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0;  symbol_array[i].name; i++) {
 
@@ -362,7 +390,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 		break;
 	}
 
-	if (ret == (const char *)(p->buffer + p->len))
+	if (ret == (const char *)(trace_seq_buffer_ptr(p)))
 		trace_seq_printf(p, "0x%lx", val);
 		
 	trace_seq_putc(p, 0);
@@ -377,7 +405,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 			 const struct trace_print_flags_u64 *symbol_array)
 {
 	int i;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0;  symbol_array[i].name; i++) {
 
@@ -388,7 +416,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 		break;
 	}
 
-	if (ret == (const char *)(p->buffer + p->len))
+	if (ret == (const char *)(trace_seq_buffer_ptr(p)))
 		trace_seq_printf(p, "0x%llx", val);
 
 	trace_seq_putc(p, 0);
@@ -399,10 +427,23 @@ EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
 #endif
 
 const char *
+ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+			 unsigned int bitmask_size)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
+
+const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {
 	int i;
-	const char *ret = p->buffer + p->len;
+	const char *ret = trace_seq_buffer_ptr(p);
 
 	for (i = 0; i < buf_len; i++)
 		trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
@@ -413,6 +454,50 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 }
 EXPORT_SYMBOL(ftrace_print_hex_seq);
 
+const char *
+ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
+		       size_t el_size)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+	const char *prefix = "";
+	void *ptr = (void *)buf;
+
+	trace_seq_putc(p, '{');
+
+	while (ptr < buf + buf_len) {
+		switch (el_size) {
+		case 1:
+			trace_seq_printf(p, "%s0x%x", prefix,
+					 *(u8 *)ptr);
+			break;
+		case 2:
+			trace_seq_printf(p, "%s0x%x", prefix,
+					 *(u16 *)ptr);
+			break;
+		case 4:
+			trace_seq_printf(p, "%s0x%x", prefix,
+					 *(u32 *)ptr);
+			break;
+		case 8:
+			trace_seq_printf(p, "%s0x%llx", prefix,
+					 *(u64 *)ptr);
+			break;
+		default:
+			trace_seq_printf(p, "BAD SIZE:%zu 0x%x", el_size,
+					 *(u8 *)ptr);
+			el_size = 1;
+		}
+		prefix = ",";
+		ptr += el_size;
+	}
+
+	trace_seq_putc(p, '}');
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_array_seq);
+
 int ftrace_raw_output_prep(struct trace_iterator *iter,
 			   struct trace_event *trace_event)
 {
@@ -702,11 +787,25 @@ int trace_print_context(struct trace_iterator *iter)
 	unsigned long secs, usec_rem;
 	char comm[TASK_COMM_LEN];
 	int ret;
+	int tgid;
 
 	trace_find_cmdline(entry->pid, comm);
 
-	ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
-			       comm, entry->pid, iter->cpu);
+	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+	if (!ret)
+		return 0;
+
+	if (trace_flags & TRACE_ITER_TGID) {
+		tgid = trace_find_tgid(entry->pid);
+		if (tgid < 0)
+			ret = trace_seq_puts(s, "(-----) ");
+		else
+			ret = trace_seq_printf(s, "(%5d) ", tgid);
+		if (!ret)
+			return 0;
+	}
+
+	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
 	if (!ret)
 		return 0;
 
@@ -1035,6 +1134,168 @@ static struct trace_event trace_fn_event = {
 	.funcs		= &trace_fn_funcs,
 };
 
+/* TRACE_GRAPH_ENT */
+static enum print_line_t trace_graph_ent_trace(struct trace_iterator *iter, int flags,
+					struct trace_event *event)
+{
+	struct trace_seq *s = &iter->seq;
+	struct ftrace_graph_ent_entry *field;
+
+	trace_assign_type(field, iter->ent);
+
+	if (!trace_seq_puts(s, "graph_ent: func="))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!seq_print_ip_sym(s, field->graph_ent.func, flags))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!trace_seq_puts(s, "\n"))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_raw(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ent_entry *field;
+
+	trace_assign_type(field, iter->ent);
+
+	if (!trace_seq_printf(&iter->seq, "%lx %d\n",
+			      field->graph_ent.func,
+			      field->graph_ent.depth))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_hex(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ent_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.func);
+	SEQ_PUT_HEX_FIELD_RET(s, field->graph_ent.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ent_bin(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ent_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_FIELD_RET(s, field->graph_ent.func);
+	SEQ_PUT_FIELD_RET(s, field->graph_ent.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ent_funcs = {
+	.trace		= trace_graph_ent_trace,
+	.raw		= trace_graph_ent_raw,
+	.hex		= trace_graph_ent_hex,
+	.binary		= trace_graph_ent_bin,
+};
+
+static struct trace_event trace_graph_ent_event = {
+	.type		= TRACE_GRAPH_ENT,
+	.funcs		= &trace_graph_ent_funcs,
+};
+
+/* TRACE_GRAPH_RET */
+static enum print_line_t trace_graph_ret_trace(struct trace_iterator *iter, int flags,
+					struct trace_event *event)
+{
+	struct trace_seq *s = &iter->seq;
+	struct trace_entry *entry = iter->ent;
+	struct ftrace_graph_ret_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (!trace_seq_puts(s, "graph_ret: func="))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!seq_print_ip_sym(s, field->ret.func, flags))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	if (!trace_seq_puts(s, "\n"))
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_raw(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ret_entry *field;
+
+	trace_assign_type(field, iter->ent);
+
+	if (!trace_seq_printf(&iter->seq, "%lx %lld %lld %ld %d\n",
+			      field->ret.func,
+			      field->ret.calltime,
+			      field->ret.rettime,
+			      field->ret.overrun,
+			      field->ret.depth));
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_hex(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ret_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_HEX_FIELD_RET(s, field->ret.func);
+	SEQ_PUT_HEX_FIELD_RET(s, field->ret.calltime);
+	SEQ_PUT_HEX_FIELD_RET(s, field->ret.rettime);
+	SEQ_PUT_HEX_FIELD_RET(s, field->ret.overrun);
+	SEQ_PUT_HEX_FIELD_RET(s, field->ret.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_graph_ret_bin(struct trace_iterator *iter, int flags,
+				      struct trace_event *event)
+{
+	struct ftrace_graph_ret_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	SEQ_PUT_FIELD_RET(s, field->ret.func);
+	SEQ_PUT_FIELD_RET(s, field->ret.calltime);
+	SEQ_PUT_FIELD_RET(s, field->ret.rettime);
+	SEQ_PUT_FIELD_RET(s, field->ret.overrun);
+	SEQ_PUT_FIELD_RET(s, field->ret.depth);
+
+	return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event_functions trace_graph_ret_funcs = {
+	.trace		= trace_graph_ret_trace,
+	.raw		= trace_graph_ret_raw,
+	.hex		= trace_graph_ret_hex,
+	.binary		= trace_graph_ret_bin,
+};
+
+static struct trace_event trace_graph_ret_event = {
+	.type		= TRACE_GRAPH_RET,
+	.funcs		= &trace_graph_ret_funcs,
+};
+
 /* TRACE_CTX an TRACE_WAKE */
 static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
 					     char *delim)
@@ -1425,6 +1686,8 @@ static struct trace_event trace_print_event = {
 
 static struct trace_event *events[] __initdata = {
 	&trace_fn_event,
+	&trace_graph_ent_event,
+	&trace_graph_ret_event,
 	&trace_ctx_event,
 	&trace_wake_event,
 	&trace_stack_event,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 05039e348f07..e092e5a6cdd7 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -45,6 +45,11 @@ static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static cpumask_t __read_mostly watchdog_cpus;
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
 
@@ -178,7 +183,7 @@ void touch_softlockup_watchdog_sync(void)
 	__raw_get_cpu_var(watchdog_touch_ts) = 0;
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 /* watchdog detector functions */
 static int is_hardlockup(void)
 {
@@ -192,6 +197,76 @@ static int is_hardlockup(void)
 }
 #endif
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+	cpumask_t cpus = watchdog_cpus;
+	unsigned int next_cpu;
+
+	next_cpu = cpumask_next(cpu, &cpus);
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(&cpus);
+
+	if (next_cpu == cpu)
+		return nr_cpu_ids;
+
+	return next_cpu;
+}
+
+static int is_hardlockup_other_cpu(unsigned int cpu)
+{
+	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+		return 1;
+
+	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+	return 0;
+}
+
+static void watchdog_check_hardlockup_other_cpu(void)
+{
+	unsigned int next_cpu;
+
+	/*
+	 * Test for hardlockups every 3 samples.  The sample period is
+	 *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+	 *  watchdog_thresh (over by 20%).
+	 */
+	if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
+		return;
+
+	/* check for a hardlockup on the next cpu */
+	next_cpu = watchdog_next_cpu(smp_processor_id());
+	if (next_cpu >= nr_cpu_ids)
+		return;
+
+	smp_rmb();
+
+	if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
+		per_cpu(watchdog_nmi_touch, next_cpu) = false;
+		return;
+	}
+
+	if (is_hardlockup_other_cpu(next_cpu)) {
+		/* only warn once */
+		if (per_cpu(hard_watchdog_warn, next_cpu) == true)
+			return;
+
+		if (hardlockup_panic)
+			panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+		else
+			WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+
+		per_cpu(hard_watchdog_warn, next_cpu) = true;
+	} else {
+		per_cpu(hard_watchdog_warn, next_cpu) = false;
+	}
+}
+#else
+static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
+#endif
+
 static int is_softlockup(unsigned long touch_ts)
 {
 	unsigned long now = get_timestamp();
@@ -203,7 +278,7 @@ static int is_softlockup(unsigned long touch_ts)
 	return 0;
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
@@ -251,7 +326,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
 	__this_cpu_write(hard_watchdog_warn, false);
 	return;
 }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
 
 static void watchdog_interrupt_count(void)
 {
@@ -271,6 +346,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	/* kick the hardlockup detector */
 	watchdog_interrupt_count();
 
+	/* test for hardlockups on the next cpu */
+	watchdog_check_hardlockup_other_cpu();
+
 	/* kick the softlockup detector */
 	wake_up_process(__this_cpu_read(softlockup_watchdog));
 
@@ -395,7 +473,7 @@ static void watchdog(unsigned int cpu)
 	__touch_watchdog();
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 /*
  * People like the simple clean cpu node info on boot.
  * Reduce the watchdog noise by only printing messages
@@ -471,9 +549,44 @@ static void watchdog_nmi_disable(unsigned int cpu)
 	return;
 }
 #else
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static int watchdog_nmi_enable(unsigned int cpu)
+{
+	/*
+	 * The new cpu will be marked online before the first hrtimer interrupt
+	 * runs on it.  If another cpu tests for a hardlockup on the new cpu
+	 * before it has run its first hrtimer, it will get a false positive.
+	 * Touch the watchdog on the new cpu to delay the first check for at
+	 * least 3 sampling periods to guarantee one hrtimer has run on the new
+	 * cpu.
+	 */
+	per_cpu(watchdog_nmi_touch, cpu) = true;
+	smp_wmb();
+	cpumask_set_cpu(cpu, &watchdog_cpus);
+	return 0;
+}
+
+static void watchdog_nmi_disable(unsigned int cpu)
+{
+	unsigned int next_cpu = watchdog_next_cpu(cpu);
+
+	/*
+	 * Offlining this cpu will cause the cpu before this one to start
+	 * checking the one after this one.  If this cpu just finished checking
+	 * the next cpu and updating hrtimer_interrupts_saved, and then the
+	 * previous cpu checks it within one sample period, it will trigger a
+	 * false positive.  Touch the watchdog on the next cpu to prevent it.
+	 */
+	if (next_cpu < nr_cpu_ids)
+		per_cpu(watchdog_nmi_touch, next_cpu) = true;
+	smp_wmb();
+	cpumask_clear_cpu(cpu, &watchdog_cpus);
+}
+#else
 static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
 static void watchdog_nmi_disable(unsigned int cpu) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
 
 /* prepare/enable/disable routines */
 /* sysctl functions */
diff --git a/lib/Kconfig b/lib/Kconfig
index 06d94d885877..7d4fc839209d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -417,3 +417,5 @@ config UCS2_STRING
         tristate
 
 endmenu
+
+source "drivers/base/kds/Kconfig"
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 74fdc5cf4adc..d317c1ad62ab 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -191,15 +191,27 @@ config LOCKUP_DETECTOR
 	  The overhead should be minimal.  A periodic hrtimer runs to
 	  generate interrupts and kick the watchdog task every 4 seconds.
 	  An NMI is generated every 10 seconds or so to check for hardlockups.
+	  If NMIs are not available on the platform, every 12 seconds the
+	  hrtimer interrupt on one cpu will be used to check for hardlockups
+	  on the next cpu.
 
 	  The frequency of hrtimer and NMI events and the soft and hard lockup
 	  thresholds can be controlled through the sysctl watchdog_thresh.
 
-config HARDLOCKUP_DETECTOR
+config HARDLOCKUP_DETECTOR_NMI
 	def_bool y
 	depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
 	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
 
+config HARDLOCKUP_DETECTOR_OTHER_CPU
+	def_bool y
+	depends on LOCKUP_DETECTOR && SMP
+	depends on !HARDLOCKUP_DETECTOR_NMI && !HAVE_NMI_WATCHDOG
+
+config HARDLOCKUP_DETECTOR
+	def_bool y
+	depends on HARDLOCKUP_DETECTOR_NMI || HARDLOCKUP_DETECTOR_OTHER_CPU
+
 config BOOTPARAM_HARDLOCKUP_PANIC
 	bool "Panic (Reboot) On Hard Lockups"
 	depends on HARDLOCKUP_DETECTOR
@@ -669,8 +681,9 @@ config DEBUG_LOCKING_API_SELFTESTS
 	  mutexes and rwsems.
 
 config STACKTRACE
-	bool
+	bool "Stacktrace"
 	depends on STACKTRACE_SUPPORT
+	default y
 
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
diff --git a/linaro/configs/android.conf b/linaro/configs/android.conf
index e4fd1ad19028..6332285ca285 100644
--- a/linaro/configs/android.conf
+++ b/linaro/configs/android.conf
@@ -40,3 +40,154 @@ CONFIG_ADF_FBDEV=y
 CONFIG_ADF_MEMBLOCK=y
 CONFIG_DMA_SHARED_BUFFER=y
 CONFIG_TUN=y
+CONFIG_NETFILTER_NETLINK=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_DCCP=y
+CONFIG_NF_CT_PROTO_GRE=y
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_BROADCAST=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_NAT=y
+CONFIG_NF_NAT_NEEDED=y
+CONFIG_NF_NAT_PROTO_DCCP=y
+CONFIG_NF_NAT_PROTO_UDPLITE=y
+CONFIG_NF_NAT_PROTO_SCTP=y
+CONFIG_NF_NAT_AMANDA=y
+CONFIG_NF_NAT_FTP=y
+CONFIG_NF_NAT_IRC=y
+CONFIG_NF_NAT_TFTP=y
+CONFIG_NETFILTER_TPROXY=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_MARK=y
+CONFIG_NETFILTER_XT_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NETMAP=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_ECN=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_HL=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QTAGUID=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_REJECT_SKERR=y
+CONFIG_NF_NAT_IPV4=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_NF_NAT_PROTO_GRE=y
+CONFIG_NF_NAT_PPTP=y
+CONFIG_NF_NAT_H323=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_TARGET_REJECT_SKERR=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=y
+CONFIG_TEXTSEARCH_BM=y
+CONFIG_TEXTSEARCH_FSM=y
+CONFIG_SWP_EMULATE=y
+CONFIG_ARMV7_COMPAT=y
+CONFIG_ARMV7_COMPAT_CPUINFO=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_PREEMPT=y
+CONFIG_PM_AUTOSLEEP=y
+CONFIG_PM_WAKELOCKS=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_INET_ESP=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_CLS_U32=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=y
+CONFIG_PPP_DEFLATE=y
+CONFIG_PPP_MPPE=y
+CONFIG_PPPOLAC=y
+CONFIG_PPPOPNS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_PM_WAKELOCKS_LIMIT=0
+# CONFIG_PM_WAKELOCKS_GC is not set
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_SUSPEND_TIME=y
+CONFIG_UID_STAT=y
+CONFIG_DM_UEVENT=y
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_MEDIA_SUPPORT=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SCHED_TRACER=y
diff --git a/linaro/configs/distribution.conf b/linaro/configs/distribution.conf
index 729b9b8979e4..4c7ae47698a4 100644
--- a/linaro/configs/distribution.conf
+++ b/linaro/configs/distribution.conf
@@ -1,4 +1,3 @@
-# CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_CGROUPS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_DEFAULT_MMAP_MIN_ADDR=32768
diff --git a/linaro/configs/selinux.conf b/linaro/configs/selinux.conf
new file mode 100644
index 000000000000..6a38da1fb2f7
--- /dev/null
+++ b/linaro/configs/selinux.conf
@@ -0,0 +1,12 @@
+CONFIG_AUDIT=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY=y
+
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
+CONFIG_SECURITY_SELINUX_DISABLE=y
+
+CONFIG_IKCONFIG_PROC=n
diff --git a/linaro/configs/vexpress64.conf b/linaro/configs/vexpress64.conf
index 43860850f0cd..d110bcc2b426 100644
--- a/linaro/configs/vexpress64.conf
+++ b/linaro/configs/vexpress64.conf
@@ -11,7 +11,6 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 # CONFIG_SERIO_I8042 is not set
 CONFIG_FB=y
 CONFIG_FB_ARMCLCD=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
@@ -45,7 +44,6 @@ CONFIG_USB=y
 CONFIG_USB_ULPI=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_SYNOPSYS=y
-CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_PHY=y
 CONFIG_PM_OPP=y
 CONFIG_GENERIC_CPUFREQ_CPU0=y
@@ -53,5 +51,36 @@ CONFIG_ARM_BIG_LITTLE_CPUFREQ=y
 CONFIG_ARM_DT_BL_CPUFREQ=y
 CONFIG_ARM64_CPUIDLE=y
 CONFIG_ARM64_CRYPTO=y
+CONFIG_SENSORS_V2M_JUNO=y
+CONFIG_SRAM=y
+CONFIG_MAILBOX=y
+CONFIG_ARM_SCPI_CPUFREQ=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_INTERACTIVE=y
+CONFIG_THERMAL=y
+CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR=y
+CONFIG_THERMAL_GOV_USER_SPACE=y
+CONFIG_CPU_THERMAL=y
+CONFIG_PM_DEVFREQ=y
+CONFIG_DEVFREQ_THERMAL=y
+CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
+CONFIG_DEVFREQ_GOV_PERFORMANCE=y
+CONFIG_SCPI_THERMAL=y
+CONFIG_MALI_DEVFREQ=y
+CONFIG_THERMAL_WRITABLE_TRIPS=y
+CONFIG_DRM=y
+CONFIG_DRM_ARM=y
+CONFIG_DRM_HDLCD=y
+CONFIG_DRM_VIRTUAL_HDLCD=y
+CONFIG_CMA_SIZE_MBYTES=32
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_DRM_I2C_NXP_TDA998X=y
+CONFIG_MALI_MIDGARD=y
+CONFIG_MALI_PLATFORM_THIRDPARTY=y
+CONFIG_MALI_EXPERT=y
+CONFIG_MALI_PLATFORM_THIRDPARTY_NAME="juno_soc"
+CONFIG_MALI_PLATFORM_FAKE=y
 CONFIG_DMADEVICES=y
 CONFIG_PL330_DMA=y
diff --git a/mm/madvise.c b/mm/madvise.c
index 7055883e6e25..85d1a5427c27 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -102,7 +102,8 @@ static long madvise_behavior(struct vm_area_struct * vma,
 
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
-				vma->vm_file, pgoff, vma_policy(vma));
+				vma->vm_file, pgoff, vma_policy(vma),
+				vma_get_anon_name(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eaa3accb01e7..81bf16fd2266 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6795,6 +6795,12 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
 	return ret;
 }
 
+static int mem_cgroup_allow_attach(struct cgroup *cgroup,
+				   struct cgroup_taskset *tset)
+{
+	return subsys_cgroup_allow_attach(cgroup, tset);
+}
+
 static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
 				     struct cgroup_taskset *tset)
 {
@@ -6963,6 +6969,11 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
 {
 	return 0;
 }
+static int mem_cgroup_allow_attach(struct cgroup *cgroup,
+				   struct cgroup_taskset *tset)
+{
+	return 0;
+}
 static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
 				     struct cgroup_taskset *tset)
 {
@@ -6998,6 +7009,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
 	.can_attach = mem_cgroup_can_attach,
 	.cancel_attach = mem_cgroup_cancel_attach,
 	.attach = mem_cgroup_move_task,
+	.allow_attach = mem_cgroup_allow_attach,
 	.bind = mem_cgroup_bind,
 	.base_cftypes = mem_cgroup_files,
 	.early_init = 0,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b2061bb5af73..420e4b97ffab 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -725,7 +725,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 			((vmstart - vma->vm_start) >> PAGE_SHIFT);
 		prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
 				  vma->anon_vma, vma->vm_file, pgoff,
-				  new_pol);
+				  new_pol, vma_get_anon_name(vma));
 		if (prev) {
 			vma = prev;
 			next = vma->vm_next;
diff --git a/mm/mlock.c b/mm/mlock.c
index 713e462c0776..b680dfd7dbf2 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -23,10 +23,10 @@
 
 int can_do_mlock(void)
 {
-	if (capable(CAP_IPC_LOCK))
-		return 1;
 	if (rlimit(RLIMIT_MEMLOCK) != 0)
 		return 1;
+	if (capable(CAP_IPC_LOCK))
+		return 1;
 	return 0;
 }
 EXPORT_SYMBOL(can_do_mlock);
@@ -289,7 +289,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
 
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
-			  vma->vm_file, pgoff, vma_policy(vma));
+			  vma->vm_file, pgoff, vma_policy(vma),
+			  vma_get_anon_name(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;
diff --git a/mm/mmap.c b/mm/mmap.c
index 70ff9b41c970..84a1c44242a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -893,7 +893,8 @@ again:			remove_next = 1 + (end > next->vm_end);
  * per-vma resources, so we don't attempt to merge those.
  */
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
-			struct file *file, unsigned long vm_flags)
+			struct file *file, unsigned long vm_flags,
+			const char __user *anon_name)
 {
 	if (vma->vm_flags ^ vm_flags)
 		return 0;
@@ -901,6 +902,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
 		return 0;
 	if (vma->vm_ops && vma->vm_ops->close)
 		return 0;
+	if (vma_get_anon_name(vma) != anon_name)
+		return 0;
 	return 1;
 }
 
@@ -931,9 +934,10 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  */
 static int
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
-	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+	const char __user *anon_name)
 {
-	if (is_mergeable_vma(vma, file, vm_flags) &&
+	if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		if (vma->vm_pgoff == vm_pgoff)
 			return 1;
@@ -950,9 +954,10 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
  */
 static int
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
-	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff,
+	const char __user *anon_name)
 {
-	if (is_mergeable_vma(vma, file, vm_flags) &&
+	if (is_mergeable_vma(vma, file, vm_flags, anon_name) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		pgoff_t vm_pglen;
 		vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
@@ -963,9 +968,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 }
 
 /*
- * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
- * whether that can be merged with its predecessor or its successor.
- * Or both (it neatly fills a hole).
+ * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
+ * figure out whether that can be merged with its predecessor or its
+ * successor.  Or both (it neatly fills a hole).
  *
  * In most cases - when called for mmap, brk or mremap - [addr,end) is
  * certain not to be mapped by the time vma_merge is called; but when
@@ -995,7 +1000,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 			struct vm_area_struct *prev, unsigned long addr,
 			unsigned long end, unsigned long vm_flags,
 		     	struct anon_vma *anon_vma, struct file *file,
-			pgoff_t pgoff, struct mempolicy *policy)
+			pgoff_t pgoff, struct mempolicy *policy,
+			const char __user *anon_name)
 {
 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
 	struct vm_area_struct *area, *next;
@@ -1021,15 +1027,15 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	 */
 	if (prev && prev->vm_end == addr &&
   			mpol_equal(vma_policy(prev), policy) &&
-			can_vma_merge_after(prev, vm_flags,
-						anon_vma, file, pgoff)) {
+			can_vma_merge_after(prev, vm_flags, anon_vma,
+						file, pgoff, anon_name)) {
 		/*
 		 * OK, it can.  Can we now merge in the successor as well?
 		 */
 		if (next && end == next->vm_start &&
 				mpol_equal(policy, vma_policy(next)) &&
-				can_vma_merge_before(next, vm_flags,
-					anon_vma, file, pgoff+pglen) &&
+				can_vma_merge_before(next, vm_flags, anon_vma,
+						file, pgoff+pglen, anon_name) &&
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma, NULL)) {
 							/* cases 1, 6 */
@@ -1049,8 +1055,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	 */
 	if (next && end == next->vm_start &&
  			mpol_equal(policy, vma_policy(next)) &&
-			can_vma_merge_before(next, vm_flags,
-					anon_vma, file, pgoff+pglen)) {
+			can_vma_merge_before(next, vm_flags, anon_vma,
+					file, pgoff+pglen, anon_name)) {
 		if (prev && addr < prev->vm_end)	/* case 4 */
 			err = vma_adjust(prev, prev->vm_start,
 				addr, prev->vm_pgoff, NULL);
@@ -1519,7 +1525,8 @@ munmap_back:
 	/*
 	 * Can we just expand an old mapping?
 	 */
-	vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
+	vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
+			NULL, NULL);
 	if (vma)
 		goto out;
 
@@ -2666,7 +2673,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 
 	/* Can we just expand an old private anonymous mapping? */
 	vma = vma_merge(mm, prev, addr, addr + len, flags,
-					NULL, NULL, pgoff, NULL);
+					NULL, NULL, pgoff, NULL, NULL);
 	if (vma)
 		goto out;
 
@@ -2824,7 +2831,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
 		return NULL;	/* should never get here */
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+			vma_get_anon_name(vma));
 	if (new_vma) {
 		/*
 		 * Source vma may have been merged into new_vma
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e9f65aaa3182..3c44c971f9f3 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -278,7 +278,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	 */
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*pprev = vma_merge(mm, *pprev, start, end, newflags,
-			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+			vma_get_anon_name(vma));
 	if (*pprev) {
 		vma = *pprev;
 		goto success;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 494a081ec5e4..025ebfc6d5a7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -196,7 +196,20 @@ static char * const zone_names[MAX_NR_ZONES] = {
 	 "Movable",
 };
 
+/*
+ * Try to keep at least this much lowmem free.  Do not allow normal
+ * allocations below this point, only high priority ones. Automatically
+ * tuned according to the amount of memory in the system.
+ */
 int min_free_kbytes = 1024;
+int min_free_order_shift = 1;
+
+/*
+ * Extra memory for the system to try freeing. Used to temporarily
+ * free memory, to make space for new workloads. Anyone can allocate
+ * down to the min watermarks controlled by min_free_kbytes above.
+ */
+int extra_free_kbytes = 0;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -1650,7 +1663,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		free_pages -= z->free_area[o].nr_free << o;
 
 		/* Require fewer higher order pages to be free */
-		min >>= 1;
+		min >>= min_free_order_shift;
 
 		if (free_pages <= min)
 			return false;
@@ -5330,6 +5343,7 @@ static void setup_per_zone_lowmem_reserve(void)
 static void __setup_per_zone_wmarks(void)
 {
 	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
+	unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10);
 	unsigned long lowmem_pages = 0;
 	struct zone *zone;
 	unsigned long flags;
@@ -5341,11 +5355,14 @@ static void __setup_per_zone_wmarks(void)
 	}
 
 	for_each_zone(zone) {
-		u64 tmp;
+		u64 min, low;
 
 		spin_lock_irqsave(&zone->lock, flags);
-		tmp = (u64)pages_min * zone->managed_pages;
-		do_div(tmp, lowmem_pages);
+		min = (u64)pages_min * zone->managed_pages;
+		do_div(min, lowmem_pages);
+		low = (u64)pages_low * zone->managed_pages;
+		do_div(low, vm_total_pages);
+
 		if (is_highmem(zone)) {
 			/*
 			 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -5366,11 +5383,13 @@ static void __setup_per_zone_wmarks(void)
 			 * If it's a lowmem zone, reserve a number of pages
 			 * proportionate to the zone's size.
 			 */
-			zone->watermark[WMARK_MIN] = tmp;
+			zone->watermark[WMARK_MIN] = min;
 		}
 
-		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
-		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
+		zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) +
+					low + (min >> 2);
+		zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) +
+					low + (min >> 1);
 
 		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
@@ -5483,7 +5502,7 @@ module_init(init_per_zone_wmark_min)
 /*
  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so 
  *	that we can call two helper functions whenever min_free_kbytes
- *	changes.
+ *	or extra_free_kbytes changes.
  */
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
 	void __user *buffer, size_t *length, loff_t *ppos)
diff --git a/mm/page_io.c b/mm/page_io.c
index a8a3ef45fed7..ba05b64e5d8d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -21,6 +21,7 @@
 #include <linux/writeback.h>
 #include <linux/frontswap.h>
 #include <linux/aio.h>
+#include <linux/blkdev.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -80,9 +81,54 @@ void end_swap_bio_read(struct bio *bio, int err)
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
 				(unsigned long long)bio->bi_sector);
-	} else {
-		SetPageUptodate(page);
+		goto out;
 	}
+
+	SetPageUptodate(page);
+
+	/*
+	 * There is no guarantee that the page is in swap cache - the software
+	 * suspend code (at least) uses end_swap_bio_read() against a non-
+	 * swapcache page.  So we must check PG_swapcache before proceeding with
+	 * this optimization.
+	 */
+	if (likely(PageSwapCache(page))) {
+		struct swap_info_struct *sis;
+
+		sis = page_swap_info(page);
+		if (sis->flags & SWP_BLKDEV) {
+			/*
+			 * The swap subsystem performs lazy swap slot freeing,
+			 * expecting that the page will be swapped out again.
+			 * So we can avoid an unnecessary write if the page
+			 * isn't redirtied.
+			 * This is good for real swap storage because we can
+			 * reduce unnecessary I/O and enhance wear-leveling
+			 * if an SSD is used as the as swap device.
+			 * But if in-memory swap device (eg zram) is used,
+			 * this causes a duplicated copy between uncompressed
+			 * data in VM-owned memory and compressed data in
+			 * zram-owned memory.  So let's free zram-owned memory
+			 * and make the VM-owned decompressed page *dirty*,
+			 * so the page should be swapped out somewhere again if
+			 * we again wish to reclaim it.
+			 */
+			struct gendisk *disk = sis->bdev->bd_disk;
+			if (disk->fops->swap_slot_free_notify) {
+				swp_entry_t entry;
+				unsigned long offset;
+
+				entry.val = page_private(page);
+				offset = swp_offset(entry);
+
+				SetPageDirty(page);
+				disk->fops->swap_slot_free_notify(sis->bdev,
+						offset);
+			}
+		}
+	}
+
+out:
 	unlock_page(page);
 	bio_put(bio);
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 4e4a7349c5cd..5373c7fffd99 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3026,6 +3026,14 @@ put_memory:
 }
 EXPORT_SYMBOL_GPL(shmem_file_setup);
 
+void shmem_set_file(struct vm_area_struct *vma, struct file *file)
+{
+	if (vma->vm_file)
+		fput(vma->vm_file);
+	vma->vm_file = file;
+	vma->vm_ops = &shmem_vm_ops;
+}
+
 /**
  * shmem_zero_setup - setup a shared anonymous mapping
  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
@@ -3039,10 +3047,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	if (vma->vm_file)
-		fput(vma->vm_file);
-	vma->vm_file = file;
-	vma->vm_ops = &shmem_vm_ops;
+	shmem_set_file(vma, file);
 	return 0;
 }
 
diff --git a/mm/util.c b/mm/util.c
index 0b1725254ff1..f68b2db27e2b 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -7,6 +7,7 @@
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -381,6 +382,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
 
+void kvfree(const void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
+
 struct address_space *page_mapping(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a2fd7e759cb7..13a4a39a1f4b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -43,6 +43,7 @@
 #include <linux/sysctl.h>
 #include <linux/oom.h>
 #include <linux/prefetch.h>
+#include <linux/debugfs.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -155,6 +156,39 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 	return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
 }
 
+struct dentry *debug_file;
+
+static int debug_shrinker_show(struct seq_file *s, void *unused)
+{
+	struct shrinker *shrinker;
+	struct shrink_control sc;
+
+	sc.gfp_mask = -1;
+	sc.nr_to_scan = 0;
+
+	down_read(&shrinker_rwsem);
+	list_for_each_entry(shrinker, &shrinker_list, list) {
+		int num_objs;
+
+		num_objs = shrinker->shrink(shrinker, &sc);
+		seq_printf(s, "%pf %d\n", shrinker->shrink, num_objs);
+	}
+	up_read(&shrinker_rwsem);
+	return 0;
+}
+
+static int debug_shrinker_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, debug_shrinker_show, inode->i_private);
+}
+
+static const struct file_operations debug_shrinker_fops = {
+        .open = debug_shrinker_open,
+        .read = seq_read,
+        .llseek = seq_lseek,
+        .release = single_release,
+};
+
 /*
  * Add a shrinker callback to be called from the vm
  */
@@ -167,6 +201,15 @@ void register_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(register_shrinker);
 
+static int __init add_shrinker_debug(void)
+{
+	debugfs_create_file("shrinker", 0644, NULL, NULL,
+			    &debug_shrinker_fops);
+	return 0;
+}
+
+late_initcall(add_shrinker_debug);
+
 /*
  * Remove one
  */
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc9046868e..2a680dadfd6c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -81,6 +81,20 @@ source "net/netlabel/Kconfig"
 
 endif # if INET
 
+config ANDROID_PARANOID_NETWORK
+	bool "Only allow certain groups to create sockets"
+	default ANDROID
+	help
+		none
+
+config NET_ACTIVITY_STATS
+	bool "Network activity statistics tracking"
+	default ANDROID
+	help
+	 Network activity statistics are useful for tracking wireless
+	 modem activity on 2G, 3G, 4G wireless networks. Counts number of
+	 transmissions and groups them in specified time buckets.
+
 config NETWORK_SECMARK
 	bool "Security Marking"
 	help
@@ -220,7 +234,7 @@ source "net/vmw_vsock/Kconfig"
 source "net/netlink/Kconfig"
 
 config RPS
-	boolean
+	boolean "RPS"
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
diff --git a/net/Makefile b/net/Makefile
index 091e7b04f301..67d460aa1c48 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
 obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
+obj-$(CONFIG_NET_ACTIVITY_STATS)		+= activity_stats.o
diff --git a/net/activity_stats.c b/net/activity_stats.c
new file mode 100644
index 000000000000..4609ce2043eb
--- /dev/null
+++ b/net/activity_stats.c
@@ -0,0 +1,119 @@
+/* net/activity_stats.c
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Author: Mike Chan (mike@android.com)
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/suspend.h>
+#include <net/net_namespace.h>
+
+/*
+ * Track transmission rates in buckets (power of 2).
+ * 1,2,4,8...512 seconds.
+ *
+ * Buckets represent the count of network transmissions at least
+ * N seconds apart, where N is 1 << bucket index.
+ */
+#define BUCKET_MAX 10
+
+/* Track network activity frequency */
+static unsigned long activity_stats[BUCKET_MAX];
+static ktime_t last_transmit;
+static ktime_t suspend_time;
+static DEFINE_SPINLOCK(activity_lock);
+
+void activity_stats_update(void)
+{
+	int i;
+	unsigned long flags;
+	ktime_t now;
+	s64 delta;
+
+	spin_lock_irqsave(&activity_lock, flags);
+	now = ktime_get();
+	delta = ktime_to_ns(ktime_sub(now, last_transmit));
+
+	for (i = BUCKET_MAX - 1; i >= 0; i--) {
+		/*
+		 * Check if the time delta between network activity is within the
+		 * minimum bucket range.
+		 */
+		if (delta < (1000000000ULL << i))
+			continue;
+
+		activity_stats[i]++;
+		last_transmit = now;
+		break;
+	}
+	spin_unlock_irqrestore(&activity_lock, flags);
+}
+
+static int activity_stats_show(struct seq_file *m, void *v)
+{
+	int i;
+	int ret;
+
+	seq_printf(m, "Min Bucket(sec) Count\n");
+
+	for (i = 0; i < BUCKET_MAX; i++) {
+		ret = seq_printf(m, "%15d %lu\n", 1 << i, activity_stats[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int activity_stats_notifier(struct notifier_block *nb,
+					unsigned long event, void *dummy)
+{
+	switch (event) {
+		case PM_SUSPEND_PREPARE:
+			suspend_time = ktime_get_real();
+			break;
+
+		case PM_POST_SUSPEND:
+			suspend_time = ktime_sub(ktime_get_real(), suspend_time);
+			last_transmit = ktime_sub(last_transmit, suspend_time);
+	}
+
+	return 0;
+}
+
+static int activity_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, activity_stats_show, PDE_DATA(inode));
+}
+
+static const struct file_operations activity_stats_fops = {
+	.open		= activity_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static struct notifier_block activity_stats_notifier_block = {
+	.notifier_call = activity_stats_notifier,
+};
+
+static int  __init activity_stats_init(void)
+{
+	proc_create("activity", S_IRUGO,
+		    init_net.proc_net_stat, &activity_stats_fops);
+	return register_pm_notifier(&activity_stats_notifier_block);
+}
+
+subsys_initcall(activity_stats_init);
+
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 6629cdc134dc..f7c36826f3f4 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -30,6 +30,11 @@
 #include <net/bluetooth/bluetooth.h>
 #include <linux/proc_fs.h>
 
+#ifndef CONFIG_BT_SOCK_DEBUG
+#undef  BT_DBG
+#define BT_DBG(D...)
+#endif
+
 #define VERSION "2.16"
 
 /* Bluetooth sockets */
@@ -103,11 +108,40 @@ void bt_sock_unregister(int proto)
 }
 EXPORT_SYMBOL(bt_sock_unregister);
 
+#ifdef CONFIG_PARANOID_NETWORK
+static inline int current_has_bt_admin(void)
+{
+	return !current_euid();
+}
+
+static inline int current_has_bt(void)
+{
+	return current_has_bt_admin();
+}
+# else
+static inline int current_has_bt_admin(void)
+{
+	return 1;
+}
+
+static inline int current_has_bt(void)
+{
+	return 1;
+}
+#endif
+
 static int bt_sock_create(struct net *net, struct socket *sock, int proto,
 			  int kern)
 {
 	int err;
 
+	if (proto == BTPROTO_RFCOMM || proto == BTPROTO_SCO ||
+			proto == BTPROTO_L2CAP) {
+		if (!current_has_bt())
+			return -EPERM;
+	} else if (!current_has_bt_admin())
+		return -EPERM;
+
 	if (net != &init_net)
 		return -EAFNOSUPPORT;
 
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index d459ed43c779..a3f3380c2095 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -113,7 +113,7 @@ struct hci_conn *phylink_add(struct hci_dev *hdev, struct amp_mgr *mgr,
 	bdaddr_t *dst = mgr->l2cap_conn->dst;
 	struct hci_conn *hcon;
 
-	hcon = hci_conn_add(hdev, AMP_LINK, dst);
+	hcon = hci_conn_add(hdev, AMP_LINK, 0, dst);
 	if (!hcon)
 		return NULL;
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 8e7290aea8f8..c9b2d5011fe4 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -354,7 +354,8 @@ static void hci_conn_auto_accept(unsigned long arg)
 		     &conn->dst);
 }
 
-struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
+struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type,
+					__u16 pkt_type, bdaddr_t *dst)
 {
 	struct hci_conn *conn;
 
@@ -382,14 +383,22 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 		conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK;
 		break;
 	case SCO_LINK:
-		if (lmp_esco_capable(hdev))
-			conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
-					(hdev->esco_type & EDR_ESCO_MASK);
-		else
-			conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
-		break;
+		if (!pkt_type)
+			pkt_type = SCO_ESCO_MASK;
 	case ESCO_LINK:
-		conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
+		if (!pkt_type)
+			pkt_type = ALL_ESCO_MASK;
+		if (lmp_esco_capable(hdev)) {
+			/* HCI Setup Synchronous Connection Command uses
+			   reverse logic on the EDR_ESCO_MASK bits */
+			conn->pkt_type = (pkt_type ^ EDR_ESCO_MASK) &
+					hdev->esco_type;
+		} else {
+			/* Legacy HCI Add Sco Connection Command uses a
+			   shifted bitmask */
+			conn->pkt_type = (pkt_type << 5) & hdev->pkt_type &
+					SCO_PTYPE_MASK;
+		}
 		break;
 	}
 
@@ -520,7 +529,7 @@ static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 		if (le)
 			return ERR_PTR(-EBUSY);
 
-		le = hci_conn_add(hdev, LE_LINK, dst);
+		le = hci_conn_add(hdev, LE_LINK, 0, dst);
 		if (!le)
 			return ERR_PTR(-ENOMEM);
 
@@ -543,7 +552,7 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
 
 	acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
 	if (!acl) {
-		acl = hci_conn_add(hdev, ACL_LINK, dst);
+		acl = hci_conn_add(hdev, ACL_LINK, 0, dst);
 		if (!acl)
 			return ERR_PTR(-ENOMEM);
 	}
@@ -561,7 +570,8 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
 }
 
 static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
-				bdaddr_t *dst, u8 sec_level, u8 auth_type)
+					__u16 pkt_type, bdaddr_t *dst,
+					u8 sec_level, u8 auth_type)
 {
 	struct hci_conn *acl;
 	struct hci_conn *sco;
@@ -572,7 +582,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
 
 	sco = hci_conn_hash_lookup_ba(hdev, type, dst);
 	if (!sco) {
-		sco = hci_conn_add(hdev, type, dst);
+		sco = hci_conn_add(hdev, type, pkt_type, dst);
 		if (!sco) {
 			hci_conn_drop(acl);
 			return ERR_PTR(-ENOMEM);
@@ -602,7 +612,8 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
 }
 
 /* Create SCO, ACL or LE connection. */
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type,
+			     __u16 pkt_type, bdaddr_t *dst,
 			     __u8 dst_type, __u8 sec_level, __u8 auth_type)
 {
 	BT_DBG("%s dst %pMR type 0x%x", hdev->name, dst, type);
@@ -614,7 +625,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
 		return hci_connect_acl(hdev, dst, sec_level, auth_type);
 	case SCO_LINK:
 	case ESCO_LINK:
-		return hci_connect_sco(hdev, type, dst, sec_level, auth_type);
+		return hci_connect_sco(hdev, type, pkt_type, dst, sec_level, auth_type);
 	}
 
 	return ERR_PTR(-EINVAL);
@@ -883,6 +894,15 @@ int hci_get_conn_list(void __user *arg)
 		(ci + n)->out   = c->out;
 		(ci + n)->state = c->state;
 		(ci + n)->link_mode = c->link_mode;
+		if (c->type == SCO_LINK) {
+			(ci + n)->mtu = hdev->sco_mtu;
+			(ci + n)->cnt = hdev->sco_cnt;
+			(ci + n)->pkts = hdev->sco_pkts;
+		} else {
+			(ci + n)->mtu = hdev->acl_mtu;
+			(ci + n)->cnt = hdev->acl_cnt;
+			(ci + n)->pkts = hdev->acl_pkts;
+		}
 		if (++n >= req.conn_num)
 			break;
 	}
@@ -919,6 +939,15 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg)
 		ci.out   = conn->out;
 		ci.state = conn->state;
 		ci.link_mode = conn->link_mode;
+		if (req.type == SCO_LINK) {
+			ci.mtu = hdev->sco_mtu;
+			ci.cnt = hdev->sco_cnt;
+			ci.pkts = hdev->sco_pkts;
+		} else {
+			ci.mtu = hdev->acl_mtu;
+			ci.cnt = hdev->acl_cnt;
+			ci.pkts = hdev->acl_pkts;
+		}
 	}
 	hci_dev_unlock(hdev);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 5daf7ab26710..1526fb232b3f 100644..100755
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1119,7 +1119,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
 		}
 	} else {
 		if (!conn) {
-			conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr);
+			conn = hci_conn_add(hdev, ACL_LINK, 0, &cp->bdaddr);
 			if (conn) {
 				conn->out = true;
 				conn->link_mode |= HCI_LM_MASTER;
@@ -1748,6 +1748,15 @@ unlock:
 	hci_conn_check_pending(hdev);
 }
 
+static inline bool is_sco_active(struct hci_dev *hdev)
+{
+	if (hci_conn_hash_lookup_state(hdev, SCO_LINK, BT_CONNECTED) ||
+			(hci_conn_hash_lookup_state(hdev, ESCO_LINK,
+						    BT_CONNECTED)))
+		return true;
+	return false;
+}
+
 static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_conn_request *ev = (void *) skb->data;
@@ -1775,7 +1784,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type,
 					       &ev->bdaddr);
 		if (!conn) {
-			conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr);
+			/* pkt_type not yet used for incoming connections */
+			conn = hci_conn_add(hdev, ev->link_type, 0, &ev->bdaddr);
 			if (!conn) {
 				BT_ERR("No memory for new connection");
 				hci_dev_unlock(hdev);
@@ -1794,7 +1804,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 			bacpy(&cp.bdaddr, &ev->bdaddr);
 
-			if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
+			if (lmp_rswitch_capable(hdev) && ((mask & HCI_LM_MASTER)
+						|| is_sco_active(hdev)))
 				cp.role = 0x00; /* Become master */
 			else
 				cp.role = 0x01; /* Remain slave */
@@ -2963,6 +2974,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 		hci_conn_add_sysfs(conn);
 		break;
 
+	case 0x10:	/* Connection Accept Timeout */
 	case 0x11:	/* Unsupported Feature or Parameter Value */
 	case 0x1c:	/* SCO interval rejected */
 	case 0x1a:	/* Unsupported Remote Feature */
@@ -3540,7 +3552,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
 	if (!conn) {
-		conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
+		conn = hci_conn_add(hdev, LE_LINK, 0, &ev->bdaddr);
 		if (!conn) {
 			BT_ERR("No memory for new connection");
 			goto unlock;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 68843a28a7af..c11a28bae844 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1793,10 +1793,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 	auth_type = l2cap_get_auth_type(chan);
 
 	if (chan->dcid == L2CAP_CID_LE_DATA)
-		hcon = hci_connect(hdev, LE_LINK, dst, dst_type,
+		hcon = hci_connect(hdev, LE_LINK, 0, dst, dst_type,
 				   chan->sec_level, auth_type);
 	else
-		hcon = hci_connect(hdev, ACL_LINK, dst, dst_type,
+		hcon = hci_connect(hdev, ACL_LINK, 0, dst, dst_type,
 				   chan->sec_level, auth_type);
 
 	if (IS_ERR(hcon)) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 8208a13a9837..3e574540b2c2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2205,10 +2205,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		auth_type = HCI_AT_DEDICATED_BONDING_MITM;
 
 	if (cp->addr.type == BDADDR_BREDR)
-		conn = hci_connect(hdev, ACL_LINK, &cp->addr.bdaddr,
+		conn = hci_connect(hdev, ACL_LINK, 0, &cp->addr.bdaddr,
 				   cp->addr.type, sec_level, auth_type);
 	else
-		conn = hci_connect(hdev, LE_LINK, &cp->addr.bdaddr,
+		conn = hci_connect(hdev, LE_LINK, 0, &cp->addr.bdaddr,
 				   cp->addr.type, sec_level, auth_type);
 
 	if (IS_ERR(conn)) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 19ba192e9dbf..3ca5e40fe390 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -436,7 +436,6 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
 
 	switch (d->state) {
 	case BT_CONNECT:
-	case BT_CONFIG:
 		if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
 			set_bit(RFCOMM_AUTH_REJECT, &d->flags);
 			rfcomm_schedule();
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index c9ae6b703c13..de9c955b247a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -158,6 +158,7 @@ static int sco_connect(struct sock *sk)
 {
 	bdaddr_t *src = &bt_sk(sk)->src;
 	bdaddr_t *dst = &bt_sk(sk)->dst;
+	__u16 pkt_type = sco_pi(sk)->pkt_type;
 	struct sco_conn *conn;
 	struct hci_conn *hcon;
 	struct hci_dev  *hdev;
@@ -173,11 +174,13 @@ static int sco_connect(struct sock *sk)
 
 	if (lmp_esco_capable(hdev) && !disable_esco)
 		type = ESCO_LINK;
-	else
+	else {
 		type = SCO_LINK;
+		pkt_type &= SCO_ESCO_MASK;
+	}
 
-	hcon = hci_connect(hdev, type, dst, BDADDR_BREDR, BT_SECURITY_LOW,
-			   HCI_AT_NO_BONDING);
+	hcon = hci_connect(hdev, type, pkt_type, dst, BDADDR_BREDR,
+			   BT_SECURITY_LOW, HCI_AT_NO_BONDING);
 	if (IS_ERR(hcon)) {
 		err = PTR_ERR(hcon);
 		goto done;
@@ -445,17 +448,21 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
 	return 0;
 }
 
-static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 {
-	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
+	struct sockaddr_sco sa;
 	struct sock *sk = sock->sk;
-	int err = 0;
+	int len, err = 0;
 
-	BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
+	BT_DBG("sk %p %pMR", sk, &sa.sco_bdaddr);
 
 	if (!addr || addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
+	memset(&sa, 0, sizeof(sa));
+	len = min_t(unsigned int, sizeof(sa), alen);
+	memcpy(&sa, addr, len);
+
 	lock_sock(sk);
 
 	if (sk->sk_state != BT_OPEN) {
@@ -468,7 +475,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
 		goto done;
 	}
 
-	bacpy(&bt_sk(sk)->src, &sa->sco_bdaddr);
+	bacpy(&bt_sk(sk)->src, &sa.sco_bdaddr);
+	sco_pi(sk)->pkt_type = sa.sco_pkt_type;
 
 	sk->sk_state = BT_BOUND;
 
@@ -479,26 +487,34 @@ done:
 
 static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
 {
-	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
 	struct sock *sk = sock->sk;
-	int err;
+	struct sockaddr_sco sa;
+	int len, err;
 
 	BT_DBG("sk %p", sk);
 
-	if (alen < sizeof(struct sockaddr_sco) ||
-	    addr->sa_family != AF_BLUETOOTH)
+	if (!addr || addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
 
-	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
-		return -EBADFD;
-
-	if (sk->sk_type != SOCK_SEQPACKET)
-		return -EINVAL;
+	memset(&sa, 0, sizeof(sa));
+	len = min_t(unsigned int, sizeof(sa), alen);
+	memcpy(&sa, addr, len);
 
 	lock_sock(sk);
 
+	if (sk->sk_type != SOCK_SEQPACKET) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+		err = -EBADFD;
+		goto done;
+	}
+
 	/* Set destination address and psm */
-	bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr);
+	bacpy(&bt_sk(sk)->dst, &sa.sco_bdaddr);
+	sco_pi(sk)->pkt_type = sa.sco_pkt_type;
 
 	err = sco_connect(sk);
 	if (err)
@@ -622,6 +638,7 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len
 		bacpy(&sa->sco_bdaddr, &bt_sk(sk)->dst);
 	else
 		bacpy(&sa->sco_bdaddr, &bt_sk(sk)->src);
+	sa->sco_pkt_type = sco_pi(sk)->pkt_type;
 
 	return 0;
 }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 967312803e41..239e0e84f9e6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -41,11 +41,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 #endif
 
-	u64_stats_update_begin(&brstats->syncp);
-	brstats->tx_packets++;
-	brstats->tx_bytes += skb->len;
-	u64_stats_update_end(&brstats->syncp);
-
 	if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid))
 		goto out;
 
@@ -54,6 +49,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
+	u64_stats_update_begin(&brstats->syncp);
+	brstats->tx_packets++;
+	/* Exclude ETH_HLEN from byte stats for consistency with Rx chain */
+	brstats->tx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
+
 	if (is_broadcast_ether_addr(dest))
 		br_flood_deliver(br, skb);
 	else if (is_multicast_ether_addr(dest)) {
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 55e08e2de3a1..da78f5c6d295 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -31,6 +31,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
+	r->uid_start = INVALID_UID;
+	r->uid_end = INVALID_UID;
 	r->fr_net = hold_net(ops->fro_net);
 
 	/* The lock is not required here, the list in unreacheable
@@ -179,6 +181,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
+static inline kuid_t fib_nl_uid(struct nlattr *nla)
+{
+	return make_kuid(current_user_ns(), nla_get_u32(nla));
+}
+
+static int nla_put_uid(struct sk_buff *skb, int idx, kuid_t uid)
+{
+	return nla_put_u32(skb, idx, from_kuid_munged(current_user_ns(), uid));
+}
+
+static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule)
+{
+	return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) ||
+	       (uid_gte(fl->flowi_uid, rule->uid_start) &&
+		uid_lte(fl->flowi_uid, rule->uid_end));
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 			  struct flowi *fl, int flags)
 {
@@ -193,6 +212,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 	if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
 		goto out;
 
+	if (!fib_uid_range_match(fl, rule))
+		goto out;
+
 	ret = ops->match(rule, fl, flags);
 out:
 	return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -363,6 +385,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	} else if (rule->action == FR_ACT_GOTO)
 		goto errout_free;
 
+	/* UID start and end must either both be valid or both unspecified. */
+	rule->uid_start = rule->uid_end = INVALID_UID;
+	if (tb[FRA_UID_START] || tb[FRA_UID_END]) {
+		if (tb[FRA_UID_START] && tb[FRA_UID_END]) {
+			rule->uid_start = fib_nl_uid(tb[FRA_UID_START]);
+			rule->uid_end = fib_nl_uid(tb[FRA_UID_END]);
+		}
+		if (!uid_valid(rule->uid_start) ||
+		    !uid_valid(rule->uid_end) ||
+		    !uid_lte(rule->uid_start, rule->uid_end))
+		goto errout_free;
+	}
+
 	err = ops->configure(rule, skb, frh, tb);
 	if (err < 0)
 		goto errout_free;
@@ -469,6 +504,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 		    (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
 			continue;
 
+		if (tb[FRA_UID_START] &&
+		    !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START])))
+			continue;
+
+		if (tb[FRA_UID_END] &&
+		    !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END])))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -525,7 +568,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(4) /* FRA_PRIORITY */
 			 + nla_total_size(4) /* FRA_TABLE */
 			 + nla_total_size(4) /* FRA_FWMARK */
-			 + nla_total_size(4); /* FRA_FWMASK */
+			 + nla_total_size(4) /* FRA_FWMASK */
+			 + nla_total_size(4) /* FRA_UID_START */
+			 + nla_total_size(4); /* FRA_UID_END */
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -579,7 +624,11 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	    ((rule->mark_mask || rule->mark) &&
 	     nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
 	    (rule->target &&
-	     nla_put_u32(skb, FRA_GOTO, rule->target)))
+	     nla_put_u32(skb, FRA_GOTO, rule->target)) ||
+	    (uid_valid(rule->uid_start) &&
+	     nla_put_uid(skb, FRA_UID_START, rule->uid_start)) ||
+	    (uid_valid(rule->uid_end) &&
+	     nla_put_uid(skb, FRA_UID_END, rule->uid_end)))
 		goto nla_put_failure;
 	if (ops->fill(rule, skb, frh) < 0)
 		goto nla_put_failure;
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 089cb9f36387..5a9af0a9b0e6 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -15,6 +15,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
+obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c4adc319cc2e..f022e0e97dc5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,19 @@
 #include <linux/mroute.h>
 #endif
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+	return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+	return 1;
+}
+#endif
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -284,6 +297,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
+	if (!current_has_network())
+		return -EACCES;
+
 	if (unlikely(!inet_ehash_secret))
 		if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 			build_ehash_secret();
@@ -336,8 +352,7 @@ lookup_protocol:
 	}
 
 	err = -EPERM;
-	if (sock->type == SOCK_RAW && !kern &&
-	    !ns_capable(net->user_ns, CAP_NET_RAW))
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
@@ -905,6 +920,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCSIFPFLAGS:
 	case SIOCGIFPFLAGS:
 	case SIOCSIFFLAGS:
+	case SIOCKILLADDR:
 		err = devinet_ioctl(net, cmd, (void __user *)arg);
 		break;
 	default:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e40eef4ac697..b151e0ac7f27 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -59,6 +59,7 @@
 
 #include <net/arp.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
@@ -918,6 +919,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
 	case SIOCSIFDSTADDR:	/* Set the destination address */
 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
+	case SIOCKILLADDR:	/* Nuke all sockets on this address */
 		ret = -EPERM;
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			goto out;
@@ -969,7 +971,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	}
 
 	ret = -EADDRNOTAVAIL;
-	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
+	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
+	    && cmd != SIOCKILLADDR)
 		goto done;
 
 	switch (cmd) {
@@ -1096,6 +1099,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			inet_insert_ifa(ifa);
 		}
 		break;
+	case SIOCKILLADDR:	/* Nuke all connections on this address */
+		ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
+		break;
 	}
 done:
 	rtnl_unlock();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4556cd25acde..4a7cf03a192c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	bool dev_match;
 
 	fl4.flowi4_oif = 0;
-	fl4.flowi4_iif = oif;
+	fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
 	fl4.daddr = src;
 	fl4.saddr = dst;
 	fl4.flowi4_tos = tos;
@@ -531,6 +531,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_METRICS]		= { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
 	[RTA_FLOW]		= { .type = NLA_U32 },
+	[RTA_UID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index bc773a10dca6..d00d8f9650e9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -629,6 +629,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 				.daddr = nh->nh_gw,
 				.flowi4_scope = cfg->fc_scope + 1,
 				.flowi4_oif = nh->nh_oif,
+				.flowi4_iif = LOOPBACK_IFINDEX,
 			};
 
 			/* It is not necessary, but requires a bit of thinking */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ea78ef5ac352..5af8781b65e1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr, saddr;
+	u32 mark = IP4_REPLY_MARK(net, skb->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 		return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	icmp_param->data.icmph.checksum = 0;
 
 	inet->tos = ip_hdr(skb)->tos;
+	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
 	ipc.opt = NULL;
@@ -361,6 +363,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
+	fl4.flowi4_mark = mark;
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -379,7 +382,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					struct flowi4 *fl4,
 					struct sk_buff *skb_in,
 					const struct iphdr *iph,
-					__be32 saddr, u8 tos,
+					__be32 saddr, u8 tos, u32 mark,
 					int type, int code,
 					struct icmp_bxm *param)
 {
@@ -391,6 +394,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 	fl4->daddr = (param->replyopts.opt.opt.srr ?
 		      param->replyopts.opt.opt.faddr : iph->saddr);
 	fl4->saddr = saddr;
+	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = RT_TOS(tos);
 	fl4->flowi4_proto = IPPROTO_ICMP;
 	fl4->fl4_icmp_type = type;
@@ -488,6 +492,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	struct flowi4 fl4;
 	__be32 saddr;
 	u8  tos;
+	u32 mark;
 	struct net *net;
 	struct sock *sk;
 
@@ -584,6 +589,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
 					   IPTOS_PREC_INTERNETCONTROL) :
 					  iph->tos;
+	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
 	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
 		goto out_unlock;
@@ -600,11 +606,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.skb	  = skb_in;
 	icmp_param.offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
+	sk->sk_mark = mark;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts.opt;
 	ipc.tx_flags = 0;
 
-	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
+	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, &icmp_param);
 	if (IS_ERR(rt))
 		goto out_unlock;
@@ -937,7 +944,8 @@ error:
 void icmp_err(struct sk_buff *skb, u32 info)
 {
 	struct iphdr *iph = (struct iphdr *)skb->data;
-	struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+	int offset = iph->ihl<<2;
+	struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
 	int type = icmp_hdr(skb)->type;
 	int code = icmp_hdr(skb)->code;
 	struct net *net = dev_net(skb->dev);
@@ -947,7 +955,7 @@ void icmp_err(struct sk_buff *skb, u32 info)
 	 * triggered by ICMP_ECHOREPLY which sent from kernel.
 	 */
 	if (icmph->type != ICMP_ECHOREPLY) {
-		ping_err(skb, info);
+		ping_err(skb, offset, info);
 		return;
 	}
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6acb541c9091..6dfec2f18214 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -417,12 +417,13 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	struct net *net = sock_net(sk);
 	int flags = inet_sk_flowi_flags(sk);
 
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
 			   flags,
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport,
+			   sock_i_uid(sk));
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -454,11 +455,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 
 	rcu_read_lock();
 	opt = rcu_dereference(newinet->inet_opt);
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
-			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
+			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport,
+			   sock_i_uid(sk));
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
@@ -688,6 +690,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
 		newsk->sk_write_space = sk_stream_write_space;
 
+		newsk->sk_mark = inet_rsk(req)->ir_mark;
+
 		newicsk->icsk_retransmits = 0;
 		newicsk->icsk_backoff	  = 0;
 		newicsk->icsk_probes_out  = 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 57e745086302..47d236156f0f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1482,12 +1482,14 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
 			daddr = replyopts.opt.opt.faddr;
 	}
 
-	flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+	flowi4_init_output(&fl4, arg->bound_dev_if,
+			   IP4_REPLY_MARK(net, skb->mark),
 			   RT_TOS(arg->tos),
 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 			   ip_reply_arg_flowi_flags(arg),
 			   daddr, saddr,
-			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+			   arg->uid);
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 56d079b63ad3..9fc74ec238de 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -454,7 +454,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct mr_table *mrt;
 	struct flowi4 fl4 = {
 		.flowi4_oif	= dev->ifindex,
-		.flowi4_iif	= skb->skb_iif,
+		.flowi4_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi4_mark	= skb->mark,
 	};
 	int err;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e7916c193932..23dfd4a312ce 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -110,6 +110,18 @@ config IP_NF_TARGET_REJECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_NF_TARGET_REJECT_SKERR
+	bool "Force socket error when rejecting with icmp*"
+	depends on IP_NF_TARGET_REJECT
+	default n
+	help
+          This option enables turning a "--reject-with icmp*" into a matching
+          socket error also.
+	  The REJECT target normally allows sending an ICMP message. But it
+          leaves the local socket unaware of any ingress rejects.
+
+	  If unsure, say N.
+
 config IP_NF_TARGET_ULOG
 	tristate "ULOG target support"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 04b18c1ac345..452e8a587c34 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -129,6 +129,14 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 static inline void send_unreach(struct sk_buff *skb_in, int code)
 {
 	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+#ifdef CONFIG_IP_NF_TARGET_REJECT_SKERR
+	if (skb_in->sk) {
+		skb_in->sk->sk_err = icmp_err_convert[code].errno;
+		skb_in->sk->sk_error_report(skb_in->sk);
+		pr_debug("ipt_REJECT: sk_err=%d for skb=%p sk=%p\n",
+			skb_in->sk->sk_err, skb_in, skb_in->sk);
+	}
+#endif
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c49dcd0284a0..4bfaedf9b34e 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ipv4_is_multicast(iph->daddr)) {
 		if (ipv4_is_zeronet(iph->saddr))
 			return ipv4_is_local_multicast(iph->daddr) ^ invert;
-		flow.flowi4_iif = 0;
-	} else {
-		flow.flowi4_iif = LOOPBACK_IFINDEX;
 	}
-
+	flow.flowi4_iif = LOOPBACK_IFINDEX;
 	flow.daddr = iph->saddr;
 	flow.saddr = rpfilter_get_saddr(iph->daddr);
 	flow.flowi4_oif = 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 459b957104a8..47d1c7e10d54 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -33,7 +33,6 @@
 #include <linux/netdevice.h>
 #include <net/snmp.h>
 #include <net/ip.h>
-#include <net/ipv6.h>
 #include <net/icmp.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -46,8 +45,18 @@
 #include <net/inet_common.h>
 #include <net/checksum.h>
 
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#endif
 
-static struct ping_table ping_table;
+
+struct ping_table ping_table;
+struct pingv6_ops pingv6_ops;
+EXPORT_SYMBOL_GPL(pingv6_ops);
 
 static u16 ping_port_rover;
 
@@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma
 	pr_debug("hash(%d) = %d\n", num, res);
 	return res;
 }
+EXPORT_SYMBOL_GPL(ping_hash);
 
 static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
 					     struct net *net, unsigned int num)
@@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
 	return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
 }
 
-static int ping_v4_get_port(struct sock *sk, unsigned short ident)
+int ping_get_port(struct sock *sk, unsigned short ident)
 {
 	struct hlist_nulls_node *node;
 	struct hlist_nulls_head *hlist;
@@ -103,6 +113,10 @@ next_port:
 		ping_portaddr_for_each_entry(sk2, node, hlist) {
 			isk2 = inet_sk(sk2);
 
+			/* BUG? Why is this reuse and not reuseaddr? ping.c
+			 * doesn't turn off SO_REUSEADDR, and it doesn't expect
+			 * that other ping processes can steal its packets.
+			 */
 			if ((isk2->inet_num == ident) &&
 			    (sk2 != sk) &&
 			    (!sk2->sk_reuse || !sk->sk_reuse))
@@ -125,17 +139,18 @@ fail:
 	write_unlock_bh(&ping_table.lock);
 	return 1;
 }
+EXPORT_SYMBOL_GPL(ping_get_port);
 
-static void ping_v4_hash(struct sock *sk)
+void ping_hash(struct sock *sk)
 {
-	pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
+	pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
 	BUG(); /* "Please do not press this button again." */
 }
 
-static void ping_v4_unhash(struct sock *sk)
+void ping_unhash(struct sock *sk)
 {
 	struct inet_sock *isk = inet_sk(sk);
-	pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+	pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
 	if (sk_hashed(sk)) {
 		write_lock_bh(&ping_table.lock);
 		hlist_nulls_del(&sk->sk_nulls_node);
@@ -147,31 +162,63 @@ static void ping_v4_unhash(struct sock *sk)
 		write_unlock_bh(&ping_table.lock);
 	}
 }
+EXPORT_SYMBOL_GPL(ping_unhash);
 
-static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr,
-				   u16 ident, int dif)
+static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 {
 	struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
 	struct sock *sk = NULL;
 	struct inet_sock *isk;
 	struct hlist_nulls_node *hnode;
+	int dif = skb->dev->ifindex;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
+			 (int)ident, &ip_hdr(skb)->daddr, dif);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
+			 (int)ident, &ipv6_hdr(skb)->daddr, dif);
+#endif
+	}
 
-	pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
-		 (int)ident, &daddr, dif);
 	read_lock_bh(&ping_table.lock);
 
 	ping_portaddr_for_each_entry(sk, hnode, hslot) {
 		isk = inet_sk(sk);
 
-		pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk,
-			 (int)isk->inet_num, &isk->inet_rcv_saddr,
-			 sk->sk_bound_dev_if);
-
 		pr_debug("iterate\n");
 		if (isk->inet_num != ident)
 			continue;
-		if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
+
+		if (skb->protocol == htons(ETH_P_IP) &&
+		    sk->sk_family == AF_INET) {
+			pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk,
+				 (int) isk->inet_num, &isk->inet_rcv_saddr,
+				 sk->sk_bound_dev_if);
+
+			if (isk->inet_rcv_saddr &&
+			    isk->inet_rcv_saddr != ip_hdr(skb)->daddr)
+				continue;
+#if IS_ENABLED(CONFIG_IPV6)
+		} else if (skb->protocol == htons(ETH_P_IPV6) &&
+			   sk->sk_family == AF_INET6) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+
+			pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk,
+				 (int) isk->inet_num,
+				 &inet6_sk(sk)->rcv_saddr,
+				 sk->sk_bound_dev_if);
+
+			if (!ipv6_addr_any(&np->rcv_saddr) &&
+			    !ipv6_addr_equal(&np->rcv_saddr,
+					     &ipv6_hdr(skb)->daddr))
+				continue;
+#endif
+		} else {
 			continue;
+		}
+
 		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
 			continue;
 
@@ -201,7 +248,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
 }
 
 
-static int ping_init_sock(struct sock *sk)
+int ping_init_sock(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
 	kgid_t group = current_egid();
@@ -210,6 +257,9 @@ static int ping_init_sock(struct sock *sk)
 	kgid_t low, high;
 	int ret = 0;
 
+	if (sk->sk_family == AF_INET6)
+		inet6_sk(sk)->ipv6only = 1;
+
 	inet_get_ping_group_range_net(net, &low, &high);
 	if (gid_lte(low, group) && gid_lte(group, high))
 		return 0;
@@ -233,8 +283,9 @@ out_release_group:
 	put_group_info(group_info);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ping_init_sock);
 
-static void ping_close(struct sock *sk, long timeout)
+void ping_close(struct sock *sk, long timeout)
 {
 	pr_debug("ping_close(sk=%p,sk->num=%u)\n",
 		 inet_sk(sk), inet_sk(sk)->inet_num);
@@ -242,36 +293,130 @@ static void ping_close(struct sock *sk, long timeout)
 
 	sk_common_release(sk);
 }
+EXPORT_SYMBOL_GPL(ping_close);
+
+/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */
+int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
+			 struct sockaddr *uaddr, int addr_len) {
+	struct net *net = sock_net(sk);
+	if (sk->sk_family == AF_INET) {
+		struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+		int chk_addr_ret;
+
+		if (addr_len < sizeof(*addr))
+			return -EINVAL;
+
+		if (addr->sin_family != AF_INET &&
+		    !(addr->sin_family == AF_UNSPEC &&
+		      addr->sin_addr.s_addr == htonl(INADDR_ANY)))
+			return -EAFNOSUPPORT;
+
+		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
+			 sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
+		chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+
+		if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+			chk_addr_ret = RTN_LOCAL;
+
+		if ((sysctl_ip_nonlocal_bind == 0 &&
+		    isk->freebind == 0 && isk->transparent == 0 &&
+		     chk_addr_ret != RTN_LOCAL) ||
+		    chk_addr_ret == RTN_MULTICAST ||
+		    chk_addr_ret == RTN_BROADCAST)
+			return -EADDRNOTAVAIL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (sk->sk_family == AF_INET6) {
+		struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr;
+		int addr_type, scoped, has_addr;
+		struct net_device *dev = NULL;
+
+		if (addr_len < sizeof(*addr))
+			return -EINVAL;
+
+		if (addr->sin6_family != AF_INET6)
+			return -EAFNOSUPPORT;
+
+		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
+			 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
+
+		addr_type = ipv6_addr_type(&addr->sin6_addr);
+		scoped = __ipv6_addr_needs_scope_id(addr_type);
+		if ((addr_type != IPV6_ADDR_ANY &&
+		     !(addr_type & IPV6_ADDR_UNICAST)) ||
+		    (scoped && !addr->sin6_scope_id))
+			return -EINVAL;
+
+		rcu_read_lock();
+		if (addr->sin6_scope_id) {
+			dev = dev_get_by_index_rcu(net, addr->sin6_scope_id);
+			if (!dev) {
+				rcu_read_unlock();
+				return -ENODEV;
+			}
+		}
+		has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
+						    scoped);
+		rcu_read_unlock();
+
+		if (!(isk->freebind || isk->transparent || has_addr ||
+		      addr_type == IPV6_ADDR_ANY))
+			return -EADDRNOTAVAIL;
+
+		if (scoped)
+			sk->sk_bound_dev_if = addr->sin6_scope_id;
+#endif
+	} else {
+		return -EAFNOSUPPORT;
+	}
+	return 0;
+}
+
+void ping_set_saddr(struct sock *sk, struct sockaddr *saddr)
+{
+	if (saddr->sa_family == AF_INET) {
+		struct inet_sock *isk = inet_sk(sk);
+		struct sockaddr_in *addr = (struct sockaddr_in *) saddr;
+		isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (saddr->sa_family == AF_INET6) {
+		struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr;
+		struct ipv6_pinfo *np = inet6_sk(sk);
+		np->rcv_saddr = np->saddr = addr->sin6_addr;
+#endif
+	}
+}
+
+void ping_clear_saddr(struct sock *sk, int dif)
+{
+	sk->sk_bound_dev_if = dif;
+	if (sk->sk_family == AF_INET) {
+		struct inet_sock *isk = inet_sk(sk);
+		isk->inet_rcv_saddr = isk->inet_saddr = 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (sk->sk_family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+		memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr));
+		memset(&np->saddr, 0, sizeof(np->saddr));
+#endif
+	}
+}
 /*
  * We need our own bind because there are no privileged id's == local ports.
  * Moreover, we don't allow binding to multi- and broadcast addresses.
  */
 
-static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
-	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
 	struct inet_sock *isk = inet_sk(sk);
 	unsigned short snum;
-	int chk_addr_ret;
 	int err;
+	int dif = sk->sk_bound_dev_if;
 
-	if (addr_len < sizeof(struct sockaddr_in))
-		return -EINVAL;
-
-	pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
-		 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
-
-	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
-	if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
-		chk_addr_ret = RTN_LOCAL;
-
-	if ((sysctl_ip_nonlocal_bind == 0 &&
-	    isk->freebind == 0 && isk->transparent == 0 &&
-	     chk_addr_ret != RTN_LOCAL) ||
-	    chk_addr_ret == RTN_MULTICAST ||
-	    chk_addr_ret == RTN_BROADCAST)
-		return -EADDRNOTAVAIL;
+	err = ping_check_bind_addr(sk, isk, uaddr, addr_len);
+	if (err)
+		return err;
 
 	lock_sock(sk);
 
@@ -280,42 +425,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		goto out;
 
 	err = -EADDRINUSE;
-	isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
-	snum = ntohs(addr->sin_port);
-	if (ping_v4_get_port(sk, snum) != 0) {
-		isk->inet_saddr = isk->inet_rcv_saddr = 0;
+	ping_set_saddr(sk, uaddr);
+	snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port);
+	if (ping_get_port(sk, snum) != 0) {
+		ping_clear_saddr(sk, dif);
 		goto out;
 	}
 
-	pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n",
+	pr_debug("after bind(): num = %d, dif = %d\n",
 		 (int)isk->inet_num,
-		 &isk->inet_rcv_saddr,
 		 (int)sk->sk_bound_dev_if);
 
 	err = 0;
-	if (isk->inet_rcv_saddr)
+	if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) ||
+	    (sk->sk_family == AF_INET6 &&
+	     !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)))
 		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+
 	if (snum)
 		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
 	isk->inet_sport = htons(isk->inet_num);
 	isk->inet_daddr = 0;
 	isk->inet_dport = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr));
+#endif
+
 	sk_dst_reset(sk);
 out:
 	release_sock(sk);
 	pr_debug("ping_v4_bind -> %d\n", err);
 	return err;
 }
+EXPORT_SYMBOL_GPL(ping_bind);
 
 /*
  * Is this a supported type of ICMP message?
  */
 
-static inline int ping_supported(int type, int code)
+static inline int ping_supported(int family, int type, int code)
 {
-	if (type == ICMP_ECHO && code == 0)
-		return 1;
-	return 0;
+	return (family == AF_INET && type == ICMP_ECHO && code == 0) ||
+	       (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0);
 }
 
 /*
@@ -323,30 +476,42 @@ static inline int ping_supported(int type, int code)
  * sort of error condition.
  */
 
-static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-
-void ping_err(struct sk_buff *skb, u32 info)
+void ping_err(struct sk_buff *skb, int offset, u32 info)
 {
-	struct iphdr *iph = (struct iphdr *)skb->data;
-	struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
+	int family;
+	struct icmphdr *icmph;
 	struct inet_sock *inet_sock;
-	int type = icmp_hdr(skb)->type;
-	int code = icmp_hdr(skb)->code;
+	int type;
+	int code;
 	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	int harderr;
 	int err;
 
+	if (skb->protocol == htons(ETH_P_IP)) {
+		family = AF_INET;
+		type = icmp_hdr(skb)->type;
+		code = icmp_hdr(skb)->code;
+		icmph = (struct icmphdr *)(skb->data + offset);
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		family = AF_INET6;
+		type = icmp6_hdr(skb)->icmp6_type;
+		code = icmp6_hdr(skb)->icmp6_code;
+		icmph = (struct icmphdr *) (skb->data + offset);
+	} else {
+		BUG();
+	}
+
 	/* We assume the packet has already been checked by icmp_unreach */
 
-	if (!ping_supported(icmph->type, icmph->code))
+	if (!ping_supported(family, icmph->type, icmph->code))
 		return;
 
-	pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
-		 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
+	pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n",
+		 skb->protocol, type, code, ntohs(icmph->un.echo.id),
+		 ntohs(icmph->un.echo.sequence));
 
-	sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
-			    ntohs(icmph->un.echo.id), skb->dev->ifindex);
+	sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
 	if (sk == NULL) {
 		pr_debug("no socket, dropping\n");
 		return;	/* No socket for error */
@@ -357,72 +522,83 @@ void ping_err(struct sk_buff *skb, u32 info)
 	harderr = 0;
 	inet_sock = inet_sk(sk);
 
-	switch (type) {
-	default:
-	case ICMP_TIME_EXCEEDED:
-		err = EHOSTUNREACH;
-		break;
-	case ICMP_SOURCE_QUENCH:
-		/* This is not a real error but ping wants to see it.
-		 * Report it with some fake errno. */
-		err = EREMOTEIO;
-		break;
-	case ICMP_PARAMETERPROB:
-		err = EPROTO;
-		harderr = 1;
-		break;
-	case ICMP_DEST_UNREACH:
-		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
-			ipv4_sk_update_pmtu(skb, sk, info);
-			if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
-				err = EMSGSIZE;
-				harderr = 1;
-				break;
+	if (skb->protocol == htons(ETH_P_IP)) {
+		switch (type) {
+		default:
+		case ICMP_TIME_EXCEEDED:
+			err = EHOSTUNREACH;
+			break;
+		case ICMP_SOURCE_QUENCH:
+			/* This is not a real error but ping wants to see it.
+			 * Report it with some fake errno.
+			 */
+			err = EREMOTEIO;
+			break;
+		case ICMP_PARAMETERPROB:
+			err = EPROTO;
+			harderr = 1;
+			break;
+		case ICMP_DEST_UNREACH:
+			if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
+				ipv4_sk_update_pmtu(skb, sk, info);
+				if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
+					err = EMSGSIZE;
+					harderr = 1;
+					break;
+				}
+				goto out;
 			}
-			goto out;
-		}
-		err = EHOSTUNREACH;
-		if (code <= NR_ICMP_UNREACH) {
-			harderr = icmp_err_convert[code].fatal;
-			err = icmp_err_convert[code].errno;
+			err = EHOSTUNREACH;
+			if (code <= NR_ICMP_UNREACH) {
+				harderr = icmp_err_convert[code].fatal;
+				err = icmp_err_convert[code].errno;
+			}
+			break;
+		case ICMP_REDIRECT:
+			/* See ICMP_SOURCE_QUENCH */
+			ipv4_sk_redirect(skb, sk);
+			err = EREMOTEIO;
+			break;
 		}
-		break;
-	case ICMP_REDIRECT:
-		/* See ICMP_SOURCE_QUENCH */
-		ipv4_sk_redirect(skb, sk);
-		err = EREMOTEIO;
-		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		harderr = pingv6_ops.icmpv6_err_convert(type, code, &err);
+#endif
 	}
 
 	/*
 	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
 	 *	4.1.3.3.
 	 */
-	if (!inet_sock->recverr) {
+	if ((family == AF_INET && !inet_sock->recverr) ||
+	    (family == AF_INET6 && !inet6_sk(sk)->recverr)) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
 	} else {
-		ip_icmp_error(sk, skb, err, 0 /* no remote port */,
-			 info, (u8 *)icmph);
+		if (family == AF_INET) {
+			ip_icmp_error(sk, skb, err, 0 /* no remote port */,
+				      info, (u8 *)icmph);
+#if IS_ENABLED(CONFIG_IPV6)
+		} else if (family == AF_INET6) {
+			pingv6_ops.ipv6_icmp_error(sk, skb, err, 0,
+						   info, (u8 *)icmph);
+#endif
+		}
 	}
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
 	sock_put(sk);
 }
+EXPORT_SYMBOL_GPL(ping_err);
 
 /*
- *	Copy and checksum an ICMP Echo packet from user space into a buffer.
+ *	Copy and checksum an ICMP Echo packet from user space into a buffer
+ *	starting from the payload.
  */
 
-struct pingfakehdr {
-	struct icmphdr icmph;
-	struct iovec *iov;
-	__wsum wcheck;
-};
-
-static int ping_getfrag(void *from, char *to,
-			int offset, int fraglen, int odd, struct sk_buff *skb)
+int ping_getfrag(void *from, char *to,
+		 int offset, int fraglen, int odd, struct sk_buff *skb)
 {
 	struct pingfakehdr *pfh = (struct pingfakehdr *)from;
 
@@ -433,20 +609,33 @@ static int ping_getfrag(void *from, char *to,
 			    pfh->iov, 0, fraglen - sizeof(struct icmphdr),
 			    &pfh->wcheck))
 			return -EFAULT;
+	} else if (offset < sizeof(struct icmphdr)) {
+			BUG();
+	} else {
+		if (csum_partial_copy_fromiovecend
+				(to, pfh->iov, offset - sizeof(struct icmphdr),
+				 fraglen, &pfh->wcheck))
+			return -EFAULT;
+	}
 
-		return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	/* For IPv6, checksum each skb as we go along, as expected by
+	 * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in
+	 * wcheck, it will be finalized in ping_v4_push_pending_frames.
+	 */
+	if (pfh->family == AF_INET6) {
+		skb->csum = pfh->wcheck;
+		skb->ip_summed = CHECKSUM_NONE;
+		pfh->wcheck = 0;
 	}
-	if (offset < sizeof(struct icmphdr))
-		BUG();
-	if (csum_partial_copy_fromiovecend
-			(to, pfh->iov, offset - sizeof(struct icmphdr),
-			 fraglen, &pfh->wcheck))
-		return -EFAULT;
+#endif
+
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ping_getfrag);
 
-static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
-				    struct flowi4 *fl4)
+static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
+				       struct flowi4 *fl4)
 {
 	struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
 
@@ -458,24 +647,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
 	return ip_push_pending_frames(sk, fl4);
 }
 
-static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-			size_t len)
-{
-	struct net *net = sock_net(sk);
-	struct flowi4 fl4;
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipcm_cookie ipc;
-	struct icmphdr user_icmph;
-	struct pingfakehdr pfh;
-	struct rtable *rt = NULL;
-	struct ip_options_data opt_copy;
-	int free = 0;
-	__be32 saddr, daddr, faddr;
-	u8  tos;
-	int err;
-
-	pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
-
+int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
+			void *user_icmph, size_t icmph_len) {
+	u8 type, code;
 
 	if (len > 0xFFFF)
 		return -EMSGSIZE;
@@ -490,15 +664,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	/*
 	 *	Fetch the ICMP header provided by the userland.
-	 *	iovec is modified!
+	 *	iovec is modified! The ICMP header is consumed.
 	 */
-
-	if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
-			     sizeof(struct icmphdr)))
+	if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len))
 		return -EFAULT;
-	if (!ping_supported(user_icmph.type, user_icmph.code))
+
+	if (family == AF_INET) {
+		type = ((struct icmphdr *) user_icmph)->type;
+		code = ((struct icmphdr *) user_icmph)->code;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (family == AF_INET6) {
+		type = ((struct icmp6hdr *) user_icmph)->icmp6_type;
+		code = ((struct icmp6hdr *) user_icmph)->icmp6_code;
+#endif
+	} else {
+		BUG();
+	}
+
+	if (!ping_supported(family, type, code))
 		return -EINVAL;
 
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ping_common_sendmsg);
+
+int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		    size_t len)
+{
+	struct net *net = sock_net(sk);
+	struct flowi4 fl4;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipcm_cookie ipc;
+	struct icmphdr user_icmph;
+	struct pingfakehdr pfh;
+	struct rtable *rt = NULL;
+	struct ip_options_data opt_copy;
+	int free = 0;
+	__be32 saddr, daddr, faddr;
+	u8  tos;
+	int err;
+
+	pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+	err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph,
+				  sizeof(user_icmph));
+	if (err)
+		return err;
+
 	/*
 	 *	Get and verify the address.
 	 */
@@ -508,7 +720,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		if (msg->msg_namelen < sizeof(*usin))
 			return -EINVAL;
 		if (usin->sin_family != AF_INET)
-			return -EINVAL;
+			return -EAFNOSUPPORT;
 		daddr = usin->sin_addr.s_addr;
 		/* no remote port */
 	} else {
@@ -570,7 +782,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
-			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
+			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
+			   sock_i_uid(sk));
 
 	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_flow(net, &fl4, sk);
@@ -603,13 +816,14 @@ back_from_confirm:
 	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
 	pfh.iov = msg->msg_iov;
 	pfh.wcheck = 0;
+	pfh.family = AF_INET;
 
 	err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
 			0, &ipc, &rt, msg->msg_flags);
 	if (err)
 		ip_flush_pending_frames(sk);
 	else
-		err = ping_push_pending_frames(sk, &pfh, &fl4);
+		err = ping_v4_push_pending_frames(sk, &pfh, &fl4);
 	release_sock(sk);
 
 out:
@@ -630,10 +844,13 @@ do_confirm:
 	goto out;
 }
 
-static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-			size_t len, int noblock, int flags, int *addr_len)
+int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		 size_t len, int noblock, int flags, int *addr_len)
 {
 	struct inet_sock *isk = inet_sk(sk);
+	int family = sk->sk_family;
+        struct sockaddr_in *sin;
+        struct sockaddr_in6 *sin6;
 	struct sk_buff *skb;
 	int copied, err;
 
@@ -643,8 +860,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (flags & MSG_OOB)
 		goto out;
 
-	if (flags & MSG_ERRQUEUE)
-		return ip_recv_error(sk, msg, len, addr_len);
+        if (addr_len) {
+                if (family == AF_INET)
+                        *addr_len = sizeof(*sin);
+                else if (family == AF_INET6 && addr_len)
+                        *addr_len = sizeof(*sin6);
+        }
+
+	if (flags & MSG_ERRQUEUE) {
+		if (family == AF_INET) {
+			return ip_recv_error(sk, msg, len, addr_len);
+#if IS_ENABLED(CONFIG_IPV6)
+		} else if (family == AF_INET6) {
+			return pingv6_ops.ipv6_recv_error(sk, msg, len);
+#endif
+		}
+	}
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
@@ -663,18 +894,44 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	sock_recv_timestamp(msg, sk, skb);
 
-	/* Copy the address. */
-	if (msg->msg_name) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+	/* Copy the address and add cmsg data. */
+	if (family == AF_INET) {
+		sin = (struct sockaddr_in *) msg->msg_name;
+		if (sin) {
+			sin->sin_family = AF_INET;
+			sin->sin_port = 0 /* skb->h.uh->source */;
+			sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		}
+
+		if (isk->cmsg_flags)
+			ip_cmsg_recv(msg, skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+		struct ipv6hdr *ip6 = ipv6_hdr(skb);
+		sin6 = (struct sockaddr_in6 *) msg->msg_name;
+
+		if (sin6) {
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = 0;
+			sin6->sin6_addr = ip6->saddr;
+			sin6->sin6_flowinfo = 0;
+			if (np->sndflow)
+				sin6->sin6_flowinfo = ip6_flowinfo(ip6);
+			sin6->sin6_scope_id =
+				ipv6_iface_scope_id(&sin6->sin6_addr,
+						    IP6CB(skb)->iif);
+		}
 
-		sin->sin_family = AF_INET;
-		sin->sin_port = 0 /* skb->h.uh->source */;
-		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
-		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-		*addr_len = sizeof(*sin);
+		if (inet6_sk(sk)->rxopt.all)
+			pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb);
+#endif
+	} else {
+		BUG();
 	}
-	if (isk->cmsg_flags)
-		ip_cmsg_recv(msg, skb);
+
 	err = copied;
 
 done:
@@ -683,8 +940,9 @@ out:
 	pr_debug("ping_recvmsg -> %d\n", err);
 	return err;
 }
+EXPORT_SYMBOL_GPL(ping_recvmsg);
 
-static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
 		 inet_sk(sk), inet_sk(sk)->inet_num, skb);
@@ -695,6 +953,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
 
 
 /*
@@ -705,10 +964,7 @@ void ping_rcv(struct sk_buff *skb)
 {
 	struct sock *sk;
 	struct net *net = dev_net(skb->dev);
-	struct iphdr *iph = ip_hdr(skb);
 	struct icmphdr *icmph = icmp_hdr(skb);
-	__be32 saddr = iph->saddr;
-	__be32 daddr = iph->daddr;
 
 	/* We assume the packet has already been checked by icmp_rcv */
 
@@ -718,8 +974,7 @@ void ping_rcv(struct sk_buff *skb)
 	/* Push ICMP header back */
 	skb_push(skb, skb->data - (u8 *)icmph);
 
-	sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
-			    skb->dev->ifindex);
+	sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
 	if (sk != NULL) {
 		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
@@ -733,6 +988,7 @@ void ping_rcv(struct sk_buff *skb)
 
 	/* We're called from icmp_rcv(). kfree_skb() is done there. */
 }
+EXPORT_SYMBOL_GPL(ping_rcv);
 
 struct proto ping_prot = {
 	.name =		"PING",
@@ -743,14 +999,14 @@ struct proto ping_prot = {
 	.disconnect =	udp_disconnect,
 	.setsockopt =	ip_setsockopt,
 	.getsockopt =	ip_getsockopt,
-	.sendmsg =	ping_sendmsg,
+	.sendmsg =	ping_v4_sendmsg,
 	.recvmsg =	ping_recvmsg,
 	.bind =		ping_bind,
 	.backlog_rcv =	ping_queue_rcv_skb,
 	.release_cb =	ip4_datagram_release_cb,
-	.hash =		ping_v4_hash,
-	.unhash =	ping_v4_unhash,
-	.get_port =	ping_v4_get_port,
+	.hash =		ping_hash,
+	.unhash =	ping_unhash,
+	.get_port =	ping_get_port,
 	.obj_size =	sizeof(struct inet_sock),
 };
 EXPORT_SYMBOL(ping_prot);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b4a1c42a627f..2dfe804fd663 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -571,9 +571,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
-			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP |
-			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
-			   daddr, saddr, 0, 0);
+			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+			   daddr, saddr, 0, 0,
+			   sock_i_uid(sk));
 
 	if (!inet->hdrincl) {
 		err = raw_probe_proto_opt(&fl4, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e23c5f64286b..577801102a70 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -515,7 +515,7 @@ void __ip_select_ident(struct iphdr *iph, int segs)
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(struct flowi4 *fl4, struct sock *sk,
 			     const struct iphdr *iph,
 			     int oif, u8 tos,
 			     u8 prot, u32 mark, int flow_flags)
@@ -531,11 +531,12 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
 	flowi4_init_output(fl4, oif, mark, tos,
 			   RT_SCOPE_UNIVERSE, prot,
 			   flow_flags,
-			   iph->daddr, iph->saddr, 0, 0);
+			   iph->daddr, iph->saddr, 0, 0,
+			   sk ? sock_i_uid(sk) : 0);
 }
 
 static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
-			       const struct sock *sk)
+			       struct sock *sk)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int oif = skb->dev->ifindex;
@@ -546,7 +547,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
 	__build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
 }
 
-static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
+static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	const struct ip_options_rcu *inet_opt;
@@ -560,11 +561,12 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk),
-			   daddr, inet->inet_saddr, 0, 0);
+			   daddr, inet->inet_saddr, 0, 0,
+			   sock_i_uid(sk));
 	rcu_read_unlock();
 }
 
-static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk,
 				 const struct sk_buff *skb)
 {
 	if (skb)
@@ -971,6 +973,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
 	struct flowi4 fl4;
 	struct rtable *rt;
 
+	if (!mark)
+		mark = IP4_REPLY_MARK(net, skb->mark);
+
 	__build_flow_key(&fl4, NULL, iph, oif,
 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
 	rt = __ip_route_output_key(net, &fl4);
@@ -988,6 +993,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 	struct rtable *rt;
 
 	__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+
+	if (!fl4.flowi4_mark)
+		fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
+
 	rt = __ip_route_output_key(sock_net(sk), &fl4);
 	if (!IS_ERR(rt)) {
 		__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -2301,6 +2310,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
 		goto nla_put_failure;
 
+	if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+	    nla_put_u32(skb, RTA_UID,
+			from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+		goto nla_put_failure;
+
 	error = rt->dst.error;
 
 	if (rt_is_input_route(rt)) {
@@ -2350,6 +2364,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	int err;
 	int mark;
 	struct sk_buff *skb;
+	kuid_t uid;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
 	if (err < 0)
@@ -2377,6 +2392,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
 	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+	if (tb[RTA_UID])
+		uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+	else
+		uid = (iif ? INVALID_UID : current_uid());
 
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = dst;
@@ -2384,6 +2403,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	fl4.flowi4_tos = rtm->rtm_tos;
 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
 	fl4.flowi4_mark = mark;
+	fl4.flowi4_uid = uid;
 
 	if (iif) {
 		struct net_device *dev;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b05c96e7af8b..c94032b95c60 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -312,6 +312,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
+	ireq->ir_mark		= inet_request_mark(sk, skb);
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -348,11 +349,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	 * hasn't changed since we received the original syn, but I see
 	 * no easy way to do this.
 	 */
-	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
-			   ireq->loc_addr, th->source, th->dest);
+			   ireq->loc_addr, th->source, th->dest,
+			   sock_i_uid(sk));
 	security_req_classify_flow(req, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(sock_net(sk), &fl4);
 	if (IS_ERR(rt)) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 90b26beb84d4..2c707a91e04c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -138,6 +138,21 @@ static int ipv4_ping_group_range(ctl_table *table, int write,
 	return ret;
 }
 
+/* Validate changes from /proc interface. */
+static int proc_tcp_default_init_rwnd(ctl_table *ctl, int write,
+				      void __user *buffer,
+				      size_t *lenp, loff_t *ppos)
+{
+	int old_value = *(int *)ctl->data;
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	int new_value = *(int *)ctl->data;
+
+	if (write && ret == 0 && (new_value < 3 || new_value > 100))
+		*(int *)ctl->data = old_value;
+
+	return ret;
+}
+
 static int proc_tcp_congestion_control(ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -737,7 +752,7 @@ static struct ctl_table ipv4_table[] = {
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec
 	},
-        {
+	{
 		.procname       = "tcp_thin_dupack",
 		.data           = &sysctl_tcp_thin_dupack,
 		.maxlen         = sizeof(int),
@@ -763,6 +778,13 @@ static struct ctl_table ipv4_table[] = {
 		.extra2		= &gso_max_segs,
 	},
 	{
+		.procname       = "tcp_default_init_rwnd",
+		.data           = &sysctl_tcp_default_init_rwnd,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_tcp_default_init_rwnd
+	},
+	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
 		.maxlen		= sizeof(sysctl_udp_mem),
@@ -851,6 +873,20 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= ipv4_tcp_mem,
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_fwmark_accept",
+		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
new file mode 100644
index 000000000000..0cbbf10026a6
--- /dev/null
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -0,0 +1,88 @@
+/*
+ * net/ipv4/sysfs_net_ipv4.c
+ *
+ * sysfs-based networking knobs (so we can, unlike with sysctl, control perms)
+ *
+ * Copyright (C) 2008 Google, Inc.
+ *
+ * Robert Love <rlove@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <net/tcp.h>
+
+#define CREATE_IPV4_FILE(_name, _var) \
+static ssize_t _name##_show(struct kobject *kobj, \
+			    struct kobj_attribute *attr, char *buf) \
+{ \
+	return sprintf(buf, "%d\n", _var); \
+} \
+static ssize_t _name##_store(struct kobject *kobj, \
+			     struct kobj_attribute *attr, \
+			     const char *buf, size_t count) \
+{ \
+	int val, ret; \
+	ret = sscanf(buf, "%d", &val); \
+	if (ret != 1) \
+		return -EINVAL; \
+	if (val < 0) \
+		return -EINVAL; \
+	_var = val; \
+	return count; \
+} \
+static struct kobj_attribute _name##_attr = \
+	__ATTR(_name, 0644, _name##_show, _name##_store)
+
+CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]);
+CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]);
+CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]);
+
+CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]);
+CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]);
+CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]);
+
+static struct attribute *ipv4_attrs[] = {
+	&tcp_wmem_min_attr.attr,
+	&tcp_wmem_def_attr.attr,
+	&tcp_wmem_max_attr.attr,
+	&tcp_rmem_min_attr.attr,
+	&tcp_rmem_def_attr.attr,
+	&tcp_rmem_max_attr.attr,
+	NULL
+};
+
+static struct attribute_group ipv4_attr_group = {
+	.attrs = ipv4_attrs,
+};
+
+static __init int sysfs_ipv4_init(void)
+{
+	struct kobject *ipv4_kobject;
+	int ret;
+
+	ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj);
+	if (!ipv4_kobject)
+		return -ENOMEM;
+
+	ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group);
+	if (ret) {
+		kobject_put(ipv4_kobject);
+		return ret;
+	}
+
+	return 0;
+}
+
+subsys_initcall(sysfs_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5d4bd6ca3ab1..8cc9b5499013 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -268,12 +268,16 @@
 #include <linux/crypto.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/uid_stat.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
 #include <net/netdma.h>
 #include <net/sock.h>
 
@@ -1240,6 +1244,9 @@ out:
 		tcp_push(sk, flags, mss_now, tp->nonagle);
 out_nopush:
 	release_sock(sk);
+
+	if (copied + copied_syn)
+		uid_stat_tcp_snd(current_uid(), copied + copied_syn);
 	return copied + copied_syn;
 
 do_fault:
@@ -1544,6 +1551,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	if (copied > 0) {
 		tcp_recv_skb(sk, seq, &offset);
 		tcp_cleanup_rbuf(sk, copied);
+		uid_stat_tcp_rcv(current_uid(), copied);
 	}
 	return copied;
 }
@@ -1948,6 +1956,9 @@ skip_copy:
 	tcp_cleanup_rbuf(sk, copied);
 
 	release_sock(sk);
+
+	if (copied > 0)
+		uid_stat_tcp_rcv(current_uid(), copied);
 	return copied;
 
 out:
@@ -1956,6 +1967,8 @@ out:
 
 recv_urg:
 	err = tcp_recv_urg(sk, msg, len, flags);
+	if (err > 0)
+		uid_stat_tcp_rcv(current_uid(), err);
 	goto out;
 
 recv_sndq:
@@ -3468,3 +3481,108 @@ void __init tcp_init(void)
 
 	tcp_tasklet_init();
 }
+
+static int tcp_is_local(struct net *net, __be32 addr) {
+	struct rtable *rt;
+	struct flowi4 fl4 = { .daddr = addr };
+	rt = ip_route_output_key(net, &fl4);
+	if (IS_ERR_OR_NULL(rt))
+		return 0;
+	return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK);
+}
+
+#if defined(CONFIG_IPV6)
+static int tcp_is_local6(struct net *net, struct in6_addr *addr) {
+	struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0);
+	return rt6 && rt6->dst.dev && (rt6->dst.dev->flags & IFF_LOOPBACK);
+}
+#endif
+
+/*
+ * tcp_nuke_addr - destroy all sockets on the given local address
+ * if local address is the unspecified address (0.0.0.0 or ::), destroy all
+ * sockets with local addresses that are not configured.
+ */
+int tcp_nuke_addr(struct net *net, struct sockaddr *addr)
+{
+	int family = addr->sa_family;
+	unsigned int bucket;
+
+	struct in_addr *in;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct in6_addr *in6 = NULL;
+#endif
+	if (family == AF_INET) {
+		in = &((struct sockaddr_in *)addr)->sin_addr;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	} else if (family == AF_INET6) {
+		in6 = &((struct sockaddr_in6 *)addr)->sin6_addr;
+#endif
+	} else {
+		return -EAFNOSUPPORT;
+	}
+
+	for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
+		struct hlist_nulls_node *node;
+		struct sock *sk;
+		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
+
+restart:
+		spin_lock_bh(lock);
+		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
+			struct inet_sock *inet = inet_sk(sk);
+
+			if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
+				continue;
+			if (sock_flag(sk, SOCK_DEAD))
+				continue;
+
+			if (family == AF_INET) {
+				__be32 s4 = inet->inet_rcv_saddr;
+				if (s4 == LOOPBACK4_IPV6)
+					continue;
+
+				if (in->s_addr != s4 &&
+				    !(in->s_addr == INADDR_ANY &&
+				      !tcp_is_local(net, s4)))
+					continue;
+			}
+
+#if defined(CONFIG_IPV6)
+			if (family == AF_INET6) {
+				struct in6_addr *s6;
+				if (!inet->pinet6)
+					continue;
+
+				s6 = &inet->pinet6->rcv_saddr;
+				if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED)
+					continue;
+
+				if (!ipv6_addr_equal(in6, s6) &&
+				    !(ipv6_addr_equal(in6, &in6addr_any) &&
+				      !tcp_is_local6(net, s6)))
+				continue;
+			}
+#endif
+
+			sock_hold(sk);
+			spin_unlock_bh(lock);
+
+			local_bh_disable();
+			bh_lock_sock(sk);
+			sk->sk_err = ETIMEDOUT;
+			sk->sk_error_report(sk);
+
+			tcp_done(sk);
+			bh_unlock_sock(sk);
+			local_bh_enable();
+			sock_put(sk);
+
+			goto restart;
+		}
+		spin_unlock_bh(lock);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_nuke_addr);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8be45e4d34f..22fc869465e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,6 +98,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
+int sysctl_tcp_default_init_rwnd __read_mostly = TCP_DEFAULT_INIT_RCVWND;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -351,14 +352,14 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 static void tcp_fixup_rcvbuf(struct sock *sk)
 {
 	u32 mss = tcp_sk(sk)->advmss;
-	u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+	u32 icwnd = sysctl_tcp_default_init_rwnd;
 	int rcvmem;
 
 	/* Limit to 10 segments if mss <= 1460,
 	 * or 14600/mss segments, with a minimum of two segments.
 	 */
 	if (mss > 1460)
-		icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+		icwnd = max_t(u32, (1460 * icwnd) / mss, 2);
 
 	rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
 	while (tcp_win_from_space(rcvmem) < mss)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7c3eec386a4b..d64453a81028 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1530,6 +1530,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->rmt_addr = saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 76c80b59e80f..5653febcb2ac 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,14 +231,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	}
 
 	/* Set initial window to a value enough for senders starting with
-	 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
+	 * initial congestion window of sysctl_tcp_default_init_rwnd. Place
 	 * a limit on the initial window when mss is larger than 1460.
 	 */
 	if (mss > (1 << *rcv_wscale)) {
-		int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
+		int init_cwnd = sysctl_tcp_default_init_rwnd;
 		if (mss > 1460)
-			init_cwnd =
-			max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+			init_cwnd = max_t(u32, (1460 * init_cwnd) / mss, 2);
 		/* when initializing use the value from init_rcv_wnd
 		 * rather than the default from above
 		 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c3075b552248..58c5dbd5fb94 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -963,7 +963,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
-				   faddr, saddr, dport, inet->inet_sport);
+				   faddr, saddr, dport, inet->inet_sport,
+				   sock_i_uid(sk));
 
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 		rt = ip_route_output_flow(net, fl4, sk);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 9af088d2cdaa..470a9c008e9b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
 ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		addrlabel.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
-		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d0912acd9522..69e8b50a45bd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -198,6 +198,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.accept_ra_rt_table	= 0,
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
@@ -232,6 +233,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.accept_ra_rt_info_max_plen = 0,
 #endif
 #endif
+	.accept_ra_rt_table	= 0,
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
@@ -1173,6 +1175,9 @@ enum {
 #endif
 	IPV6_SADDR_RULE_ORCHID,
 	IPV6_SADDR_RULE_PREFIX,
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	IPV6_SADDR_RULE_NOT_OPTIMISTIC,
+#endif
 	IPV6_SADDR_RULE_MAX
 };
 
@@ -1200,6 +1205,15 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
+static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+#else
+	return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
 			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
@@ -1260,10 +1274,16 @@ static int ipv6_get_saddr_eval(struct net *net,
 		score->scopedist = ret;
 		break;
 	case IPV6_SADDR_RULE_PREFERRED:
+	    {
 		/* Rule 3: Avoid deprecated and optimistic addresses */
+		u8 avoid = IFA_F_DEPRECATED;
+
+		if (!ipv6_use_optimistic_addr(score->ifa->idev))
+			avoid |= IFA_F_OPTIMISTIC;
 		ret = ipv6_saddr_preferred(score->addr_type) ||
-		      !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+		      !(score->ifa->flags & avoid);
 		break;
+	    }
 #ifdef CONFIG_IPV6_MIP6
 	case IPV6_SADDR_RULE_HOA:
 	    {
@@ -1311,6 +1331,14 @@ static int ipv6_get_saddr_eval(struct net *net,
 			ret = score->ifa->prefix_len;
 		score->matchlen = ret;
 		break;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	case IPV6_SADDR_RULE_NOT_OPTIMISTIC:
+		/* Optimistic addresses still have lower precedence than other
+		 * preferred addresses.
+		 */
+		ret = !(score->ifa->flags & IFA_F_OPTIMISTIC);
+		break;
+#endif
 	default:
 		ret = 0;
 	}
@@ -1506,7 +1534,9 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    (!(ifp->flags&IFA_F_TENTATIVE) ||
+		     (ipv6_use_optimistic_addr(ifp->idev) &&
+		      ifp->flags&IFA_F_OPTIMISTIC)) &&
 		    (dev == NULL || ifp->idev->dev == dev ||
 		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
 			rcu_read_unlock_bh();
@@ -1946,6 +1976,31 @@ static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp
 }
 #endif
 
+u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) {
+	/* Determines into what table to put autoconf PIO/RIO/default routes
+	 * learned on this device.
+	 *
+	 * - If 0, use the same table for every device. This puts routes into
+	 *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
+	 *   (but note that these three are currently all equal to
+	 *   RT6_TABLE_MAIN).
+	 * - If > 0, use the specified table.
+	 * - If < 0, put routes into table dev->ifindex + (-rt_table).
+	 */
+	struct inet6_dev *idev = in6_dev_get(dev);
+	u32 table;
+	int sysctl = idev->cnf.accept_ra_rt_table;
+	if (sysctl == 0) {
+		table = default_table;
+	} else if (sysctl > 0) {
+		table = (u32) sysctl;
+	} else {
+		table = (unsigned) dev->ifindex + (-sysctl);
+	}
+	in6_dev_put(idev);
+	return table;
+}
+
 /*
  *	Add prefix route.
  */
@@ -1955,7 +2010,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
 	struct fib6_config cfg = {
-		.fc_table = RT6_TABLE_PREFIX,
+		.fc_table = addrconf_rt_table(dev, RT6_TABLE_PREFIX),
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
@@ -1989,7 +2044,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
 
-	table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+	table = fib6_get_table(dev_net(dev),
+			       addrconf_rt_table(dev, RT6_TABLE_PREFIX));
 	if (table == NULL)
 		return NULL;
 
@@ -3145,11 +3201,13 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	write_unlock_bh(&idev->lock);
 
-	/* Step 5: Discard multicast list */
-	if (how)
+	/* Step 5: Discard anycast and multicast list */
+	if (how) {
+		ipv6_ac_destroy_dev(idev);
 		ipv6_mc_destroy_dev(idev);
-	else
+	} else {
 		ipv6_mc_down(idev);
+	}
 
 	idev->tstamp = jiffies;
 
@@ -3263,8 +3321,15 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp)
 	 * Optimistic nodes can start receiving
 	 * Frames right away
 	 */
-	if (ifp->flags & IFA_F_OPTIMISTIC)
+	if (ifp->flags & IFA_F_OPTIMISTIC) {
 		ip6_ins_rt(ifp->rt);
+		if (ipv6_use_optimistic_addr(idev)) {
+			/* Because optimistic nodes can use this address,
+			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
+			 */
+			ipv6_ifa_notify(RTM_NEWADDR, ifp);
+		}
+	}
 
 	addrconf_dad_kick(ifp);
 out:
@@ -4205,10 +4270,12 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
 #endif
 #endif
+	array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table;
 	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
 	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+	array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
@@ -4914,6 +4981,13 @@ static struct addrconf_sysctl_table
 #endif
 #endif
 		{
+			.procname	= "accept_ra_rt_table",
+			.data		= &ipv6_devconf.accept_ra_rt_table,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
+		{
 			.procname	= "proxy_ndp",
 			.data		= &ipv6_devconf.proxy_ndp,
 			.maxlen		= sizeof(int),
@@ -4936,6 +5010,14 @@ static struct addrconf_sysctl_table
 			.proc_handler   = proc_dointvec,
 
 		},
+		{
+			.procname       = "use_optimistic",
+			.data           = &ipv6_devconf.use_optimistic,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
+
+		},
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 		{
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ab5c7ad482cd..d29ae19ae698 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,6 +49,7 @@
 #include <net/udp.h>
 #include <net/udplite.h>
 #include <net/tcp.h>
+#include <net/ping.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
 #include <net/route.h>
@@ -62,6 +63,20 @@
 #include <asm/uaccess.h>
 #include <linux/mroute6.h>
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+
+static inline int current_has_network(void)
+{
+	return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
+}
+#else
+static inline int current_has_network(void)
+{
+	return 1;
+}
+#endif
+
 MODULE_AUTHOR("Cast of dozens");
 MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
 MODULE_LICENSE("GPL");
@@ -108,6 +123,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
+	if (!current_has_network())
+		return -EACCES;
+
 	if (sock->type != SOCK_RAW &&
 	    sock->type != SOCK_DGRAM &&
 	    !inet_ehash_secret)
@@ -159,8 +177,7 @@ lookup_protocol:
 	}
 
 	err = -EPERM;
-	if (sock->type == SOCK_RAW && !kern &&
-	    !ns_capable(net->user_ns, CAP_NET_RAW))
+	if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
 		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
@@ -477,6 +494,21 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 }
 EXPORT_SYMBOL(inet6_getname);
 
+int inet6_killaddr_ioctl(struct net *net, void __user *arg) {
+	struct in6_ifreq ireq;
+	struct sockaddr_in6 sin6;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EACCES;
+
+	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+		return -EFAULT;
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_addr = ireq.ifr6_addr;
+	return tcp_nuke_addr(net, (struct sockaddr *) &sin6);
+}
+
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
@@ -500,6 +532,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return addrconf_del_ifaddr(net, (void __user *) arg);
 	case SIOCSIFDSTADDR:
 		return addrconf_set_dstaddr(net, (void __user *) arg);
+	case SIOCKILLADDR:
+		return inet6_killaddr_ioctl(net, (void __user *) arg);
 	default:
 		if (!sk->sk_prot->ioctl)
 			return -ENOIOCTLCMD;
@@ -660,6 +694,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl6.flowi6_mark = sk->sk_mark;
 		fl6.fl6_dport = inet->inet_dport;
 		fl6.fl6_sport = inet->inet_sport;
+		fl6.flowi6_uid = sock_i_uid(sk);
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
@@ -840,6 +875,9 @@ static int __init inet6_init(void)
 	if (err)
 		goto out_unregister_udplite_proto;
 
+	err = proto_register(&pingv6_prot, 1);
+	if (err)
+		goto out_unregister_ping_proto;
 
 	/* We MUST register RAW sockets before we create the ICMP6,
 	 * IGMP6, or NDISC control sockets.
@@ -930,6 +968,10 @@ static int __init inet6_init(void)
 	if (err)
 		goto ipv6_packet_fail;
 
+	err = pingv6_init();
+	if (err)
+		goto pingv6_fail;
+
 #ifdef CONFIG_SYSCTL
 	err = ipv6_sysctl_register();
 	if (err)
@@ -942,6 +984,8 @@ out:
 sysctl_fail:
 	ipv6_packet_cleanup();
 #endif
+pingv6_fail:
+	pingv6_exit();
 ipv6_packet_fail:
 	tcpv6_exit();
 tcpv6_fail:
@@ -985,6 +1029,8 @@ register_pernet_fail:
 	rtnl_unregister_all(PF_INET6);
 out_sock_register_fail:
 	rawv6_exit();
+out_unregister_ping_proto:
+	proto_unregister(&pingv6_prot);
 out_unregister_raw_proto:
 	proto_unregister(&rawv6_prot);
 out_unregister_udplite_proto:
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index bb02e176cb70..b903e19463c9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -630,7 +630,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 5a80f15a9de2..1e97d0886597 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -341,6 +341,27 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 	return __ipv6_dev_ac_dec(idev, addr);
 }
 
+void ipv6_ac_destroy_dev(struct inet6_dev *idev)
+{
+	struct ifacaddr6 *aca;
+
+	write_lock_bh(&idev->lock);
+	while ((aca = idev->ac_list) != NULL) {
+		idev->ac_list = aca->aca_next;
+		write_unlock_bh(&idev->lock);
+
+		addrconf_leave_solict(idev, &aca->aca_addr);
+
+		dst_hold(&aca->aca_rt->dst);
+		ip6_del_rt(aca->aca_rt);
+
+		aca_put(aca);
+
+		write_lock_bh(&idev->lock);
+	}
+	write_unlock_bh(&idev->lock);
+}
+
 /*
  *	check if the interface has this anycast address
  *	called with rcu_read_lock()
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ce17d3da9b2b..ae52918a3367 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,6 +162,7 @@ ipv4_connected:
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = inet->inet_dport;
 	fl6.fl6_sport = inet->inet_sport;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
 		fl6.flowi6_oif = np->mcast_oif;
@@ -318,7 +319,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 /*
  *	Handle MSG_ERRQUEUE
  */
-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sock_exterr_skb *serr;
@@ -369,7 +370,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 					       &sin->sin6_addr);
 			sin->sin6_scope_id = 0;
 		}
-		*addr_len = sizeof(*sin);
 	}
 
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40ffd72243a4..fdc81cb29e80 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -449,7 +449,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 51af9d0d019a..11de7379fb9a 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -166,15 +166,15 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv);
  * to explore inner IPv6 header, eg. ICMPv6 error messages.
  *
  * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
+ * number. Otherwise, return -ENOENT or -EBADMSG.
  *
  * If the first fragment doesn't contain the final protocol header or
  * NEXTHDR_NONE it is considered invalid.
  *
  * Note that non-1st fragment is special case that "the protocol number
  * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
+ * *offset is meaningless. If fragoff is not NULL, the fragment offset is
+ * stored in *fragoff; if it is NULL, return -EINVAL.
  *
  * if flags is not NULL and it's a fragment, then the frag flag
  * IP6_FH_F_FRAG will be set. If it's an AH header, the
@@ -253,9 +253,12 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 				if (target < 0 &&
 				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
 				     hp->nexthdr == NEXTHDR_NONE)) {
-					if (fragoff)
+					if (fragoff) {
 						*fragoff = _frag_off;
-					return hp->nexthdr;
+						return hp->nexthdr;
+					} else {
+						return -EINVAL;
+					}
 				}
 				return -ENOENT;
 			}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2dee1d9d7305..c1b611cc55ae 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -57,6 +57,7 @@
 
 #include <net/ipv6.h>
 #include <net/ip6_checksum.h>
+#include <net/ping.h>
 #include <net/protocol.h>
 #include <net/raw.h>
 #include <net/rawv6.h>
@@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net)
 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		       u8 type, u8 code, int offset, __be32 info)
 {
+	/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
+	struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
 	struct net *net = dev_net(skb->dev);
 
 	if (type == ICMPV6_PKT_TOOBIG)
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	else if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
+
+	if (!(type & ICMPV6_INFOMSG_MASK))
+		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
+			ping_err(skb, offset, info);
 }
 
 static int icmpv6_rcv(struct sk_buff *skb);
@@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 	return (*op & 0xC0) == 0x80;
 }
 
-static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len)
+int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+			       struct icmp6hdr *thdr, int len)
 {
 	struct sk_buff *skb;
 	struct icmp6hdr *icmp6h;
@@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
 static inline void mip6_addr_swap(struct sk_buff *skb) {}
 #endif
 
-static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
-					     struct sock *sk, struct flowi6 *fl6)
+struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb,
+				      struct sock *sk, struct flowi6 *fl6)
 {
 	struct dst_entry *dst, *dst2;
 	struct flowi6 fl2;
@@ -389,6 +397,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	int len;
 	int hlimit;
 	int err = 0;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	if ((u8 *)hdr < skb->head ||
 	    (skb->network_header + sizeof(*hdr)) > skb->tail)
@@ -454,6 +463,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	fl6.daddr = hdr->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
+	fl6.flowi6_mark = mark;
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
@@ -462,6 +472,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -543,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct dst_entry *dst;
 	int err = 0;
 	int hlimit;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -559,11 +571,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 		fl6.saddr = *saddr;
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
@@ -697,7 +711,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
 					     IPPROTO_ICMPV6, 0));
 		if (__skb_checksum_complete(skb)) {
-			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
+			LIMIT_NETDEBUG(KERN_DEBUG
+				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 				       saddr, daddr);
 			goto csum_error;
 		}
@@ -718,7 +733,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		break;
 
 	case ICMPV6_ECHO_REPLY:
-		/* we couldn't care less */
+		ping_rcv(skb);
 		break;
 
 	case ICMPV6_PKT_TOOBIG:
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e4311cbc8b4e..65a46058c854 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,9 +81,10 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	final_p = fl6_update_dst(fl6, np->opt, &final);
 	fl6->saddr = treq->loc_addr;
 	fl6->flowi6_oif = treq->iif;
-	fl6->flowi6_mark = sk->sk_mark;
+	fl6->flowi6_mark = inet_rsk(req)->ir_mark;
 	fl6->fl6_dport = inet_rsk(req)->rmt_port;
 	fl6->fl6_sport = inet_rsk(req)->loc_port;
+	fl6->flowi6_uid = sock_i_uid(sk);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
@@ -211,6 +212,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 	fl6->flowi6_mark = sk->sk_mark;
 	fl6->fl6_sport = inet->inet_sport;
 	fl6->fl6_dport = inet->inet_dport;
+	fl6->flowi6_uid = sock_i_uid(sk);
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
 	final_p = fl6_update_dst(fl6, np->opt, &final);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2c84072b1da7..63cf7cded36d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -701,7 +701,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 	struct mr6_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_oif	= dev->ifindex,
-		.flowi6_iif	= skb->skb_iif,
+		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_mark	= skb->mark,
 	};
 	int err;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d98..a1beb59a841e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -78,7 +78,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (type == NDISC_REDIRECT)
 		ip6_redirect(skb, net, 0, 0);
 	else
-		ip6_update_pmtu(skb, net, info, 0, 0);
+		ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID);
 	xfrm_state_put(x);
 }
 
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4433ab40e7de..7f45af3e8128 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -153,6 +153,18 @@ config IP6_NF_TARGET_REJECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP6_NF_TARGET_REJECT_SKERR
+	bool "Force socket error when rejecting with icmp*"
+	depends on IP6_NF_TARGET_REJECT
+	default n
+	help
+          This option enables turning a "--reject-with icmp*" into a matching
+          socket error also.
+	  The REJECT target normally allows sending an ICMP message. But it
+          leaves the local socket unaware of any ingress rejects.
+
+	  If unsure, say N.
+
 config IP6_NF_MANGLE
 	tristate "Packet mangling"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 70f9abc0efe9..573c232239b0 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -180,6 +180,15 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 		skb_in->dev = net->loopback_dev;
 
 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
+#ifdef CONFIG_IP6_NF_TARGET_REJECT_SKERR
+	if (skb_in->sk) {
+		icmpv6_err_convert(ICMPV6_DEST_UNREACH, code,
+				   &skb_in->sk->sk_err);
+		skb_in->sk->sk_error_report(skb_in->sk);
+		pr_debug("ip6t_REJECT: sk_err=%d for skb=%p sk=%p\n",
+			skb_in->sk->sk_err, skb_in, skb_in->sk);
+	}
+#endif
 }
 
 static unsigned int
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
new file mode 100644
index 000000000000..857201f4936f
--- /dev/null
+++ b/net/ipv6/ping.c
@@ -0,0 +1,224 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		"Ping" sockets
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Based on ipv4/ping.c code.
+ *
+ * Authors:	Lorenzo Colitti (IPv6 support)
+ *		Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6),
+ *		Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32)
+ *
+ */
+
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/transp_v6.h>
+#include <net/ping.h>
+
+struct proto pingv6_prot = {
+	.name =		"PINGv6",
+	.owner =	THIS_MODULE,
+	.init =		ping_init_sock,
+	.close =	ping_close,
+	.connect =	ip6_datagram_connect,
+	.disconnect =	udp_disconnect,
+	.setsockopt =	ipv6_setsockopt,
+	.getsockopt =	ipv6_getsockopt,
+	.sendmsg =	ping_v6_sendmsg,
+	.recvmsg =	ping_recvmsg,
+	.bind =		ping_bind,
+	.backlog_rcv =	ping_queue_rcv_skb,
+	.hash =		ping_hash,
+	.unhash =	ping_unhash,
+	.get_port =	ping_get_port,
+	.obj_size =	sizeof(struct raw6_sock),
+};
+EXPORT_SYMBOL_GPL(pingv6_prot);
+
+static struct inet_protosw pingv6_protosw = {
+	.type =      SOCK_DGRAM,
+	.protocol =  IPPROTO_ICMPV6,
+	.prot =      &pingv6_prot,
+	.ops =       &inet6_dgram_ops,
+	.no_check =  UDP_CSUM_DEFAULT,
+	.flags =     INET_PROTOSW_REUSE,
+};
+
+
+/* Compatibility glue so we can support IPv6 when it's compiled as a module */
+int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	return -EAFNOSUPPORT;
+}
+int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
+				 struct sk_buff *skb)
+{
+	return -EAFNOSUPPORT;
+}
+int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)
+{
+	return -EAFNOSUPPORT;
+}
+void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+			    __be16 port, u32 info, u8 *payload) {}
+int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+			const struct net_device *dev, int strict)
+{
+	return 0;
+}
+
+int __init pingv6_init(void)
+{
+	pingv6_ops.ipv6_recv_error = ipv6_recv_error;
+	pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl;
+	pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;
+	pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;
+	pingv6_ops.ipv6_chk_addr = ipv6_chk_addr;
+	return inet6_register_protosw(&pingv6_protosw);
+}
+
+/* This never gets called because it's not possible to unload the ipv6 module,
+ * but just in case.
+ */
+void pingv6_exit(void)
+{
+	pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error;
+	pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl;
+	pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;
+	pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;
+	pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr;
+	inet6_unregister_protosw(&pingv6_protosw);
+}
+
+int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		    size_t len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct icmp6hdr user_icmph;
+	int addr_type;
+	struct in6_addr *daddr;
+	int iif = 0;
+	struct flowi6 fl6;
+	int err;
+	int hlimit;
+	struct dst_entry *dst;
+	struct rt6_info *rt;
+	struct pingfakehdr pfh;
+
+	pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
+
+	err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph,
+				  sizeof(user_icmph));
+	if (err)
+		return err;
+
+	if (msg->msg_name) {
+		struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name;
+		if (msg->msg_namelen < sizeof(*u))
+			return -EINVAL;
+		if (u->sin6_family != AF_INET6) {
+			return -EAFNOSUPPORT;
+		}
+		if (sk->sk_bound_dev_if &&
+		    sk->sk_bound_dev_if != u->sin6_scope_id) {
+			return -EINVAL;
+		}
+		daddr = &(u->sin6_addr);
+		iif = u->sin6_scope_id;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+		daddr = &np->daddr;
+	}
+
+	if (!iif)
+		iif = sk->sk_bound_dev_if;
+
+	addr_type = ipv6_addr_type(daddr);
+	if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
+		return -EINVAL;
+	if (addr_type & IPV6_ADDR_MAPPED)
+		return -EINVAL;
+
+	/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
+
+	memset(&fl6, 0, sizeof(fl6));
+
+	fl6.flowi6_proto = IPPROTO_ICMPV6;
+	fl6.saddr = np->saddr;
+	fl6.daddr = *daddr;
+	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sock_i_uid(sk);
+	fl6.fl6_icmp_type = user_icmph.icmp6_type;
+	fl6.fl6_icmp_code = user_icmph.icmp6_code;
+	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
+
+	dst = ip6_sk_dst_lookup_flow(sk, &fl6,  daddr, 1);
+	if (IS_ERR(dst))
+		return PTR_ERR(dst);
+	rt = (struct rt6_info *) dst;
+
+	np = inet6_sk(sk);
+	if (!np)
+		return -EBADF;
+
+	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
+		fl6.flowi6_oif = np->mcast_oif;
+	else if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = np->ucast_oif;
+
+	pfh.icmph.type = user_icmph.icmp6_type;
+	pfh.icmph.code = user_icmph.icmp6_code;
+	pfh.icmph.checksum = 0;
+	pfh.icmph.un.echo.id = inet->inet_sport;
+	pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
+	pfh.iov = msg->msg_iov;
+	pfh.wcheck = 0;
+	pfh.family = AF_INET6;
+
+	if (ipv6_addr_is_multicast(&fl6.daddr))
+		hlimit = np->mcast_hops;
+	else
+		hlimit = np->hop_limit;
+	if (hlimit < 0)
+		hlimit = ip6_dst_hoplimit(dst);
+
+	lock_sock(sk);
+	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
+			      0, hlimit,
+			      np->tclass, NULL, &fl6, rt,
+			      MSG_DONTWAIT, np->dontfrag);
+
+	if (err) {
+		ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev,
+				   ICMP6_MIB_OUTERRORS);
+		ip6_flush_pending_frames(sk);
+	} else {
+		err = icmpv6_push_pending_frames(sk, &fl6,
+						 (struct icmp6hdr *) &pfh.icmph,
+						 len);
+	}
+	release_sock(sk);
+
+	if (err)
+		return err;
+
+	return len;
+}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 464b1c9c08e4..a4bf16d0d3d0 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -459,8 +459,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (flags & MSG_ERRQUEUE)
-		return ipv6_recv_error(sk, msg, len, addr_len);
+	if (flags & MSG_ERRQUEUE) {
+		*addr_len = sizeof(struct sockaddr_in6);
+		return ipv6_recv_error(sk, msg, len);
+	}
 
 	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
 		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
@@ -759,6 +761,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	memset(&fl6, 0, sizeof(fl6));
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d94d224f7e68..2f127bd67a5d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -93,13 +93,12 @@ static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 					struct sk_buff *skb);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct rt6_info *rt6_add_route_info(struct net_device *dev,
 					   const struct in6_addr *prefix, int prefixlen,
-					   const struct in6_addr *gwaddr, int ifindex,
-					   unsigned int pref);
-static struct rt6_info *rt6_get_route_info(struct net *net,
+					   const struct in6_addr *gwaddr, unsigned int pref);
+static struct rt6_info *rt6_get_route_info(struct net_device *dev,
 					   const struct in6_addr *prefix, int prefixlen,
-					   const struct in6_addr *gwaddr, int ifindex);
+					   const struct in6_addr *gwaddr);
 #endif
 
 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -685,7 +684,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		  const struct in6_addr *gwaddr)
 {
-	struct net *net = dev_net(dev);
 	struct route_info *rinfo = (struct route_info *) opt;
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
@@ -730,8 +728,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	if (rinfo->prefix_len == 0)
 		rt = rt6_get_dflt_router(gwaddr, dev);
 	else
-		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
-					gwaddr, dev->ifindex);
+		rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr);
 
 	if (rt && !lifetime) {
 		ip6_del_rt(rt);
@@ -739,8 +736,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	}
 
 	if (!rt && lifetime)
-		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
-					pref);
+		rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref);
 	else if (rt)
 		rt->rt6i_flags = RTF_ROUTEINFO |
 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
@@ -1150,7 +1146,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 }
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-		     int oif, u32 mark)
+		     int oif, u32 mark, kuid_t uid)
 {
 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
 	struct dst_entry *dst;
@@ -1158,11 +1154,12 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
-	fl6.flowi6_mark = mark;
+	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
 	fl6.flowi6_flags = 0;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
+	fl6.flowi6_uid = uid;
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (!dst->error)
@@ -1174,7 +1171,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
 	ip6_update_pmtu(skb, sock_net(sk), mtu,
-			sk->sk_bound_dev_if, sk->sk_mark);
+			sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk));
 }
 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
 
@@ -1849,15 +1846,16 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 }
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-static struct rt6_info *rt6_get_route_info(struct net *net,
+static struct rt6_info *rt6_get_route_info(struct net_device *dev,
 					   const struct in6_addr *prefix, int prefixlen,
-					   const struct in6_addr *gwaddr, int ifindex)
+					   const struct in6_addr *gwaddr)
 {
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
 
-	table = fib6_get_table(net, RT6_TABLE_INFO);
+	table = fib6_get_table(dev_net(dev),
+			       addrconf_rt_table(dev, RT6_TABLE_INFO));
 	if (!table)
 		return NULL;
 
@@ -1867,7 +1865,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 		goto out;
 
 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-		if (rt->dst.dev->ifindex != ifindex)
+		if (rt->dst.dev->ifindex != dev->ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
@@ -1881,21 +1879,20 @@ out:
 	return rt;
 }
 
-static struct rt6_info *rt6_add_route_info(struct net *net,
+static struct rt6_info *rt6_add_route_info(struct net_device *dev,
 					   const struct in6_addr *prefix, int prefixlen,
-					   const struct in6_addr *gwaddr, int ifindex,
-					   unsigned int pref)
+					   const struct in6_addr *gwaddr, unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= RT6_TABLE_INFO,
+		.fc_table	= addrconf_rt_table(dev, RT6_TABLE_INFO),
 		.fc_metric	= IP6_RT_PRIO_USER,
-		.fc_ifindex	= ifindex,
+		.fc_ifindex	= dev->ifindex,
 		.fc_dst_len	= prefixlen,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
 				  RTF_UP | RTF_PREF(pref),
 		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
-		.fc_nlinfo.nl_net = net,
+		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
 	cfg.fc_dst = *prefix;
@@ -1907,7 +1904,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 
 	ip6_route_add(&cfg);
 
-	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+	return rt6_get_route_info(dev, prefix, prefixlen, gwaddr);
 }
 #endif
 
@@ -1916,7 +1913,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
 	struct rt6_info *rt;
 	struct fib6_table *table;
 
-	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+	table = fib6_get_table(dev_net(dev),
+			       addrconf_rt_table(dev, RT6_TABLE_MAIN));
 	if (!table)
 		return NULL;
 
@@ -1938,7 +1936,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 				     unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= RT6_TABLE_DFLT,
+		.fc_table	= addrconf_rt_table(dev, RT6_TABLE_DFLT),
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -1955,28 +1953,17 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 	return rt6_get_dflt_router(gwaddr, dev);
 }
 
-void rt6_purge_dflt_routers(struct net *net)
-{
-	struct rt6_info *rt;
-	struct fib6_table *table;
 
-	/* NOTE: Keep consistent with rt6_get_dflt_router */
-	table = fib6_get_table(net, RT6_TABLE_DFLT);
-	if (!table)
-		return;
+int rt6_addrconf_purge(struct rt6_info *rt, void *arg) {
+	if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
+	    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2))
+		return -1;
+	return 0;
+}
 
-restart:
-	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
-		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
-		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			dst_hold(&rt->dst);
-			read_unlock_bh(&table->tb6_lock);
-			ip6_del_rt(rt);
-			goto restart;
-		}
-	}
-	read_unlock_bh(&table->tb6_lock);
+void rt6_purge_dflt_routers(struct net *net)
+{
+	fib6_clean_all(net, rt6_addrconf_purge, 0, NULL);
 }
 
 static void rtmsg_to_fib6_config(struct net *net,
@@ -2261,6 +2248,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+	[RTA_UID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2647,6 +2635,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 	if (tb[RTA_OIF])
 		oif = nla_get_u32(tb[RTA_OIF]);
 
+	if (tb[RTA_MARK])
+		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
+
+	if (tb[RTA_UID])
+		fl6.flowi6_uid = make_kuid(current_user_ns(),
+					   nla_get_u32(tb[RTA_UID]));
+	else
+		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
+
 	if (iif) {
 		struct net_device *dev;
 		int flags = 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d5dda20bd717..ba8622daffd7 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -212,6 +212,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
 		ireq6->iif = inet6_iif(skb);
 
+	ireq->ir_mark = inet_request_mark(sk, skb);
+
 	req->expires = 0UL;
 	req->num_retrans = 0;
 	ireq->ecn_ok		= ecn_ok;
@@ -238,9 +240,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
 		fl6.saddr = ireq6->loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.flowi6_mark = sk->sk_mark;
+		fl6.flowi6_mark = ireq->ir_mark;
 		fl6.fl6_dport = inet_rsk(req)->rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
+		fl6.flowi6_uid = sock_i_uid(sk);
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e85c48bd404f..53a9f5a64536 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -24,6 +24,13 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4659b8ab55d9..a4738211c601 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -252,6 +252,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	final_p = fl6_update_dst(&fl6, np->opt, &final);
 
@@ -791,6 +792,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	fl6.flowi6_proto = IPPROTO_TCP;
 	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
 		fl6.flowi6_oif = inet6_iif(skb);
+	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 	fl6.fl6_dport = t1->dest;
 	fl6.fl6_sport = t1->source;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -999,6 +1001,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	treq->iif = sk->sk_bound_dev_if;
+	inet_rsk(req)->ir_mark = inet_request_mark(sk, skb);
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6b298dc614e3..e06772c4bccb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -373,8 +373,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	int is_udp4;
 	bool slow;
 
-	if (flags & MSG_ERRQUEUE)
-		return ipv6_recv_error(sk, msg, len, addr_len);
+	if (flags & MSG_ERRQUEUE) {
+		*addr_len = sizeof(struct sockaddr_in6);
+		return ipv6_recv_error(sk, msg, len);
+	}
 
 	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
 		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
@@ -1149,6 +1151,7 @@ do_udp_sendmsg:
 		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
 	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_uid = sock_i_uid(sk);
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22cae5906..bdf9c60b296a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1089,6 +1089,8 @@ config NETFILTER_XT_MATCH_OWNER
 	based on who created the socket: the user or group. It is also
 	possible to check whether a socket actually exists.
 
+	Conflicts with '"quota, tag, uid" match'
+
 config NETFILTER_XT_MATCH_POLICY
 	tristate 'IPsec "policy" match support'
 	depends on XFRM
@@ -1122,6 +1124,22 @@ config NETFILTER_XT_MATCH_PKTTYPE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_QTAGUID
+	bool '"quota, tag, owner" match and stats support'
+        depends on NETFILTER_XT_MATCH_SOCKET
+	depends on NETFILTER_XT_MATCH_OWNER=n
+	help
+	  This option replaces the `owner' match. In addition to matching
+	  on uid, it keeps stats based on a tag assigned to a socket.
+	  The full tag is comprised of a UID and an accounting tag.
+	  The tags are assignable to sockets from user space (e.g. a download
+	  manager can assign the socket to another UID for accounting).
+	  Stats and control are done via /proc/net/xt_qtaguid/.
+	  It replaces owner as it takes the same arguments, but should
+	  really be recognized by the iptables tool.
+
+	  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_QUOTA
 	tristate '"quota" match support'
 	depends on NETFILTER_ADVANCED
@@ -1132,6 +1150,30 @@ config NETFILTER_XT_MATCH_QUOTA
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_QUOTA2
+	tristate '"quota2" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds a `quota2' match, which allows to match on a
+	  byte counter correctly and not per CPU.
+	  It allows naming the quotas.
+	  This is based on http://xtables-addons.git.sourceforge.net
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
+config NETFILTER_XT_MATCH_QUOTA2_LOG
+	bool '"quota2" Netfilter LOG support'
+	depends on NETFILTER_XT_MATCH_QUOTA2
+	depends on IP_NF_TARGET_ULOG=n    # not yes, not module, just no
+	default n
+	help
+	  This option allows `quota2' to log ONCE when a quota limit
+	  is passed. It logs via NETLINK using the NETLINK_NFLOG family.
+	  It logs similarly to how ipt_ULOG would without data.
+
+	  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_RATEEST
 	tristate '"rateest" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87d43bf..d9655f6f3466 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -127,7 +127,9 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QTAGUID) += xt_qtaguid_print.o xt_qtaguid.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA2) += xt_quota2.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index f407ebc13481..ddf77f7fbe24 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -5,6 +5,7 @@
  * After timer expires a kevent will be sent.
  *
  * Copyright (C) 2004, 2010 Nokia Corporation
+ *
  * Written by Timo Teras <ext-timo.teras@nokia.com>
  *
  * Converted to x_tables and reworked for upstream inclusion
@@ -38,8 +39,16 @@
 #include <linux/netfilter/xt_IDLETIMER.h>
 #include <linux/kdev_t.h>
 #include <linux/kobject.h>
+#include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/sysfs.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/suspend.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 
 struct idletimer_tg_attr {
 	struct attribute attr;
@@ -55,14 +64,110 @@ struct idletimer_tg {
 	struct kobject *kobj;
 	struct idletimer_tg_attr attr;
 
+	struct timespec delayed_timer_trigger;
+	struct timespec last_modified_timer;
+	struct timespec last_suspend_time;
+	struct notifier_block pm_nb;
+
+	int timeout;
 	unsigned int refcnt;
+	bool work_pending;
+	bool send_nl_msg;
+	bool active;
+	uid_t uid;
 };
 
 static LIST_HEAD(idletimer_tg_list);
 static DEFINE_MUTEX(list_mutex);
+static DEFINE_SPINLOCK(timestamp_lock);
 
 static struct kobject *idletimer_tg_kobj;
 
+static bool check_for_delayed_trigger(struct idletimer_tg *timer,
+		struct timespec *ts)
+{
+	bool state;
+	struct timespec temp;
+	spin_lock_bh(&timestamp_lock);
+	timer->work_pending = false;
+	if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout ||
+			timer->delayed_timer_trigger.tv_sec != 0) {
+		state = false;
+		temp.tv_sec = timer->timeout;
+		temp.tv_nsec = 0;
+		if (timer->delayed_timer_trigger.tv_sec != 0) {
+			temp = timespec_add(timer->delayed_timer_trigger, temp);
+			ts->tv_sec = temp.tv_sec;
+			ts->tv_nsec = temp.tv_nsec;
+			timer->delayed_timer_trigger.tv_sec = 0;
+			timer->work_pending = true;
+			schedule_work(&timer->work);
+		} else {
+			temp = timespec_add(timer->last_modified_timer, temp);
+			ts->tv_sec = temp.tv_sec;
+			ts->tv_nsec = temp.tv_nsec;
+		}
+	} else {
+		state = timer->active;
+	}
+	spin_unlock_bh(&timestamp_lock);
+	return state;
+}
+
+static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer)
+{
+	char iface_msg[NLMSG_MAX_SIZE];
+	char state_msg[NLMSG_MAX_SIZE];
+	char timestamp_msg[NLMSG_MAX_SIZE];
+	char uid_msg[NLMSG_MAX_SIZE];
+	char *envp[] = { iface_msg, state_msg, timestamp_msg, uid_msg, NULL };
+	int res;
+	struct timespec ts;
+	uint64_t time_ns;
+	bool state;
+
+	res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s",
+		       iface);
+	if (NLMSG_MAX_SIZE <= res) {
+		pr_err("message too long (%d)", res);
+		return;
+	}
+
+	get_monotonic_boottime(&ts);
+	state = check_for_delayed_trigger(timer, &ts);
+	res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s",
+			state ? "active" : "inactive");
+
+	if (NLMSG_MAX_SIZE <= res) {
+		pr_err("message too long (%d)", res);
+		return;
+	}
+
+	if (state) {
+		res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=%u", timer->uid);
+		if (NLMSG_MAX_SIZE <= res)
+			pr_err("message too long (%d)", res);
+	} else {
+		res = snprintf(uid_msg, NLMSG_MAX_SIZE, "UID=");
+		if (NLMSG_MAX_SIZE <= res)
+			pr_err("message too long (%d)", res);
+	}
+
+	time_ns = timespec_to_ns(&ts);
+	res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns);
+	if (NLMSG_MAX_SIZE <= res) {
+		timestamp_msg[0] = '\0';
+		pr_err("message too long (%d)", res);
+	}
+
+	pr_debug("putting nlmsg: <%s> <%s> <%s> <%s>\n", iface_msg, state_msg,
+		 timestamp_msg, uid_msg);
+	kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp);
+	return;
+
+
+}
+
 static
 struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
 {
@@ -83,6 +188,7 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
 {
 	struct idletimer_tg *timer;
 	unsigned long expires = 0;
+	unsigned long now = jiffies;
 
 	mutex_lock(&list_mutex);
 
@@ -92,11 +198,15 @@ static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
 
 	mutex_unlock(&list_mutex);
 
-	if (time_after(expires, jiffies))
+	if (time_after(expires, now))
 		return sprintf(buf, "%u\n",
-			       jiffies_to_msecs(expires - jiffies) / 1000);
+			       jiffies_to_msecs(expires - now) / 1000);
 
-	return sprintf(buf, "0\n");
+	if (timer->send_nl_msg)
+		return sprintf(buf, "0 %d\n",
+			jiffies_to_msecs(now - expires) / 1000);
+	else
+		return sprintf(buf, "0\n");
 }
 
 static void idletimer_tg_work(struct work_struct *work)
@@ -105,6 +215,9 @@ static void idletimer_tg_work(struct work_struct *work)
 						  work);
 
 	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+
+	if (timer->send_nl_msg)
+		notify_netlink_uevent(timer->attr.attr.name, timer);
 }
 
 static void idletimer_tg_expired(unsigned long data)
@@ -112,8 +225,55 @@ static void idletimer_tg_expired(unsigned long data)
 	struct idletimer_tg *timer = (struct idletimer_tg *) data;
 
 	pr_debug("timer %s expired\n", timer->attr.attr.name);
-
+	spin_lock_bh(&timestamp_lock);
+	timer->active = false;
+	timer->work_pending = true;
 	schedule_work(&timer->work);
+	spin_unlock_bh(&timestamp_lock);
+}
+
+static int idletimer_resume(struct notifier_block *notifier,
+		unsigned long pm_event, void *unused)
+{
+	struct timespec ts;
+	unsigned long time_diff, now = jiffies;
+	struct idletimer_tg *timer = container_of(notifier,
+			struct idletimer_tg, pm_nb);
+	if (!timer)
+		return NOTIFY_DONE;
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+		get_monotonic_boottime(&timer->last_suspend_time);
+		break;
+	case PM_POST_SUSPEND:
+		spin_lock_bh(&timestamp_lock);
+		if (!timer->active) {
+			spin_unlock_bh(&timestamp_lock);
+			break;
+		}
+		/* since jiffies are not updated when suspended now represents
+		 * the time it would have suspended */
+		if (time_after(timer->timer.expires, now)) {
+			get_monotonic_boottime(&ts);
+			ts = timespec_sub(ts, timer->last_suspend_time);
+			time_diff = timespec_to_jiffies(&ts);
+			if (timer->timer.expires > (time_diff + now)) {
+				mod_timer_pending(&timer->timer,
+						(timer->timer.expires - time_diff));
+			} else {
+				del_timer(&timer->timer);
+				timer->timer.expires = 0;
+				timer->active = false;
+				timer->work_pending = true;
+				schedule_work(&timer->work);
+			}
+		}
+		spin_unlock_bh(&timestamp_lock);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
 }
 
 static int idletimer_tg_create(struct idletimer_tg_info *info)
@@ -145,6 +305,21 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
 	setup_timer(&info->timer->timer, idletimer_tg_expired,
 		    (unsigned long) info->timer);
 	info->timer->refcnt = 1;
+	info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true;
+	info->timer->active = true;
+	info->timer->timeout = info->timeout;
+
+	info->timer->delayed_timer_trigger.tv_sec = 0;
+	info->timer->delayed_timer_trigger.tv_nsec = 0;
+	info->timer->work_pending = false;
+	info->timer->uid = 0;
+	get_monotonic_boottime(&info->timer->last_modified_timer);
+
+	info->timer->pm_nb.notifier_call = idletimer_resume;
+	ret = register_pm_notifier(&info->timer->pm_nb);
+	if (ret)
+		printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n",
+				__func__, ret);
 
 	mod_timer(&info->timer->timer,
 		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
@@ -161,6 +336,46 @@ out:
 	return ret;
 }
 
+static void reset_timer(const struct idletimer_tg_info *info,
+			struct sk_buff *skb)
+{
+	unsigned long now = jiffies;
+	struct idletimer_tg *timer = info->timer;
+	bool timer_prev;
+
+	spin_lock_bh(&timestamp_lock);
+	timer_prev = timer->active;
+	timer->active = true;
+	/* timer_prev is used to guard overflow problem in time_before*/
+	if (!timer_prev || time_before(timer->timer.expires, now)) {
+		pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n",
+				timer->timer.expires, now);
+
+		/* Stores the uid resposible for waking up the radio */
+		if (skb && (skb->sk)) {
+			struct sock *sk = skb->sk;
+			read_lock_bh(&sk->sk_callback_lock);
+			if ((sk->sk_socket) && (sk->sk_socket->file) &&
+		    (sk->sk_socket->file->f_cred))
+				timer->uid = sk->sk_socket->file->f_cred->uid;
+			read_unlock_bh(&sk->sk_callback_lock);
+		}
+
+		/* checks if there is a pending inactive notification*/
+		if (timer->work_pending)
+			timer->delayed_timer_trigger = timer->last_modified_timer;
+		else {
+			timer->work_pending = true;
+			schedule_work(&timer->work);
+		}
+	}
+
+	get_monotonic_boottime(&timer->last_modified_timer);
+	mod_timer(&timer->timer,
+			msecs_to_jiffies(info->timeout * 1000) + now);
+	spin_unlock_bh(&timestamp_lock);
+}
+
 /*
  * The actual xt_tables plugin.
  */
@@ -168,15 +383,23 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,
 					 const struct xt_action_param *par)
 {
 	const struct idletimer_tg_info *info = par->targinfo;
+	unsigned long now = jiffies;
 
 	pr_debug("resetting timer %s, timeout period %u\n",
 		 info->label, info->timeout);
 
 	BUG_ON(!info->timer);
 
-	mod_timer(&info->timer->timer,
-		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+	info->timer->active = true;
 
+	if (time_before(info->timer->timer.expires, now)) {
+		schedule_work(&info->timer->work);
+		pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n",
+			 info->label, info->timer->timer.expires, now);
+	}
+
+	/* TODO: Avoid modifying timers on each packet */
+	reset_timer(info, skb);
 	return XT_CONTINUE;
 }
 
@@ -185,7 +408,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
 	struct idletimer_tg_info *info = par->targinfo;
 	int ret;
 
-	pr_debug("checkentry targinfo%s\n", info->label);
+	pr_debug("checkentry targinfo %s\n", info->label);
 
 	if (info->timeout == 0) {
 		pr_debug("timeout value is zero\n");
@@ -204,9 +427,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
 	info->timer = __idletimer_tg_find_by_label(info->label);
 	if (info->timer) {
 		info->timer->refcnt++;
-		mod_timer(&info->timer->timer,
-			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
-
+		reset_timer(info, NULL);
 		pr_debug("increased refcnt of timer %s to %u\n",
 			 info->label, info->timer->refcnt);
 	} else {
@@ -219,6 +440,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
 	}
 
 	mutex_unlock(&list_mutex);
+
 	return 0;
 }
 
@@ -236,11 +458,12 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
 		list_del(&info->timer->entry);
 		del_timer_sync(&info->timer->timer);
 		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		unregister_pm_notifier(&info->timer->pm_nb);
 		kfree(info->timer->attr.attr.name);
 		kfree(info->timer);
 	} else {
 		pr_debug("decreased refcnt of timer %s to %u\n",
-			 info->label, info->timer->refcnt);
+		info->label, info->timer->refcnt);
 	}
 
 	mutex_unlock(&list_mutex);
@@ -248,6 +471,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
 
 static struct xt_target idletimer_tg __read_mostly = {
 	.name		= "IDLETIMER",
+	.revision	= 1,
 	.family		= NFPROTO_UNSPEC,
 	.target		= idletimer_tg_target,
 	.targetsize     = sizeof(struct idletimer_tg_info),
@@ -313,3 +537,4 @@ MODULE_DESCRIPTION("Xtables: idle time monitor");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("ipt_IDLETIMER");
 MODULE_ALIAS("ip6t_IDLETIMER");
+MODULE_ALIAS("arpt_IDLETIMER");
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
new file mode 100644
index 000000000000..1f576dab9472
--- /dev/null
+++ b/net/netfilter/xt_qtaguid.c
@@ -0,0 +1,3017 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * There are run-time debug flags enabled via the debug_mask module param, or
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
+ */
+#define DEBUG
+
+#include <linux/file.h>
+#include <linux/inetdevice.h>
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_qtaguid.h>
+#include <linux/ratelimit.h>
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/addrconf.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#endif
+
+#include <linux/netfilter/xt_socket.h>
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+#include "../../fs/proc/internal.h"
+
+/*
+ * We only use the xt_socket funcs within a similar context to avoid unexpected
+ * return values.
+ */
+#define XT_SOCKET_SUPPORTED_HOOKS \
+	((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
+
+
+static const char *module_procdirname = "xt_qtaguid";
+static struct proc_dir_entry *xt_qtaguid_procdir;
+
+static unsigned int proc_iface_perms = S_IRUGO;
+module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
+
+static struct proc_dir_entry *xt_qtaguid_stats_file;
+static unsigned int proc_stats_perms = S_IRUGO;
+module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
+
+static struct proc_dir_entry *xt_qtaguid_ctrl_file;
+
+/* Everybody can write. But proc_ctrl_write_limited is true by default which
+ * limits what can be controlled. See the can_*() functions.
+ */
+static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
+module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
+
+/* Limited by default, so the gid of the ctrl and stats proc entries
+ * will limit what can be done. See the can_*() functions.
+ */
+static bool proc_stats_readall_limited = true;
+static bool proc_ctrl_write_limited = true;
+
+module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
+		   S_IRUGO | S_IWUSR);
+module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
+		   S_IRUGO | S_IWUSR);
+
+/*
+ * Limit the number of active tags (via socket tags) for a given UID.
+ * Multiple processes could share the UID.
+ */
+static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
+module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
+
+/*
+ * After the kernel has initiallized this module, it is still possible
+ * to make it passive.
+ * Setting passive to Y:
+ *  - the iface stats handling will not act on notifications.
+ *  - iptables matches will never match.
+ *  - ctrl commands silently succeed.
+ *  - stats are always empty.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool module_passive;
+module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
+
+/*
+ * Control how qtaguid data is tracked per proc/uid.
+ * Setting tag_tracking_passive to Y:
+ *  - don't create proc specific structs to track tags
+ *  - don't check that active tag stats exceed some limits.
+ *  - don't clean up socket tags on process exits.
+ * This is mostly usefull when a bug is suspected.
+ */
+static bool qtu_proc_handling_passive;
+module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
+		   S_IRUGO | S_IWUSR);
+
+#define QTU_DEV_NAME "xt_qtaguid"
+
+uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
+module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
+
+/*---------------------------------------------------------------------------*/
+static const char *iface_stat_procdirname = "iface_stat";
+static struct proc_dir_entry *iface_stat_procdir;
+/*
+ * The iface_stat_all* will go away once userspace gets use to the new fields
+ * that have a format line.
+ */
+static const char *iface_stat_all_procfilename = "iface_stat_all";
+static struct proc_dir_entry *iface_stat_all_procfile;
+static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
+static struct proc_dir_entry *iface_stat_fmt_procfile;
+
+
+static LIST_HEAD(iface_stat_list);
+static DEFINE_SPINLOCK(iface_stat_list_lock);
+
+static struct rb_root sock_tag_tree = RB_ROOT;
+static DEFINE_SPINLOCK(sock_tag_list_lock);
+
+static struct rb_root tag_counter_set_tree = RB_ROOT;
+static DEFINE_SPINLOCK(tag_counter_set_list_lock);
+
+static struct rb_root uid_tag_data_tree = RB_ROOT;
+static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
+
+static struct rb_root proc_qtu_data_tree = RB_ROOT;
+/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
+
+static struct qtaguid_event_counts qtu_events;
+/*----------------------------------------------*/
+static bool can_manipulate_uids(void)
+{
+	/* root pwnd */
+	return in_egroup_p(xt_qtaguid_ctrl_file->gid)
+		|| unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
+		|| unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
+}
+
+static bool can_impersonate_uid(uid_t uid)
+{
+	return uid == current_fsuid() || can_manipulate_uids();
+}
+
+static bool can_read_other_uid_stats(uid_t uid)
+{
+	/* root pwnd */
+	return in_egroup_p(xt_qtaguid_stats_file->gid)
+		|| unlikely(!current_fsuid()) || uid == current_fsuid()
+		|| unlikely(!proc_stats_readall_limited)
+		|| unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
+}
+
+static inline void dc_add_byte_packets(struct data_counters *counters, int set,
+				  enum ifs_tx_rx direction,
+				  enum ifs_proto ifs_proto,
+				  int bytes,
+				  int packets)
+{
+	counters->bpc[set][direction][ifs_proto].bytes += bytes;
+	counters->bpc[set][direction][ifs_proto].packets += packets;
+}
+
+static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct tag_node *data = rb_entry(node, struct tag_node, node);
+		int result;
+		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+			 " node=%p data=%p\n", tag, node, data);
+		result = tag_compare(tag, data->tag);
+		RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+			 " data.tag=0x%llx (uid=%u) res=%d\n",
+			 tag, data->tag, get_uid_from_tag(data->tag), result);
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct tag_node *this = rb_entry(*new, struct tag_node,
+						 node);
+		int result = tag_compare(data->tag, this->tag);
+		RB_DEBUG("qtaguid: %s(): tag=0x%llx"
+			 " (uid=%u)\n", __func__,
+			 this->tag,
+			 get_uid_from_tag(this->tag));
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
+{
+	tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
+{
+	struct tag_node *node = tag_node_tree_search(root, tag);
+	if (!node)
+		return NULL;
+	return rb_entry(&node->node, struct tag_stat, tn.node);
+}
+
+static void tag_counter_set_tree_insert(struct tag_counter_set *data,
+					struct rb_root *root)
+{
+	tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
+							   tag_t tag)
+{
+	struct tag_node *node = tag_node_tree_search(root, tag);
+	if (!node)
+		return NULL;
+	return rb_entry(&node->node, struct tag_counter_set, tn.node);
+
+}
+
+static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
+{
+	tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
+{
+	struct tag_node *node = tag_node_tree_search(root, tag);
+	if (!node)
+		return NULL;
+	return rb_entry(&node->node, struct tag_ref, tn.node);
+}
+
+static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
+					     const struct sock *sk)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct sock_tag *data = rb_entry(node, struct sock_tag,
+						 sock_node);
+		if (sk < data->sk)
+			node = node->rb_left;
+		else if (sk > data->sk)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct sock_tag *this = rb_entry(*new, struct sock_tag,
+						 sock_node);
+		parent = *new;
+		if (data->sk < this->sk)
+			new = &((*new)->rb_left);
+		else if (data->sk > this->sk)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->sock_node, parent, new);
+	rb_insert_color(&data->sock_node, root);
+}
+
+static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
+{
+	struct rb_node *node;
+	struct sock_tag *st_entry;
+
+	node = rb_first(st_to_free_tree);
+	while (node) {
+		st_entry = rb_entry(node, struct sock_tag, sock_node);
+		node = rb_next(node);
+		CT_DEBUG("qtaguid: %s(): "
+			 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
+			 st_entry->sk,
+			 st_entry->tag,
+			 get_uid_from_tag(st_entry->tag));
+		rb_erase(&st_entry->sock_node, st_to_free_tree);
+		sockfd_put(st_entry->socket);
+		kfree(st_entry);
+	}
+}
+
+static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
+						       const pid_t pid)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct proc_qtu_data *data = rb_entry(node,
+						      struct proc_qtu_data,
+						      node);
+		if (pid < data->pid)
+			node = node->rb_left;
+		else if (pid > data->pid)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
+				      struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct proc_qtu_data *this = rb_entry(*new,
+						      struct proc_qtu_data,
+						      node);
+		parent = *new;
+		if (data->pid < this->pid)
+			new = &((*new)->rb_left);
+		else if (data->pid > this->pid)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void uid_tag_data_tree_insert(struct uid_tag_data *data,
+				     struct rb_root *root)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct uid_tag_data *this = rb_entry(*new,
+						     struct uid_tag_data,
+						     node);
+		parent = *new;
+		if (data->uid < this->uid)
+			new = &((*new)->rb_left);
+		else if (data->uid > this->uid)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
+						     uid_t uid)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct uid_tag_data *data = rb_entry(node,
+						     struct uid_tag_data,
+						     node);
+		if (uid < data->uid)
+			node = node->rb_left;
+		else if (uid > data->uid)
+			node = node->rb_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+/*
+ * Allocates a new uid_tag_data struct if needed.
+ * Returns a pointer to the found or allocated uid_tag_data.
+ * Returns a PTR_ERR on failures, and lock is not held.
+ * If found is not NULL:
+ *   sets *found to true if not allocated.
+ *   sets *found to false if allocated.
+ */
+struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
+{
+	struct uid_tag_data *utd_entry;
+
+	/* Look for top level uid_tag_data for the UID */
+	utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
+	DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
+
+	if (found_res)
+		*found_res = utd_entry;
+	if (utd_entry)
+		return utd_entry;
+
+	utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
+	if (!utd_entry) {
+		pr_err("qtaguid: get_uid_data(%u): "
+		       "tag data alloc failed\n", uid);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	utd_entry->uid = uid;
+	utd_entry->tag_ref_tree = RB_ROOT;
+	uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
+	DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
+	return utd_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *new_tag_ref(tag_t new_tag,
+				   struct uid_tag_data *utd_entry)
+{
+	struct tag_ref *tr_entry;
+	int res;
+
+	if (utd_entry->num_active_tags + 1 > max_sock_tags) {
+		pr_info("qtaguid: new_tag_ref(0x%llx): "
+			"tag ref alloc quota exceeded. max=%d\n",
+			new_tag, max_sock_tags);
+		res = -EMFILE;
+		goto err_res;
+
+	}
+
+	tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
+	if (!tr_entry) {
+		pr_err("qtaguid: new_tag_ref(0x%llx): "
+		       "tag ref alloc failed\n",
+		       new_tag);
+		res = -ENOMEM;
+		goto err_res;
+	}
+	tr_entry->tn.tag = new_tag;
+	/* tr_entry->num_sock_tags  handled by caller */
+	utd_entry->num_active_tags++;
+	tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
+	DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
+		 " inserted new tag ref %p\n",
+		 new_tag, tr_entry);
+	return tr_entry;
+
+err_res:
+	return ERR_PTR(res);
+}
+
+static struct tag_ref *lookup_tag_ref(tag_t full_tag,
+				      struct uid_tag_data **utd_res)
+{
+	struct uid_tag_data *utd_entry;
+	struct tag_ref *tr_entry;
+	bool found_utd;
+	uid_t uid = get_uid_from_tag(full_tag);
+
+	DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
+		 full_tag, uid);
+
+	utd_entry = get_uid_data(uid, &found_utd);
+	if (IS_ERR_OR_NULL(utd_entry)) {
+		if (utd_res)
+			*utd_res = utd_entry;
+		return NULL;
+	}
+
+	tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
+	if (utd_res)
+		*utd_res = utd_entry;
+	DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
+		 full_tag, utd_entry, tr_entry);
+	return tr_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *get_tag_ref(tag_t full_tag,
+				   struct uid_tag_data **utd_res)
+{
+	struct uid_tag_data *utd_entry;
+	struct tag_ref *tr_entry;
+
+	DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
+		 full_tag);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	tr_entry = lookup_tag_ref(full_tag, &utd_entry);
+	BUG_ON(IS_ERR_OR_NULL(utd_entry));
+	if (!tr_entry)
+		tr_entry = new_tag_ref(full_tag, utd_entry);
+
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	if (utd_res)
+		*utd_res = utd_entry;
+	DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
+		 full_tag, utd_entry, tr_entry);
+	return tr_entry;
+}
+
+/* Checks and maybe frees the UID Tag Data entry */
+static void put_utd_entry(struct uid_tag_data *utd_entry)
+{
+	/* Are we done with the UID tag data entry? */
+	if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
+		!utd_entry->num_pqd) {
+		DR_DEBUG("qtaguid: %s(): "
+			 "erase utd_entry=%p uid=%u "
+			 "by pid=%u tgid=%u uid=%u\n", __func__,
+			 utd_entry, utd_entry->uid,
+			 current->pid, current->tgid, current_fsuid());
+		BUG_ON(utd_entry->num_active_tags);
+		rb_erase(&utd_entry->node, &uid_tag_data_tree);
+		kfree(utd_entry);
+	} else {
+		DR_DEBUG("qtaguid: %s(): "
+			 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
+			 __func__, utd_entry, utd_entry->num_active_tags,
+			 utd_entry->num_pqd);
+		BUG_ON(!(utd_entry->num_active_tags ||
+			 utd_entry->num_pqd));
+	}
+}
+
+/*
+ * If no sock_tags are using this tag_ref,
+ * decrements refcount of utd_entry, removes tr_entry
+ * from utd_entry->tag_ref_tree and frees.
+ */
+static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
+					struct uid_tag_data *utd_entry)
+{
+	DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
+		 tr_entry, tr_entry->tn.tag,
+		 get_uid_from_tag(tr_entry->tn.tag));
+	if (!tr_entry->num_sock_tags) {
+		BUG_ON(!utd_entry->num_active_tags);
+		utd_entry->num_active_tags--;
+		rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
+		DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
+		kfree(tr_entry);
+	}
+}
+
+static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
+{
+	struct rb_node *node;
+	struct tag_ref *tr_entry;
+	tag_t acct_tag;
+
+	DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
+		 full_tag, get_uid_from_tag(full_tag));
+	acct_tag = get_atag_from_tag(full_tag);
+	node = rb_first(&utd_entry->tag_ref_tree);
+	while (node) {
+		tr_entry = rb_entry(node, struct tag_ref, tn.node);
+		node = rb_next(node);
+		if (!acct_tag || tr_entry->tn.tag == full_tag)
+			free_tag_ref_from_utd_entry(tr_entry, utd_entry);
+	}
+}
+
+static ssize_t read_proc_u64(struct file *file, char __user *buf,
+			 size_t size, loff_t *ppos)
+{
+	uint64_t *valuep = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t read_proc_bool(struct file *file, char __user *buf,
+			  size_t size, loff_t *ppos)
+{
+	bool *valuep = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static int get_active_counter_set(tag_t tag)
+{
+	int active_set = 0;
+	struct tag_counter_set *tcs;
+
+	MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
+		 " (uid=%u)\n",
+		 tag, get_uid_from_tag(tag));
+	/* For now we only handle UID tags for active sets */
+	tag = get_utag_from_tag(tag);
+	spin_lock_bh(&tag_counter_set_list_lock);
+	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+	if (tcs)
+		active_set = tcs->active_set;
+	spin_unlock_bh(&tag_counter_set_list_lock);
+	return active_set;
+}
+
+/*
+ * Find the entry for tracking the specified interface.
+ * Caller must hold iface_stat_list_lock
+ */
+static struct iface_stat *get_iface_entry(const char *ifname)
+{
+	struct iface_stat *iface_entry;
+
+	/* Find the entry for tracking the specified tag within the interface */
+	if (ifname == NULL) {
+		pr_info("qtaguid: iface_stat: get() NULL device name\n");
+		return NULL;
+	}
+
+	/* Iterate over interfaces */
+	list_for_each_entry(iface_entry, &iface_stat_list, list) {
+		if (!strcmp(ifname, iface_entry->ifname))
+			goto done;
+	}
+	iface_entry = NULL;
+done:
+	return iface_entry;
+}
+
+/* This is for fmt2 only */
+static void pp_iface_stat_header(struct seq_file *m)
+{
+	seq_puts(m,
+		 "ifname "
+		 "total_skb_rx_bytes total_skb_rx_packets "
+		 "total_skb_tx_bytes total_skb_tx_packets "
+		 "rx_tcp_bytes rx_tcp_packets "
+		 "rx_udp_bytes rx_udp_packets "
+		 "rx_other_bytes rx_other_packets "
+		 "tx_tcp_bytes tx_tcp_packets "
+		 "tx_udp_bytes tx_udp_packets "
+		 "tx_other_bytes tx_other_packets\n"
+	);
+}
+
+static void pp_iface_stat_line(struct seq_file *m,
+			       struct iface_stat *iface_entry)
+{
+	struct data_counters *cnts;
+	int cnt_set = 0;   /* We only use one set for the device */
+	cnts = &iface_entry->totals_via_skb;
+	seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
+		   "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		   iface_entry->ifname,
+		   dc_sum_bytes(cnts, cnt_set, IFS_RX),
+		   dc_sum_packets(cnts, cnt_set, IFS_RX),
+		   dc_sum_bytes(cnts, cnt_set, IFS_TX),
+		   dc_sum_packets(cnts, cnt_set, IFS_TX),
+		   cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+		   cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+		   cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+}
+
+struct proc_iface_stat_fmt_info {
+	long fmt;
+};
+
+static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_iface_stat_fmt_info *p = m->private;
+	loff_t n = *pos;
+
+	/*
+	 * This lock will prevent iface_stat_update() from changing active,
+	 * and in turn prevent an interface from unregistering itself.
+	 */
+	spin_lock_bh(&iface_stat_list_lock);
+
+	if (unlikely(module_passive))
+		return NULL;
+
+	if (!n && p->fmt == 2)
+		pp_iface_stat_header(m);
+
+	return seq_list_start(&iface_stat_list, n);
+}
+
+static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &iface_stat_list, pos);
+}
+
+static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
+{
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
+{
+	struct proc_iface_stat_fmt_info *p = m->private;
+	struct iface_stat *iface_entry;
+	struct rtnl_link_stats64 dev_stats, *stats;
+	struct rtnl_link_stats64 no_dev_stats = {0};
+
+
+	CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
+		 current->pid, current->tgid, current_fsuid());
+
+	iface_entry = list_entry(v, struct iface_stat, list);
+
+	if (iface_entry->active) {
+		stats = dev_get_stats(iface_entry->net_dev,
+				      &dev_stats);
+	} else {
+		stats = &no_dev_stats;
+	}
+	/*
+	 * If the meaning of the data changes, then update the fmtX
+	 * string.
+	 */
+	if (p->fmt == 1) {
+		seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
+			   iface_entry->ifname,
+			   iface_entry->active,
+			   iface_entry->totals_via_dev[IFS_RX].bytes,
+			   iface_entry->totals_via_dev[IFS_RX].packets,
+			   iface_entry->totals_via_dev[IFS_TX].bytes,
+			   iface_entry->totals_via_dev[IFS_TX].packets,
+			   stats->rx_bytes, stats->rx_packets,
+			   stats->tx_bytes, stats->tx_packets
+			   );
+	} else {
+		pp_iface_stat_line(m, iface_entry);
+	}
+	return 0;
+}
+
+static const struct file_operations read_u64_fops = {
+	.read		= read_proc_u64,
+	.llseek		= default_llseek,
+};
+
+static const struct file_operations read_bool_fops = {
+	.read		= read_proc_bool,
+	.llseek		= default_llseek,
+};
+
+static void iface_create_proc_worker(struct work_struct *work)
+{
+	struct proc_dir_entry *proc_entry;
+	struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
+						   iface_work);
+	struct iface_stat *new_iface  = isw->iface_entry;
+
+	/* iface_entries are not deleted, so safe to manipulate. */
+	proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
+	if (IS_ERR_OR_NULL(proc_entry)) {
+		pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
+		kfree(isw);
+		return;
+	}
+
+	new_iface->proc_ptr = proc_entry;
+
+	proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_TX].bytes);
+	proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_RX].bytes);
+	proc_create_data("tx_packets", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_TX].packets);
+	proc_create_data("rx_packets", proc_iface_perms, proc_entry,
+			 &read_u64_fops,
+			 &new_iface->totals_via_dev[IFS_RX].packets);
+	proc_create_data("active", proc_iface_perms, proc_entry,
+			 &read_bool_fops, &new_iface->active);
+
+	IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
+		 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
+	kfree(isw);
+}
+
+/*
+ * Will set the entry's active state, and
+ * update the net_dev accordingly also.
+ */
+static void _iface_stat_set_active(struct iface_stat *entry,
+				   struct net_device *net_dev,
+				   bool activate)
+{
+	if (activate) {
+		entry->net_dev = net_dev;
+		entry->active = true;
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "enable tracking. rfcnt=%d\n", __func__,
+			 entry->ifname,
+			 __this_cpu_read(*net_dev->pcpu_refcnt));
+	} else {
+		entry->active = false;
+		entry->net_dev = NULL;
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "disable tracking. rfcnt=%d\n", __func__,
+			 entry->ifname,
+			 __this_cpu_read(*net_dev->pcpu_refcnt));
+
+	}
+}
+
+/* Caller must hold iface_stat_list_lock */
+static struct iface_stat *iface_alloc(struct net_device *net_dev)
+{
+	struct iface_stat *new_iface;
+	struct iface_stat_work *isw;
+
+	new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
+	if (new_iface == NULL) {
+		pr_err("qtaguid: iface_stat: create(%s): "
+		       "iface_stat alloc failed\n", net_dev->name);
+		return NULL;
+	}
+	new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
+	if (new_iface->ifname == NULL) {
+		pr_err("qtaguid: iface_stat: create(%s): "
+		       "ifname alloc failed\n", net_dev->name);
+		kfree(new_iface);
+		return NULL;
+	}
+	spin_lock_init(&new_iface->tag_stat_list_lock);
+	new_iface->tag_stat_tree = RB_ROOT;
+	_iface_stat_set_active(new_iface, net_dev, true);
+
+	/*
+	 * ipv6 notifier chains are atomic :(
+	 * No create_proc_read_entry() for you!
+	 */
+	isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
+	if (!isw) {
+		pr_err("qtaguid: iface_stat: create(%s): "
+		       "work alloc failed\n", new_iface->ifname);
+		_iface_stat_set_active(new_iface, net_dev, false);
+		kfree(new_iface->ifname);
+		kfree(new_iface);
+		return NULL;
+	}
+	isw->iface_entry = new_iface;
+	INIT_WORK(&isw->iface_work, iface_create_proc_worker);
+	schedule_work(&isw->iface_work);
+	list_add(&new_iface->list, &iface_stat_list);
+	return new_iface;
+}
+
+static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
+					       struct iface_stat *iface)
+{
+	struct rtnl_link_stats64 dev_stats, *stats;
+	bool stats_rewound;
+
+	stats = dev_get_stats(net_dev, &dev_stats);
+	/* No empty packets */
+	stats_rewound =
+		(stats->rx_bytes < iface->last_known[IFS_RX].bytes)
+		|| (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
+
+	IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
+		 "bytes rx/tx=%llu/%llu "
+		 "active=%d last_known=%d "
+		 "stats_rewound=%d\n", __func__,
+		 net_dev ? net_dev->name : "?",
+		 iface, net_dev,
+		 stats->rx_bytes, stats->tx_bytes,
+		 iface->active, iface->last_known_valid, stats_rewound);
+
+	if (iface->active && iface->last_known_valid && stats_rewound) {
+		pr_warn_once("qtaguid: iface_stat: %s(%s): "
+			     "iface reset its stats unexpectedly\n", __func__,
+			     net_dev->name);
+
+		iface->totals_via_dev[IFS_TX].bytes +=
+			iface->last_known[IFS_TX].bytes;
+		iface->totals_via_dev[IFS_TX].packets +=
+			iface->last_known[IFS_TX].packets;
+		iface->totals_via_dev[IFS_RX].bytes +=
+			iface->last_known[IFS_RX].bytes;
+		iface->totals_via_dev[IFS_RX].packets +=
+			iface->last_known[IFS_RX].packets;
+		iface->last_known_valid = false;
+		IF_DEBUG("qtaguid: %s(%s): iface=%p "
+			 "used last known bytes rx/tx=%llu/%llu\n", __func__,
+			 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
+			 iface->last_known[IFS_TX].bytes);
+	}
+}
+
+/*
+ * Create a new entry for tracking the specified interface.
+ * Do nothing if the entry already exists.
+ * Called when an interface is configured with a valid IP address.
+ */
+static void iface_stat_create(struct net_device *net_dev,
+			      struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = NULL;
+	const char *ifname;
+	struct iface_stat *entry;
+	__be32 ipaddr = 0;
+	struct iface_stat *new_iface;
+
+	IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
+		 net_dev ? net_dev->name : "?",
+		 ifa, net_dev);
+	if (!net_dev) {
+		pr_err("qtaguid: iface_stat: create(): no net dev\n");
+		return;
+	}
+
+	ifname = net_dev->name;
+	if (!ifa) {
+		in_dev = in_dev_get(net_dev);
+		if (!in_dev) {
+			pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
+			       ifname);
+			return;
+		}
+		IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
+			 ifname, in_dev);
+		for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+			IF_DEBUG("qtaguid: iface_stat: create(%s): "
+				 "ifa=%p ifa_label=%s\n",
+				 ifname, ifa,
+				 ifa->ifa_label ? ifa->ifa_label : "(null)");
+			if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
+				break;
+		}
+	}
+
+	if (!ifa) {
+		IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
+			 ifname);
+		goto done_put;
+	}
+	ipaddr = ifa->ifa_local;
+
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(ifname);
+	if (entry != NULL) {
+		IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
+			 ifname, entry);
+		iface_check_stats_reset_and_adjust(net_dev, entry);
+		_iface_stat_set_active(entry, net_dev, true);
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "tracking now %d on ip=%pI4\n", __func__,
+			 entry->ifname, true, &ipaddr);
+		goto done_unlock_put;
+	}
+
+	new_iface = iface_alloc(net_dev);
+	IF_DEBUG("qtaguid: iface_stat: create(%s): done "
+		 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
+done_unlock_put:
+	spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+	if (in_dev)
+		in_dev_put(in_dev);
+}
+
+static void iface_stat_create_ipv6(struct net_device *net_dev,
+				   struct inet6_ifaddr *ifa)
+{
+	struct in_device *in_dev;
+	const char *ifname;
+	struct iface_stat *entry;
+	struct iface_stat *new_iface;
+	int addr_type;
+
+	IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
+		 ifa, net_dev, net_dev ? net_dev->name : "");
+	if (!net_dev) {
+		pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
+		return;
+	}
+	ifname = net_dev->name;
+
+	in_dev = in_dev_get(net_dev);
+	if (!in_dev) {
+		pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
+		       ifname);
+		return;
+	}
+
+	IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
+		 ifname, in_dev);
+
+	if (!ifa) {
+		IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
+			 ifname);
+		goto done_put;
+	}
+	addr_type = ipv6_addr_type(&ifa->addr);
+
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(ifname);
+	if (entry != NULL) {
+		IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+			 ifname, entry);
+		iface_check_stats_reset_and_adjust(net_dev, entry);
+		_iface_stat_set_active(entry, net_dev, true);
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "tracking now %d on ip=%pI6c\n", __func__,
+			 entry->ifname, true, &ifa->addr);
+		goto done_unlock_put;
+	}
+
+	new_iface = iface_alloc(net_dev);
+	IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
+		 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
+
+done_unlock_put:
+	spin_unlock_bh(&iface_stat_list_lock);
+done_put:
+	in_dev_put(in_dev);
+}
+
+static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
+{
+	MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
+	return sock_tag_tree_search(&sock_tag_tree, sk);
+}
+
+static struct sock_tag *get_sock_stat(const struct sock *sk)
+{
+	struct sock_tag *sock_tag_entry;
+	MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
+	if (!sk)
+		return NULL;
+	spin_lock_bh(&sock_tag_list_lock);
+	sock_tag_entry = get_sock_stat_nl(sk);
+	spin_unlock_bh(&sock_tag_list_lock);
+	return sock_tag_entry;
+}
+
+static int ipx_proto(const struct sk_buff *skb,
+		     struct xt_action_param *par)
+{
+	int thoff = 0, tproto;
+
+	switch (par->family) {
+	case NFPROTO_IPV6:
+		tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+		if (tproto < 0)
+			MT_DEBUG("%s(): transport header not found in ipv6"
+				 " skb=%p\n", __func__, skb);
+		break;
+	case NFPROTO_IPV4:
+		tproto = ip_hdr(skb)->protocol;
+		break;
+	default:
+		tproto = IPPROTO_RAW;
+	}
+	return tproto;
+}
+
+static void
+data_counters_update(struct data_counters *dc, int set,
+		     enum ifs_tx_rx direction, int proto, int bytes)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+		dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
+		break;
+	case IPPROTO_UDP:
+		dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
+		break;
+	case IPPROTO_IP:
+	default:
+		dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
+				    1);
+		break;
+	}
+}
+
+/*
+ * Update stats for the specified interface. Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called when an device is being unregistered.
+ */
+static void iface_stat_update(struct net_device *net_dev, bool stash_only)
+{
+	struct rtnl_link_stats64 dev_stats, *stats;
+	struct iface_stat *entry;
+
+	stats = dev_get_stats(net_dev, &dev_stats);
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(net_dev->name);
+	if (entry == NULL) {
+		IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
+			 net_dev->name);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+
+	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+		 net_dev->name, entry);
+	if (!entry->active) {
+		IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
+			 net_dev->name);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+
+	if (stash_only) {
+		entry->last_known[IFS_TX].bytes = stats->tx_bytes;
+		entry->last_known[IFS_TX].packets = stats->tx_packets;
+		entry->last_known[IFS_RX].bytes = stats->rx_bytes;
+		entry->last_known[IFS_RX].packets = stats->rx_packets;
+		entry->last_known_valid = true;
+		IF_DEBUG("qtaguid: %s(%s): "
+			 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
+			 net_dev->name, stats->rx_bytes, stats->tx_bytes);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+	entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
+	entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
+	entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
+	entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
+	/* We don't need the last_known[] anymore */
+	entry->last_known_valid = false;
+	_iface_stat_set_active(entry, net_dev, false);
+	IF_DEBUG("qtaguid: %s(%s): "
+		 "disable tracking. rx/tx=%llu/%llu\n", __func__,
+		 net_dev->name, stats->rx_bytes, stats->tx_bytes);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+/*
+ * Update stats for the specified interface from the skb.
+ * Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called on each sk.
+ */
+static void iface_stat_update_from_skb(const struct sk_buff *skb,
+				       struct xt_action_param *par)
+{
+	struct iface_stat *entry;
+	const struct net_device *el_dev;
+	enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
+	int bytes = skb->len;
+	int proto;
+
+	if (!skb->dev) {
+		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+		el_dev = par->in ? : par->out;
+	} else {
+		const struct net_device *other_dev;
+		el_dev = skb->dev;
+		other_dev = par->in ? : par->out;
+		if (el_dev != other_dev) {
+			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+				 "par->(in/out)=%p %s\n",
+				 par->hooknum, el_dev, el_dev->name, other_dev,
+				 other_dev->name);
+		}
+	}
+
+	if (unlikely(!el_dev)) {
+		pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
+				   par->hooknum, __func__);
+		BUG();
+	} else if (unlikely(!el_dev->name)) {
+		pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
+				   par->hooknum, __func__);
+		BUG();
+	} else {
+		proto = ipx_proto(skb, par);
+		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+			 par->hooknum, el_dev->name, el_dev->type,
+			 par->family, proto);
+	}
+
+	spin_lock_bh(&iface_stat_list_lock);
+	entry = get_iface_entry(el_dev->name);
+	if (entry == NULL) {
+		IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
+			 __func__, el_dev->name);
+		spin_unlock_bh(&iface_stat_list_lock);
+		return;
+	}
+
+	IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+		 el_dev->name, entry);
+
+	data_counters_update(&entry->totals_via_skb, 0, direction, proto,
+			     bytes);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+static void tag_stat_update(struct tag_stat *tag_entry,
+			enum ifs_tx_rx direction, int proto, int bytes)
+{
+	int active_set;
+	active_set = get_active_counter_set(tag_entry->tn.tag);
+	MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
+		 "dir=%d proto=%d bytes=%d)\n",
+		 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
+		 active_set, direction, proto, bytes);
+	data_counters_update(&tag_entry->counters, active_set, direction,
+			     proto, bytes);
+	if (tag_entry->parent_counters)
+		data_counters_update(tag_entry->parent_counters, active_set,
+				     direction, proto, bytes);
+}
+
+/*
+ * Create a new entry for tracking the specified {acct_tag,uid_tag} within
+ * the interface.
+ * iface_entry->tag_stat_list_lock should be held.
+ */
+static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
+					   tag_t tag)
+{
+	struct tag_stat *new_tag_stat_entry = NULL;
+	IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
+		 " (uid=%u)\n", __func__,
+		 iface_entry, tag, get_uid_from_tag(tag));
+	new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
+	if (!new_tag_stat_entry) {
+		pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
+		goto done;
+	}
+	new_tag_stat_entry->tn.tag = tag;
+	tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
+done:
+	return new_tag_stat_entry;
+}
+
+static void if_tag_stat_update(const char *ifname, uid_t uid,
+			       const struct sock *sk, enum ifs_tx_rx direction,
+			       int proto, int bytes)
+{
+	struct tag_stat *tag_stat_entry;
+	tag_t tag, acct_tag;
+	tag_t uid_tag;
+	struct data_counters *uid_tag_counters;
+	struct sock_tag *sock_tag_entry;
+	struct iface_stat *iface_entry;
+	struct tag_stat *new_tag_stat = NULL;
+	MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
+		"uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
+		 ifname, uid, sk, direction, proto, bytes);
+
+
+	iface_entry = get_iface_entry(ifname);
+	if (!iface_entry) {
+		pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
+				   "%s not found\n", ifname);
+		return;
+	}
+	/* It is ok to process data when an iface_entry is inactive */
+
+	MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
+		 ifname, iface_entry);
+
+	/*
+	 * Look for a tagged sock.
+	 * It will have an acct_uid.
+	 */
+	sock_tag_entry = get_sock_stat(sk);
+	if (sock_tag_entry) {
+		tag = sock_tag_entry->tag;
+		acct_tag = get_atag_from_tag(tag);
+		uid_tag = get_utag_from_tag(tag);
+	} else {
+		acct_tag = make_atag_from_value(0);
+		tag = combine_atag_with_uid(acct_tag, uid);
+		uid_tag = make_tag_from_uid(uid);
+	}
+	MT_DEBUG("qtaguid: iface_stat: stat_update(): "
+		 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
+		 tag, get_uid_from_tag(tag), iface_entry);
+	/* Loop over tag list under this interface for {acct_tag,uid_tag} */
+	spin_lock_bh(&iface_entry->tag_stat_list_lock);
+
+	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+					      tag);
+	if (tag_stat_entry) {
+		/*
+		 * Updating the {acct_tag, uid_tag} entry handles both stats:
+		 * {0, uid_tag} will also get updated.
+		 */
+		tag_stat_update(tag_stat_entry, direction, proto, bytes);
+		spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+		return;
+	}
+
+	/* Loop over tag list under this interface for {0,uid_tag} */
+	tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
+					      uid_tag);
+	if (!tag_stat_entry) {
+		/* Here: the base uid_tag did not exist */
+		/*
+		 * No parent counters. So
+		 *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
+		 */
+		new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
+		if (!new_tag_stat)
+			goto unlock;
+		uid_tag_counters = &new_tag_stat->counters;
+	} else {
+		uid_tag_counters = &tag_stat_entry->counters;
+	}
+
+	if (acct_tag) {
+		/* Create the child {acct_tag, uid_tag} and hook up parent. */
+		new_tag_stat = create_if_tag_stat(iface_entry, tag);
+		if (!new_tag_stat)
+			goto unlock;
+		new_tag_stat->parent_counters = uid_tag_counters;
+	} else {
+		/*
+		 * For new_tag_stat to be still NULL here would require:
+		 *  {0, uid_tag} exists
+		 *  and {acct_tag, uid_tag} doesn't exist
+		 *  AND acct_tag == 0.
+		 * Impossible. This reassures us that new_tag_stat
+		 * below will always be assigned.
+		 */
+		BUG_ON(!new_tag_stat);
+	}
+	tag_stat_update(new_tag_stat, direction, proto, bytes);
+unlock:
+	spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+}
+
+static int iface_netdev_event_handler(struct notifier_block *nb,
+				      unsigned long event, void *ptr) {
+	struct net_device *dev = ptr;
+
+	if (unlikely(module_passive))
+		return NOTIFY_DONE;
+
+	IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
+		 "ev=0x%lx/%s netdev=%p->name=%s\n",
+		 event, netdev_evt_str(event), dev, dev ? dev->name : "");
+
+	switch (event) {
+	case NETDEV_UP:
+		iface_stat_create(dev, NULL);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		iface_stat_update(dev, event == NETDEV_DOWN);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static int iface_inet6addr_event_handler(struct notifier_block *nb,
+					 unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	struct net_device *dev;
+
+	if (unlikely(module_passive))
+		return NOTIFY_DONE;
+
+	IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
+		 "ev=0x%lx/%s ifa=%p\n",
+		 event, netdev_evt_str(event), ifa);
+
+	switch (event) {
+	case NETDEV_UP:
+		BUG_ON(!ifa || !ifa->idev);
+		dev = (struct net_device *)ifa->idev->dev;
+		iface_stat_create_ipv6(dev, ifa);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		BUG_ON(!ifa || !ifa->idev);
+		dev = (struct net_device *)ifa->idev->dev;
+		iface_stat_update(dev, event == NETDEV_DOWN);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static int iface_inetaddr_event_handler(struct notifier_block *nb,
+					unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *dev;
+
+	if (unlikely(module_passive))
+		return NOTIFY_DONE;
+
+	IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
+		 "ev=0x%lx/%s ifa=%p\n",
+		 event, netdev_evt_str(event), ifa);
+
+	switch (event) {
+	case NETDEV_UP:
+		BUG_ON(!ifa || !ifa->ifa_dev);
+		dev = ifa->ifa_dev->dev;
+		iface_stat_create(dev, ifa);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	case NETDEV_DOWN:
+	case NETDEV_UNREGISTER:
+		BUG_ON(!ifa || !ifa->ifa_dev);
+		dev = ifa->ifa_dev->dev;
+		iface_stat_update(dev, event == NETDEV_DOWN);
+		atomic64_inc(&qtu_events.iface_events);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block iface_netdev_notifier_blk = {
+	.notifier_call = iface_netdev_event_handler,
+};
+
+static struct notifier_block iface_inetaddr_notifier_blk = {
+	.notifier_call = iface_inetaddr_event_handler,
+};
+
+static struct notifier_block iface_inet6addr_notifier_blk = {
+	.notifier_call = iface_inet6addr_event_handler,
+};
+
+static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
+	.start	= iface_stat_fmt_proc_start,
+	.next	= iface_stat_fmt_proc_next,
+	.stop	= iface_stat_fmt_proc_stop,
+	.show	= iface_stat_fmt_proc_show,
+};
+
+static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
+{
+	struct proc_iface_stat_fmt_info *s;
+
+	s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
+			sizeof(struct proc_iface_stat_fmt_info));
+	if (!s)
+		return -ENOMEM;
+
+	s->fmt = (uintptr_t)PDE_DATA(inode);
+	return 0;
+}
+
+static const struct file_operations proc_iface_stat_fmt_fops = {
+	.open		= proc_iface_stat_fmt_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
+{
+	int err;
+
+	iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
+	if (!iface_stat_procdir) {
+		pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
+		err = -1;
+		goto err;
+	}
+
+	iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
+						   proc_iface_perms,
+						   parent_procdir,
+						   &proc_iface_stat_fmt_fops,
+						   (void *)1 /* fmt1 */);
+	if (!iface_stat_all_procfile) {
+		pr_err("qtaguid: iface_stat: init "
+		       " failed to create stat_old proc entry\n");
+		err = -1;
+		goto err_zap_entry;
+	}
+
+	iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
+						   proc_iface_perms,
+						   parent_procdir,
+						   &proc_iface_stat_fmt_fops,
+						   (void *)2 /* fmt2 */);
+	if (!iface_stat_fmt_procfile) {
+		pr_err("qtaguid: iface_stat: init "
+		       " failed to create stat_all proc entry\n");
+		err = -1;
+		goto err_zap_all_stats_entry;
+	}
+
+
+	err = register_netdevice_notifier(&iface_netdev_notifier_blk);
+	if (err) {
+		pr_err("qtaguid: iface_stat: init "
+		       "failed to register dev event handler\n");
+		goto err_zap_all_stats_entries;
+	}
+	err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+	if (err) {
+		pr_err("qtaguid: iface_stat: init "
+		       "failed to register ipv4 dev event handler\n");
+		goto err_unreg_nd;
+	}
+
+	err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
+	if (err) {
+		pr_err("qtaguid: iface_stat: init "
+		       "failed to register ipv6 dev event handler\n");
+		goto err_unreg_ip4_addr;
+	}
+	return 0;
+
+err_unreg_ip4_addr:
+	unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
+err_unreg_nd:
+	unregister_netdevice_notifier(&iface_netdev_notifier_blk);
+err_zap_all_stats_entries:
+	remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
+err_zap_all_stats_entry:
+	remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
+err_zap_entry:
+	remove_proc_entry(iface_stat_procdirname, parent_procdir);
+err:
+	return err;
+}
+
+static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
+				    struct xt_action_param *par)
+{
+	struct sock *sk;
+	unsigned int hook_mask = (1 << par->hooknum);
+
+	MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
+		 par->hooknum, par->family);
+
+	/*
+	 * Let's not abuse the the xt_socket_get*_sk(), or else it will
+	 * return garbage SKs.
+	 */
+	if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
+		return NULL;
+
+	switch (par->family) {
+	case NFPROTO_IPV6:
+		sk = xt_socket_get6_sk(skb, par);
+		break;
+	case NFPROTO_IPV4:
+		sk = xt_socket_get4_sk(skb, par);
+		break;
+	default:
+		return NULL;
+	}
+
+	if (sk) {
+		MT_DEBUG("qtaguid: %p->sk_proto=%u "
+			 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
+		/*
+		 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
+		 * "struct inet_timewait_sock" which is missing fields.
+		 */
+		if (sk->sk_state  == TCP_TIME_WAIT) {
+			xt_socket_put_sk(sk);
+			sk = NULL;
+		}
+	}
+	return sk;
+}
+
+static void account_for_uid(const struct sk_buff *skb,
+			    const struct sock *alternate_sk, uid_t uid,
+			    struct xt_action_param *par)
+{
+	const struct net_device *el_dev;
+
+	if (!skb->dev) {
+		MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+		el_dev = par->in ? : par->out;
+	} else {
+		const struct net_device *other_dev;
+		el_dev = skb->dev;
+		other_dev = par->in ? : par->out;
+		if (el_dev != other_dev) {
+			MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+				"par->(in/out)=%p %s\n",
+				par->hooknum, el_dev, el_dev->name, other_dev,
+				other_dev->name);
+		}
+	}
+
+	if (unlikely(!el_dev)) {
+		pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
+	} else if (unlikely(!el_dev->name)) {
+		pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
+	} else {
+		int proto = ipx_proto(skb, par);
+		MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+			 par->hooknum, el_dev->name, el_dev->type,
+			 par->family, proto);
+
+		if_tag_stat_update(el_dev->name, uid,
+				skb->sk ? skb->sk : alternate_sk,
+				par->in ? IFS_RX : IFS_TX,
+				proto, skb->len);
+	}
+}
+
+static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_qtaguid_match_info *info = par->matchinfo;
+	const struct file *filp;
+	bool got_sock = false;
+	struct sock *sk;
+	uid_t sock_uid;
+	bool res;
+	bool set_sk_callback_lock = false;
+
+	if (unlikely(module_passive))
+		return (info->match ^ info->invert) == 0;
+
+	MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
+		 par->hooknum, skb, par->in, par->out, par->family);
+
+	atomic64_inc(&qtu_events.match_calls);
+	if (skb == NULL) {
+		res = (info->match ^ info->invert) == 0;
+		goto ret_res;
+	}
+
+	switch (par->hooknum) {
+	case NF_INET_PRE_ROUTING:
+	case NF_INET_POST_ROUTING:
+		atomic64_inc(&qtu_events.match_calls_prepost);
+		iface_stat_update_from_skb(skb, par);
+		/*
+		 * We are done in pre/post. The skb will get processed
+		 * further alter.
+		 */
+		res = (info->match ^ info->invert);
+		goto ret_res;
+		break;
+	/* default: Fall through and do UID releated work */
+	}
+
+	sk = skb->sk;
+	/*
+	 * When in TCP_TIME_WAIT the sk is not a "struct sock" but
+	 * "struct inet_timewait_sock" which is missing fields.
+	 * So we ignore it.
+	 */
+	if (sk && sk->sk_state == TCP_TIME_WAIT)
+		sk = NULL;
+	if (sk == NULL) {
+		/*
+		 * A missing sk->sk_socket happens when packets are in-flight
+		 * and the matching socket is already closed and gone.
+		 */
+		sk = qtaguid_find_sk(skb, par);
+		/*
+		 * If we got the socket from the find_sk(), we will need to put
+		 * it back, as nf_tproxy_get_sock_v4() got it.
+		 */
+		got_sock = sk;
+		if (sk)
+			atomic64_inc(&qtu_events.match_found_sk_in_ct);
+		else
+			atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
+	} else {
+		atomic64_inc(&qtu_events.match_found_sk);
+	}
+	MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
+		 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
+	if (sk != NULL) {
+		set_sk_callback_lock = true;
+		read_lock_bh(&sk->sk_callback_lock);
+		MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
+			par->hooknum, sk, sk->sk_socket,
+			sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
+		filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+		MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
+			par->hooknum, filp ? filp->f_cred->fsuid : -1);
+	}
+
+	if (sk == NULL || sk->sk_socket == NULL) {
+		/*
+		 * Here, the qtaguid_find_sk() using connection tracking
+		 * couldn't find the owner, so for now we just count them
+		 * against the system.
+		 */
+		/*
+		 * TODO: unhack how to force just accounting.
+		 * For now we only do iface stats when the uid-owner is not
+		 * requested.
+		 */
+		if (!(info->match & XT_QTAGUID_UID))
+			account_for_uid(skb, sk, 0, par);
+		MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
+			par->hooknum,
+			sk ? sk->sk_socket : NULL);
+		res = (info->match ^ info->invert) == 0;
+		atomic64_inc(&qtu_events.match_no_sk);
+		goto put_sock_ret_res;
+	} else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
+		res = false;
+		goto put_sock_ret_res;
+	}
+	filp = sk->sk_socket->file;
+	if (filp == NULL) {
+		MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
+		account_for_uid(skb, sk, 0, par);
+		res = ((info->match ^ info->invert) &
+			(XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
+		atomic64_inc(&qtu_events.match_no_sk_file);
+		goto put_sock_ret_res;
+	}
+	sock_uid = filp->f_cred->fsuid;
+	/*
+	 * TODO: unhack how to force just accounting.
+	 * For now we only do iface stats when the uid-owner is not requested
+	 */
+	if (!(info->match & XT_QTAGUID_UID))
+		account_for_uid(skb, sk, sock_uid, par);
+
+	/*
+	 * The following two tests fail the match when:
+	 *    id not in range AND no inverted condition requested
+	 * or id     in range AND    inverted condition requested
+	 * Thus (!a && b) || (a && !b) == a ^ b
+	 */
+	if (info->match & XT_QTAGUID_UID)
+		if ((filp->f_cred->fsuid >= info->uid_min &&
+		     filp->f_cred->fsuid <= info->uid_max) ^
+		    !(info->invert & XT_QTAGUID_UID)) {
+			MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
+				 par->hooknum);
+			res = false;
+			goto put_sock_ret_res;
+		}
+	if (info->match & XT_QTAGUID_GID)
+		if ((filp->f_cred->fsgid >= info->gid_min &&
+				filp->f_cred->fsgid <= info->gid_max) ^
+			!(info->invert & XT_QTAGUID_GID)) {
+			MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
+				par->hooknum);
+			res = false;
+			goto put_sock_ret_res;
+		}
+
+	MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
+	res = true;
+
+put_sock_ret_res:
+	if (got_sock)
+		xt_socket_put_sk(sk);
+	if (set_sk_callback_lock)
+		read_unlock_bh(&sk->sk_callback_lock);
+ret_res:
+	MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
+	return res;
+}
+
+#ifdef DDEBUG
+/* This function is not in xt_qtaguid_print.c because of locks visibility */
+static void prdebug_full_state(int indent_level, const char *fmt, ...)
+{
+	va_list args;
+	char *fmt_buff;
+	char *buff;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	fmt_buff = kasprintf(GFP_ATOMIC,
+			     "qtaguid: %s(): %s {\n", __func__, fmt);
+	BUG_ON(!fmt_buff);
+	va_start(args, fmt);
+	buff = kvasprintf(GFP_ATOMIC,
+			  fmt_buff, args);
+	BUG_ON(!buff);
+	pr_debug("%s", buff);
+	kfree(fmt_buff);
+	kfree(buff);
+	va_end(args);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
+	spin_unlock_bh(&sock_tag_list_lock);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
+	prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	spin_unlock_bh(&sock_tag_list_lock);
+
+	spin_lock_bh(&iface_stat_list_lock);
+	prdebug_iface_stat_list(indent_level, &iface_stat_list);
+	spin_unlock_bh(&iface_stat_list_lock);
+
+	pr_debug("qtaguid: %s(): }\n", __func__);
+}
+#else
+static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
+#endif
+
+struct proc_ctrl_print_info {
+	struct sock *sk; /* socket found by reading to sk_pos */
+	loff_t sk_pos;
+};
+
+static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct proc_ctrl_print_info *pcpi = m->private;
+	struct sock_tag *sock_tag_entry = v;
+	struct rb_node *node;
+
+	(*pos)++;
+
+	if (!v || v  == SEQ_START_TOKEN)
+		return NULL;
+
+	node = rb_next(&sock_tag_entry->sock_node);
+	if (!node) {
+		pcpi->sk = NULL;
+		sock_tag_entry = SEQ_START_TOKEN;
+	} else {
+		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+		pcpi->sk = sock_tag_entry->sk;
+	}
+	pcpi->sk_pos = *pos;
+	return sock_tag_entry;
+}
+
+static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_ctrl_print_info *pcpi = m->private;
+	struct sock_tag *sock_tag_entry;
+	struct rb_node *node;
+
+	spin_lock_bh(&sock_tag_list_lock);
+
+	if (unlikely(module_passive))
+		return NULL;
+
+	if (*pos == 0) {
+		pcpi->sk_pos = 0;
+		node = rb_first(&sock_tag_tree);
+		if (!node) {
+			pcpi->sk = NULL;
+			return SEQ_START_TOKEN;
+		}
+		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+		pcpi->sk = sock_tag_entry->sk;
+	} else {
+		sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
+						NULL) ?: SEQ_START_TOKEN;
+		if (*pos != pcpi->sk_pos) {
+			/* seq_read skipped a next call */
+			*pos = pcpi->sk_pos;
+			return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
+		}
+	}
+	return sock_tag_entry;
+}
+
+static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
+{
+	spin_unlock_bh(&sock_tag_list_lock);
+}
+
+/*
+ * Procfs reader to get all active socket tags using style "1)" as described in
+ * fs/proc/generic.c
+ */
+static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
+{
+	struct sock_tag *sock_tag_entry = v;
+	uid_t uid;
+	long f_count;
+
+	CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
+		 current->pid, current->tgid, current_fsuid());
+
+	if (sock_tag_entry != SEQ_START_TOKEN) {
+		uid = get_uid_from_tag(sock_tag_entry->tag);
+		CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
+			 "pid=%u\n",
+			 sock_tag_entry->sk,
+			 sock_tag_entry->tag,
+			 uid,
+			 sock_tag_entry->pid
+			);
+		f_count = atomic_long_read(
+			&sock_tag_entry->socket->file->f_count);
+		seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
+			   "f_count=%lu\n",
+			   sock_tag_entry->sk,
+			   sock_tag_entry->tag, uid,
+			   sock_tag_entry->pid, f_count);
+	} else {
+		seq_printf(m, "events: sockets_tagged=%llu "
+			   "sockets_untagged=%llu "
+			   "counter_set_changes=%llu "
+			   "delete_cmds=%llu "
+			   "iface_events=%llu "
+			   "match_calls=%llu "
+			   "match_calls_prepost=%llu "
+			   "match_found_sk=%llu "
+			   "match_found_sk_in_ct=%llu "
+			   "match_found_no_sk_in_ct=%llu "
+			   "match_no_sk=%llu "
+			   "match_no_sk_file=%llu\n",
+			   (u64)atomic64_read(&qtu_events.sockets_tagged),
+			   (u64)atomic64_read(&qtu_events.sockets_untagged),
+			   (u64)atomic64_read(&qtu_events.counter_set_changes),
+			   (u64)atomic64_read(&qtu_events.delete_cmds),
+			   (u64)atomic64_read(&qtu_events.iface_events),
+			   (u64)atomic64_read(&qtu_events.match_calls),
+			   (u64)atomic64_read(&qtu_events.match_calls_prepost),
+			   (u64)atomic64_read(&qtu_events.match_found_sk),
+			   (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
+			   (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
+			   (u64)atomic64_read(&qtu_events.match_no_sk),
+			   (u64)atomic64_read(&qtu_events.match_no_sk_file));
+
+		/* Count the following as part of the last item_index */
+		prdebug_full_state(0, "proc ctrl");
+	}
+
+	return 0;
+}
+
+/*
+ * Delete socket tags, and stat tags associated with a given
+ * accouting tag and uid.
+ */
+static int ctrl_cmd_delete(const char *input)
+{
+	char cmd;
+	uid_t uid;
+	uid_t entry_uid;
+	tag_t acct_tag;
+	tag_t tag;
+	int res, argc;
+	struct iface_stat *iface_entry;
+	struct rb_node *node;
+	struct sock_tag *st_entry;
+	struct rb_root st_to_free_tree = RB_ROOT;
+	struct tag_stat *ts_entry;
+	struct tag_counter_set *tcs_entry;
+	struct tag_ref *tr_entry;
+	struct uid_tag_data *utd_entry;
+
+	argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
+	CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
+		 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
+		 acct_tag, uid);
+	if (argc < 2) {
+		res = -EINVAL;
+		goto err;
+	}
+	if (!valid_atag(acct_tag)) {
+		pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
+		res = -EINVAL;
+		goto err;
+	}
+	if (argc < 3) {
+		uid = current_fsuid();
+	} else if (!can_impersonate_uid(uid)) {
+		pr_info("qtaguid: ctrl_delete(%s): "
+			"insufficient priv from pid=%u tgid=%u uid=%u\n",
+			input, current->pid, current->tgid, current_fsuid());
+		res = -EPERM;
+		goto err;
+	}
+
+	tag = combine_atag_with_uid(acct_tag, uid);
+	CT_DEBUG("qtaguid: ctrl_delete(%s): "
+		 "looking for tag=0x%llx (uid=%u)\n",
+		 input, tag, uid);
+
+	/* Delete socket tags */
+	spin_lock_bh(&sock_tag_list_lock);
+	node = rb_first(&sock_tag_tree);
+	while (node) {
+		st_entry = rb_entry(node, struct sock_tag, sock_node);
+		entry_uid = get_uid_from_tag(st_entry->tag);
+		node = rb_next(node);
+		if (entry_uid != uid)
+			continue;
+
+		CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
+			 input, st_entry->tag, entry_uid);
+
+		if (!acct_tag || st_entry->tag == tag) {
+			rb_erase(&st_entry->sock_node, &sock_tag_tree);
+			/* Can't sockfd_put() within spinlock, do it later. */
+			sock_tag_tree_insert(st_entry, &st_to_free_tree);
+			tr_entry = lookup_tag_ref(st_entry->tag, NULL);
+			BUG_ON(tr_entry->num_sock_tags <= 0);
+			tr_entry->num_sock_tags--;
+			/*
+			 * TODO: remove if, and start failing.
+			 * This is a hack to work around the fact that in some
+			 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
+			 * and are trying to work around apps
+			 * that didn't open the /dev/xt_qtaguid.
+			 */
+			if (st_entry->list.next && st_entry->list.prev)
+				list_del(&st_entry->list);
+		}
+	}
+	spin_unlock_bh(&sock_tag_list_lock);
+
+	sock_tag_tree_erase(&st_to_free_tree);
+
+	/* Delete tag counter-sets */
+	spin_lock_bh(&tag_counter_set_list_lock);
+	/* Counter sets are only on the uid tag, not full tag */
+	tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+	if (tcs_entry) {
+		CT_DEBUG("qtaguid: ctrl_delete(%s): "
+			 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
+			 input,
+			 tcs_entry->tn.tag,
+			 get_uid_from_tag(tcs_entry->tn.tag),
+			 tcs_entry->active_set);
+		rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
+		kfree(tcs_entry);
+	}
+	spin_unlock_bh(&tag_counter_set_list_lock);
+
+	/*
+	 * If acct_tag is 0, then all entries belonging to uid are
+	 * erased.
+	 */
+	spin_lock_bh(&iface_stat_list_lock);
+	list_for_each_entry(iface_entry, &iface_stat_list, list) {
+		spin_lock_bh(&iface_entry->tag_stat_list_lock);
+		node = rb_first(&iface_entry->tag_stat_tree);
+		while (node) {
+			ts_entry = rb_entry(node, struct tag_stat, tn.node);
+			entry_uid = get_uid_from_tag(ts_entry->tn.tag);
+			node = rb_next(node);
+
+			CT_DEBUG("qtaguid: ctrl_delete(%s): "
+				 "ts tag=0x%llx (uid=%u)\n",
+				 input, ts_entry->tn.tag, entry_uid);
+
+			if (entry_uid != uid)
+				continue;
+			if (!acct_tag || ts_entry->tn.tag == tag) {
+				CT_DEBUG("qtaguid: ctrl_delete(%s): "
+					 "erase ts: %s 0x%llx %u\n",
+					 input, iface_entry->ifname,
+					 get_atag_from_tag(ts_entry->tn.tag),
+					 entry_uid);
+				rb_erase(&ts_entry->tn.node,
+					 &iface_entry->tag_stat_tree);
+				kfree(ts_entry);
+			}
+		}
+		spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+	}
+	spin_unlock_bh(&iface_stat_list_lock);
+
+	/* Cleanup the uid_tag_data */
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	node = rb_first(&uid_tag_data_tree);
+	while (node) {
+		utd_entry = rb_entry(node, struct uid_tag_data, node);
+		entry_uid = utd_entry->uid;
+		node = rb_next(node);
+
+		CT_DEBUG("qtaguid: ctrl_delete(%s): "
+			 "utd uid=%u\n",
+			 input, entry_uid);
+
+		if (entry_uid != uid)
+			continue;
+		/*
+		 * Go over the tag_refs, and those that don't have
+		 * sock_tags using them are freed.
+		 */
+		put_tag_ref_tree(tag, utd_entry);
+		put_utd_entry(utd_entry);
+	}
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+
+	atomic64_inc(&qtu_events.delete_cmds);
+	res = 0;
+
+err:
+	return res;
+}
+
+static int ctrl_cmd_counter_set(const char *input)
+{
+	char cmd;
+	uid_t uid = 0;
+	tag_t tag;
+	int res, argc;
+	struct tag_counter_set *tcs;
+	int counter_set;
+
+	argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
+	CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
+		 "set=%d uid=%u\n", input, argc, cmd,
+		 counter_set, uid);
+	if (argc != 3) {
+		res = -EINVAL;
+		goto err;
+	}
+	if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
+		pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
+			input);
+		res = -EINVAL;
+		goto err;
+	}
+	if (!can_manipulate_uids()) {
+		pr_info("qtaguid: ctrl_counterset(%s): "
+			"insufficient priv from pid=%u tgid=%u uid=%u\n",
+			input, current->pid, current->tgid, current_fsuid());
+		res = -EPERM;
+		goto err;
+	}
+
+	tag = make_tag_from_uid(uid);
+	spin_lock_bh(&tag_counter_set_list_lock);
+	tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
+	if (!tcs) {
+		tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
+		if (!tcs) {
+			spin_unlock_bh(&tag_counter_set_list_lock);
+			pr_err("qtaguid: ctrl_counterset(%s): "
+			       "failed to alloc counter set\n",
+			       input);
+			res = -ENOMEM;
+			goto err;
+		}
+		tcs->tn.tag = tag;
+		tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
+		CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
+			 "(uid=%u) set=%d\n",
+			 input, tag, get_uid_from_tag(tag), counter_set);
+	}
+	tcs->active_set = counter_set;
+	spin_unlock_bh(&tag_counter_set_list_lock);
+	atomic64_inc(&qtu_events.counter_set_changes);
+	res = 0;
+
+err:
+	return res;
+}
+
+static int ctrl_cmd_tag(const char *input)
+{
+	char cmd;
+	int sock_fd = 0;
+	uid_t uid = 0;
+	tag_t acct_tag = make_atag_from_value(0);
+	tag_t full_tag;
+	struct socket *el_socket;
+	int res, argc;
+	struct sock_tag *sock_tag_entry;
+	struct tag_ref *tag_ref_entry;
+	struct uid_tag_data *uid_tag_data_entry;
+	struct proc_qtu_data *pqd_entry;
+
+	/* Unassigned args will get defaulted later. */
+	argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
+	CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
+		 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
+		 acct_tag, uid);
+	if (argc < 2) {
+		res = -EINVAL;
+		goto err;
+	}
+	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
+	if (!el_socket) {
+		pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
+			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+			input, sock_fd, res, current->pid, current->tgid,
+			current_fsuid());
+		goto err;
+	}
+	CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
+		 input, atomic_long_read(&el_socket->file->f_count),
+		 el_socket->sk);
+	if (argc < 3) {
+		acct_tag = make_atag_from_value(0);
+	} else if (!valid_atag(acct_tag)) {
+		pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
+		res = -EINVAL;
+		goto err_put;
+	}
+	CT_DEBUG("qtaguid: ctrl_tag(%s): "
+		 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
+		 "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
+		 input, current->pid, current->tgid, current_uid(),
+		 current_euid(), current_fsuid(),
+		 xt_qtaguid_ctrl_file->gid,
+		 in_group_p(xt_qtaguid_ctrl_file->gid),
+		 in_egroup_p(xt_qtaguid_ctrl_file->gid));
+	if (argc < 4) {
+		uid = current_fsuid();
+	} else if (!can_impersonate_uid(uid)) {
+		pr_info("qtaguid: ctrl_tag(%s): "
+			"insufficient priv from pid=%u tgid=%u uid=%u\n",
+			input, current->pid, current->tgid, current_fsuid());
+		res = -EPERM;
+		goto err_put;
+	}
+	full_tag = combine_atag_with_uid(acct_tag, uid);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+	tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
+	if (IS_ERR(tag_ref_entry)) {
+		res = PTR_ERR(tag_ref_entry);
+		spin_unlock_bh(&sock_tag_list_lock);
+		goto err_put;
+	}
+	tag_ref_entry->num_sock_tags++;
+	if (sock_tag_entry) {
+		struct tag_ref *prev_tag_ref_entry;
+
+		CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
+			 "st@%p ...->f_count=%ld\n",
+			 input, el_socket->sk, sock_tag_entry,
+			 atomic_long_read(&el_socket->file->f_count));
+		/*
+		 * This is a re-tagging, so release the sock_fd that was
+		 * locked at the time of the 1st tagging.
+		 * There is still the ref from this call's sockfd_lookup() so
+		 * it can be done within the spinlock.
+		 */
+		sockfd_put(sock_tag_entry->socket);
+		prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
+						    &uid_tag_data_entry);
+		BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
+		BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
+		prev_tag_ref_entry->num_sock_tags--;
+		sock_tag_entry->tag = full_tag;
+	} else {
+		CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
+			 input, el_socket->sk);
+		sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
+					 GFP_ATOMIC);
+		if (!sock_tag_entry) {
+			pr_err("qtaguid: ctrl_tag(%s): "
+			       "socket tag alloc failed\n",
+			       input);
+			spin_unlock_bh(&sock_tag_list_lock);
+			res = -ENOMEM;
+			goto err_tag_unref_put;
+		}
+		sock_tag_entry->sk = el_socket->sk;
+		sock_tag_entry->socket = el_socket;
+		sock_tag_entry->pid = current->tgid;
+		sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
+							    uid);
+		spin_lock_bh(&uid_tag_data_tree_lock);
+		pqd_entry = proc_qtu_data_tree_search(
+			&proc_qtu_data_tree, current->tgid);
+		/*
+		 * TODO: remove if, and start failing.
+		 * At first, we want to catch user-space code that is not
+		 * opening the /dev/xt_qtaguid.
+		 */
+		if (IS_ERR_OR_NULL(pqd_entry))
+			pr_warn_once(
+				"qtaguid: %s(): "
+				"User space forgot to open /dev/xt_qtaguid? "
+				"pid=%u tgid=%u uid=%u\n", __func__,
+				current->pid, current->tgid,
+				current_fsuid());
+		else
+			list_add(&sock_tag_entry->list,
+				 &pqd_entry->sock_tag_list);
+		spin_unlock_bh(&uid_tag_data_tree_lock);
+
+		sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
+		atomic64_inc(&qtu_events.sockets_tagged);
+	}
+	spin_unlock_bh(&sock_tag_list_lock);
+	/* We keep the ref to the socket (file) until it is untagged */
+	CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
+		 input, sock_tag_entry,
+		 atomic_long_read(&el_socket->file->f_count));
+	return 0;
+
+err_tag_unref_put:
+	BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+	tag_ref_entry->num_sock_tags--;
+	free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
+err_put:
+	CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
+		 input, atomic_long_read(&el_socket->file->f_count) - 1);
+	/* Release the sock_fd that was grabbed by sockfd_lookup(). */
+	sockfd_put(el_socket);
+	return res;
+
+err:
+	CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
+	return res;
+}
+
+static int ctrl_cmd_untag(const char *input)
+{
+	char cmd;
+	int sock_fd = 0;
+	struct socket *el_socket;
+	int res, argc;
+	struct sock_tag *sock_tag_entry;
+	struct tag_ref *tag_ref_entry;
+	struct uid_tag_data *utd_entry;
+	struct proc_qtu_data *pqd_entry;
+
+	argc = sscanf(input, "%c %d", &cmd, &sock_fd);
+	CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
+		 input, argc, cmd, sock_fd);
+	if (argc < 2) {
+		res = -EINVAL;
+		goto err;
+	}
+	el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
+	if (!el_socket) {
+		pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
+			" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+			input, sock_fd, res, current->pid, current->tgid,
+			current_fsuid());
+		goto err;
+	}
+	CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
+		 input, atomic_long_read(&el_socket->file->f_count),
+		 el_socket->sk);
+	spin_lock_bh(&sock_tag_list_lock);
+	sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+	if (!sock_tag_entry) {
+		spin_unlock_bh(&sock_tag_list_lock);
+		res = -EINVAL;
+		goto err_put;
+	}
+	/*
+	 * The socket already belongs to the current process
+	 * so it can do whatever it wants to it.
+	 */
+	rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
+
+	tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
+	BUG_ON(!tag_ref_entry);
+	BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+	pqd_entry = proc_qtu_data_tree_search(
+		&proc_qtu_data_tree, current->tgid);
+	/*
+	 * TODO: remove if, and start failing.
+	 * At first, we want to catch user-space code that is not
+	 * opening the /dev/xt_qtaguid.
+	 */
+	if (IS_ERR_OR_NULL(pqd_entry))
+		pr_warn_once("qtaguid: %s(): "
+			     "User space forgot to open /dev/xt_qtaguid? "
+			     "pid=%u tgid=%u uid=%u\n", __func__,
+			     current->pid, current->tgid, current_fsuid());
+	else
+		list_del(&sock_tag_entry->list);
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	/*
+	 * We don't free tag_ref from the utd_entry here,
+	 * only during a cmd_delete().
+	 */
+	tag_ref_entry->num_sock_tags--;
+	spin_unlock_bh(&sock_tag_list_lock);
+	/*
+	 * Release the sock_fd that was grabbed at tag time,
+	 * and once more for the sockfd_lookup() here.
+	 */
+	sockfd_put(sock_tag_entry->socket);
+	CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
+		 input, sock_tag_entry,
+		 atomic_long_read(&el_socket->file->f_count) - 1);
+	sockfd_put(el_socket);
+
+	kfree(sock_tag_entry);
+	atomic64_inc(&qtu_events.sockets_untagged);
+
+	return 0;
+
+err_put:
+	CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
+		 input, atomic_long_read(&el_socket->file->f_count) - 1);
+	/* Release the sock_fd that was grabbed by sockfd_lookup(). */
+	sockfd_put(el_socket);
+	return res;
+
+err:
+	CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
+	return res;
+}
+
+static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
+{
+	char cmd;
+	ssize_t res;
+
+	CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
+		 input, current->pid, current->tgid, current_fsuid());
+
+	cmd = input[0];
+	/* Collect params for commands */
+	switch (cmd) {
+	case 'd':
+		res = ctrl_cmd_delete(input);
+		break;
+
+	case 's':
+		res = ctrl_cmd_counter_set(input);
+		break;
+
+	case 't':
+		res = ctrl_cmd_tag(input);
+		break;
+
+	case 'u':
+		res = ctrl_cmd_untag(input);
+		break;
+
+	default:
+		res = -EINVAL;
+		goto err;
+	}
+	if (!res)
+		res = count;
+err:
+	CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
+	return res;
+}
+
+#define MAX_QTAGUID_CTRL_INPUT_LEN 255
+static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
+				   size_t count, loff_t *offp)
+{
+	char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
+
+	if (unlikely(module_passive))
+		return count;
+
+	if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
+		return -EINVAL;
+
+	if (copy_from_user(input_buf, buffer, count))
+		return -EFAULT;
+
+	input_buf[count] = '\0';
+	return qtaguid_ctrl_parse(input_buf, count);
+}
+
+struct proc_print_info {
+	struct iface_stat *iface_entry;
+	int item_index;
+	tag_t tag; /* tag found by reading to tag_pos */
+	off_t tag_pos;
+	int tag_item_index;
+};
+
+static void pp_stats_header(struct seq_file *m)
+{
+	seq_puts(m,
+		 "idx iface acct_tag_hex uid_tag_int cnt_set "
+		 "rx_bytes rx_packets "
+		 "tx_bytes tx_packets "
+		 "rx_tcp_bytes rx_tcp_packets "
+		 "rx_udp_bytes rx_udp_packets "
+		 "rx_other_bytes rx_other_packets "
+		 "tx_tcp_bytes tx_tcp_packets "
+		 "tx_udp_bytes tx_udp_packets "
+		 "tx_other_bytes tx_other_packets\n");
+}
+
+static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
+			 int cnt_set)
+{
+	int ret;
+	struct data_counters *cnts;
+	tag_t tag = ts_entry->tn.tag;
+	uid_t stat_uid = get_uid_from_tag(tag);
+	struct proc_print_info *ppi = m->private;
+	/* Detailed tags are not available to everybody */
+	if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) {
+		CT_DEBUG("qtaguid: stats line: "
+			 "%s 0x%llx %u: insufficient priv "
+			 "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
+			 ppi->iface_entry->ifname,
+			 get_atag_from_tag(tag), stat_uid,
+			 current->pid, current->tgid, current_fsuid(),
+			 xt_qtaguid_stats_file->gid);
+		return 0;
+	}
+	ppi->item_index++;
+	cnts = &ts_entry->counters;
+	ret = seq_printf(m, "%d %s 0x%llx %u %u "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu "
+		"%llu %llu\n",
+		ppi->item_index,
+		ppi->iface_entry->ifname,
+		get_atag_from_tag(tag),
+		stat_uid,
+		cnt_set,
+		dc_sum_bytes(cnts, cnt_set, IFS_RX),
+		dc_sum_packets(cnts, cnt_set, IFS_RX),
+		dc_sum_bytes(cnts, cnt_set, IFS_TX),
+		dc_sum_packets(cnts, cnt_set, IFS_TX),
+		cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
+		cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
+		cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
+		cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
+		cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
+		cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
+		cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
+		cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
+		cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
+		cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
+		cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
+		cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
+	return ret ?: 1;
+}
+
+static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
+{
+	int ret;
+	int counter_set;
+	for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
+	     counter_set++) {
+		ret = pp_stats_line(m, ts_entry, counter_set);
+		if (ret < 0)
+			return false;
+	}
+	return true;
+}
+
+static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
+{
+	struct iface_stat *iface_entry;
+
+	if (!ptr)
+		return false;
+
+	list_for_each_entry(iface_entry, &iface_stat_list, list)
+		if (iface_entry == ptr)
+			return true;
+	return false;
+}
+
+static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
+{
+	spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
+	list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
+		spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+		return;
+	}
+	ppi->iface_entry = NULL;
+}
+
+static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct proc_print_info *ppi = m->private;
+	struct tag_stat *ts_entry;
+	struct rb_node *node;
+
+	if (!v) {
+		pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
+		return NULL;
+	}
+
+	(*pos)++;
+
+	if (!ppi->iface_entry || unlikely(module_passive))
+		return NULL;
+
+	if (v == SEQ_START_TOKEN)
+		node = rb_first(&ppi->iface_entry->tag_stat_tree);
+	else
+		node = rb_next(&((struct tag_stat *)v)->tn.node);
+
+	while (!node) {
+		qtaguid_stats_proc_next_iface_entry(ppi);
+		if (!ppi->iface_entry)
+			return NULL;
+		node = rb_first(&ppi->iface_entry->tag_stat_tree);
+	}
+
+	ts_entry = rb_entry(node, struct tag_stat, tn.node);
+	ppi->tag = ts_entry->tn.tag;
+	ppi->tag_pos = *pos;
+	ppi->tag_item_index = ppi->item_index;
+	return ts_entry;
+}
+
+static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
+{
+	struct proc_print_info *ppi = m->private;
+	struct tag_stat *ts_entry = NULL;
+
+	spin_lock_bh(&iface_stat_list_lock);
+
+	if (*pos == 0) {
+		ppi->item_index = 1;
+		ppi->tag_pos = 0;
+		if (list_empty(&iface_stat_list)) {
+			ppi->iface_entry = NULL;
+		} else {
+			ppi->iface_entry = list_first_entry(&iface_stat_list,
+							    struct iface_stat,
+							    list);
+			spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+		}
+		return SEQ_START_TOKEN;
+	}
+	if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
+		if (ppi->iface_entry) {
+			pr_err("qtaguid: %s(): iface_entry %p not found\n",
+			       __func__, ppi->iface_entry);
+			ppi->iface_entry = NULL;
+		}
+		return NULL;
+	}
+
+	spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
+
+	if (!ppi->tag_pos) {
+		/* seq_read skipped first next call */
+		ts_entry = SEQ_START_TOKEN;
+	} else {
+		ts_entry = tag_stat_tree_search(
+				&ppi->iface_entry->tag_stat_tree, ppi->tag);
+		if (!ts_entry) {
+			pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
+				__func__, ppi->tag);
+			return NULL;
+		}
+	}
+
+	if (*pos == ppi->tag_pos) { /* normal resume */
+		ppi->item_index = ppi->tag_item_index;
+	} else {
+		/* seq_read skipped a next call */
+		*pos = ppi->tag_pos;
+		ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
+	}
+
+	return ts_entry;
+}
+
+static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
+{
+	struct proc_print_info *ppi = m->private;
+	if (ppi->iface_entry)
+		spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
+	spin_unlock_bh(&iface_stat_list_lock);
+}
+
+/*
+ * Procfs reader to get all tag stats using style "1)" as described in
+ * fs/proc/generic.c
+ * Groups all protocols tx/rx bytes.
+ */
+static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
+{
+	struct tag_stat *ts_entry = v;
+
+	if (v == SEQ_START_TOKEN)
+		pp_stats_header(m);
+	else
+		pp_sets(m, ts_entry);
+
+	return 0;
+}
+
+/*------------------------------------------*/
+static int qtudev_open(struct inode *inode, struct file *file)
+{
+	struct uid_tag_data *utd_entry;
+	struct proc_qtu_data  *pqd_entry;
+	struct proc_qtu_data  *new_pqd_entry;
+	int res;
+	bool utd_entry_found;
+
+	if (unlikely(qtu_proc_handling_passive))
+		return 0;
+
+	DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
+		 current->pid, current->tgid, current_fsuid());
+
+	spin_lock_bh(&uid_tag_data_tree_lock);
+
+	/* Look for existing uid data, or alloc one. */
+	utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
+	if (IS_ERR_OR_NULL(utd_entry)) {
+		res = PTR_ERR(utd_entry);
+		goto err_unlock;
+	}
+
+	/* Look for existing PID based proc_data */
+	pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
+					      current->tgid);
+	if (pqd_entry) {
+		pr_err("qtaguid: qtudev_open(): %u/%u %u "
+		       "%s already opened\n",
+		       current->pid, current->tgid, current_fsuid(),
+		       QTU_DEV_NAME);
+		res = -EBUSY;
+		goto err_unlock_free_utd;
+	}
+
+	new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
+	if (!new_pqd_entry) {
+		pr_err("qtaguid: qtudev_open(): %u/%u %u: "
+		       "proc data alloc failed\n",
+		       current->pid, current->tgid, current_fsuid());
+		res = -ENOMEM;
+		goto err_unlock_free_utd;
+	}
+	new_pqd_entry->pid = current->tgid;
+	INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
+	new_pqd_entry->parent_tag_data = utd_entry;
+	utd_entry->num_pqd++;
+
+	proc_qtu_data_tree_insert(new_pqd_entry,
+				  &proc_qtu_data_tree);
+
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
+		 current_fsuid(), new_pqd_entry);
+	file->private_data = new_pqd_entry;
+	return 0;
+
+err_unlock_free_utd:
+	if (!utd_entry_found) {
+		rb_erase(&utd_entry->node, &uid_tag_data_tree);
+		kfree(utd_entry);
+	}
+err_unlock:
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	return res;
+}
+
+static int qtudev_release(struct inode *inode, struct file *file)
+{
+	struct proc_qtu_data  *pqd_entry = file->private_data;
+	struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
+	struct sock_tag *st_entry;
+	struct rb_root st_to_free_tree = RB_ROOT;
+	struct list_head *entry, *next;
+	struct tag_ref *tr;
+
+	if (unlikely(qtu_proc_handling_passive))
+		return 0;
+
+	/*
+	 * Do not trust the current->pid, it might just be a kworker cleaning
+	 * up after a dead proc.
+	 */
+	DR_DEBUG("qtaguid: qtudev_release(): "
+		 "pid=%u tgid=%u uid=%u "
+		 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
+		 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
+		 pqd_entry, pqd_entry->pid, utd_entry,
+		 utd_entry->num_active_tags);
+
+	spin_lock_bh(&sock_tag_list_lock);
+	spin_lock_bh(&uid_tag_data_tree_lock);
+
+	list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
+		st_entry = list_entry(entry, struct sock_tag, list);
+		DR_DEBUG("qtaguid: %s(): "
+			 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
+			 __func__,
+			 st_entry, st_entry->sk,
+			 current->pid, current->tgid,
+			 pqd_entry->parent_tag_data->uid);
+
+		utd_entry = uid_tag_data_tree_search(
+			&uid_tag_data_tree,
+			get_uid_from_tag(st_entry->tag));
+		BUG_ON(IS_ERR_OR_NULL(utd_entry));
+		DR_DEBUG("qtaguid: %s(): "
+			 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
+			 st_entry->tag, utd_entry);
+		tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
+					 st_entry->tag);
+		BUG_ON(!tr);
+		BUG_ON(tr->num_sock_tags <= 0);
+		tr->num_sock_tags--;
+		free_tag_ref_from_utd_entry(tr, utd_entry);
+
+		rb_erase(&st_entry->sock_node, &sock_tag_tree);
+		list_del(&st_entry->list);
+		/* Can't sockfd_put() within spinlock, do it later. */
+		sock_tag_tree_insert(st_entry, &st_to_free_tree);
+
+		/*
+		 * Try to free the utd_entry if no other proc_qtu_data is
+		 * using it (num_pqd is 0) and it doesn't have active tags
+		 * (num_active_tags is 0).
+		 */
+		put_utd_entry(utd_entry);
+	}
+
+	rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
+	BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
+	pqd_entry->parent_tag_data->num_pqd--;
+	put_utd_entry(pqd_entry->parent_tag_data);
+	kfree(pqd_entry);
+	file->private_data = NULL;
+
+	spin_unlock_bh(&uid_tag_data_tree_lock);
+	spin_unlock_bh(&sock_tag_list_lock);
+
+
+	sock_tag_tree_erase(&st_to_free_tree);
+
+	prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
+			   current->pid, current->tgid);
+	return 0;
+}
+
+/*------------------------------------------*/
+static const struct file_operations qtudev_fops = {
+	.owner = THIS_MODULE,
+	.open = qtudev_open,
+	.release = qtudev_release,
+};
+
+static struct miscdevice qtu_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = QTU_DEV_NAME,
+	.fops = &qtudev_fops,
+	/* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
+};
+
+static const struct seq_operations proc_qtaguid_ctrl_seqops = {
+	.start = qtaguid_ctrl_proc_start,
+	.next = qtaguid_ctrl_proc_next,
+	.stop = qtaguid_ctrl_proc_stop,
+	.show = qtaguid_ctrl_proc_show,
+};
+
+static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
+{
+	return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
+				sizeof(struct proc_ctrl_print_info));
+}
+
+static const struct file_operations proc_qtaguid_ctrl_fops = {
+	.open		= proc_qtaguid_ctrl_open,
+	.read		= seq_read,
+	.write		= qtaguid_ctrl_proc_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static const struct seq_operations proc_qtaguid_stats_seqops = {
+	.start = qtaguid_stats_proc_start,
+	.next = qtaguid_stats_proc_next,
+	.stop = qtaguid_stats_proc_stop,
+	.show = qtaguid_stats_proc_show,
+};
+
+static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
+{
+	return seq_open_private(file, &proc_qtaguid_stats_seqops,
+				sizeof(struct proc_print_info));
+}
+
+static const struct file_operations proc_qtaguid_stats_fops = {
+	.open		= proc_qtaguid_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+/*------------------------------------------*/
+static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
+{
+	int ret;
+	*res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
+	if (!*res_procdir) {
+		pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
+		ret = -ENOMEM;
+		goto no_dir;
+	}
+
+	xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
+						*res_procdir,
+						&proc_qtaguid_ctrl_fops,
+						NULL);
+	if (!xt_qtaguid_ctrl_file) {
+		pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
+			" file\n");
+		ret = -ENOMEM;
+		goto no_ctrl_entry;
+	}
+
+	xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
+						 *res_procdir,
+						 &proc_qtaguid_stats_fops,
+						 NULL);
+	if (!xt_qtaguid_stats_file) {
+		pr_err("qtaguid: failed to create xt_qtaguid/stats "
+			"file\n");
+		ret = -ENOMEM;
+		goto no_stats_entry;
+	}
+	/*
+	 * TODO: add support counter hacking
+	 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
+	 */
+	return 0;
+
+no_stats_entry:
+	remove_proc_entry("ctrl", *res_procdir);
+no_ctrl_entry:
+	remove_proc_entry("xt_qtaguid", NULL);
+no_dir:
+	return ret;
+}
+
+static struct xt_match qtaguid_mt_reg __read_mostly = {
+	/*
+	 * This module masquerades as the "owner" module so that iptables
+	 * tools can deal with it.
+	 */
+	.name       = "owner",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.match      = qtaguid_mt,
+	.matchsize  = sizeof(struct xt_qtaguid_match_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init qtaguid_mt_init(void)
+{
+	if (qtaguid_proc_register(&xt_qtaguid_procdir)
+	    || iface_stat_init(xt_qtaguid_procdir)
+	    || xt_register_match(&qtaguid_mt_reg)
+	    || misc_register(&qtu_device))
+		return -1;
+	return 0;
+}
+
+/*
+ * TODO: allow unloading of the module.
+ * For now stats are permanent.
+ * Kconfig forces'y/n' and never an 'm'.
+ */
+
+module_init(qtaguid_mt_init);
+MODULE_AUTHOR("jpa <jpa@google.com>");
+MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_owner");
+MODULE_ALIAS("ip6t_owner");
+MODULE_ALIAS("ipt_qtaguid");
+MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
new file mode 100644
index 000000000000..6dc14a9c6889
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -0,0 +1,352 @@
+/*
+ * Kernel iptables module to track stats for packets based on user tags.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_INTERNAL_H__
+#define __XT_QTAGUID_INTERNAL_H__
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock_types.h>
+#include <linux/workqueue.h>
+
+/* Iface handling */
+#define IDEBUG_MASK (1<<0)
+/* Iptable Matching. Per packet. */
+#define MDEBUG_MASK (1<<1)
+/* Red-black tree handling. Per packet. */
+#define RDEBUG_MASK (1<<2)
+/* procfs ctrl/stats handling */
+#define CDEBUG_MASK (1<<3)
+/* dev and resource tracking */
+#define DDEBUG_MASK (1<<4)
+
+/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
+#define DEFAULT_DEBUG_MASK 0
+
+/*
+ * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
+ * All undef: text size ~ 0x3030; all def: ~ 0x4404.
+ */
+#define IDEBUG
+#define MDEBUG
+#define RDEBUG
+#define CDEBUG
+#define DDEBUG
+
+#define MSK_DEBUG(mask, ...) do {                           \
+		if (unlikely(qtaguid_debug_mask & (mask)))  \
+			pr_debug(__VA_ARGS__);              \
+	} while (0)
+#ifdef IDEBUG
+#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
+#else
+#define IF_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef MDEBUG
+#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
+#else
+#define MT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef RDEBUG
+#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
+#else
+#define RB_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef CDEBUG
+#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
+#else
+#define CT_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+#ifdef DDEBUG
+#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
+#else
+#define DR_DEBUG(...) no_printk(__VA_ARGS__)
+#endif
+
+extern uint qtaguid_debug_mask;
+
+/*---------------------------------------------------------------------------*/
+/*
+ * Tags:
+ *
+ * They represent what the data usage counters will be tracked against.
+ * By default a tag is just based on the UID.
+ * The UID is used as the base for policing, and can not be ignored.
+ * So a tag will always at least represent a UID (uid_tag).
+ *
+ * A tag can be augmented with an "accounting tag" which is associated
+ * with a UID.
+ * User space can set the acct_tag portion of the tag which is then used
+ * with sockets: all data belonging to that socket will be counted against the
+ * tag. The policing is then based on the tag's uid_tag portion,
+ * and stats are collected for the acct_tag portion separately.
+ *
+ * There could be
+ * a:  {acct_tag=1, uid_tag=10003}
+ * b:  {acct_tag=2, uid_tag=10003}
+ * c:  {acct_tag=3, uid_tag=10003}
+ * d:  {acct_tag=0, uid_tag=10003}
+ * a, b, and c represent tags associated with specific sockets.
+ * d is for the totals for that uid, including all untagged traffic.
+ * Typically d is used with policing/quota rules.
+ *
+ * We want tag_t big enough to distinguish uid_t and acct_tag.
+ * It might become a struct if needed.
+ * Nothing should be using it as an int.
+ */
+typedef uint64_t tag_t;  /* Only used via accessors */
+
+#define TAG_UID_MASK 0xFFFFFFFFULL
+#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
+
+static inline int tag_compare(tag_t t1, tag_t t2)
+{
+	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
+}
+
+static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
+{
+	return acct_tag | uid;
+}
+static inline tag_t make_tag_from_uid(uid_t uid)
+{
+	return uid;
+}
+static inline uid_t get_uid_from_tag(tag_t tag)
+{
+	return tag & TAG_UID_MASK;
+}
+static inline tag_t get_utag_from_tag(tag_t tag)
+{
+	return tag & TAG_UID_MASK;
+}
+static inline tag_t get_atag_from_tag(tag_t tag)
+{
+	return tag & TAG_ACCT_MASK;
+}
+
+static inline bool valid_atag(tag_t tag)
+{
+	return !(tag & TAG_UID_MASK);
+}
+static inline tag_t make_atag_from_value(uint32_t value)
+{
+	return (uint64_t)value << 32;
+}
+/*---------------------------------------------------------------------------*/
+
+/*
+ * Maximum number of socket tags that a UID is allowed to have active.
+ * Multiple processes belonging to the same UID contribute towards this limit.
+ * Special UIDs that can impersonate a UID also contribute (e.g. download
+ * manager, ...)
+ */
+#define DEFAULT_MAX_SOCK_TAGS 1024
+
+/*
+ * For now we only track 2 sets of counters.
+ * The default set is 0.
+ * Userspace can activate another set for a given uid being tracked.
+ */
+#define IFS_MAX_COUNTER_SETS 2
+
+enum ifs_tx_rx {
+	IFS_TX,
+	IFS_RX,
+	IFS_MAX_DIRECTIONS
+};
+
+/* For now, TCP, UDP, the rest */
+enum ifs_proto {
+	IFS_TCP,
+	IFS_UDP,
+	IFS_PROTO_OTHER,
+	IFS_MAX_PROTOS
+};
+
+struct byte_packet_counters {
+	uint64_t bytes;
+	uint64_t packets;
+};
+
+struct data_counters {
+	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
+};
+
+static inline uint64_t dc_sum_bytes(struct data_counters *counters,
+				    int set,
+				    enum ifs_tx_rx direction)
+{
+	return counters->bpc[set][direction][IFS_TCP].bytes
+		+ counters->bpc[set][direction][IFS_UDP].bytes
+		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
+}
+
+static inline uint64_t dc_sum_packets(struct data_counters *counters,
+				      int set,
+				      enum ifs_tx_rx direction)
+{
+	return counters->bpc[set][direction][IFS_TCP].packets
+		+ counters->bpc[set][direction][IFS_UDP].packets
+		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
+}
+
+
+/* Generic X based nodes used as a base for rb_tree ops */
+struct tag_node {
+	struct rb_node node;
+	tag_t tag;
+};
+
+struct tag_stat {
+	struct tag_node tn;
+	struct data_counters counters;
+	/*
+	 * If this tag is acct_tag based, we need to count against the
+	 * matching parent uid_tag.
+	 */
+	struct data_counters *parent_counters;
+};
+
+struct iface_stat {
+	struct list_head list;  /* in iface_stat_list */
+	char *ifname;
+	bool active;
+	/* net_dev is only valid for active iface_stat */
+	struct net_device *net_dev;
+
+	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
+	struct data_counters totals_via_skb;
+	/*
+	 * We keep the last_known, because some devices reset their counters
+	 * just before NETDEV_UP, while some will reset just before
+	 * NETDEV_REGISTER (which is more normal).
+	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
+	 * its current dev stats smaller that what was previously known, we
+	 * assume an UNREGISTER and just use the last_known.
+	 */
+	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
+	/* last_known is usable when last_known_valid is true */
+	bool last_known_valid;
+
+	struct proc_dir_entry *proc_ptr;
+
+	struct rb_root tag_stat_tree;
+	spinlock_t tag_stat_list_lock;
+};
+
+/* This is needed to create proc_dir_entries from atomic context. */
+struct iface_stat_work {
+	struct work_struct iface_work;
+	struct iface_stat *iface_entry;
+};
+
+/*
+ * Track tag that this socket is transferring data for, and not necessarily
+ * the uid that owns the socket.
+ * This is the tag against which tag_stat.counters will be billed.
+ * These structs need to be looked up by sock and pid.
+ */
+struct sock_tag {
+	struct rb_node sock_node;
+	struct sock *sk;  /* Only used as a number, never dereferenced */
+	/* The socket is needed for sockfd_put() */
+	struct socket *socket;
+	/* Used to associate with a given pid */
+	struct list_head list;   /* in proc_qtu_data.sock_tag_list */
+	pid_t pid;
+
+	tag_t tag;
+};
+
+struct qtaguid_event_counts {
+	/* Various successful events */
+	atomic64_t sockets_tagged;
+	atomic64_t sockets_untagged;
+	atomic64_t counter_set_changes;
+	atomic64_t delete_cmds;
+	atomic64_t iface_events;  /* Number of NETDEV_* events handled */
+
+	atomic64_t match_calls;   /* Number of times iptables called mt */
+	/* Number of times iptables called mt from pre or post routing hooks */
+	atomic64_t match_calls_prepost;
+	/*
+	 * match_found_sk_*: numbers related to the netfilter matching
+	 * function finding a sock for the sk_buff.
+	 * Total skbs processed is sum(match_found*).
+	 */
+	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
+	/* The connection tracker had or didn't have the sk. */
+	atomic64_t match_found_sk_in_ct;
+	atomic64_t match_found_no_sk_in_ct;
+	/*
+	 * No sk could be found. No apparent owner. Could happen with
+	 * unsolicited traffic.
+	 */
+	atomic64_t match_no_sk;
+	/*
+	 * The file ptr in the sk_socket wasn't there.
+	 * This might happen for traffic while the socket is being closed.
+	 */
+	atomic64_t match_no_sk_file;
+};
+
+/* Track the set active_set for the given tag. */
+struct tag_counter_set {
+	struct tag_node tn;
+	int active_set;
+};
+
+/*----------------------------------------------*/
+/*
+ * The qtu uid data is used to track resources that are created directly or
+ * indirectly by processes (uid tracked).
+ * It is shared by the processes with the same uid.
+ * Some of the resource will be counted to prevent further rogue allocations,
+ * some will need freeing once the owner process (uid) exits.
+ */
+struct uid_tag_data {
+	struct rb_node node;
+	uid_t uid;
+
+	/*
+	 * For the uid, how many accounting tags have been set.
+	 */
+	int num_active_tags;
+	/* Track the number of proc_qtu_data that reference it */
+	int num_pqd;
+	struct rb_root tag_ref_tree;
+	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
+};
+
+struct tag_ref {
+	struct tag_node tn;
+
+	/*
+	 * This tracks the number of active sockets that have a tag on them
+	 * which matches this tag_ref.tn.tag.
+	 * A tag ref can live on after the sockets are untagged.
+	 * A tag ref can only be removed during a tag delete command.
+	 */
+	int num_sock_tags;
+};
+
+struct proc_qtu_data {
+	struct rb_node node;
+	pid_t pid;
+
+	struct uid_tag_data *parent_tag_data;
+
+	/* Tracks the sock_tags that need freeing upon this proc's death */
+	struct list_head sock_tag_list;
+	/* No spinlock_t sock_tag_list_lock; use the global one. */
+};
+
+/*----------------------------------------------*/
+#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
new file mode 100644
index 000000000000..f6a00a3520ed
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -0,0 +1,566 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Most of the functions in this file just waste time if DEBUG is not defined.
+ * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
+ * debug flags ore not defined.
+ * Those funcs that fail to allocate memory will panic as there is no need to
+ * hobble allong just pretending to do the requested work.
+ */
+
+#define DEBUG
+
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/net.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+
+
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+
+#ifdef DDEBUG
+
+static void _bug_on_err_or_null(void *ptr)
+{
+	if (IS_ERR_OR_NULL(ptr)) {
+		pr_err("qtaguid: kmalloc failed\n");
+		BUG();
+	}
+}
+
+char *pp_tag_t(tag_t *tag)
+{
+	char *res;
+
+	if (!tag)
+		res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
+	else
+		res = kasprintf(GFP_ATOMIC,
+				"tag_t@%p{tag=0x%llx, uid=%u}",
+				tag, *tag, get_uid_from_tag(*tag));
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+	char *res;
+
+	if (!dc)
+		res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
+	else if (showValues)
+		res = kasprintf(
+			GFP_ATOMIC, "data_counters@%p{"
+			"set0{"
+			"rx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}, "
+			"tx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}}, "
+			"set1{"
+			"rx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}, "
+			"tx{"
+			"tcp{b=%llu, p=%llu}, "
+			"udp{b=%llu, p=%llu},"
+			"other{b=%llu, p=%llu}}}}",
+			dc,
+			dc->bpc[0][IFS_RX][IFS_TCP].bytes,
+			dc->bpc[0][IFS_RX][IFS_TCP].packets,
+			dc->bpc[0][IFS_RX][IFS_UDP].bytes,
+			dc->bpc[0][IFS_RX][IFS_UDP].packets,
+			dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
+			dc->bpc[0][IFS_TX][IFS_TCP].bytes,
+			dc->bpc[0][IFS_TX][IFS_TCP].packets,
+			dc->bpc[0][IFS_TX][IFS_UDP].bytes,
+			dc->bpc[0][IFS_TX][IFS_UDP].packets,
+			dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
+			dc->bpc[1][IFS_RX][IFS_TCP].bytes,
+			dc->bpc[1][IFS_RX][IFS_TCP].packets,
+			dc->bpc[1][IFS_RX][IFS_UDP].bytes,
+			dc->bpc[1][IFS_RX][IFS_UDP].packets,
+			dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
+			dc->bpc[1][IFS_TX][IFS_TCP].bytes,
+			dc->bpc[1][IFS_TX][IFS_TCP].packets,
+			dc->bpc[1][IFS_TX][IFS_UDP].bytes,
+			dc->bpc[1][IFS_TX][IFS_UDP].packets,
+			dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
+			dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
+	else
+		res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_tag_node(struct tag_node *tn)
+{
+	char *tag_str;
+	char *res;
+
+	if (!tn) {
+		res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tag_str = pp_tag_t(&tn->tag);
+	res = kasprintf(GFP_ATOMIC,
+			"tag_node@%p{tag=%s}",
+			tn, tag_str);
+	_bug_on_err_or_null(res);
+	kfree(tag_str);
+	return res;
+}
+
+char *pp_tag_ref(struct tag_ref *tr)
+{
+	char *tn_str;
+	char *res;
+
+	if (!tr) {
+		res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tn_str = pp_tag_node(&tr->tn);
+	res = kasprintf(GFP_ATOMIC,
+			"tag_ref@%p{%s, num_sock_tags=%d}",
+			tr, tn_str, tr->num_sock_tags);
+	_bug_on_err_or_null(res);
+	kfree(tn_str);
+	return res;
+}
+
+char *pp_tag_stat(struct tag_stat *ts)
+{
+	char *tn_str;
+	char *counters_str;
+	char *parent_counters_str;
+	char *res;
+
+	if (!ts) {
+		res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tn_str = pp_tag_node(&ts->tn);
+	counters_str = pp_data_counters(&ts->counters, true);
+	parent_counters_str = pp_data_counters(ts->parent_counters, false);
+	res = kasprintf(GFP_ATOMIC,
+			"tag_stat@%p{%s, counters=%s, parent_counters=%s}",
+			ts, tn_str, counters_str, parent_counters_str);
+	_bug_on_err_or_null(res);
+	kfree(tn_str);
+	kfree(counters_str);
+	kfree(parent_counters_str);
+	return res;
+}
+
+char *pp_iface_stat(struct iface_stat *is)
+{
+	char *res;
+	if (!is) {
+		res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
+	} else {
+		struct data_counters *cnts = &is->totals_via_skb;
+		res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
+				"list=list_head{...}, "
+				"ifname=%s, "
+				"total_dev={rx={bytes=%llu, "
+				"packets=%llu}, "
+				"tx={bytes=%llu, "
+				"packets=%llu}}, "
+				"total_skb={rx={bytes=%llu, "
+				"packets=%llu}, "
+				"tx={bytes=%llu, "
+				"packets=%llu}}, "
+				"last_known_valid=%d, "
+				"last_known={rx={bytes=%llu, "
+				"packets=%llu}, "
+				"tx={bytes=%llu, "
+				"packets=%llu}}, "
+				"active=%d, "
+				"net_dev=%p, "
+				"proc_ptr=%p, "
+				"tag_stat_tree=rb_root{...}}",
+				is,
+				is->ifname,
+				is->totals_via_dev[IFS_RX].bytes,
+				is->totals_via_dev[IFS_RX].packets,
+				is->totals_via_dev[IFS_TX].bytes,
+				is->totals_via_dev[IFS_TX].packets,
+				dc_sum_bytes(cnts, 0, IFS_RX),
+				dc_sum_packets(cnts, 0, IFS_RX),
+				dc_sum_bytes(cnts, 0, IFS_TX),
+				dc_sum_packets(cnts, 0, IFS_TX),
+				is->last_known_valid,
+				is->last_known[IFS_RX].bytes,
+				is->last_known[IFS_RX].packets,
+				is->last_known[IFS_TX].bytes,
+				is->last_known[IFS_TX].packets,
+				is->active,
+				is->net_dev,
+				is->proc_ptr);
+	}
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_sock_tag(struct sock_tag *st)
+{
+	char *tag_str;
+	char *res;
+
+	if (!st) {
+		res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	tag_str = pp_tag_t(&st->tag);
+	res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
+			"sock_node=rb_node{...}, "
+			"sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
+			"pid=%u, tag=%s}",
+			st, st->sk, st->socket, atomic_long_read(
+				&st->socket->file->f_count),
+			st->pid, tag_str);
+	_bug_on_err_or_null(res);
+	kfree(tag_str);
+	return res;
+}
+
+char *pp_uid_tag_data(struct uid_tag_data *utd)
+{
+	char *res;
+
+	if (!utd)
+		res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
+	else
+		res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
+				"uid=%u, num_active_acct_tags=%d, "
+				"num_pqd=%d, "
+				"tag_node_tree=rb_root{...}, "
+				"proc_qtu_data_tree=rb_root{...}}",
+				utd, utd->uid,
+				utd->num_active_tags, utd->num_pqd);
+	_bug_on_err_or_null(res);
+	return res;
+}
+
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+	char *parent_tag_data_str;
+	char *res;
+
+	if (!pqd) {
+		res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
+		_bug_on_err_or_null(res);
+		return res;
+	}
+	parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
+	res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
+			"node=rb_node{...}, pid=%u, "
+			"parent_tag_data=%s, "
+			"sock_tag_list=list_head{...}}",
+			pqd, pqd->pid, parent_tag_data_str
+		);
+	_bug_on_err_or_null(res);
+	kfree(parent_tag_data_str);
+	return res;
+}
+
+/*------------------------------------------*/
+void prdebug_sock_tag_tree(int indent_level,
+			   struct rb_root *sock_tag_tree)
+{
+	struct rb_node *node;
+	struct sock_tag *sock_tag_entry;
+	char *str;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(sock_tag_tree)) {
+		str = "sock_tag_tree=rb_root{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "sock_tag_tree=rb_root{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(sock_tag_tree);
+	     node;
+	     node = rb_next(node)) {
+		sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+		str = pp_sock_tag(sock_tag_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_sock_tag_list(int indent_level,
+			   struct list_head *sock_tag_list)
+{
+	struct sock_tag *sock_tag_entry;
+	char *str;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (list_empty(sock_tag_list)) {
+		str = "sock_tag_list=list_head{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "sock_tag_list=list_head{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
+		str = pp_sock_tag(sock_tag_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_proc_qtu_data_tree(int indent_level,
+				struct rb_root *proc_qtu_data_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct proc_qtu_data *proc_qtu_data_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
+		str = "proc_qtu_data_tree=rb_root{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "proc_qtu_data_tree=rb_root{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(proc_qtu_data_tree);
+	     node;
+	     node = rb_next(node)) {
+		proc_qtu_data_entry = rb_entry(node,
+					       struct proc_qtu_data,
+					       node);
+		str = pp_proc_qtu_data(proc_qtu_data_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+			 str);
+		kfree(str);
+		indent_level++;
+		prdebug_sock_tag_list(indent_level,
+				      &proc_qtu_data_entry->sock_tag_list);
+		indent_level--;
+
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct tag_ref *tag_ref_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(tag_ref_tree)) {
+		str = "tag_ref_tree{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "tag_ref_tree{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(tag_ref_tree);
+	     node;
+	     node = rb_next(node)) {
+		tag_ref_entry = rb_entry(node,
+					 struct tag_ref,
+					 tn.node);
+		str = pp_tag_ref(tag_ref_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level,
+			 str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_uid_tag_data_tree(int indent_level,
+			       struct rb_root *uid_tag_data_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct uid_tag_data *uid_tag_data_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
+		str = "uid_tag_data_tree=rb_root{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "uid_tag_data_tree=rb_root{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(uid_tag_data_tree);
+	     node;
+	     node = rb_next(node)) {
+		uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
+					      node);
+		str = pp_uid_tag_data(uid_tag_data_entry);
+		pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
+		kfree(str);
+		if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
+			indent_level++;
+			prdebug_tag_ref_tree(indent_level,
+					     &uid_tag_data_entry->tag_ref_tree);
+			indent_level--;
+		}
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_stat_tree(int indent_level,
+				  struct rb_root *tag_stat_tree)
+{
+	char *str;
+	struct rb_node *node;
+	struct tag_stat *ts_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (RB_EMPTY_ROOT(tag_stat_tree)) {
+		str = "tag_stat_tree{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "tag_stat_tree{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	for (node = rb_first(tag_stat_tree);
+	     node;
+	     node = rb_next(node)) {
+		ts_entry = rb_entry(node, struct tag_stat, tn.node);
+		str = pp_tag_stat(ts_entry);
+		pr_debug("%*d: %s\n", indent_level*2, indent_level,
+			 str);
+		kfree(str);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_iface_stat_list(int indent_level,
+			     struct list_head *iface_stat_list)
+{
+	char *str;
+	struct iface_stat *iface_entry;
+
+	if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
+		return;
+
+	if (list_empty(iface_stat_list)) {
+		str = "iface_stat_list=list_head{}";
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		return;
+	}
+
+	str = "iface_stat_list=list_head{";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+	indent_level++;
+	list_for_each_entry(iface_entry, iface_stat_list, list) {
+		str = pp_iface_stat(iface_entry);
+		pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+		kfree(str);
+
+		spin_lock_bh(&iface_entry->tag_stat_list_lock);
+		if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
+			indent_level++;
+			prdebug_tag_stat_tree(indent_level,
+					      &iface_entry->tag_stat_tree);
+			indent_level--;
+		}
+		spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+	}
+	indent_level--;
+	str = "}";
+	pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+#endif  /* ifdef DDEBUG */
+/*------------------------------------------*/
+static const char * const netdev_event_strings[] = {
+	"netdev_unknown",
+	"NETDEV_UP",
+	"NETDEV_DOWN",
+	"NETDEV_REBOOT",
+	"NETDEV_CHANGE",
+	"NETDEV_REGISTER",
+	"NETDEV_UNREGISTER",
+	"NETDEV_CHANGEMTU",
+	"NETDEV_CHANGEADDR",
+	"NETDEV_GOING_DOWN",
+	"NETDEV_CHANGENAME",
+	"NETDEV_FEAT_CHANGE",
+	"NETDEV_BONDING_FAILOVER",
+	"NETDEV_PRE_UP",
+	"NETDEV_PRE_TYPE_CHANGE",
+	"NETDEV_POST_TYPE_CHANGE",
+	"NETDEV_POST_INIT",
+	"NETDEV_UNREGISTER_BATCH",
+	"NETDEV_RELEASE",
+	"NETDEV_NOTIFY_PEERS",
+	"NETDEV_JOIN",
+};
+
+const char *netdev_evt_str(int netdev_event)
+{
+	if (netdev_event < 0
+	    || netdev_event >= ARRAY_SIZE(netdev_event_strings))
+		return "bad event num";
+	return netdev_event_strings[netdev_event];
+}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
new file mode 100644
index 000000000000..b63871a0be5a
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.h
@@ -0,0 +1,120 @@
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __XT_QTAGUID_PRINT_H__
+#define __XT_QTAGUID_PRINT_H__
+
+#include "xt_qtaguid_internal.h"
+
+#ifdef DDEBUG
+
+char *pp_tag_t(tag_t *tag);
+char *pp_data_counters(struct data_counters *dc, bool showValues);
+char *pp_tag_node(struct tag_node *tn);
+char *pp_tag_ref(struct tag_ref *tr);
+char *pp_tag_stat(struct tag_stat *ts);
+char *pp_iface_stat(struct iface_stat *is);
+char *pp_sock_tag(struct sock_tag *st);
+char *pp_uid_tag_data(struct uid_tag_data *qtd);
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
+
+/*------------------------------------------*/
+void prdebug_sock_tag_list(int indent_level,
+			   struct list_head *sock_tag_list);
+void prdebug_sock_tag_tree(int indent_level,
+			   struct rb_root *sock_tag_tree);
+void prdebug_proc_qtu_data_tree(int indent_level,
+				struct rb_root *proc_qtu_data_tree);
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
+void prdebug_uid_tag_data_tree(int indent_level,
+			       struct rb_root *uid_tag_data_tree);
+void prdebug_tag_stat_tree(int indent_level,
+			   struct rb_root *tag_stat_tree);
+void prdebug_iface_stat_list(int indent_level,
+			     struct list_head *iface_stat_list);
+
+#else
+
+/*------------------------------------------*/
+static inline char *pp_tag_t(tag_t *tag)
+{
+	return NULL;
+}
+static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+	return NULL;
+}
+static inline char *pp_tag_node(struct tag_node *tn)
+{
+	return NULL;
+}
+static inline char *pp_tag_ref(struct tag_ref *tr)
+{
+	return NULL;
+}
+static inline char *pp_tag_stat(struct tag_stat *ts)
+{
+	return NULL;
+}
+static inline char *pp_iface_stat(struct iface_stat *is)
+{
+	return NULL;
+}
+static inline char *pp_sock_tag(struct sock_tag *st)
+{
+	return NULL;
+}
+static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
+{
+	return NULL;
+}
+static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+	return NULL;
+}
+
+/*------------------------------------------*/
+static inline
+void prdebug_sock_tag_list(int indent_level,
+			   struct list_head *sock_tag_list)
+{
+}
+static inline
+void prdebug_sock_tag_tree(int indent_level,
+			   struct rb_root *sock_tag_tree)
+{
+}
+static inline
+void prdebug_proc_qtu_data_tree(int indent_level,
+				struct rb_root *proc_qtu_data_tree)
+{
+}
+static inline
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+}
+static inline
+void prdebug_uid_tag_data_tree(int indent_level,
+			       struct rb_root *uid_tag_data_tree)
+{
+}
+static inline
+void prdebug_tag_stat_tree(int indent_level,
+			   struct rb_root *tag_stat_tree)
+{
+}
+static inline
+void prdebug_iface_stat_list(int indent_level,
+			     struct list_head *iface_stat_list)
+{
+}
+#endif
+/*------------------------------------------*/
+const char *netdev_evt_str(int netdev_event);
+#endif  /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 000000000000..4328562572f6
--- /dev/null
+++ b/net/netfilter/xt_quota2.c
@@ -0,0 +1,385 @@
+/*
+ * xt_quota2 - enhanced xt_quota that can count upwards and in packets
+ * as a minimal accounting match.
+ * by Jan Engelhardt <jengelh@medozas.de>, 2008
+ *
+ * Originally based on xt_quota.c:
+ * 	netfilter module to enforce network quotas
+ * 	Sam Johnston <samj@samj.net>
+ *
+ *	This program is free software; you can redistribute it and/or modify
+ *	it under the terms of the GNU General Public License; either
+ *	version 2 of the License, as published by the Free Software Foundation.
+ */
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_quota2.h>
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#endif
+
+/**
+ * @lock:	lock to protect quota writers from each other
+ */
+struct xt_quota_counter {
+	u_int64_t quota;
+	spinlock_t lock;
+	struct list_head list;
+	atomic_t ref;
+	char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
+	struct proc_dir_entry *procfs_entry;
+};
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+/* Harald's favorite number +1 :D From ipt_ULOG.C */
+static int qlog_nl_event = 112;
+module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(event_num,
+		 "Event number for NETLINK_NFLOG message. 0 disables log."
+		 "111 is what ipt_ULOG uses.");
+static struct sock *nflognl;
+#endif
+
+static LIST_HEAD(counter_list);
+static DEFINE_SPINLOCK(counter_list_lock);
+
+static struct proc_dir_entry *proc_xt_quota;
+static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
+static unsigned int quota_list_uid   = 0;
+static unsigned int quota_list_gid   = 0;
+module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
+module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR);
+module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR);
+
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+static void quota2_log(unsigned int hooknum,
+		       const struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       const char *prefix)
+{
+	ulog_packet_msg_t *pm;
+	struct sk_buff *log_skb;
+	size_t size;
+	struct nlmsghdr *nlh;
+
+	if (!qlog_nl_event)
+		return;
+
+	size = NLMSG_SPACE(sizeof(*pm));
+	size = max(size, (size_t)NLMSG_GOODSIZE);
+	log_skb = alloc_skb(size, GFP_ATOMIC);
+	if (!log_skb) {
+		pr_err("xt_quota2: cannot alloc skb for logging\n");
+		return;
+	}
+
+	nlh = nlmsg_put(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
+			sizeof(*pm), 0);
+	if (!nlh) {
+		pr_err("xt_quota2: nlmsg_put failed\n");
+		kfree_skb(log_skb);
+		return;
+	}
+	pm = nlmsg_data(nlh);
+	if (skb->tstamp.tv64 == 0)
+		__net_timestamp((struct sk_buff *)skb);
+	pm->data_len = 0;
+	pm->hook = hooknum;
+	if (prefix != NULL)
+		strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
+	else
+		*(pm->prefix) = '\0';
+	if (in)
+		strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
+	else
+		pm->indev_name[0] = '\0';
+
+	if (out)
+		strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
+	else
+		pm->outdev_name[0] = '\0';
+
+	NETLINK_CB(log_skb).dst_group = 1;
+	pr_debug("throwing 1 packets to netlink group 1\n");
+	netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
+}
+#else
+static void quota2_log(unsigned int hooknum,
+		       const struct sk_buff *skb,
+		       const struct net_device *in,
+		       const struct net_device *out,
+		       const char *prefix)
+{
+}
+#endif  /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
+
+static ssize_t quota_proc_read(struct file *file, char __user *buf,
+			   size_t size, loff_t *ppos)
+{
+	struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+	char tmp[24];
+	size_t tmp_size;
+
+	spin_lock_bh(&e->lock);
+	tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", e->quota);
+	spin_unlock_bh(&e->lock);
+	return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
+}
+
+static ssize_t quota_proc_write(struct file *file, const char __user *input,
+                            size_t size, loff_t *ppos)
+{
+	struct xt_quota_counter *e = PDE_DATA(file_inode(file));
+	char buf[sizeof("18446744073709551616")];
+
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+	if (copy_from_user(buf, input, size) != 0)
+		return -EFAULT;
+	buf[sizeof(buf)-1] = '\0';
+
+	spin_lock_bh(&e->lock);
+	e->quota = simple_strtoull(buf, NULL, 0);
+	spin_unlock_bh(&e->lock);
+	return size;
+}
+
+static const struct file_operations q2_counter_fops = {
+	.read		= quota_proc_read,
+	.write		= quota_proc_write,
+	.llseek		= default_llseek,
+};
+
+static struct xt_quota_counter *
+q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
+{
+	struct xt_quota_counter *e;
+	unsigned int size;
+
+	/* Do not need all the procfs things for anonymous counters. */
+	size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
+	e = kmalloc(size, GFP_KERNEL);
+	if (e == NULL)
+		return NULL;
+
+	e->quota = q->quota;
+	spin_lock_init(&e->lock);
+	if (!anon) {
+		INIT_LIST_HEAD(&e->list);
+		atomic_set(&e->ref, 1);
+		strlcpy(e->name, q->name, sizeof(e->name));
+	}
+	return e;
+}
+
+/**
+ * q2_get_counter - get ref to counter or create new
+ * @name:	name of counter
+ */
+static struct xt_quota_counter *
+q2_get_counter(const struct xt_quota_mtinfo2 *q)
+{
+	struct proc_dir_entry *p;
+	struct xt_quota_counter *e = NULL;
+	struct xt_quota_counter *new_e;
+
+	if (*q->name == '\0')
+		return q2_new_counter(q, true);
+
+	/* No need to hold a lock while getting a new counter */
+	new_e = q2_new_counter(q, false);
+	if (new_e == NULL)
+		goto out;
+
+	spin_lock_bh(&counter_list_lock);
+	list_for_each_entry(e, &counter_list, list)
+		if (strcmp(e->name, q->name) == 0) {
+			atomic_inc(&e->ref);
+			spin_unlock_bh(&counter_list_lock);
+			kfree(new_e);
+			pr_debug("xt_quota2: old counter name=%s", e->name);
+			return e;
+		}
+	e = new_e;
+	pr_debug("xt_quota2: new_counter name=%s", e->name);
+	list_add_tail(&e->list, &counter_list);
+	/* The entry having a refcount of 1 is not directly destructible.
+	 * This func has not yet returned the new entry, thus iptables
+	 * has not references for destroying this entry.
+	 * For another rule to try to destroy it, it would 1st need for this
+	 * func* to be re-invoked, acquire a new ref for the same named quota.
+	 * Nobody will access the e->procfs_entry either.
+	 * So release the lock. */
+	spin_unlock_bh(&counter_list_lock);
+
+	/* create_proc_entry() is not spin_lock happy */
+	p = e->procfs_entry = proc_create_data(e->name, quota_list_perms,
+	                      proc_xt_quota, &q2_counter_fops, e);
+
+	if (IS_ERR_OR_NULL(p)) {
+		spin_lock_bh(&counter_list_lock);
+		list_del(&e->list);
+		spin_unlock_bh(&counter_list_lock);
+		goto out;
+	}
+	proc_set_user(p, quota_list_uid, quota_list_gid);
+	return e;
+
+ out:
+	kfree(e);
+	return NULL;
+}
+
+static int quota_mt2_check(const struct xt_mtchk_param *par)
+{
+	struct xt_quota_mtinfo2 *q = par->matchinfo;
+
+	pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
+
+	if (q->flags & ~XT_QUOTA_MASK)
+		return -EINVAL;
+
+	q->name[sizeof(q->name)-1] = '\0';
+	if (*q->name == '.' || strchr(q->name, '/') != NULL) {
+		printk(KERN_ERR "xt_quota.3: illegal name\n");
+		return -EINVAL;
+	}
+
+	q->master = q2_get_counter(q);
+	if (q->master == NULL) {
+		printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
+{
+	struct xt_quota_mtinfo2 *q = par->matchinfo;
+	struct xt_quota_counter *e = q->master;
+
+	if (*q->name == '\0') {
+		kfree(e);
+		return;
+	}
+
+	spin_lock_bh(&counter_list_lock);
+	if (!atomic_dec_and_test(&e->ref)) {
+		spin_unlock_bh(&counter_list_lock);
+		return;
+	}
+
+	list_del(&e->list);
+	remove_proc_entry(e->name, proc_xt_quota);
+	spin_unlock_bh(&counter_list_lock);
+	kfree(e);
+}
+
+static bool
+quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
+	struct xt_quota_counter *e = q->master;
+	bool ret = q->flags & XT_QUOTA_INVERT;
+
+	spin_lock_bh(&e->lock);
+	if (q->flags & XT_QUOTA_GROW) {
+		/*
+		 * While no_change is pointless in "grow" mode, we will
+		 * implement it here simply to have a consistent behavior.
+		 */
+		if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
+			e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+		}
+		ret = true;
+	} else {
+		if (e->quota >= skb->len) {
+			if (!(q->flags & XT_QUOTA_NO_CHANGE))
+				e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
+			ret = !ret;
+		} else {
+			/* We are transitioning, log that fact. */
+			if (e->quota) {
+				quota2_log(par->hooknum,
+					   skb,
+					   par->in,
+					   par->out,
+					   q->name);
+			}
+			/* we do not allow even small packets from now on */
+			e->quota = 0;
+		}
+	}
+	spin_unlock_bh(&e->lock);
+	return ret;
+}
+
+static struct xt_match quota_mt2_reg[] __read_mostly = {
+	{
+		.name       = "quota2",
+		.revision   = 3,
+		.family     = NFPROTO_IPV4,
+		.checkentry = quota_mt2_check,
+		.match      = quota_mt2,
+		.destroy    = quota_mt2_destroy,
+		.matchsize  = sizeof(struct xt_quota_mtinfo2),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "quota2",
+		.revision   = 3,
+		.family     = NFPROTO_IPV6,
+		.checkentry = quota_mt2_check,
+		.match      = quota_mt2,
+		.destroy    = quota_mt2_destroy,
+		.matchsize  = sizeof(struct xt_quota_mtinfo2),
+		.me         = THIS_MODULE,
+	},
+};
+
+static int __init quota_mt2_init(void)
+{
+	int ret;
+	pr_debug("xt_quota2: init()");
+
+#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
+	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, NULL);
+	if (!nflognl)
+		return -ENOMEM;
+#endif
+
+	proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
+	if (proc_xt_quota == NULL)
+		return -EACCES;
+
+	ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+	if (ret < 0)
+		remove_proc_entry("xt_quota", init_net.proc_net);
+	pr_debug("xt_quota2: init() %d", ret);
+	return ret;
+}
+
+static void __exit quota_mt2_exit(void)
+{
+	xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
+	remove_proc_entry("xt_quota", init_net.proc_net);
+}
+
+module_init(quota_mt2_init);
+module_exit(quota_mt2_exit);
+MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
+MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_quota2");
+MODULE_ALIAS("ip6t_quota2");
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 63b2bdb59e95..030f33cd9ee9 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,7 +35,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
-static void
+void
 xt_socket_put_sk(struct sock *sk)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -43,6 +43,7 @@ xt_socket_put_sk(struct sock *sk)
 	else
 		sock_put(sk);
 }
+EXPORT_SYMBOL(xt_socket_put_sk);
 
 static int
 extract_icmp4_fields(const struct sk_buff *skb,
@@ -101,9 +102,8 @@ extract_icmp4_fields(const struct sk_buff *skb,
 	return 0;
 }
 
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
-	     const struct xt_socket_mtinfo1 *info)
+struct sock*
+xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
@@ -120,7 +120,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 		hp = skb_header_pointer(skb, ip_hdrlen(skb),
 					sizeof(_hdr), &_hdr);
 		if (hp == NULL)
-			return false;
+			return NULL;
 
 		protocol = iph->protocol;
 		saddr = iph->saddr;
@@ -131,9 +131,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	} else if (iph->protocol == IPPROTO_ICMP) {
 		if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
 					&sport, &dport))
-			return false;
+			return NULL;
 	} else {
-		return false;
+		return NULL;
 	}
 
 #ifdef XT_SOCKET_HAVE_CONNTRACK
@@ -157,6 +157,23 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 
 	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
 				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
+
+	pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
+		 protocol, &saddr, ntohs(sport),
+		 &daddr, ntohs(dport),
+		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
+
+	return sk;
+}
+EXPORT_SYMBOL(xt_socket_get4_sk);
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+	     const struct xt_socket_mtinfo1 *info)
+{
+	struct sock *sk;
+
+	sk = xt_socket_get4_sk(skb, par);
 	if (sk != NULL) {
 		bool wildcard;
 		bool transparent = true;
@@ -179,11 +196,6 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 			sk = NULL;
 	}
 
-	pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
-		 protocol, &saddr, ntohs(sport),
-		 &daddr, ntohs(dport),
-		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
 	return (sk != NULL);
 }
 
@@ -255,8 +267,8 @@ extract_icmp6_fields(const struct sk_buff *skb,
 	return 0;
 }
 
-static bool
-socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+struct sock*
+xt_socket_get6_sk(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct udphdr _hdr, *hp = NULL;
@@ -264,7 +276,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	struct in6_addr *daddr = NULL, *saddr = NULL;
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
 	int thoff = 0, uninitialized_var(tproto);
-	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
 
 	tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
 	if (tproto < 0) {
@@ -276,7 +287,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 		hp = skb_header_pointer(skb, thoff,
 					sizeof(_hdr), &_hdr);
 		if (hp == NULL)
-			return false;
+			return NULL;
 
 		saddr = &iph->saddr;
 		sport = hp->source;
@@ -286,13 +297,30 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	} else if (tproto == IPPROTO_ICMPV6) {
 		if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
 					 &sport, &dport))
-			return false;
+			return NULL;
 	} else {
-		return false;
+		return NULL;
 	}
 
 	sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
 				   saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
+	pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
+		 "(orig %pI6:%hu) sock %p\n",
+		 tproto, saddr, ntohs(sport),
+		 daddr, ntohs(dport),
+		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
+	return sk;
+}
+EXPORT_SYMBOL(xt_socket_get6_sk);
+
+static bool
+socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct sock *sk;
+	const struct xt_socket_mtinfo1 *info;
+
+	info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+	sk = xt_socket_get6_sk(skb, par);
 	if (sk != NULL) {
 		bool wildcard;
 		bool transparent = true;
@@ -315,12 +343,6 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
 			sk = NULL;
 	}
 
-	pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
-		 "(orig %pI6:%hu) sock %p\n",
-		 tproto, saddr, ntohs(sport),
-		 daddr, ntohs(dport),
-		 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
 	return (sk != NULL);
 }
 #endif
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 78efe895b663..8e12c8a2b82b 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -10,6 +10,11 @@ menuconfig RFKILL
 	  To compile this driver as a module, choose M here: the
 	  module will be called rfkill.
 
+config RFKILL_PM
+	bool "Power off on suspend"
+	depends on RFKILL && PM
+	default y
+
 # LED trigger support
 config RFKILL_LEDS
 	bool
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1cec5e4f3a5e..c099b4fffd93 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -792,6 +792,7 @@ void rfkill_pause_polling(struct rfkill *rfkill)
 }
 EXPORT_SYMBOL(rfkill_pause_polling);
 
+#ifdef CONFIG_RFKILL_PM
 void rfkill_resume_polling(struct rfkill *rfkill)
 {
 	BUG_ON(!rfkill);
@@ -826,14 +827,17 @@ static int rfkill_resume(struct device *dev)
 
 	return 0;
 }
+#endif
 
 static struct class rfkill_class = {
 	.name		= "rfkill",
 	.dev_release	= rfkill_release,
 	.dev_attrs	= rfkill_dev_attrs,
 	.dev_uevent	= rfkill_dev_uevent,
+#ifdef CONFIG_RFKILL_PM
 	.suspend	= rfkill_suspend,
 	.resume		= rfkill_resume,
+#endif
 };
 
 bool rfkill_blocked(struct rfkill *rfkill)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5356b120dbf8..77d251e02593 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -254,7 +254,7 @@ static int rpc_wait_bit_killable(void *word)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
-	freezable_schedule();
+	freezable_schedule_unsafe();
 	return 0;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 75e198d029d2..c80c107139f1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,7 @@
 #include <linux/mount.h>
 #include <net/checksum.h>
 #include <linux/security.h>
+#include <linux/freezer.h>
 
 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 EXPORT_SYMBOL_GPL(unix_socket_table);
@@ -1896,7 +1897,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
 
 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 		unix_state_unlock(sk);
-		timeo = schedule_timeout(timeo);
+		timeo = freezable_schedule_timeout(timeo);
 		unix_state_lock(sk);
 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 16d08b399210..4c602d100480 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -166,3 +166,14 @@ config LIB80211_DEBUG
 	  from lib80211.
 
 	  If unsure, say N.
+
+config CFG80211_ALLOW_RECONNECT
+	bool "Allow reconnect while already connected"
+	depends on CFG80211
+	default n
+	help
+	  cfg80211 stack doesn't allow to connect if you are already
+	  connected. This option allows to make a connection in this case.
+
+	  Select this option ONLY for wlan drivers that are specifically
+	  built for such purposes.
diff --git a/net/wireless/core.h b/net/wireless/core.h
index fd35dae547c4..15b4bb7ac046 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -77,9 +77,7 @@ struct cfg80211_registered_device {
 
 	struct mutex sched_scan_mtx;
 
-#ifdef CONFIG_NL80211_TESTMODE
-	struct genl_info *testmode_info;
-#endif
+	struct genl_info *cur_cmd_info;
 
 	struct work_struct conn_work;
 	struct work_struct event_work;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 90a0e552cb32..e63d881d31b7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -378,6 +378,12 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_MDID] = { .type = NLA_U16 },
 	[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
 				  .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
+	[NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY },
+	[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY },
+	[NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 },
+	[NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 },
+	[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
 };
 
 /* policy for the key attributes */
@@ -1520,6 +1526,39 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
 				dev->wiphy.max_acl_mac_addrs))
 			goto nla_put_failure;
 
+		if (dev->wiphy.n_vendor_commands) {
+			const struct nl80211_vendor_cmd_info *info;
+			struct nlattr *nested;
+
+			nested = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA);
+			if (!nested)
+				goto nla_put_failure;
+
+			for (i = 0; i < dev->wiphy.n_vendor_commands; i++) {
+				info = &dev->wiphy.vendor_commands[i].info;
+				if (nla_put(msg, i + 1, sizeof(*info), info))
+					goto nla_put_failure;
+			}
+				nla_nest_end(msg, nested);
+		}
+
+		if (dev->wiphy.n_vendor_events) {
+			const struct nl80211_vendor_cmd_info *info;
+			struct nlattr *nested;
+
+			nested = nla_nest_start(msg,
+				NL80211_ATTR_VENDOR_EVENTS);
+			if (!nested)
+				goto nla_put_failure;
+
+			for (i = 0; i < dev->wiphy.n_vendor_events; i++) {
+				info = &dev->wiphy.vendor_events[i];
+				if (nla_put(msg, i + 1, sizeof(*info), info))
+					goto nla_put_failure;
+			}
+			nla_nest_end(msg, nested);
+		}
+
 		/*
 		 * Any information below this point is only available to
 		 * applications that can deal with it being split. This
@@ -2638,8 +2677,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 
 	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_NEW_KEY);
-	if (IS_ERR(hdr))
-		return PTR_ERR(hdr);
+	if (!hdr)
+		goto nla_put_failure;
 
 	cookie.msg = msg;
 	cookie.idx = key_idx;
@@ -6402,12 +6441,62 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static struct sk_buff *
+__cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev,
+			    int approxlen, u32 portid, u32 seq,
+			    enum nl80211_commands cmd,
+			    enum nl80211_attrs attr,
+			    const struct nl80211_vendor_cmd_info *info,
+			    gfp_t gfp)
+{
+	struct sk_buff *skb;
+	void *hdr;
+	struct nlattr *data;
+
+	skb = nlmsg_new(approxlen + 100, gfp);
+	if (!skb)
+		return NULL;
+
+	hdr = nl80211hdr_put(skb, portid, seq, 0, cmd);
+	if (!hdr) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+		goto nla_put_failure;
+
+	if (info) {
+		if (nla_put_u32(skb, NL80211_ATTR_VENDOR_ID,
+				info->vendor_id))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, NL80211_ATTR_VENDOR_SUBCMD,
+				info->subcmd))
+			goto nla_put_failure;
+	}
+
+	data = nla_nest_start(skb, attr);
+
+	((void **)skb->cb)[0] = rdev;
+	((void **)skb->cb)[1] = hdr;
+	((void **)skb->cb)[2] = data;
+
+	return skb;
+
+ nla_put_failure:
+	kfree_skb(skb);
+	return NULL;
+}
 
 #ifdef CONFIG_NL80211_TESTMODE
 static struct genl_multicast_group nl80211_testmode_mcgrp = {
 	.name = "testmode",
 };
 
+static struct genl_multicast_group nl80211_vendor_mcgrp = {
+	.name = "vendor",
+};
+
 static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6418,11 +6507,11 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
 
 	err = -EOPNOTSUPP;
 	if (rdev->ops->testmode_cmd) {
-		rdev->testmode_info = info;
+		rdev->cur_cmd_info = info;
 		err = rdev_testmode_cmd(rdev,
 				nla_data(info->attrs[NL80211_ATTR_TESTDATA]),
 				nla_len(info->attrs[NL80211_ATTR_TESTDATA]));
-		rdev->testmode_info = NULL;
+		rdev->cur_cmd_info = NULL;
 	}
 
 	return err;
@@ -6491,6 +6580,9 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
 					   NL80211_CMD_TESTMODE);
 		struct nlattr *tmdata;
 
+		if (!hdr)
+			break;
+
 		if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) {
 			genlmsg_cancel(skb, hdr);
 			break;
@@ -6523,84 +6615,37 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
 	return err;
 }
 
-static struct sk_buff *
-__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev,
-			      int approxlen, u32 portid, u32 seq, gfp_t gfp)
-{
-	struct sk_buff *skb;
-	void *hdr;
-	struct nlattr *data;
-
-	skb = nlmsg_new(approxlen + 100, gfp);
-	if (!skb)
-		return NULL;
-
-	hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE);
-	if (!hdr) {
-		kfree_skb(skb);
-		return NULL;
-	}
-
-	if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
-		goto nla_put_failure;
-	data = nla_nest_start(skb, NL80211_ATTR_TESTDATA);
-
-	((void **)skb->cb)[0] = rdev;
-	((void **)skb->cb)[1] = hdr;
-	((void **)skb->cb)[2] = data;
-
-	return skb;
-
- nla_put_failure:
-	kfree_skb(skb);
-	return NULL;
-}
-
-struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
-						  int approxlen)
+struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
+					   enum nl80211_commands cmd,
+					   enum nl80211_attrs attr,
+					   int vendor_event_idx,
+					   int approxlen, gfp_t gfp)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	const struct nl80211_vendor_cmd_info *info;
 
-	if (WARN_ON(!rdev->testmode_info))
+	switch (cmd) {
+	case NL80211_CMD_TESTMODE:
+		if (WARN_ON(vendor_event_idx != -1))
+			return NULL;
+		info = NULL;
+		break;
+	case NL80211_CMD_VENDOR:
+		if (WARN_ON(vendor_event_idx < 0 ||
+			    vendor_event_idx >= wiphy->n_vendor_events))
+			return NULL;
+		info = &wiphy->vendor_events[vendor_event_idx];
+		break;
+	default:
+		WARN_ON(1);
 		return NULL;
-
-	return __cfg80211_testmode_alloc_skb(rdev, approxlen,
-				rdev->testmode_info->snd_portid,
-				rdev->testmode_info->snd_seq,
-				GFP_KERNEL);
-}
-EXPORT_SYMBOL(cfg80211_testmode_alloc_reply_skb);
-
-int cfg80211_testmode_reply(struct sk_buff *skb)
-{
-	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
-	void *hdr = ((void **)skb->cb)[1];
-	struct nlattr *data = ((void **)skb->cb)[2];
-
-	/* clear CB data for netlink core to own from now on */
-	memset(skb->cb, 0, sizeof(skb->cb));
-
-	if (WARN_ON(!rdev->testmode_info)) {
-		kfree_skb(skb);
-		return -EINVAL;
 	}
-
-	nla_nest_end(skb, data);
-	genlmsg_end(skb, hdr);
-	return genlmsg_reply(skb, rdev->testmode_info);
-}
-EXPORT_SYMBOL(cfg80211_testmode_reply);
-
-struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy,
-						  int approxlen, gfp_t gfp)
-{
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-
-	return __cfg80211_testmode_alloc_skb(rdev, approxlen, 0, 0, gfp);
+	return __cfg80211_alloc_vendor_skb(rdev, approxlen, 0, 0,
+					   cmd, attr, info, gfp);
 }
-EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb);
+EXPORT_SYMBOL(__cfg80211_alloc_event_skb);
 
-void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
+void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp)
 {
 	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
 	void *hdr = ((void **)skb->cb)[1];
@@ -6611,10 +6656,15 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
-	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
-				nl80211_testmode_mcgrp.id, gfp);
+
+	if (data->nla_type == NL80211_ATTR_VENDOR_DATA)
+		genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
+			nl80211_vendor_mcgrp.id, gfp);
+	else
+		genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
+			nl80211_testmode_mcgrp.id, gfp);
 }
-EXPORT_SYMBOL(cfg80211_testmode_event);
+EXPORT_SYMBOL(__cfg80211_send_event_skb);
 #endif
 
 static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
@@ -6935,9 +6985,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
 
 	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_REMAIN_ON_CHANNEL);
-
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
+	if (!hdr) {
+		err = -ENOBUFS;
 		goto free_msg;
 	}
 
@@ -7224,9 +7273,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 
 		hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 				     NL80211_CMD_FRAME);
-
-		if (IS_ERR(hdr)) {
-			err = PTR_ERR(hdr);
+		if (!hdr) {
+			err = -ENOBUFS;
 			goto free_msg;
 		}
 	}
@@ -8087,9 +8135,8 @@ static int nl80211_probe_client(struct sk_buff *skb,
 
 	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_PROBE_CLIENT);
-
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
+	if (!hdr) {
+		err = -ENOBUFS;
 		goto free_msg;
 	}
 
@@ -8306,6 +8353,108 @@ static int nl80211_crit_protocol_stop(struct sk_buff *skb,
 	return 0;
 }
 
+static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev =
+		__cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs);
+	int i, err;
+	u32 vid, subcmd;
+
+	if (!rdev || !rdev->wiphy.vendor_commands)
+		return -EOPNOTSUPP;
+
+	if (IS_ERR(wdev)) {
+		err = PTR_ERR(wdev);
+		if (err != -EINVAL)
+			return err;
+		wdev = NULL;
+	} else if (wdev->wiphy != &rdev->wiphy) {
+		return -EINVAL;
+	}
+
+	if (!info->attrs[NL80211_ATTR_VENDOR_ID] ||
+	    !info->attrs[NL80211_ATTR_VENDOR_SUBCMD])
+		return -EINVAL;
+
+	vid = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_ID]);
+	subcmd = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_SUBCMD]);
+	for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) {
+		const struct wiphy_vendor_command *vcmd;
+		void *data = NULL;
+		int len = 0;
+
+		vcmd = &rdev->wiphy.vendor_commands[i];
+
+		if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
+			continue;
+
+		if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
+				   WIPHY_VENDOR_CMD_NEED_NETDEV)) {
+			if (!wdev)
+				return -EINVAL;
+			if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
+			    !wdev->netdev)
+				return -EINVAL;
+
+			if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
+				if (!wdev->netdev ||
+				    !netif_running(wdev->netdev))
+					return -ENETDOWN;
+			}
+		} else {
+			wdev = NULL;
+		}
+
+		if (info->attrs[NL80211_ATTR_VENDOR_DATA]) {
+			data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]);
+			len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]);
+		}
+
+		rdev->cur_cmd_info = info;
+		err = rdev->wiphy.vendor_commands[i].doit(&rdev->wiphy, wdev,
+								data, len);
+		rdev->cur_cmd_info = NULL;
+		return err;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
+					   enum nl80211_commands cmd,
+					   enum nl80211_attrs attr,
+					   int approxlen)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	if (WARN_ON(!rdev->cur_cmd_info))
+		return NULL;
+
+	return __cfg80211_alloc_vendor_skb(rdev, approxlen,
+					   0,
+					   0,
+					   cmd, attr, NULL, GFP_KERNEL);
+}
+EXPORT_SYMBOL(__cfg80211_alloc_reply_skb);
+
+int cfg80211_vendor_cmd_reply(struct sk_buff *skb)
+{
+	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
+	void *hdr = ((void **)skb->cb)[1];
+	struct nlattr *data = ((void **)skb->cb)[2];
+
+	if (WARN_ON(!rdev->cur_cmd_info)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	nla_nest_end(skb, data);
+	genlmsg_end(skb, hdr);
+	return genlmsg_reply(skb, rdev->cur_cmd_info);
+}
+EXPORT_SYMBOL(cfg80211_vendor_cmd_reply);
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -9010,7 +9159,15 @@ static struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
-	}
+	},
+	{
+		.cmd = NL80211_CMD_VENDOR,
+		.doit = nl80211_vendor_cmd,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -10829,6 +10986,10 @@ int nl80211_init(void)
 	err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp);
 	if (err)
 		goto err_out;
+
+	err = genl_register_mc_group(&nl80211_fam, &nl80211_vendor_mcgrp);
+	if (err)
+		goto err_out;
 #endif
 
 	err = netlink_register_notifier(&nl80211_netlink_notifier);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 81019ee3ddc8..ab406d0462b7 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -55,7 +55,7 @@
  * also linked into the probe response struct.
  */
 
-#define IEEE80211_SCAN_RESULT_EXPIRE	(30 * HZ)
+#define IEEE80211_SCAN_RESULT_EXPIRE	(7 * HZ)
 
 static void bss_free(struct cfg80211_internal_bss *bss)
 {
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3ed35c345cae..e2f74e66a169 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -707,8 +707,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 		    wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
 		return;
 
+#ifndef CONFIG_CFG80211_ALLOW_RECONNECT
 	if (wdev->sme_state != CFG80211_SME_CONNECTED)
 		return;
+#endif
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
@@ -785,10 +787,14 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 
 	ASSERT_WDEV_LOCK(wdev);
 
+#ifndef CONFIG_CFG80211_ALLOW_RECONNECT
 	if (wdev->sme_state != CFG80211_SME_IDLE)
 		return -EALREADY;
 
 	if (WARN_ON(wdev->connect_keys)) {
+#else
+	if (wdev->connect_keys) {
+#endif
 		kfree(wdev->connect_keys);
 		wdev->connect_keys = NULL;
 	}
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index c4b37f6b5478..76fec0940545 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -274,6 +274,18 @@ $(obj)/%.dtb: $(src)/%.dts FORCE
 
 dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp)
 
+$(obj)/%.dtb: $(src)/%.dts FORCE
+	$(call if_changed_dep,dtc)
+
+dtc-tmp = $(subst $(comma),_,$(dot-target).dts)
+
+quiet_cmd_dtc_cpp = DTC+CPP $@
+cmd_dtc_cpp = $(CPP) $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
+	$(objtree)/scripts/dtc/dtc -O dtb -o $@ -b 0 $(DTC_FLAGS) $(dtc-tmp)
+
+$(obj)/%.dtb: $(src)/%.dtsp FORCE
+	$(call if_changed_dep,dtc_cpp)
+
 # Bzip2
 # ---------------------------------------------------------------------------
 
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index ecbb44797e28..8900c5c5390d 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -24,7 +24,7 @@ quiet_cmd_modules_install = INSTALL $@
 INSTALL_MOD_DIR ?= extra
 ext-mod-dir = $(INSTALL_MOD_DIR)$(subst $(patsubst %/,%,$(KBUILD_EXTMOD)),,$(@D))
 
-modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
+modinst_dir ?= $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
 
 $(modules):
 	$(call cmd,modules_install,$(MODLIB)/$(modinst_dir))
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 43eda40c3838..35e0f164ef81 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -1083,7 +1083,7 @@ static void randomize_choice_values(struct symbol *csym)
 	csym->flags &= ~(SYMBOL_VALID);
 }
 
-static void set_all_choice_values(struct symbol *csym)
+void set_all_choice_values(struct symbol *csym)
 {
 	struct property *prop;
 	struct symbol *sym;
@@ -1100,7 +1100,7 @@ static void set_all_choice_values(struct symbol *csym)
 	}
 	csym->flags |= SYMBOL_DEF_USER;
 	/* clear VALID to get value calculated */
-	csym->flags &= ~(SYMBOL_VALID);
+	csym->flags &= ~(SYMBOL_VALID | SYMBOL_NEED_SET_CHOICE_VALUES);
 }
 
 void conf_set_all_new_symbols(enum conf_def_mode mode)
@@ -1202,6 +1202,14 @@ void conf_set_all_new_symbols(enum conf_def_mode mode)
 	 * selected in a choice block and we set it to yes,
 	 * and the rest to no.
 	 */
+	if (mode != def_random) {
+		for_all_symbols(i, csym) {
+			if ((sym_is_choice(csym) && !sym_has_value(csym)) ||
+			    sym_is_choice_value(csym))
+				csym->flags |= SYMBOL_NEED_SET_CHOICE_VALUES;
+		}
+	}
+
 	for_all_symbols(i, csym) {
 		if (sym_has_value(csym) || !sym_is_choice(csym))
 			continue;
@@ -1209,7 +1217,5 @@ void conf_set_all_new_symbols(enum conf_def_mode mode)
 		sym_calc_value(csym);
 		if (mode == def_random)
 			randomize_choice_values(csym);
-		else
-			set_all_choice_values(csym);
 	}
 }
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index cdd48600e02a..df198a5f4822 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -106,6 +106,9 @@ struct symbol {
 #define SYMBOL_DEF3       0x40000  /* symbol.def[S_DEF_3] is valid */
 #define SYMBOL_DEF4       0x80000  /* symbol.def[S_DEF_4] is valid */
 
+/* choice values need to be set before calculating this symbol value */
+#define SYMBOL_NEED_SET_CHOICE_VALUES  0x100000
+
 #define SYMBOL_MAXLENGTH	256
 #define SYMBOL_HASHSIZE		9973
 
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index f8aee5fc6d5e..0c8d4191ca87 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -87,6 +87,7 @@ char *conf_get_default_confname(void);
 void sym_set_change_count(int count);
 void sym_add_change_count(int count);
 void conf_set_all_new_symbols(enum conf_def_mode mode);
+void set_all_choice_values(struct symbol *csym);
 
 struct conf_printer {
 	void (*print_symbol)(FILE *, struct symbol *, const char *, void *);
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index ecc5aa5f865d..ab8f4c835933 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -300,6 +300,14 @@ void sym_calc_value(struct symbol *sym)
 
 	if (sym->flags & SYMBOL_VALID)
 		return;
+
+	if (sym_is_choice_value(sym) &&
+	    sym->flags & SYMBOL_NEED_SET_CHOICE_VALUES) {
+		sym->flags &= ~SYMBOL_NEED_SET_CHOICE_VALUES;
+		prop = sym_get_choice_prop(sym);
+		sym_calc_value(prop_get_symbol(prop));
+	}
+
 	sym->flags |= SYMBOL_VALID;
 
 	oldval = sym->curr;
@@ -425,6 +433,9 @@ void sym_calc_value(struct symbol *sym)
 
 	if (sym->flags & SYMBOL_AUTO)
 		sym->flags &= ~SYMBOL_WRITE;
+
+	if (sym->flags & SYMBOL_NEED_SET_CHOICE_VALUES)
+		set_all_choice_values(sym);
 }
 
 void sym_clear_all_valid(void)
diff --git a/security/Kconfig b/security/Kconfig
index e9c6ac724fef..beb86b500adf 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -103,7 +103,7 @@ config INTEL_TXT
 config LSM_MMAP_MIN_ADDR
 	int "Low address space for LSM to protect from user allocation"
 	depends on SECURITY && SECURITY_SELINUX
-	default 32768 if ARM
+	default 32768 if ARM || (ARM64 && COMPAT)
 	default 65536
 	help
 	  This is the portion of low virtual memory which should be protected
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 859abdaac1ea..9aaa4e72cc1f 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -629,7 +629,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 	 * There is no exception for unconfined as change_hat is not
 	 * available.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		return -EPERM;
 
 	/* released below */
@@ -780,7 +780,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
 	 * no_new_privs is set because this aways results in a reduction
 	 * of permissions.
 	 */
-	if (current->no_new_privs && !unconfined(profile)) {
+	if (task_no_new_privs(current) && !unconfined(profile)) {
 		put_cred(cred);
 		return -EPERM;
 	}
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 40aedd9f73ea..4a8cbfeef8b3 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -65,7 +65,6 @@ extern int apparmor_initialized __initdata;
 char *aa_split_fqname(char *args, char **ns_name);
 void aa_info_message(const char *str);
 void *kvmalloc(size_t size);
-void kvfree(void *buffer);
 
 
 /**
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 7430298116d6..ce8d9a84ab2d 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -103,35 +103,3 @@ void *kvmalloc(size_t size)
 	}
 	return buffer;
 }
-
-/**
- * do_vfree - workqueue routine for freeing vmalloced memory
- * @work: data to be freed
- *
- * The work_struct is overlaid to the data being freed, as at the point
- * the work is scheduled the data is no longer valid, be its freeing
- * needs to be delayed until safe.
- */
-static void do_vfree(struct work_struct *work)
-{
-	vfree(work);
-}
-
-/**
- * kvfree - free an allocation do by kvmalloc
- * @buffer: buffer to free (MAYBE_NULL)
- *
- * Free a buffer allocated by kvmalloc
- */
-void kvfree(void *buffer)
-{
-	if (is_vmalloc_addr(buffer)) {
-		/* Data is no longer valid so just use the allocated space
-		 * as the work_struct
-		 */
-		struct work_struct *work = (struct work_struct *) buffer;
-		INIT_WORK(work, do_vfree);
-		schedule_work(work);
-	} else
-		kfree(buffer);
-}
diff --git a/security/capability.c b/security/capability.c
index 1728d4e375db..6e4fc776badf 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -12,6 +12,26 @@
 
 #include <linux/security.h>
 
+static int cap_binder_set_context_mgr(struct task_struct *mgr)
+{
+	return 0;
+}
+
+static int cap_binder_transaction(struct task_struct *from, struct task_struct *to)
+{
+	return 0;
+}
+
+static int cap_binder_transfer_binder(struct task_struct *from, struct task_struct *to)
+{
+	return 0;
+}
+
+static int cap_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file)
+{
+	return 0;
+}
+
 static int cap_syslog(int type)
 {
 	return 0;
@@ -903,6 +923,10 @@ static void cap_audit_rule_free(void *lsmrule)
 
 void __init security_fixup_ops(struct security_operations *ops)
 {
+	set_to_cap_if_null(ops, binder_set_context_mgr);
+	set_to_cap_if_null(ops, binder_transaction);
+	set_to_cap_if_null(ops, binder_transfer_binder);
+	set_to_cap_if_null(ops, binder_transfer_file);
 	set_to_cap_if_null(ops, ptrace_access_check);
 	set_to_cap_if_null(ops, ptrace_traceme);
 	set_to_cap_if_null(ops, capget);
diff --git a/security/commoncap.c b/security/commoncap.c
index c9219a66b7c6..0405522995c5 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -31,6 +31,10 @@
 #include <linux/binfmts.h>
 #include <linux/personality.h>
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+#include <linux/android_aid.h>
+#endif
+
 /*
  * If a non-root user executes a setuid-root binary in
  * !secure(SECURE_NOROOT) mode, then we raise capabilities.
@@ -78,6 +82,13 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
 {
 	struct user_namespace *ns = targ_ns;
 
+#ifdef CONFIG_ANDROID_PARANOID_NETWORK
+	if (cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW))
+		return 0;
+	if (cap == CAP_NET_ADMIN && in_egroup_p(AID_NET_ADMIN))
+		return 0;
+#endif
+
 	/* See if cred has the capability in the target user namespace
 	 * by examining the target user namespace and all of the target
 	 * user namespace's parents.
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 8d8d97dbb389..8f6b8e8a4cc8 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -245,6 +245,21 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 		}
 		break;
 	}
+	case LSM_AUDIT_DATA_IOCTL_OP: {
+		struct inode *inode;
+
+		audit_log_d_path(ab, " path=", &a->u.op->path);
+
+		inode = a->u.op->path.dentry->d_inode;
+		if (inode) {
+			audit_log_format(ab, " dev=");
+			audit_log_untrustedstring(ab, inode->i_sb->s_id);
+			audit_log_format(ab, " ino=%lu", inode->i_ino);
+		}
+
+		audit_log_format(ab, " ioctlcmd=%hx", a->u.op->cmd);
+		break;
+	}
 	case LSM_AUDIT_DATA_DENTRY: {
 		struct inode *inode;
 
diff --git a/security/security.c b/security/security.c
index a3dce87d1aef..d6ab2d2b363d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -134,6 +134,26 @@ int __init register_security(struct security_operations *ops)
 
 /* Security operations */
 
+int security_binder_set_context_mgr(struct task_struct *mgr)
+{
+	return security_ops->binder_set_context_mgr(mgr);
+}
+
+int security_binder_transaction(struct task_struct *from, struct task_struct *to)
+{
+	return security_ops->binder_transaction(from, to);
+}
+
+int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to)
+{
+	return security_ops->binder_transfer_binder(from, to);
+}
+
+int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file)
+{
+	return security_ops->binder_transfer_file(from, to, file);
+}
+
 int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
 #ifdef CONFIG_SECURITY_YAMA_STACKED
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index dad36a6ab45f..f3dbbc0f15dd 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/percpu.h>
+#include <linux/list.h>
 #include <net/sock.h>
 #include <linux/un.h>
 #include <net/af_unix.h>
@@ -48,6 +49,7 @@ struct avc_entry {
 	u32			tsid;
 	u16			tclass;
 	struct av_decision	avd;
+	struct avc_operation_node *ops_node;
 };
 
 struct avc_node {
@@ -64,6 +66,16 @@ struct avc_cache {
 	u32			latest_notif;	/* latest revocation notification */
 };
 
+struct avc_operation_decision_node {
+	struct operation_decision od;
+	struct list_head od_list;
+};
+
+struct avc_operation_node {
+	struct operation ops;
+	struct list_head od_head; /* list of operation_decision_node */
+};
+
 struct avc_callback_node {
 	int (*callback) (u32 event);
 	u32 events;
@@ -80,6 +92,9 @@ DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 };
 static struct avc_cache avc_cache;
 static struct avc_callback_node *avc_callbacks;
 static struct kmem_cache *avc_node_cachep;
+static struct kmem_cache *avc_operation_decision_node_cachep;
+static struct kmem_cache *avc_operation_node_cachep;
+static struct kmem_cache *avc_operation_perm_cachep;
 
 static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
 {
@@ -171,6 +186,16 @@ void __init avc_init(void)
 
 	avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node),
 					     0, SLAB_PANIC, NULL);
+	avc_operation_node_cachep = kmem_cache_create("avc_operation_node",
+				sizeof(struct avc_operation_node),
+				0, SLAB_PANIC, NULL);
+	avc_operation_decision_node_cachep = kmem_cache_create(
+				"avc_operation_decision_node",
+				sizeof(struct avc_operation_decision_node),
+				0, SLAB_PANIC, NULL);
+	avc_operation_perm_cachep = kmem_cache_create("avc_operation_perm",
+				sizeof(struct operation_perm),
+				0, SLAB_PANIC, NULL);
 
 	audit_log(current->audit_context, GFP_KERNEL, AUDIT_KERNEL, "AVC INITIALIZED\n");
 }
@@ -205,9 +230,271 @@ int avc_get_hash_stats(char *page)
 			 slots_used, AVC_CACHE_SLOTS, max_chain_len);
 }
 
+/*
+ * using a linked list for operation_decision lookup because the list is
+ * always small. i.e. less than 5, typically 1
+ */
+static struct operation_decision *avc_operation_lookup(u8 type,
+					struct avc_operation_node *ops_node)
+{
+	struct avc_operation_decision_node *od_node;
+	struct operation_decision *od = NULL;
+
+	list_for_each_entry(od_node, &ops_node->od_head, od_list) {
+		if (od_node->od.type != type)
+			continue;
+		od = &od_node->od;
+		break;
+	}
+	return od;
+}
+
+static inline unsigned int avc_operation_has_perm(struct operation_decision *od,
+						u16 cmd, u8 specified)
+{
+	unsigned int rc = 0;
+	u8 num = cmd & 0xff;
+
+	if ((specified == OPERATION_ALLOWED) &&
+			(od->specified & OPERATION_ALLOWED))
+		rc = security_operation_test(od->allowed->perms, num);
+	else if ((specified == OPERATION_AUDITALLOW) &&
+			(od->specified & OPERATION_AUDITALLOW))
+		rc = security_operation_test(od->auditallow->perms, num);
+	else if ((specified == OPERATION_DONTAUDIT) &&
+			(od->specified & OPERATION_DONTAUDIT))
+		rc = security_operation_test(od->dontaudit->perms, num);
+	return rc;
+}
+
+static void avc_operation_allow_perm(struct avc_operation_node *node, u16 cmd)
+{
+	struct operation_decision *od;
+	u8 type;
+	u8 num;
+
+	type = cmd >> 8;
+	num = cmd & 0xff;
+	security_operation_set(node->ops.type, type);
+	od = avc_operation_lookup(type, node);
+	if (od && od->allowed)
+		security_operation_set(od->allowed->perms, num);
+}
+
+static void avc_operation_decision_free(
+				struct avc_operation_decision_node *od_node)
+{
+	struct operation_decision *od;
+
+	od = &od_node->od;
+	if (od->allowed)
+		kmem_cache_free(avc_operation_perm_cachep, od->allowed);
+	if (od->auditallow)
+		kmem_cache_free(avc_operation_perm_cachep, od->auditallow);
+	if (od->dontaudit)
+		kmem_cache_free(avc_operation_perm_cachep, od->dontaudit);
+	kmem_cache_free(avc_operation_decision_node_cachep, od_node);
+}
+
+static void avc_operation_free(struct avc_operation_node *ops_node)
+{
+	struct avc_operation_decision_node *od_node, *tmp;
+
+	if (!ops_node)
+		return;
+
+	list_for_each_entry_safe(od_node, tmp, &ops_node->od_head, od_list) {
+		list_del(&od_node->od_list);
+		avc_operation_decision_free(od_node);
+	}
+	kmem_cache_free(avc_operation_node_cachep, ops_node);
+}
+
+static void avc_copy_operation_decision(struct operation_decision *dest,
+					struct operation_decision *src)
+{
+	dest->type = src->type;
+	dest->specified = src->specified;
+	if (dest->specified & OPERATION_ALLOWED)
+		memcpy(dest->allowed->perms, src->allowed->perms,
+				sizeof(src->allowed->perms));
+	if (dest->specified & OPERATION_AUDITALLOW)
+		memcpy(dest->auditallow->perms, src->auditallow->perms,
+				sizeof(src->auditallow->perms));
+	if (dest->specified & OPERATION_DONTAUDIT)
+		memcpy(dest->dontaudit->perms, src->dontaudit->perms,
+				sizeof(src->dontaudit->perms));
+}
+
+/*
+ * similar to avc_copy_operation_decision, but only copy decision
+ * information relevant to this command
+ */
+static inline void avc_quick_copy_operation_decision(u16 cmd,
+			struct operation_decision *dest,
+			struct operation_decision *src)
+{
+	/*
+	 * compute index of the u32 of the 256 bits (8 u32s) that contain this
+	 * command permission
+	 */
+	u8 i = (0xff & cmd) >> 5;
+
+	dest->specified = src->specified;
+	if (dest->specified & OPERATION_ALLOWED)
+		dest->allowed->perms[i] = src->allowed->perms[i];
+	if (dest->specified & OPERATION_AUDITALLOW)
+		dest->auditallow->perms[i] = src->auditallow->perms[i];
+	if (dest->specified & OPERATION_DONTAUDIT)
+		dest->dontaudit->perms[i] = src->dontaudit->perms[i];
+}
+
+static struct avc_operation_decision_node
+		*avc_operation_decision_alloc(u8 specified)
+{
+	struct avc_operation_decision_node *node;
+	struct operation_decision *od;
+
+	node = kmem_cache_zalloc(avc_operation_decision_node_cachep,
+				GFP_ATOMIC | __GFP_NOMEMALLOC);
+	if (!node)
+		return NULL;
+
+	od = &node->od;
+	if (specified & OPERATION_ALLOWED) {
+		od->allowed = kmem_cache_zalloc(avc_operation_perm_cachep,
+						GFP_ATOMIC | __GFP_NOMEMALLOC);
+		if (!od->allowed)
+			goto error;
+	}
+	if (specified & OPERATION_AUDITALLOW) {
+		od->auditallow = kmem_cache_zalloc(avc_operation_perm_cachep,
+						GFP_ATOMIC | __GFP_NOMEMALLOC);
+		if (!od->auditallow)
+			goto error;
+	}
+	if (specified & OPERATION_DONTAUDIT) {
+		od->dontaudit = kmem_cache_zalloc(avc_operation_perm_cachep,
+						GFP_ATOMIC | __GFP_NOMEMALLOC);
+		if (!od->dontaudit)
+			goto error;
+	}
+	return node;
+error:
+	avc_operation_decision_free(node);
+	return NULL;
+}
+
+static int avc_add_operation(struct avc_node *node,
+			struct operation_decision *od)
+{
+	struct avc_operation_decision_node *dest_od;
+
+	node->ae.ops_node->ops.len++;
+	dest_od = avc_operation_decision_alloc(od->specified);
+	if (!dest_od)
+		return -ENOMEM;
+	avc_copy_operation_decision(&dest_od->od, od);
+	list_add(&dest_od->od_list, &node->ae.ops_node->od_head);
+	return 0;
+}
+
+static struct avc_operation_node *avc_operation_alloc(void)
+{
+	struct avc_operation_node *ops;
+
+	ops = kmem_cache_zalloc(avc_operation_node_cachep,
+				GFP_ATOMIC|__GFP_NOMEMALLOC);
+	if (!ops)
+		return ops;
+	INIT_LIST_HEAD(&ops->od_head);
+	return ops;
+}
+
+static int avc_operation_populate(struct avc_node *node,
+				struct avc_operation_node *src)
+{
+	struct avc_operation_node *dest;
+	struct avc_operation_decision_node *dest_od;
+	struct avc_operation_decision_node *src_od;
+
+	if (src->ops.len == 0)
+		return 0;
+	dest = avc_operation_alloc();
+	if (!dest)
+		return -ENOMEM;
+
+	memcpy(dest->ops.type, &src->ops.type, sizeof(dest->ops.type));
+	dest->ops.len = src->ops.len;
+
+	/* for each source od allocate a destination od and copy */
+	list_for_each_entry(src_od, &src->od_head, od_list) {
+		dest_od = avc_operation_decision_alloc(src_od->od.specified);
+		if (!dest_od)
+			goto error;
+		avc_copy_operation_decision(&dest_od->od, &src_od->od);
+		list_add(&dest_od->od_list, &dest->od_head);
+	}
+	node->ae.ops_node = dest;
+	return 0;
+error:
+	avc_operation_free(dest);
+	return -ENOMEM;
+
+}
+
+static inline u32 avc_operation_audit_required(u32 requested,
+					struct av_decision *avd,
+					struct operation_decision *od,
+					u16 cmd,
+					int result,
+					u32 *deniedp)
+{
+	u32 denied, audited;
+
+	denied = requested & ~avd->allowed;
+	if (unlikely(denied)) {
+		audited = denied & avd->auditdeny;
+		if (audited && od) {
+			if (avc_operation_has_perm(od, cmd,
+						OPERATION_DONTAUDIT))
+				audited &= ~requested;
+		}
+	} else if (result) {
+		audited = denied = requested;
+	} else {
+		audited = requested & avd->auditallow;
+		if (audited && od) {
+			if (!avc_operation_has_perm(od, cmd,
+						OPERATION_AUDITALLOW))
+				audited &= ~requested;
+		}
+	}
+
+	*deniedp = denied;
+	return audited;
+}
+
+static inline int avc_operation_audit(u32 ssid, u32 tsid, u16 tclass,
+				u32 requested, struct av_decision *avd,
+				struct operation_decision *od,
+				u16 cmd, int result,
+				struct common_audit_data *ad)
+{
+	u32 audited, denied;
+
+	audited = avc_operation_audit_required(
+			requested, avd, od, cmd, result, &denied);
+	if (likely(!audited))
+		return 0;
+	return slow_avc_audit(ssid, tsid, tclass, requested,
+			audited, denied, result, ad, 0);
+}
+
 static void avc_node_free(struct rcu_head *rhead)
 {
 	struct avc_node *node = container_of(rhead, struct avc_node, rhead);
+	avc_operation_free(node->ae.ops_node);
 	kmem_cache_free(avc_node_cachep, node);
 	avc_cache_stats_incr(frees);
 }
@@ -221,6 +508,7 @@ static void avc_node_delete(struct avc_node *node)
 
 static void avc_node_kill(struct avc_node *node)
 {
+	avc_operation_free(node->ae.ops_node);
 	kmem_cache_free(avc_node_cachep, node);
 	avc_cache_stats_incr(frees);
 	atomic_dec(&avc_cache.active_nodes);
@@ -367,6 +655,7 @@ static int avc_latest_notif_update(int seqno, int is_insert)
  * @tsid: target security identifier
  * @tclass: target security class
  * @avd: resulting av decision
+ * @ops: resulting operation decisions
  *
  * Insert an AVC entry for the SID pair
  * (@ssid, @tsid) and class @tclass.
@@ -378,7 +667,9 @@ static int avc_latest_notif_update(int seqno, int is_insert)
  * the access vectors into a cache entry, returns
  * avc_node inserted. Otherwise, this function returns NULL.
  */
-static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd)
+static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass,
+				struct av_decision *avd,
+				struct avc_operation_node *ops_node)
 {
 	struct avc_node *pos, *node = NULL;
 	int hvalue;
@@ -391,10 +682,15 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec
 	if (node) {
 		struct hlist_head *head;
 		spinlock_t *lock;
+		int rc = 0;
 
 		hvalue = avc_hash(ssid, tsid, tclass);
 		avc_node_populate(node, ssid, tsid, tclass, avd);
-
+		rc = avc_operation_populate(node, ops_node);
+		if (rc) {
+			kmem_cache_free(avc_node_cachep, node);
+			return NULL;
+		}
 		head = &avc_cache.slots[hvalue];
 		lock = &avc_cache.slots_lock[hvalue];
 
@@ -444,11 +740,15 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
 	avc_dump_query(ab, ad->selinux_audit_data->ssid,
 			   ad->selinux_audit_data->tsid,
 			   ad->selinux_audit_data->tclass);
+	if (ad->selinux_audit_data->denied) {
+		audit_log_format(ab, " permissive=%u",
+				 ad->selinux_audit_data->result ? 0 : 1);
+	}
 }
 
 /* This is the slow part of avc audit with big stack footprint */
 noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass,
-		u32 requested, u32 audited, u32 denied,
+		u32 requested, u32 audited, u32 denied, int result,
 		struct common_audit_data *a,
 		unsigned flags)
 {
@@ -477,6 +777,7 @@ noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass,
 	sad.tsid = tsid;
 	sad.audited = audited;
 	sad.denied = denied;
+	sad.result = result;
 
 	a->selinux_audit_data = &sad;
 
@@ -523,14 +824,17 @@ static inline int avc_sidcmp(u32 x, u32 y)
  * @perms : Permission mask bits
  * @ssid,@tsid,@tclass : identifier of an AVC entry
  * @seqno : sequence number when decision was made
+ * @od: operation_decision to be added to the node
  *
  * if a valid AVC entry doesn't exist,this function returns -ENOENT.
  * if kmalloc() called internal returns NULL, this function returns -ENOMEM.
  * otherwise, this function updates the AVC entry. The original AVC-entry object
  * will release later by RCU.
  */
-static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
-			   u32 seqno)
+static int avc_update_node(u32 event, u32 perms, u16 cmd, u32 ssid, u32 tsid,
+			u16 tclass, u32 seqno,
+			struct operation_decision *od,
+			u32 flags)
 {
 	int hvalue, rc = 0;
 	unsigned long flag;
@@ -574,9 +878,19 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
 
 	avc_node_populate(node, ssid, tsid, tclass, &orig->ae.avd);
 
+	if (orig->ae.ops_node) {
+		rc = avc_operation_populate(node, orig->ae.ops_node);
+		if (rc) {
+			kmem_cache_free(avc_node_cachep, node);
+			goto out_unlock;
+		}
+	}
+
 	switch (event) {
 	case AVC_CALLBACK_GRANT:
 		node->ae.avd.allowed |= perms;
+		if (node->ae.ops_node && (flags & AVC_OPERATION_CMD))
+			avc_operation_allow_perm(node->ae.ops_node, cmd);
 		break;
 	case AVC_CALLBACK_TRY_REVOKE:
 	case AVC_CALLBACK_REVOKE:
@@ -594,6 +908,9 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
 	case AVC_CALLBACK_AUDITDENY_DISABLE:
 		node->ae.avd.auditdeny &= ~perms;
 		break;
+	case AVC_CALLBACK_ADD_OPERATION:
+		avc_add_operation(node, od);
+		break;
 	}
 	avc_node_replace(node, orig);
 out_unlock:
@@ -665,18 +982,20 @@ int avc_ss_reset(u32 seqno)
  * results in a bigger stack frame.
  */
 static noinline struct avc_node *avc_compute_av(u32 ssid, u32 tsid,
-			 u16 tclass, struct av_decision *avd)
+			 u16 tclass, struct av_decision *avd,
+			 struct avc_operation_node *ops_node)
 {
 	rcu_read_unlock();
-	security_compute_av(ssid, tsid, tclass, avd);
+	INIT_LIST_HEAD(&ops_node->od_head);
+	security_compute_av(ssid, tsid, tclass, avd, &ops_node->ops);
 	rcu_read_lock();
-	return avc_insert(ssid, tsid, tclass, avd);
+	return avc_insert(ssid, tsid, tclass, avd, ops_node);
 }
 
 static noinline int avc_denied(u32 ssid, u32 tsid,
-			 u16 tclass, u32 requested,
-			 unsigned flags,
-			 struct av_decision *avd)
+				u16 tclass, u32 requested,
+				u16 cmd, unsigned flags,
+				struct av_decision *avd)
 {
 	if (flags & AVC_STRICT)
 		return -EACCES;
@@ -684,11 +1003,92 @@ static noinline int avc_denied(u32 ssid, u32 tsid,
 	if (selinux_enforcing && !(avd->flags & AVD_FLAGS_PERMISSIVE))
 		return -EACCES;
 
-	avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
-				tsid, tclass, avd->seqno);
+	avc_update_node(AVC_CALLBACK_GRANT, requested, cmd, ssid,
+				tsid, tclass, avd->seqno, NULL, flags);
 	return 0;
 }
 
+/*
+ * ioctl commands are comprised of four fields, direction, size, type, and
+ * number. The avc operation logic filters based on two of them:
+ *
+ * type: or code, typically unique to each driver
+ * number: or function
+ *
+ * For example, 0x89 is a socket type, and number 0x27 is the get hardware
+ * address function.
+ */
+int avc_has_operation(u32 ssid, u32 tsid, u16 tclass, u32 requested,
+			u16 cmd, struct common_audit_data *ad)
+{
+	struct avc_node *node;
+	struct av_decision avd;
+	u32 denied;
+	struct operation_decision *od = NULL;
+	struct operation_decision od_local;
+	struct operation_perm allowed;
+	struct operation_perm auditallow;
+	struct operation_perm dontaudit;
+	struct avc_operation_node local_ops_node;
+	struct avc_operation_node *ops_node;
+	u8 type = cmd >> 8;
+	int rc = 0, rc2;
+
+	ops_node = &local_ops_node;
+	BUG_ON(!requested);
+
+	rcu_read_lock();
+
+	node = avc_lookup(ssid, tsid, tclass);
+	if (unlikely(!node)) {
+		node = avc_compute_av(ssid, tsid, tclass, &avd, ops_node);
+	} else {
+		memcpy(&avd, &node->ae.avd, sizeof(avd));
+		ops_node = node->ae.ops_node;
+	}
+	/* if operations are not defined, only consider av_decision */
+	if (!ops_node || !ops_node->ops.len)
+		goto decision;
+
+	od_local.allowed = &allowed;
+	od_local.auditallow = &auditallow;
+	od_local.dontaudit = &dontaudit;
+
+	/* lookup operation decision */
+	od = avc_operation_lookup(type, ops_node);
+	if (unlikely(!od)) {
+		/* Compute operation decision if type is flagged */
+		if (!security_operation_test(ops_node->ops.type, type)) {
+			avd.allowed &= ~requested;
+			goto decision;
+		}
+		rcu_read_unlock();
+		security_compute_operation(ssid, tsid, tclass, type, &od_local);
+		rcu_read_lock();
+		avc_update_node(AVC_CALLBACK_ADD_OPERATION, requested, cmd,
+				ssid, tsid, tclass, avd.seqno, &od_local, 0);
+	} else {
+		avc_quick_copy_operation_decision(cmd, &od_local, od);
+	}
+	od = &od_local;
+
+	if (!avc_operation_has_perm(od, cmd, OPERATION_ALLOWED))
+		avd.allowed &= ~requested;
+
+decision:
+	denied = requested & ~(avd.allowed);
+	if (unlikely(denied))
+		rc = avc_denied(ssid, tsid, tclass, requested, cmd,
+				AVC_OPERATION_CMD, &avd);
+
+	rcu_read_unlock();
+
+	rc2 = avc_operation_audit(ssid, tsid, tclass, requested,
+			&avd, od, cmd, rc, ad);
+	if (rc2)
+		return rc2;
+	return rc;
+}
 
 /**
  * avc_has_perm_noaudit - Check permissions but perform no auditing.
@@ -716,6 +1116,7 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 			 struct av_decision *avd)
 {
 	struct avc_node *node;
+	struct avc_operation_node ops_node;
 	int rc = 0;
 	u32 denied;
 
@@ -724,16 +1125,14 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 	rcu_read_lock();
 
 	node = avc_lookup(ssid, tsid, tclass);
-	if (unlikely(!node)) {
-		node = avc_compute_av(ssid, tsid, tclass, avd);
-	} else {
+	if (unlikely(!node))
+		node = avc_compute_av(ssid, tsid, tclass, avd, &ops_node);
+	else
 		memcpy(avd, &node->ae.avd, sizeof(*avd));
-		avd = &node->ae.avd;
-	}
 
 	denied = requested & ~(avd->allowed);
 	if (unlikely(denied))
-		rc = avc_denied(ssid, tsid, tclass, requested, flags, avd);
+		rc = avc_denied(ssid, tsid, tclass, requested, 0, flags, avd);
 
 	rcu_read_unlock();
 	return rc;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index fdd6e4f8be39..607c3d58a691 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -147,6 +147,17 @@ static int selinux_secmark_enabled(void)
 	return (atomic_read(&selinux_secmark_refcount) > 0);
 }
 
+static int selinux_netcache_avc_callback(u32 event)
+{
+	if (event == AVC_CALLBACK_RESET) {
+		sel_netif_flush();
+		sel_netnode_flush();
+		sel_netport_flush();
+		synchronize_net();
+	}
+	return 0;
+}
+
 /*
  * initialise the security for the init task
  */
@@ -419,8 +430,11 @@ static int sb_finish_set_opts(struct super_block *sb)
 	    sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
 		sbsec->flags &= ~SE_SBLABELSUPP;
 
-	/* Special handling for sysfs. Is genfs but also has setxattr handler*/
-	if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+	/* Special handling. Is genfs but also has in-core setxattr handler*/
+	if (!strcmp(sb->s_type->name, "sysfs") ||
+	    !strcmp(sb->s_type->name, "pstore") ||
+	    !strcmp(sb->s_type->name, "debugfs") ||
+	    !strcmp(sb->s_type->name, "rootfs"))
 		sbsec->flags |= SE_SBLABELSUPP;
 
 	/* Initialize the root inode. */
@@ -1856,6 +1870,65 @@ static inline u32 open_file_to_av(struct file *file)
 
 /* Hook functions begin here. */
 
+static int selinux_binder_set_context_mgr(struct task_struct *mgr)
+{
+	u32 mysid = current_sid();
+	u32 mgrsid = task_sid(mgr);
+
+	return avc_has_perm(mysid, mgrsid, SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL);
+}
+
+static int selinux_binder_transaction(struct task_struct *from, struct task_struct *to)
+{
+	u32 mysid = current_sid();
+	u32 fromsid = task_sid(from);
+	u32 tosid = task_sid(to);
+	int rc;
+
+	if (mysid != fromsid) {
+		rc = avc_has_perm(mysid, fromsid, SECCLASS_BINDER, BINDER__IMPERSONATE, NULL);
+		if (rc)
+			return rc;
+	}
+
+	return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL);
+}
+
+static int selinux_binder_transfer_binder(struct task_struct *from, struct task_struct *to)
+{
+	u32 fromsid = task_sid(from);
+	u32 tosid = task_sid(to);
+	return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, NULL);
+}
+
+static int selinux_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file)
+{
+	u32 sid = task_sid(to);
+	struct file_security_struct *fsec = file->f_security;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode_security_struct *isec = inode->i_security;
+	struct common_audit_data ad;
+	int rc;
+
+	ad.type = LSM_AUDIT_DATA_PATH;
+	ad.u.path = file->f_path;
+
+	if (sid != fsec->sid) {
+		rc = avc_has_perm(sid, fsec->sid,
+				  SECCLASS_FD,
+				  FD__USE,
+				  &ad);
+		if (rc)
+			return rc;
+	}
+
+	if (unlikely(IS_PRIVATE(inode)))
+		return 0;
+
+	return avc_has_perm(sid, isec->sid, isec->sclass, file_to_av(file),
+			    &ad);
+}
+
 static int selinux_ptrace_access_check(struct task_struct *child,
 				     unsigned int mode)
 {
@@ -2672,6 +2745,7 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
 
 static noinline int audit_inode_permission(struct inode *inode,
 					   u32 perms, u32 audited, u32 denied,
+					   int result,
 					   unsigned flags)
 {
 	struct common_audit_data ad;
@@ -2682,7 +2756,7 @@ static noinline int audit_inode_permission(struct inode *inode,
 	ad.u.inode = inode;
 
 	rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms,
-			    audited, denied, &ad, flags);
+			    audited, denied, result, &ad, flags);
 	if (rc)
 		return rc;
 	return 0;
@@ -2724,7 +2798,7 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 	if (likely(!audited))
 		return rc;
 
-	rc2 = audit_inode_permission(inode, perms, audited, denied, flags);
+	rc2 = audit_inode_permission(inode, perms, audited, denied, rc, flags);
 	if (rc2)
 		return rc2;
 	return rc;
@@ -3035,6 +3109,44 @@ static void selinux_file_free_security(struct file *file)
 	file_free_security(file);
 }
 
+/*
+ * Check whether a task has the ioctl permission and cmd
+ * operation to an inode.
+ */
+int ioctl_has_perm(const struct cred *cred, struct file *file,
+		u32 requested, u16 cmd)
+{
+	struct common_audit_data ad;
+	struct file_security_struct *fsec = file->f_security;
+	struct inode *inode = file_inode(file);
+	struct inode_security_struct *isec = inode->i_security;
+	struct lsm_ioctlop_audit ioctl;
+	u32 ssid = cred_sid(cred);
+	int rc;
+
+	ad.type = LSM_AUDIT_DATA_IOCTL_OP;
+	ad.u.op = &ioctl;
+	ad.u.op->cmd = cmd;
+	ad.u.op->path = file->f_path;
+
+	if (ssid != fsec->sid) {
+		rc = avc_has_perm(ssid, fsec->sid,
+				SECCLASS_FD,
+				FD__USE,
+				&ad);
+		if (rc)
+			goto out;
+	}
+
+	if (unlikely(IS_PRIVATE(inode)))
+		return 0;
+
+	rc = avc_has_operation(ssid, isec->sid, isec->sclass,
+			requested, cmd, &ad);
+out:
+	return rc;
+}
+
 static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 			      unsigned long arg)
 {
@@ -3077,7 +3189,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 	 * to the file's ioctl() function.
 	 */
 	default:
-		error = file_has_perm(cred, file, FILE__IOCTL);
+		error = ioctl_has_perm(cred, file, FILE__IOCTL, (u16) cmd);
 	}
 	return error;
 }
@@ -3121,24 +3233,20 @@ error:
 
 static int selinux_mmap_addr(unsigned long addr)
 {
-	int rc = 0;
-	u32 sid = current_sid();
+	int rc;
+
+	/* do DAC check on address space usage */
+	rc = cap_mmap_addr(addr);
+	if (rc)
+		return rc;
 
-	/*
-	 * notice that we are intentionally putting the SELinux check before
-	 * the secondary cap_file_mmap check.  This is such a likely attempt
-	 * at bad behaviour/exploit that we always want to get the AVC, even
-	 * if DAC would have also denied the operation.
-	 */
 	if (addr < CONFIG_LSM_MMAP_MIN_ADDR) {
+		u32 sid = current_sid();
 		rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
 				  MEMPROTECT__MMAP_ZERO, NULL);
-		if (rc)
-			return rc;
 	}
 
-	/* do DAC check on address space usage */
-	return cap_mmap_addr(addr);
+	return rc;
 }
 
 static int selinux_mmap_file(struct file *file, unsigned long reqprot,
@@ -4153,15 +4261,15 @@ static int selinux_socket_unix_may_send(struct socket *sock,
 			    &ad);
 }
 
-static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
-				    u32 peer_sid,
+static int selinux_inet_sys_rcv_skb(struct net *ns, int ifindex,
+				    char *addrp, u16 family, u32 peer_sid,
 				    struct common_audit_data *ad)
 {
 	int err;
 	u32 if_sid;
 	u32 node_sid;
 
-	err = sel_netif_sid(ifindex, &if_sid);
+	err = sel_netif_sid(ns, ifindex, &if_sid);
 	if (err)
 		return err;
 	err = avc_has_perm(peer_sid, if_sid,
@@ -4254,8 +4362,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		err = selinux_skb_peerlbl_sid(skb, family, &peer_sid);
 		if (err)
 			return err;
-		err = selinux_inet_sys_rcv_skb(skb->skb_iif, addrp, family,
-					       peer_sid, &ad);
+		err = selinux_inet_sys_rcv_skb(sock_net(sk), skb->skb_iif,
+					       addrp, family, peer_sid, &ad);
 		if (err) {
 			selinux_netlbl_err(skb, err, 0);
 			return err;
@@ -4598,7 +4706,8 @@ out:
 
 #ifdef CONFIG_NETFILTER
 
-static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
+static unsigned int selinux_ip_forward(struct sk_buff *skb,
+				       const struct net_device *indev,
 				       u16 family)
 {
 	int err;
@@ -4624,14 +4733,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 
 	ad.type = LSM_AUDIT_DATA_NET;
 	ad.u.net = &net;
-	ad.u.net->netif = ifindex;
+	ad.u.net->netif = indev->ifindex;
 	ad.u.net->family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
 		return NF_DROP;
 
 	if (peerlbl_active) {
-		err = selinux_inet_sys_rcv_skb(ifindex, addrp, family,
-					       peer_sid, &ad);
+		err = selinux_inet_sys_rcv_skb(dev_net(indev), indev->ifindex,
+					       addrp, family, peer_sid, &ad);
 		if (err) {
 			selinux_netlbl_err(skb, err, 1);
 			return NF_DROP;
@@ -4660,7 +4769,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum,
 					 const struct net_device *out,
 					 int (*okfn)(struct sk_buff *))
 {
-	return selinux_ip_forward(skb, in->ifindex, PF_INET);
+	return selinux_ip_forward(skb, in, PF_INET);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -4670,7 +4779,7 @@ static unsigned int selinux_ipv6_forward(unsigned int hooknum,
 					 const struct net_device *out,
 					 int (*okfn)(struct sk_buff *))
 {
-	return selinux_ip_forward(skb, in->ifindex, PF_INET6);
+	return selinux_ip_forward(skb, in, PF_INET6);
 }
 #endif	/* IPV6 */
 
@@ -4758,11 +4867,13 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
+static unsigned int selinux_ip_postroute(struct sk_buff *skb,
+					 const struct net_device *outdev,
 					 u16 family)
 {
 	u32 secmark_perm;
 	u32 peer_sid;
+	int ifindex = outdev->ifindex;
 	struct sock *sk;
 	struct common_audit_data ad;
 	struct lsm_network_audit net = {0,};
@@ -4875,7 +4986,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 		u32 if_sid;
 		u32 node_sid;
 
-		if (sel_netif_sid(ifindex, &if_sid))
+		if (sel_netif_sid(dev_net(outdev), ifindex, &if_sid))
 			return NF_DROP;
 		if (avc_has_perm(peer_sid, if_sid,
 				 SECCLASS_NETIF, NETIF__EGRESS, &ad))
@@ -4897,7 +5008,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
 					   const struct net_device *out,
 					   int (*okfn)(struct sk_buff *))
 {
-	return selinux_ip_postroute(skb, out->ifindex, PF_INET);
+	return selinux_ip_postroute(skb, out, PF_INET);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -4907,7 +5018,7 @@ static unsigned int selinux_ipv6_postroute(unsigned int hooknum,
 					   const struct net_device *out,
 					   int (*okfn)(struct sk_buff *))
 {
-	return selinux_ip_postroute(skb, out->ifindex, PF_INET6);
+	return selinux_ip_postroute(skb, out, PF_INET6);
 }
 #endif	/* IPV6 */
 
@@ -5656,6 +5767,11 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
 static struct security_operations selinux_ops = {
 	.name =				"selinux",
 
+	.binder_set_context_mgr =	selinux_binder_set_context_mgr,
+	.binder_transaction =		selinux_binder_transaction,
+	.binder_transfer_binder =	selinux_binder_transfer_binder,
+	.binder_transfer_file =		selinux_binder_transfer_file,
+
 	.ptrace_access_check =		selinux_ptrace_access_check,
 	.ptrace_traceme =		selinux_ptrace_traceme,
 	.capget =			selinux_capget,
@@ -5882,6 +5998,9 @@ static __init int selinux_init(void)
 	if (register_security(&selinux_ops))
 		panic("SELinux: Unable to register with kernel.\n");
 
+	if (avc_add_callback(selinux_netcache_avc_callback, AVC_CALLBACK_RESET))
+		panic("SELinux: Unable to register AVC netcache callback\n");
+
 	if (selinux_enforcing)
 		printk(KERN_DEBUG "SELinux:  Starting in enforcing mode\n");
 	else
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index 92d0ab561db8..8109ad846e99 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -102,7 +102,7 @@ static inline u32 avc_audit_required(u32 requested,
 }
 
 int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass,
-		   u32 requested, u32 audited, u32 denied,
+		   u32 requested, u32 audited, u32 denied, int result,
 		   struct common_audit_data *a,
 		   unsigned flags);
 
@@ -137,16 +137,20 @@ static inline int avc_audit(u32 ssid, u32 tsid,
 	if (likely(!audited))
 		return 0;
 	return slow_avc_audit(ssid, tsid, tclass,
-			      requested, audited, denied,
+			      requested, audited, denied, result,
 			      a, flags);
 }
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
+#define AVC_OPERATION_CMD 2	/* ignore command when updating operations */
 int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 			 u16 tclass, u32 requested,
 			 unsigned flags,
 			 struct av_decision *avd);
 
+int avc_has_operation(u32 ssid, u32 tsid, u16 tclass, u32 requested,
+		u16 cmd, struct common_audit_data *ad);
+
 int avc_has_perm_flags(u32 ssid, u32 tsid,
 		       u16 tclass, u32 requested,
 		       struct common_audit_data *auditdata,
@@ -169,6 +173,7 @@ u32 avc_policy_seqno(void);
 #define AVC_CALLBACK_AUDITALLOW_DISABLE	32
 #define AVC_CALLBACK_AUDITDENY_ENABLE	64
 #define AVC_CALLBACK_AUDITDENY_DISABLE	128
+#define AVC_CALLBACK_ADD_OPERATION	256
 
 int avc_add_callback(int (*callback)(u32 event), u32 events);
 
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 14d04e63b1f0..c32ff7bde81a 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -151,5 +151,6 @@ struct security_class_mapping secclass_map[] = {
 	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
 	{ "tun_socket",
 	  { COMMON_SOCK_PERMS, "attach_queue", NULL } },
+	{ "binder", { "impersonate", "call", "set_context_mgr", "transfer", NULL } },
 	{ NULL }
   };
diff --git a/security/selinux/include/netif.h b/security/selinux/include/netif.h
index 43d507242b42..c72145444090 100644
--- a/security/selinux/include/netif.h
+++ b/security/selinux/include/netif.h
@@ -17,7 +17,11 @@
 #ifndef _SELINUX_NETIF_H_
 #define _SELINUX_NETIF_H_
 
-int sel_netif_sid(int ifindex, u32 *sid);
+#include <net/net_namespace.h>
+
+void sel_netif_flush(void);
+
+int sel_netif_sid(struct net *ns, int ifindex, u32 *sid);
 
 #endif	/* _SELINUX_NETIF_H_ */
 
diff --git a/security/selinux/include/netnode.h b/security/selinux/include/netnode.h
index df7a5ed6c694..937668dd3024 100644
--- a/security/selinux/include/netnode.h
+++ b/security/selinux/include/netnode.h
@@ -27,6 +27,8 @@
 #ifndef _SELINUX_NETNODE_H
 #define _SELINUX_NETNODE_H
 
+void sel_netnode_flush(void);
+
 int sel_netnode_sid(void *addr, u16 family, u32 *sid);
 
 #endif
diff --git a/security/selinux/include/netport.h b/security/selinux/include/netport.h
index 4d965b83d735..d1ce896b2cb0 100644
--- a/security/selinux/include/netport.h
+++ b/security/selinux/include/netport.h
@@ -26,6 +26,8 @@
 #ifndef _SELINUX_NETPORT_H
 #define _SELINUX_NETPORT_H
 
+void sel_netport_flush(void);
+
 int sel_netport_sid(u8 protocol, u16 pnum, u32 *sid);
 
 #endif
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 6fd9dd256a62..2a3be7adc071 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -24,6 +24,7 @@
 #include <linux/binfmts.h>
 #include <linux/in.h>
 #include <linux/spinlock.h>
+#include <net/net_namespace.h>
 #include "flask.h"
 #include "avc.h"
 
@@ -78,6 +79,7 @@ struct ipc_security_struct {
 };
 
 struct netif_security_struct {
+	struct net *ns;			/* network namespace */
 	int ifindex;			/* device index */
 	u32 sid;			/* SID for this interface */
 };
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 6d3885165d14..e72d8de93725 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -33,13 +33,15 @@
 #define POLICYDB_VERSION_ROLETRANS	26
 #define POLICYDB_VERSION_NEW_OBJECT_DEFAULTS	27
 #define POLICYDB_VERSION_DEFAULT_TYPE	28
+#define POLICYDB_VERSION_CONSTRAINT_NAMES	29
+#define POLICYDB_VERSION_IOCTL_OPERATIONS	30
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
 #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
 #define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
 #else
-#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_DEFAULT_TYPE
+#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_IOCTL_OPERATIONS
 #endif
 
 /* Mask for just the mount related flags */
@@ -102,11 +104,40 @@ struct av_decision {
 	u32 flags;
 };
 
+#define security_operation_set(perms, x) (perms[x >> 5] |= 1 << (x & 0x1f))
+#define security_operation_test(perms, x) (1 & (perms[x >> 5] >> (x & 0x1f)))
+
+struct operation_perm {
+	u32 perms[8];
+};
+
+struct operation_decision {
+	u8 type;
+	u8 specified;
+	struct operation_perm *allowed;
+	struct operation_perm *auditallow;
+	struct operation_perm *dontaudit;
+};
+
+#define OPERATION_ALLOWED 1
+#define OPERATION_AUDITALLOW 2
+#define OPERATION_DONTAUDIT 4
+#define OPERATION_ALL (OPERATION_ALLOWED | OPERATION_AUDITALLOW |\
+			OPERATION_DONTAUDIT)
+struct operation {
+	u16 len;	/* length of operation decision chain */
+	u32 type[8];	/* 256 types */
+};
+
 /* definitions of av_decision.flags */
 #define AVD_FLAGS_PERMISSIVE	0x0001
 
 void security_compute_av(u32 ssid, u32 tsid,
-			 u16 tclass, struct av_decision *avd);
+			 u16 tclass, struct av_decision *avd,
+			 struct operation *ops);
+
+void security_compute_operation(u32 ssid, u32 tsid, u16 tclass,
+			 u8 type, struct operation_decision *od);
 
 void security_compute_av_user(u32 ssid, u32 tsid,
 			     u16 tclass, struct av_decision *avd);
diff --git a/security/selinux/netif.c b/security/selinux/netif.c
index 47a49d1a6f6a..548651a73876 100644
--- a/security/selinux/netif.c
+++ b/security/selinux/netif.c
@@ -45,6 +45,7 @@ static struct list_head sel_netif_hash[SEL_NETIF_HASH_SIZE];
 
 /**
  * sel_netif_hashfn - Hashing function for the interface table
+ * @ns: the network namespace
  * @ifindex: the network interface
  *
  * Description:
@@ -52,13 +53,14 @@ static struct list_head sel_netif_hash[SEL_NETIF_HASH_SIZE];
  * bucket number for the given interface.
  *
  */
-static inline u32 sel_netif_hashfn(int ifindex)
+static inline u32 sel_netif_hashfn(const struct net *ns, int ifindex)
 {
-	return (ifindex & (SEL_NETIF_HASH_SIZE - 1));
+	return (((uintptr_t)ns + ifindex) & (SEL_NETIF_HASH_SIZE - 1));
 }
 
 /**
  * sel_netif_find - Search for an interface record
+ * @ns: the network namespace
  * @ifindex: the network interface
  *
  * Description:
@@ -66,15 +68,15 @@ static inline u32 sel_netif_hashfn(int ifindex)
  * If an entry can not be found in the table return NULL.
  *
  */
-static inline struct sel_netif *sel_netif_find(int ifindex)
+static inline struct sel_netif *sel_netif_find(const struct net *ns,
+					       int ifindex)
 {
-	int idx = sel_netif_hashfn(ifindex);
+	int idx = sel_netif_hashfn(ns, ifindex);
 	struct sel_netif *netif;
 
 	list_for_each_entry_rcu(netif, &sel_netif_hash[idx], list)
-		/* all of the devices should normally fit in the hash, so we
-		 * optimize for that case */
-		if (likely(netif->nsec.ifindex == ifindex))
+		if (net_eq(netif->nsec.ns, ns) &&
+		    netif->nsec.ifindex == ifindex)
 			return netif;
 
 	return NULL;
@@ -96,7 +98,7 @@ static int sel_netif_insert(struct sel_netif *netif)
 	if (sel_netif_total >= SEL_NETIF_HASH_MAX)
 		return -ENOSPC;
 
-	idx = sel_netif_hashfn(netif->nsec.ifindex);
+	idx = sel_netif_hashfn(netif->nsec.ns, netif->nsec.ifindex);
 	list_add_rcu(&netif->list, &sel_netif_hash[idx]);
 	sel_netif_total++;
 
@@ -120,6 +122,7 @@ static void sel_netif_destroy(struct sel_netif *netif)
 
 /**
  * sel_netif_sid_slow - Lookup the SID of a network interface using the policy
+ * @ns: the network namespace
  * @ifindex: the network interface
  * @sid: interface SID
  *
@@ -130,7 +133,7 @@ static void sel_netif_destroy(struct sel_netif *netif)
  * failure.
  *
  */
-static int sel_netif_sid_slow(int ifindex, u32 *sid)
+static int sel_netif_sid_slow(struct net *ns, int ifindex, u32 *sid)
 {
 	int ret;
 	struct sel_netif *netif;
@@ -140,7 +143,7 @@ static int sel_netif_sid_slow(int ifindex, u32 *sid)
 	/* NOTE: we always use init's network namespace since we don't
 	 * currently support containers */
 
-	dev = dev_get_by_index(&init_net, ifindex);
+	dev = dev_get_by_index(ns, ifindex);
 	if (unlikely(dev == NULL)) {
 		printk(KERN_WARNING
 		       "SELinux: failure in sel_netif_sid_slow(),"
@@ -149,7 +152,7 @@ static int sel_netif_sid_slow(int ifindex, u32 *sid)
 	}
 
 	spin_lock_bh(&sel_netif_lock);
-	netif = sel_netif_find(ifindex);
+	netif = sel_netif_find(ns, ifindex);
 	if (netif != NULL) {
 		*sid = netif->nsec.sid;
 		ret = 0;
@@ -163,6 +166,7 @@ static int sel_netif_sid_slow(int ifindex, u32 *sid)
 	ret = security_netif_sid(dev->name, &new->nsec.sid);
 	if (ret != 0)
 		goto out;
+	new->nsec.ns = ns;
 	new->nsec.ifindex = ifindex;
 	ret = sel_netif_insert(new);
 	if (ret != 0)
@@ -184,6 +188,7 @@ out:
 
 /**
  * sel_netif_sid - Lookup the SID of a network interface
+ * @ns: the network namespace
  * @ifindex: the network interface
  * @sid: interface SID
  *
@@ -195,12 +200,12 @@ out:
  * on failure.
  *
  */
-int sel_netif_sid(int ifindex, u32 *sid)
+int sel_netif_sid(struct net *ns, int ifindex, u32 *sid)
 {
 	struct sel_netif *netif;
 
 	rcu_read_lock();
-	netif = sel_netif_find(ifindex);
+	netif = sel_netif_find(ns, ifindex);
 	if (likely(netif != NULL)) {
 		*sid = netif->nsec.sid;
 		rcu_read_unlock();
@@ -208,11 +213,12 @@ int sel_netif_sid(int ifindex, u32 *sid)
 	}
 	rcu_read_unlock();
 
-	return sel_netif_sid_slow(ifindex, sid);
+	return sel_netif_sid_slow(ns, ifindex, sid);
 }
 
 /**
  * sel_netif_kill - Remove an entry from the network interface table
+ * @ns: the network namespace
  * @ifindex: the network interface
  *
  * Description:
@@ -220,13 +226,13 @@ int sel_netif_sid(int ifindex, u32 *sid)
  * table if it exists.
  *
  */
-static void sel_netif_kill(int ifindex)
+static void sel_netif_kill(const struct net *ns, int ifindex)
 {
 	struct sel_netif *netif;
 
 	rcu_read_lock();
 	spin_lock_bh(&sel_netif_lock);
-	netif = sel_netif_find(ifindex);
+	netif = sel_netif_find(ns, ifindex);
 	if (netif)
 		sel_netif_destroy(netif);
 	spin_unlock_bh(&sel_netif_lock);
@@ -240,7 +246,7 @@ static void sel_netif_kill(int ifindex)
  * Remove all entries from the network interface table.
  *
  */
-static void sel_netif_flush(void)
+void sel_netif_flush(void)
 {
 	int idx;
 	struct sel_netif *netif;
@@ -252,25 +258,13 @@ static void sel_netif_flush(void)
 	spin_unlock_bh(&sel_netif_lock);
 }
 
-static int sel_netif_avc_callback(u32 event)
-{
-	if (event == AVC_CALLBACK_RESET) {
-		sel_netif_flush();
-		synchronize_net();
-	}
-	return 0;
-}
-
 static int sel_netif_netdev_notifier_handler(struct notifier_block *this,
 					     unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
 
-	if (dev_net(dev) != &init_net)
-		return NOTIFY_DONE;
-
 	if (event == NETDEV_DOWN)
-		sel_netif_kill(dev->ifindex);
+		sel_netif_kill(dev_net(dev), dev->ifindex);
 
 	return NOTIFY_DONE;
 }
@@ -291,10 +285,6 @@ static __init int sel_netif_init(void)
 
 	register_netdevice_notifier(&sel_netif_netdev_notifier);
 
-	err = avc_add_callback(sel_netif_avc_callback, AVC_CALLBACK_RESET);
-	if (err)
-		panic("avc_add_callback() failed, error %d\n", err);
-
 	return err;
 }
 
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index c5454c0477c3..bb8de9d9b429 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -281,7 +281,7 @@ int sel_netnode_sid(void *addr, u16 family, u32 *sid)
  * Remove all entries from the network address table.
  *
  */
-static void sel_netnode_flush(void)
+void sel_netnode_flush(void)
 {
 	unsigned int idx;
 	struct sel_netnode *node, *node_tmp;
@@ -298,15 +298,6 @@ static void sel_netnode_flush(void)
 	spin_unlock_bh(&sel_netnode_lock);
 }
 
-static int sel_netnode_avc_callback(u32 event)
-{
-	if (event == AVC_CALLBACK_RESET) {
-		sel_netnode_flush();
-		synchronize_net();
-	}
-	return 0;
-}
-
 static __init int sel_netnode_init(void)
 {
 	int iter;
@@ -320,10 +311,6 @@ static __init int sel_netnode_init(void)
 		sel_netnode_hash[iter].size = 0;
 	}
 
-	ret = avc_add_callback(sel_netnode_avc_callback, AVC_CALLBACK_RESET);
-	if (ret != 0)
-		panic("avc_add_callback() failed, error %d\n", ret);
-
 	return ret;
 }
 
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
index d35379781c2c..73ac6784d091 100644
--- a/security/selinux/netport.c
+++ b/security/selinux/netport.c
@@ -217,7 +217,7 @@ int sel_netport_sid(u8 protocol, u16 pnum, u32 *sid)
  * Remove all entries from the network address table.
  *
  */
-static void sel_netport_flush(void)
+void sel_netport_flush(void)
 {
 	unsigned int idx;
 	struct sel_netport *port, *port_tmp;
@@ -234,15 +234,6 @@ static void sel_netport_flush(void)
 	spin_unlock_bh(&sel_netport_lock);
 }
 
-static int sel_netport_avc_callback(u32 event)
-{
-	if (event == AVC_CALLBACK_RESET) {
-		sel_netport_flush();
-		synchronize_net();
-	}
-	return 0;
-}
-
 static __init int sel_netport_init(void)
 {
 	int iter;
@@ -256,10 +247,6 @@ static __init int sel_netport_init(void)
 		sel_netport_hash[iter].size = 0;
 	}
 
-	ret = avc_add_callback(sel_netport_avc_callback, AVC_CALLBACK_RESET);
-	if (ret != 0)
-		panic("avc_add_callback() failed, error %d\n", ret);
-
 	return ret;
 }
 
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 855e464e92ef..4c29bcc7fad2 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -17,6 +17,7 @@
 #include <linux/inet_diag.h>
 #include <linux/xfrm.h>
 #include <linux/audit.h>
+#include <linux/sock_diag.h>
 
 #include "flask.h"
 #include "av_permissions.h"
@@ -78,6 +79,7 @@ static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
 {
 	{ TCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 	{ DCCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
+	{ SOCK_DIAG_BY_FAMILY,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 };
 
 static struct nlmsg_perm nlmsg_xfrm_perms[] =
@@ -98,6 +100,13 @@ static struct nlmsg_perm nlmsg_xfrm_perms[] =
 	{ XFRM_MSG_FLUSHPOLICY,	NETLINK_XFRM_SOCKET__NLMSG_WRITE },
 	{ XFRM_MSG_NEWAE,	NETLINK_XFRM_SOCKET__NLMSG_WRITE },
 	{ XFRM_MSG_GETAE,	NETLINK_XFRM_SOCKET__NLMSG_READ  },
+	{ XFRM_MSG_REPORT,	NETLINK_XFRM_SOCKET__NLMSG_READ  },
+	{ XFRM_MSG_MIGRATE,	NETLINK_XFRM_SOCKET__NLMSG_WRITE },
+	{ XFRM_MSG_NEWSADINFO,	NETLINK_XFRM_SOCKET__NLMSG_READ  },
+	{ XFRM_MSG_GETSADINFO,	NETLINK_XFRM_SOCKET__NLMSG_READ  },
+	{ XFRM_MSG_NEWSPDINFO,	NETLINK_XFRM_SOCKET__NLMSG_WRITE },
+	{ XFRM_MSG_GETSPDINFO,	NETLINK_XFRM_SOCKET__NLMSG_READ  },
+	{ XFRM_MSG_MAPPING,	NETLINK_XFRM_SOCKET__NLMSG_READ  },
 };
 
 static struct nlmsg_perm nlmsg_audit_perms[] =
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index a3dd9faa19c0..dd7466cb2021 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -24,6 +24,7 @@
 #include "policydb.h"
 
 static struct kmem_cache *avtab_node_cachep;
+static struct kmem_cache *avtab_operation_cachep;
 
 static inline int avtab_hash(struct avtab_key *keyp, u16 mask)
 {
@@ -37,11 +38,24 @@ avtab_insert_node(struct avtab *h, int hvalue,
 		  struct avtab_key *key, struct avtab_datum *datum)
 {
 	struct avtab_node *newnode;
+	struct avtab_operation *ops;
 	newnode = kmem_cache_zalloc(avtab_node_cachep, GFP_KERNEL);
 	if (newnode == NULL)
 		return NULL;
 	newnode->key = *key;
-	newnode->datum = *datum;
+
+	if (key->specified & AVTAB_OP) {
+		ops = kmem_cache_zalloc(avtab_operation_cachep, GFP_KERNEL);
+		if (ops == NULL) {
+			kmem_cache_free(avtab_node_cachep, newnode);
+			return NULL;
+		}
+		*ops = *(datum->u.ops);
+		newnode->datum.u.ops = ops;
+	} else {
+		newnode->datum.u.data = datum->u.data;
+	}
+
 	if (prev) {
 		newnode->next = prev->next;
 		prev->next = newnode;
@@ -70,8 +84,11 @@ static int avtab_insert(struct avtab *h, struct avtab_key *key, struct avtab_dat
 		if (key->source_type == cur->key.source_type &&
 		    key->target_type == cur->key.target_type &&
 		    key->target_class == cur->key.target_class &&
-		    (specified & cur->key.specified))
+		    (specified & cur->key.specified)) {
+			if (specified & AVTAB_OPNUM)
+				break;
 			return -EEXIST;
+		}
 		if (key->source_type < cur->key.source_type)
 			break;
 		if (key->source_type == cur->key.source_type &&
@@ -232,6 +249,9 @@ void avtab_destroy(struct avtab *h)
 		while (cur) {
 			temp = cur;
 			cur = cur->next;
+			if (temp->key.specified & AVTAB_OP)
+				kmem_cache_free(avtab_operation_cachep,
+							temp->datum.u.ops);
 			kmem_cache_free(avtab_node_cachep, temp);
 		}
 		h->htable[i] = NULL;
@@ -320,7 +340,13 @@ static uint16_t spec_order[] = {
 	AVTAB_AUDITALLOW,
 	AVTAB_TRANSITION,
 	AVTAB_CHANGE,
-	AVTAB_MEMBER
+	AVTAB_MEMBER,
+	AVTAB_OPNUM_ALLOWED,
+	AVTAB_OPNUM_AUDITALLOW,
+	AVTAB_OPNUM_DONTAUDIT,
+	AVTAB_OPTYPE_ALLOWED,
+	AVTAB_OPTYPE_AUDITALLOW,
+	AVTAB_OPTYPE_DONTAUDIT
 };
 
 int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
@@ -330,10 +356,11 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 {
 	__le16 buf16[4];
 	u16 enabled;
-	__le32 buf32[7];
 	u32 items, items2, val, vers = pol->policyvers;
 	struct avtab_key key;
 	struct avtab_datum datum;
+	struct avtab_operation ops;
+	__le32 buf32[ARRAY_SIZE(ops.op.perms)];
 	int i, rc;
 	unsigned set;
 
@@ -390,11 +417,15 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 			printk(KERN_ERR "SELinux: avtab: entry has both access vectors and types\n");
 			return -EINVAL;
 		}
+		if (val & AVTAB_OP) {
+			printk(KERN_ERR "SELinux: avtab: entry has operations\n");
+			return -EINVAL;
+		}
 
 		for (i = 0; i < ARRAY_SIZE(spec_order); i++) {
 			if (val & spec_order[i]) {
 				key.specified = spec_order[i] | enabled;
-				datum.data = le32_to_cpu(buf32[items++]);
+				datum.u.data = le32_to_cpu(buf32[items++]);
 				rc = insertf(a, &key, &datum, p);
 				if (rc)
 					return rc;
@@ -413,7 +444,6 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 		printk(KERN_ERR "SELinux: avtab: truncated entry\n");
 		return rc;
 	}
-
 	items = 0;
 	key.source_type = le16_to_cpu(buf16[items++]);
 	key.target_type = le16_to_cpu(buf16[items++]);
@@ -437,14 +467,32 @@ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol,
 		return -EINVAL;
 	}
 
-	rc = next_entry(buf32, fp, sizeof(u32));
-	if (rc) {
-		printk(KERN_ERR "SELinux: avtab: truncated entry\n");
-		return rc;
+	if ((vers < POLICYDB_VERSION_IOCTL_OPERATIONS)
+			|| !(key.specified & AVTAB_OP)) {
+		rc = next_entry(buf32, fp, sizeof(u32));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		datum.u.data = le32_to_cpu(*buf32);
+	} else {
+		memset(&ops, 0, sizeof(struct avtab_operation));
+		rc = next_entry(&ops.type, fp, sizeof(u8));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		rc = next_entry(buf32, fp, sizeof(u32)*ARRAY_SIZE(ops.op.perms));
+		if (rc) {
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
+			return rc;
+		}
+		for (i = 0; i < ARRAY_SIZE(ops.op.perms); i++)
+			ops.op.perms[i] = le32_to_cpu(buf32[i]);
+		datum.u.ops = &ops;
 	}
-	datum.data = le32_to_cpu(*buf32);
 	if ((key.specified & AVTAB_TYPE) &&
-	    !policydb_type_isvalid(pol, datum.data)) {
+	    !policydb_type_isvalid(pol, datum.u.data)) {
 		printk(KERN_ERR "SELinux: avtab: invalid type\n");
 		return -EINVAL;
 	}
@@ -504,8 +552,9 @@ bad:
 int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp)
 {
 	__le16 buf16[4];
-	__le32 buf32[1];
+	__le32 buf32[ARRAY_SIZE(cur->datum.u.ops->op.perms)];
 	int rc;
+	unsigned int i;
 
 	buf16[0] = cpu_to_le16(cur->key.source_type);
 	buf16[1] = cpu_to_le16(cur->key.target_type);
@@ -514,8 +563,19 @@ int avtab_write_item(struct policydb *p, struct avtab_node *cur, void *fp)
 	rc = put_entry(buf16, sizeof(u16), 4, fp);
 	if (rc)
 		return rc;
-	buf32[0] = cpu_to_le32(cur->datum.data);
-	rc = put_entry(buf32, sizeof(u32), 1, fp);
+
+	if (cur->key.specified & AVTAB_OP) {
+		rc = put_entry(&cur->datum.u.ops->type, sizeof(u8), 1, fp);
+		if (rc)
+			return rc;
+		for (i = 0; i < ARRAY_SIZE(cur->datum.u.ops->op.perms); i++)
+			buf32[i] = cpu_to_le32(cur->datum.u.ops->op.perms[i]);
+		rc = put_entry(buf32, sizeof(u32),
+				ARRAY_SIZE(cur->datum.u.ops->op.perms), fp);
+	} else {
+		buf32[0] = cpu_to_le32(cur->datum.u.data);
+		rc = put_entry(buf32, sizeof(u32), 1, fp);
+	}
 	if (rc)
 		return rc;
 	return 0;
@@ -548,9 +608,13 @@ void avtab_cache_init(void)
 	avtab_node_cachep = kmem_cache_create("avtab_node",
 					      sizeof(struct avtab_node),
 					      0, SLAB_PANIC, NULL);
+	avtab_operation_cachep = kmem_cache_create("avtab_operation",
+					      sizeof(struct avtab_operation),
+					      0, SLAB_PANIC, NULL);
 }
 
 void avtab_cache_destroy(void)
 {
 	kmem_cache_destroy(avtab_node_cachep);
+	kmem_cache_destroy(avtab_operation_cachep);
 }
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h
index 63ce2f9e441d..97acd6fa705e 100644
--- a/security/selinux/ss/avtab.h
+++ b/security/selinux/ss/avtab.h
@@ -23,6 +23,8 @@
 #ifndef _SS_AVTAB_H_
 #define _SS_AVTAB_H_
 
+#include "security.h"
+
 struct avtab_key {
 	u16 source_type;	/* source type */
 	u16 target_type;	/* target type */
@@ -35,13 +37,34 @@ struct avtab_key {
 #define AVTAB_MEMBER		0x0020
 #define AVTAB_CHANGE		0x0040
 #define AVTAB_TYPE		(AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
+#define AVTAB_OPNUM_ALLOWED	0x0100
+#define AVTAB_OPNUM_AUDITALLOW	0x0200
+#define AVTAB_OPNUM_DONTAUDIT	0x0400
+#define AVTAB_OPNUM		(AVTAB_OPNUM_ALLOWED | \
+				AVTAB_OPNUM_AUDITALLOW | \
+				AVTAB_OPNUM_DONTAUDIT)
+#define AVTAB_OPTYPE_ALLOWED	0x1000
+#define AVTAB_OPTYPE_AUDITALLOW	0x2000
+#define AVTAB_OPTYPE_DONTAUDIT	0x4000
+#define AVTAB_OPTYPE		(AVTAB_OPTYPE_ALLOWED | \
+				AVTAB_OPTYPE_AUDITALLOW | \
+				AVTAB_OPTYPE_DONTAUDIT)
+#define AVTAB_OP		(AVTAB_OPNUM | AVTAB_OPTYPE)
 #define AVTAB_ENABLED_OLD   0x80000000 /* reserved for used in cond_avtab */
 #define AVTAB_ENABLED		0x8000 /* reserved for used in cond_avtab */
 	u16 specified;	/* what field is specified */
 };
 
+struct avtab_operation {
+	u8 type;
+	struct operation_perm op;
+};
+
 struct avtab_datum {
-	u32 data; /* access vector or type value */
+	union {
+		u32 data; /* access vector or type value */
+		struct avtab_operation *ops; /* ioctl operations */
+	} u;
 };
 
 struct avtab_node {
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 377d148e7157..16651c7a1541 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -15,6 +15,7 @@
 
 #include "security.h"
 #include "conditional.h"
+#include "services.h"
 
 /*
  * cond_evaluate_expr evaluates a conditional expr
@@ -617,21 +618,39 @@ int cond_write_list(struct policydb *p, struct cond_node *list, void *fp)
 
 	return 0;
 }
+
+void cond_compute_operation(struct avtab *ctab, struct avtab_key *key,
+		struct operation_decision *od)
+{
+	struct avtab_node *node;
+
+	if (!ctab || !key || !od)
+		return;
+
+	for (node = avtab_search_node(ctab, key); node;
+			node = avtab_search_node_next(node, key->specified)) {
+		if (node->key.specified & AVTAB_ENABLED)
+			services_compute_operation_num(od, node);
+	}
+	return;
+
+}
 /* Determine whether additional permissions are granted by the conditional
  * av table, and if so, add them to the result
  */
-void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd)
+void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
+		struct av_decision *avd, struct operation *ops)
 {
 	struct avtab_node *node;
 
-	if (!ctab || !key || !avd)
+	if (!ctab || !key || !avd || !ops)
 		return;
 
 	for (node = avtab_search_node(ctab, key); node;
 				node = avtab_search_node_next(node, key->specified)) {
 		if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED)))
-			avd->allowed |= node->datum.data;
+			avd->allowed |= node->datum.u.data;
 		if ((u16)(AVTAB_AUDITDENY|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_AUDITDENY|AVTAB_ENABLED)))
 			/* Since a '0' in an auditdeny mask represents a
@@ -639,10 +658,13 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi
 			 * the '&' operand to ensure that all '0's in the mask
 			 * are retained (much unlike the allow and auditallow cases).
 			 */
-			avd->auditdeny &= node->datum.data;
+			avd->auditdeny &= node->datum.u.data;
 		if ((u16)(AVTAB_AUDITALLOW|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_AUDITALLOW|AVTAB_ENABLED)))
-			avd->auditallow |= node->datum.data;
+			avd->auditallow |= node->datum.u.data;
+		if ((node->key.specified & AVTAB_ENABLED) &&
+				(node->key.specified & AVTAB_OP))
+			services_compute_operation_type(ops, node);
 	}
 	return;
 }
diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h
index 4d1f87466508..80ee2bb20eee 100644
--- a/security/selinux/ss/conditional.h
+++ b/security/selinux/ss/conditional.h
@@ -73,8 +73,10 @@ int cond_read_list(struct policydb *p, void *fp);
 int cond_write_bool(void *key, void *datum, void *ptr);
 int cond_write_list(struct policydb *p, struct cond_node *list, void *fp);
 
-void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decision *avd);
-
+void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
+		struct av_decision *avd, struct operation *ops);
+void cond_compute_operation(struct avtab *ctab, struct avtab_key *key,
+		struct operation_decision *od);
 int evaluate_cond_node(struct policydb *p, struct cond_node *node);
 
 #endif /* _CONDITIONAL_H_ */
diff --git a/security/selinux/ss/constraint.h b/security/selinux/ss/constraint.h
index 149dda731fd3..96fd947c494b 100644
--- a/security/selinux/ss/constraint.h
+++ b/security/selinux/ss/constraint.h
@@ -48,6 +48,7 @@ struct constraint_expr {
 	u32 op;			/* operator */
 
 	struct ebitmap names;	/* names */
+	struct type_set *type_names;
 
 	struct constraint_expr *next;   /* next expression */
 };
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index bcdca73033f3..a683475e1635 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -143,6 +143,16 @@ static struct policydb_compat_info policydb_compat[] = {
 		.sym_num	= SYM_NUM,
 		.ocon_num	= OCON_NUM,
 	},
+	{
+		.version	= POLICYDB_VERSION_CONSTRAINT_NAMES,
+		.sym_num	= SYM_NUM,
+		.ocon_num	= OCON_NUM,
+	},
+	{
+		.version	= POLICYDB_VERSION_IOCTL_OPERATIONS,
+		.sym_num	= SYM_NUM,
+		.ocon_num	= OCON_NUM,
+	},
 };
 
 static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -613,6 +623,19 @@ static int common_destroy(void *key, void *datum, void *p)
 	return 0;
 }
 
+static void constraint_expr_destroy(struct constraint_expr *expr)
+{
+	if (expr) {
+		ebitmap_destroy(&expr->names);
+		if (expr->type_names) {
+			ebitmap_destroy(&expr->type_names->types);
+			ebitmap_destroy(&expr->type_names->negset);
+			kfree(expr->type_names);
+		}
+		kfree(expr);
+	}
+}
+
 static int cls_destroy(void *key, void *datum, void *p)
 {
 	struct class_datum *cladatum;
@@ -628,10 +651,9 @@ static int cls_destroy(void *key, void *datum, void *p)
 		while (constraint) {
 			e = constraint->expr;
 			while (e) {
-				ebitmap_destroy(&e->names);
 				etmp = e;
 				e = e->next;
-				kfree(etmp);
+				constraint_expr_destroy(etmp);
 			}
 			ctemp = constraint;
 			constraint = constraint->next;
@@ -642,16 +664,14 @@ static int cls_destroy(void *key, void *datum, void *p)
 		while (constraint) {
 			e = constraint->expr;
 			while (e) {
-				ebitmap_destroy(&e->names);
 				etmp = e;
 				e = e->next;
-				kfree(etmp);
+				constraint_expr_destroy(etmp);
 			}
 			ctemp = constraint;
 			constraint = constraint->next;
 			kfree(ctemp);
 		}
-
 		kfree(cladatum->comkey);
 	}
 	kfree(datum);
@@ -1156,8 +1176,34 @@ bad:
 	return rc;
 }
 
-static int read_cons_helper(struct constraint_node **nodep, int ncons,
-			    int allowxtarget, void *fp)
+static void type_set_init(struct type_set *t)
+{
+	ebitmap_init(&t->types);
+	ebitmap_init(&t->negset);
+}
+
+static int type_set_read(struct type_set *t, void *fp)
+{
+	__le32 buf[1];
+	int rc;
+
+	if (ebitmap_read(&t->types, fp))
+		return -EINVAL;
+	if (ebitmap_read(&t->negset, fp))
+		return -EINVAL;
+
+	rc = next_entry(buf, fp, sizeof(u32));
+	if (rc < 0)
+		return -EINVAL;
+	t->flags = le32_to_cpu(buf[0]);
+
+	return 0;
+}
+
+
+static int read_cons_helper(struct policydb *p,
+				struct constraint_node **nodep,
+				int ncons, int allowxtarget, void *fp)
 {
 	struct constraint_node *c, *lc;
 	struct constraint_expr *e, *le;
@@ -1225,6 +1271,18 @@ static int read_cons_helper(struct constraint_node **nodep, int ncons,
 				rc = ebitmap_read(&e->names, fp);
 				if (rc)
 					return rc;
+				if (p->policyvers >=
+					POLICYDB_VERSION_CONSTRAINT_NAMES) {
+						e->type_names = kzalloc(sizeof
+						(*e->type_names),
+						GFP_KERNEL);
+					if (!e->type_names)
+						return -ENOMEM;
+					type_set_init(e->type_names);
+					rc = type_set_read(e->type_names, fp);
+					if (rc)
+						return rc;
+				}
 				break;
 			default:
 				return -EINVAL;
@@ -1301,7 +1359,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 			goto bad;
 	}
 
-	rc = read_cons_helper(&cladatum->constraints, ncons, 0, fp);
+	rc = read_cons_helper(p, &cladatum->constraints, ncons, 0, fp);
 	if (rc)
 		goto bad;
 
@@ -1311,7 +1369,8 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 		if (rc)
 			goto bad;
 		ncons = le32_to_cpu(buf[0]);
-		rc = read_cons_helper(&cladatum->validatetrans, ncons, 1, fp);
+		rc = read_cons_helper(p, &cladatum->validatetrans,
+				ncons, 1, fp);
 		if (rc)
 			goto bad;
 	}
@@ -2762,6 +2821,24 @@ static int common_write(void *vkey, void *datum, void *ptr)
 	return 0;
 }
 
+static int type_set_write(struct type_set *t, void *fp)
+{
+	int rc;
+	__le32 buf[1];
+
+	if (ebitmap_write(&t->types, fp))
+		return -EINVAL;
+	if (ebitmap_write(&t->negset, fp))
+		return -EINVAL;
+
+	buf[0] = cpu_to_le32(t->flags);
+	rc = put_entry(buf, sizeof(u32), 1, fp);
+	if (rc)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int write_cons_helper(struct policydb *p, struct constraint_node *node,
 			     void *fp)
 {
@@ -2793,6 +2870,12 @@ static int write_cons_helper(struct policydb *p, struct constraint_node *node,
 				rc = ebitmap_write(&e->names, fp);
 				if (rc)
 					return rc;
+				if (p->policyvers >=
+					POLICYDB_VERSION_CONSTRAINT_NAMES) {
+					rc = type_set_write(e->type_names, fp);
+					if (rc)
+						return rc;
+				}
 				break;
 			default:
 				break;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index da637471d4ce..725d5945a97e 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -154,6 +154,17 @@ struct cond_bool_datum {
 struct cond_node;
 
 /*
+ * type set preserves data needed to determine constraint info from
+ * policy source. This is not used by the kernel policy but allows
+ * utilities such as audit2allow to determine constraint denials.
+ */
+struct type_set {
+	struct ebitmap types;
+	struct ebitmap negset;
+	u32 flags;
+};
+
+/*
  * The configuration data includes security contexts for
  * initial SIDs, unlabeled file systems, TCP and UDP port numbers,
  * network interfaces, and nodes.  This structure stores the
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 18caa16de27b..88178da2b834 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -92,9 +92,10 @@ static int context_struct_to_string(struct context *context, char **scontext,
 				    u32 *scontext_len);
 
 static void context_struct_compute_av(struct context *scontext,
-				      struct context *tcontext,
-				      u16 tclass,
-				      struct av_decision *avd);
+					struct context *tcontext,
+					u16 tclass,
+					struct av_decision *avd,
+					struct operation *ops);
 
 struct selinux_mapping {
 	u16 value; /* policy value */
@@ -564,7 +565,8 @@ static void type_attribute_bounds_av(struct context *scontext,
 		context_struct_compute_av(&lo_scontext,
 					  tcontext,
 					  tclass,
-					  &lo_avd);
+					  &lo_avd,
+					  NULL);
 		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
 			return;		/* no masked permission */
 		masked = ~lo_avd.allowed & avd->allowed;
@@ -579,7 +581,8 @@ static void type_attribute_bounds_av(struct context *scontext,
 		context_struct_compute_av(scontext,
 					  &lo_tcontext,
 					  tclass,
-					  &lo_avd);
+					  &lo_avd,
+					  NULL);
 		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
 			return;		/* no masked permission */
 		masked = ~lo_avd.allowed & avd->allowed;
@@ -595,7 +598,8 @@ static void type_attribute_bounds_av(struct context *scontext,
 		context_struct_compute_av(&lo_scontext,
 					  &lo_tcontext,
 					  tclass,
-					  &lo_avd);
+					  &lo_avd,
+					  NULL);
 		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
 			return;		/* no masked permission */
 		masked = ~lo_avd.allowed & avd->allowed;
@@ -611,14 +615,39 @@ static void type_attribute_bounds_av(struct context *scontext,
 	}
 }
 
+/* flag ioctl types that have operation permissions */
+void services_compute_operation_type(
+		struct operation *ops,
+		struct avtab_node *node)
+{
+	u8 type;
+	unsigned int i;
+
+	if (node->key.specified & AVTAB_OPTYPE) {
+		/* if allowing one or more complete types */
+		for (i = 0; i < ARRAY_SIZE(ops->type); i++)
+			ops->type[i] |= node->datum.u.ops->op.perms[i];
+	} else {
+		/* if allowing operations within a type */
+		type = node->datum.u.ops->type;
+		security_operation_set(ops->type, type);
+	}
+
+	/* If no ioctl commands are allowed, ignore auditallow and auditdeny */
+	if (node->key.specified & AVTAB_OPTYPE_ALLOWED ||
+			node->key.specified & AVTAB_OPNUM_ALLOWED)
+		ops->len = 1;
+}
+
 /*
- * Compute access vectors based on a context structure pair for
- * the permissions in a particular class.
+ * Compute access vectors and operations ranges based on a context
+ * structure pair for the permissions in a particular class.
  */
 static void context_struct_compute_av(struct context *scontext,
-				      struct context *tcontext,
-				      u16 tclass,
-				      struct av_decision *avd)
+					struct context *tcontext,
+					u16 tclass,
+					struct av_decision *avd,
+					struct operation *ops)
 {
 	struct constraint_node *constraint;
 	struct role_allow *ra;
@@ -632,6 +661,10 @@ static void context_struct_compute_av(struct context *scontext,
 	avd->allowed = 0;
 	avd->auditallow = 0;
 	avd->auditdeny = 0xffffffff;
+	if (ops) {
+		memset(&ops->type, 0, sizeof(ops->type));
+		ops->len = 0;
+	}
 
 	if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) {
 		if (printk_ratelimit())
@@ -646,7 +679,7 @@ static void context_struct_compute_av(struct context *scontext,
 	 * this permission check, then use it.
 	 */
 	avkey.target_class = tclass;
-	avkey.specified = AVTAB_AV;
+	avkey.specified = AVTAB_AV | AVTAB_OP;
 	sattr = flex_array_get(policydb.type_attr_map_array, scontext->type - 1);
 	BUG_ON(!sattr);
 	tattr = flex_array_get(policydb.type_attr_map_array, tcontext->type - 1);
@@ -659,15 +692,17 @@ static void context_struct_compute_av(struct context *scontext,
 			     node;
 			     node = avtab_search_node_next(node, avkey.specified)) {
 				if (node->key.specified == AVTAB_ALLOWED)
-					avd->allowed |= node->datum.data;
+					avd->allowed |= node->datum.u.data;
 				else if (node->key.specified == AVTAB_AUDITALLOW)
-					avd->auditallow |= node->datum.data;
+					avd->auditallow |= node->datum.u.data;
 				else if (node->key.specified == AVTAB_AUDITDENY)
-					avd->auditdeny &= node->datum.data;
+					avd->auditdeny &= node->datum.u.data;
+				else if (ops && (node->key.specified & AVTAB_OP))
+					services_compute_operation_type(ops, node);
 			}
 
 			/* Check conditional av table for additional permissions */
-			cond_compute_av(&policydb.te_cond_avtab, &avkey, avd);
+			cond_compute_av(&policydb.te_cond_avtab, &avkey, avd, ops);
 
 		}
 	}
@@ -898,13 +933,138 @@ static void avd_init(struct av_decision *avd)
 	avd->flags = 0;
 }
 
+void services_compute_operation_num(struct operation_decision *od,
+					struct avtab_node *node)
+{
+	unsigned int i;
 
+	if (node->key.specified & AVTAB_OPNUM) {
+		if (od->type != node->datum.u.ops->type)
+			return;
+	} else {
+		if (!security_operation_test(node->datum.u.ops->op.perms,
+					od->type))
+			return;
+	}
+
+	if (node->key.specified == AVTAB_OPTYPE_ALLOWED) {
+		od->specified |= OPERATION_ALLOWED;
+		memset(od->allowed->perms, 0xff,
+				sizeof(od->allowed->perms));
+	} else if (node->key.specified == AVTAB_OPTYPE_AUDITALLOW) {
+		od->specified |= OPERATION_AUDITALLOW;
+		memset(od->auditallow->perms, 0xff,
+				sizeof(od->auditallow->perms));
+	} else if (node->key.specified == AVTAB_OPTYPE_DONTAUDIT) {
+		od->specified |= OPERATION_DONTAUDIT;
+		memset(od->dontaudit->perms, 0xff,
+				sizeof(od->dontaudit->perms));
+	} else if (node->key.specified == AVTAB_OPNUM_ALLOWED) {
+		od->specified |= OPERATION_ALLOWED;
+		for (i = 0; i < ARRAY_SIZE(od->allowed->perms); i++)
+			od->allowed->perms[i] |=
+					node->datum.u.ops->op.perms[i];
+	} else if (node->key.specified == AVTAB_OPNUM_AUDITALLOW) {
+		od->specified |= OPERATION_AUDITALLOW;
+		for (i = 0; i < ARRAY_SIZE(od->auditallow->perms); i++)
+			od->auditallow->perms[i] |=
+					node->datum.u.ops->op.perms[i];
+	} else if (node->key.specified == AVTAB_OPNUM_DONTAUDIT) {
+		od->specified |= OPERATION_DONTAUDIT;
+		for (i = 0; i < ARRAY_SIZE(od->dontaudit->perms); i++)
+			od->dontaudit->perms[i] |=
+					node->datum.u.ops->op.perms[i];
+	} else {
+		BUG();
+	}
+}
+
+void security_compute_operation(u32 ssid,
+				u32 tsid,
+				u16 orig_tclass,
+				u8 type,
+				struct operation_decision *od)
+{
+	u16 tclass;
+	struct context *scontext, *tcontext;
+	struct avtab_key avkey;
+	struct avtab_node *node;
+	struct ebitmap *sattr, *tattr;
+	struct ebitmap_node *snode, *tnode;
+	unsigned int i, j;
+
+	od->type = type;
+	od->specified = 0;
+	memset(od->allowed->perms, 0, sizeof(od->allowed->perms));
+	memset(od->auditallow->perms, 0, sizeof(od->auditallow->perms));
+	memset(od->dontaudit->perms, 0, sizeof(od->dontaudit->perms));
+
+	read_lock(&policy_rwlock);
+	if (!ss_initialized)
+		goto allow;
+
+	scontext = sidtab_search(&sidtab, ssid);
+	if (!scontext) {
+		printk(KERN_ERR "SELinux: %s:  unrecognized SID %d\n",
+		       __func__, ssid);
+		goto out;
+	}
+
+	tcontext = sidtab_search(&sidtab, tsid);
+	if (!tcontext) {
+		printk(KERN_ERR "SELinux: %s:  unrecognized SID %d\n",
+		       __func__, tsid);
+		goto out;
+	}
+
+	tclass = unmap_class(orig_tclass);
+	if (unlikely(orig_tclass && !tclass)) {
+		if (policydb.allow_unknown)
+			goto allow;
+		goto out;
+	}
+
+
+	if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) {
+		pr_warn_ratelimited("SELinux:  Invalid class %hu\n", tclass);
+		goto out;
+	}
+
+	avkey.target_class = tclass;
+	avkey.specified = AVTAB_OP;
+	sattr = flex_array_get(policydb.type_attr_map_array,
+				scontext->type - 1);
+	BUG_ON(!sattr);
+	tattr = flex_array_get(policydb.type_attr_map_array,
+				tcontext->type - 1);
+	BUG_ON(!tattr);
+	ebitmap_for_each_positive_bit(sattr, snode, i) {
+		ebitmap_for_each_positive_bit(tattr, tnode, j) {
+			avkey.source_type = i + 1;
+			avkey.target_type = j + 1;
+			for (node = avtab_search_node(&policydb.te_avtab, &avkey);
+			     node;
+			     node = avtab_search_node_next(node, avkey.specified))
+				services_compute_operation_num(od, node);
+
+			cond_compute_operation(&policydb.te_cond_avtab,
+						&avkey, od);
+		}
+	}
+out:
+	read_unlock(&policy_rwlock);
+	return;
+allow:
+	memset(od->allowed->perms, 0xff, sizeof(od->allowed->perms));
+	goto out;
+}
 /**
  * security_compute_av - Compute access vector decisions.
  * @ssid: source security identifier
  * @tsid: target security identifier
  * @tclass: target security class
  * @avd: access vector decisions
+ * @od: operation decisions
  *
  * Compute a set of access vector decisions based on the
  * SID pair (@ssid, @tsid) for the permissions in @tclass.
@@ -912,13 +1072,15 @@ static void avd_init(struct av_decision *avd)
 void security_compute_av(u32 ssid,
 			 u32 tsid,
 			 u16 orig_tclass,
-			 struct av_decision *avd)
+			 struct av_decision *avd,
+			 struct operation *ops)
 {
 	u16 tclass;
 	struct context *scontext = NULL, *tcontext = NULL;
 
 	read_lock(&policy_rwlock);
 	avd_init(avd);
+	ops->len = 0;
 	if (!ss_initialized)
 		goto allow;
 
@@ -946,7 +1108,7 @@ void security_compute_av(u32 ssid,
 			goto allow;
 		goto out;
 	}
-	context_struct_compute_av(scontext, tcontext, tclass, avd);
+	context_struct_compute_av(scontext, tcontext, tclass, avd, ops);
 	map_decision(orig_tclass, avd, policydb.allow_unknown);
 out:
 	read_unlock(&policy_rwlock);
@@ -992,7 +1154,7 @@ void security_compute_av_user(u32 ssid,
 		goto out;
 	}
 
-	context_struct_compute_av(scontext, tcontext, tclass, avd);
+	context_struct_compute_av(scontext, tcontext, tclass, avd, NULL);
  out:
 	read_unlock(&policy_rwlock);
 	return;
@@ -1512,7 +1674,7 @@ static int security_compute_sid(u32 ssid,
 
 	if (avdatum) {
 		/* Use the type from the type transition/member/change rule. */
-		newcontext.type = avdatum->data;
+		newcontext.type = avdatum->u.data;
 	}
 
 	/* if we have a objname this is a file trans check so check those rules */
diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h
index e8d907e903cd..569757484d05 100644
--- a/security/selinux/ss/services.h
+++ b/security/selinux/ss/services.h
@@ -11,5 +11,11 @@
 
 extern struct policydb policydb;
 
+void services_compute_operation_type(struct operation *ops,
+				struct avtab_node *node);
+
+void services_compute_operation_num(struct operation_decision *od,
+					struct avtab_node *node);
+
 #endif	/* _SS_SERVICES_H_ */
 
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 3fdf998ad057..0d430adee88d 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -490,9 +490,6 @@ static int snd_compress_check_input(struct snd_compr_params *params)
 	if (params->codec.ch_in == 0 || params->codec.ch_out == 0)
 		return -EINVAL;
 
-	if (!(params->codec.sample_rate & SNDRV_PCM_RATE_8000_192000))
-		return -EINVAL;
-
 	return 0;
 }
 
diff --git a/tools/gator/daemon/Android.mk b/tools/gator/daemon/Android.mk
index c5c8bdc1ff29..99342115424d 100644
--- a/tools/gator/daemon/Android.mk
+++ b/tools/gator/daemon/Android.mk
@@ -9,6 +9,7 @@ XML_H := $(shell cd $(LOCAL_PATH) && make events_xml.h defaults_xml.h SrcMd5.cpp
 
 LOCAL_SRC_FILES := \
 	AnnotateListener.cpp \
+	AtraceDriver.cpp \
 	Buffer.cpp \
 	CCNDriver.cpp \
 	CapturedXML.cpp \
@@ -20,11 +21,11 @@ LOCAL_SRC_FILES := \
 	DriverSource.cpp \
 	DynBuf.cpp \
 	EventsXML.cpp \
+	ExternalDriver.cpp \
 	ExternalSource.cpp \
 	FSDriver.cpp \
 	Fifo.cpp \
 	FtraceDriver.cpp \
-	FtraceSource.cpp \
 	HwmonDriver.cpp \
 	KMod.cpp \
 	LocalCapture.cpp \
diff --git a/tools/gator/daemon/AtraceDriver.cpp b/tools/gator/daemon/AtraceDriver.cpp
new file mode 100644
index 000000000000..78fe58a5c46a
--- /dev/null
+++ b/tools/gator/daemon/AtraceDriver.cpp
@@ -0,0 +1,136 @@
+/**
+ * Copyright (C) ARM Limited 2014-2015. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "AtraceDriver.h"
+
+#include <unistd.h>
+
+/*
+#include <regex.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "DriverSource.h"
+#include "Setup.h"
+*/
+
+#include "Logging.h"
+#include "OlyUtility.h"
+#include "SessionData.h"
+
+class AtraceCounter : public DriverCounter {
+public:
+	AtraceCounter(DriverCounter *next, char *name, int flag);
+	~AtraceCounter();
+
+	int getFlag() const { return mFlag; }
+
+private:
+	const int mFlag;
+
+	// Intentionally unimplemented
+	AtraceCounter(const AtraceCounter &);
+	AtraceCounter &operator=(const AtraceCounter &);
+};
+
+AtraceCounter::AtraceCounter(DriverCounter *next, char *name, int flag) : DriverCounter(next, name), mFlag(flag) {
+}
+
+AtraceCounter::~AtraceCounter() {
+}
+
+AtraceDriver::AtraceDriver() : mSupported(false), mNotifyPath() {
+}
+
+AtraceDriver::~AtraceDriver() {
+}
+
+void AtraceDriver::readEvents(mxml_node_t *const xml) {
+	if (!gSessionData->mFtraceDriver.isSupported()) {
+		logg->logMessage("Atrace support disabled, ftrace support is required");
+		return;
+	}
+	if (access("/system/bin/setprop", X_OK) != 0) {
+		logg->logMessage("Atrace support disabled, setprop is not found, this is not an Android target");
+		return;
+	}
+
+	if (util->getApplicationFullPath(mNotifyPath, sizeof(mNotifyPath)) != 0) {
+		logg->logMessage("Unable to determine the full path of gatord, the cwd will be used");
+	}
+	strncat(mNotifyPath, "notify.dex", sizeof(mNotifyPath) - strlen(mNotifyPath) - 1);
+	if (access(mNotifyPath, W_OK) != 0) {
+		logg->logMessage("Atrace support disabled, unable to locate notify.dex");
+		return;
+	}
+
+	mSupported = true;
+
+	mxml_node_t *node = xml;
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			continue;
+		}
+
+		if (strncmp(counter, "atrace_", 7) != 0) {
+			continue;
+		}
+
+		const char *flag = mxmlElementGetAttr(node, "flag");
+		if (flag == NULL) {
+			logg->logError("The atrace counter %s is missing the required flag attribute", counter);
+			handleException();
+		}
+		setCounters(new AtraceCounter(getCounters(), strdup(counter), strtol(flag, NULL, 16)));
+	}
+}
+
+void AtraceDriver::setAtrace(const int flags) {
+	logg->logMessage("Setting atrace flags to %i\n", flags);
+	pid_t pid = fork();
+	if (pid < 0) {
+		logg->logError("fork failed");
+		handleException();
+	} else if (pid == 0) {
+		char buf[1<<10];
+		snprintf(buf, sizeof(buf), "setprop debug.atrace.tags.enableflags %i; "
+			 "dalvikvm -cp %s com.android.internal.util.WithFramework Notify", flags, mNotifyPath);
+		execlp("sh", "sh", "-c", buf, NULL);
+		exit(0);
+	}
+}
+
+void AtraceDriver::start() {
+	if (!mSupported) {
+		return;
+	}
+
+	int flags = 0;
+	for (AtraceCounter *counter = static_cast<AtraceCounter *>(getCounters()); counter != NULL; counter = static_cast<AtraceCounter *>(counter->getNext())) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		flags |= counter->getFlag();
+	}
+
+	setAtrace(flags);
+}
+
+void AtraceDriver::stop() {
+	if (!mSupported) {
+		return;
+	}
+
+	setAtrace(0);
+}
diff --git a/tools/gator/daemon/AtraceDriver.h b/tools/gator/daemon/AtraceDriver.h
new file mode 100644
index 000000000000..0a068580caf3
--- /dev/null
+++ b/tools/gator/daemon/AtraceDriver.h
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) ARM Limited 2015. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ATRACEDRIVER_H
+#define ATRACEDRIVER_H
+
+#include "mxml/mxml.h"
+
+#include "Driver.h"
+
+class AtraceDriver : public SimpleDriver {
+public:
+	AtraceDriver();
+	~AtraceDriver();
+
+	void readEvents(mxml_node_t *const xml);
+
+	void start();
+	void stop();
+
+	bool isSupported() const { return mSupported; }
+
+private:
+	void setAtrace(const int flags);
+
+	bool mSupported;
+	char mNotifyPath[256];
+
+	// Intentionally unimplemented
+	AtraceDriver(const AtraceDriver &);
+	AtraceDriver &operator=(const AtraceDriver &);
+};
+
+#endif // ATRACEDRIVER_H
diff --git a/tools/gator/daemon/Buffer.cpp b/tools/gator/daemon/Buffer.cpp
index c4ced9f607f9..26deb2e43723 100644
--- a/tools/gator/daemon/Buffer.cpp
+++ b/tools/gator/daemon/Buffer.cpp
@@ -143,6 +143,16 @@ int Buffer::contiguousSpaceAvailable() const {
 	}
 }
 
+bool Buffer::hasUncommittedMessages() const {
+	const int typeLength = gSessionData->mLocalCapture ? 0 : 1;
+	int length = mWritePos - mCommitPos;
+	if (length < 0) {
+		length += mSize;
+	}
+	length = length - typeLength - sizeof(int32_t);
+	return length > FRAME_HEADER_SIZE;
+}
+
 void Buffer::commit(const uint64_t time, const bool force) {
 	// post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload
 	const int typeLength = gSessionData->mLocalCapture ? 0 : 1;
@@ -263,7 +273,7 @@ void Buffer::frame() {
 	}
 }
 
-void Buffer::summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname) {
+void Buffer::summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname, const long pageSize) {
 	packInt(MESSAGE_SUMMARY);
 	writeString(NEWLINE_CANARY);
 	packInt64(timestamp);
@@ -271,6 +281,10 @@ void Buffer::summary(const uint64_t currTime, const int64_t timestamp, const int
 	packInt64(monotonicDelta);
 	writeString("uname");
 	writeString(uname);
+	writeString("PAGESIZE");
+	char buf[32];
+	snprintf(buf, sizeof(buf), "%li", pageSize);
+	writeString(buf);
 	writeString("");
 	check(currTime);
 }
diff --git a/tools/gator/daemon/Buffer.h b/tools/gator/daemon/Buffer.h
index 13c44e1fd359..ea250a910e45 100644
--- a/tools/gator/daemon/Buffer.h
+++ b/tools/gator/daemon/Buffer.h
@@ -36,11 +36,12 @@ public:
 
 	int bytesAvailable() const;
 	int contiguousSpaceAvailable() const;
+	bool hasUncommittedMessages() const;
 	void commit(const uint64_t time, const bool force = false);
 	void check(const uint64_t time);
 
 	// Summary messages
-	void summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname);
+	void summary(const uint64_t currTime, const int64_t timestamp, const int64_t uptime, const int64_t monotonicDelta, const char *const uname, const long pageSize);
 	void coreName(const uint64_t currTime, const int core, const int cpuid, const char *const name);
 
 	// Block Counter messages
diff --git a/tools/gator/daemon/CCNDriver.cpp b/tools/gator/daemon/CCNDriver.cpp
index d77513acc4e4..24eda03b8091 100644
--- a/tools/gator/daemon/CCNDriver.cpp
+++ b/tools/gator/daemon/CCNDriver.cpp
@@ -19,6 +19,7 @@
 #include "Config.h"
 #include "DriverSource.h"
 #include "Logging.h"
+#include "SessionData.h"
 
 static const char TAG_CATEGORY[] = "category";
 static const char TAG_COUNTER_SET[] = "counter_set";
@@ -44,6 +45,8 @@ static const char RNI_REGION[] = "RN-I_Region";
 static const char SBAS_REGION[] = "SBAS_Region";
 static const char CCN_5XX[] = "CCN-5xx";
 #define ARM_CCN_5XX "ARM_CCN_5XX_"
+#define CCN_COUNT 8
+static const char ARM_CCN_5XX_CNT[] = ARM_CCN_5XX "cnt";
 
 static const char *const VC_TYPES[] = { "REQ", "RSP", "SNP", "DAT" };
 static const char *const XP_EVENT_NAMES[] = { NULL, "H-bit", "S-bit", "P-Cnt", "TknV" };
@@ -173,12 +176,12 @@ int CCNDriver::writeCounters(mxml_node_t *const) const {
 
 void CCNDriver::writeEvents(mxml_node_t *const root) const {
 	mxml_node_t *const counter_set = mxmlNewElement(root, TAG_COUNTER_SET);
-	mxmlElementSetAttr(counter_set, ATTR_NAME, ARM_CCN_5XX "cnt");
-	mxmlElementSetAttr(counter_set, ATTR_COUNT, "8");
+	mxmlElementSetAttr(counter_set, ATTR_NAME, ARM_CCN_5XX_CNT);
+	mxmlElementSetAttr(counter_set, ATTR_COUNT, STRIFY(CCN_COUNT));
 
 	mxml_node_t *const category = mxmlNewElement(root, TAG_CATEGORY);
 	mxmlElementSetAttr(category, ATTR_NAME, CCN_5XX);
-	mxmlElementSetAttr(category, TAG_COUNTER_SET, ARM_CCN_5XX "cnt");
+	mxmlElementSetAttr(category, TAG_COUNTER_SET, ARM_CCN_5XX_CNT);
 
 	mxml_node_t *const clock_event = mxmlNewElement(category, TAG_EVENT);
 	mxmlElementSetAttr(clock_event, ATTR_COUNTER, ARM_CCN_5XX "ccnt");
@@ -292,3 +295,35 @@ void CCNDriver::writeEvents(mxml_node_t *const root) const {
 		}
 	}
 }
+
+void CCNDriver::validateCounters() const {
+	int counts[CCN_COUNT][2] = { { 0 } };
+	const unsigned int mask = getConfig(0xff, 0xff, 0, 0, 0);
+
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->mCounters); ++i) {
+		const Counter *const counter = &gSessionData->mCounters[i];
+
+		if (!counter->isEnabled()) {
+			continue;
+		}
+
+		if (strncmp(counter->getType(), ARM_CCN_5XX_CNT, sizeof(ARM_CCN_5XX_CNT) - 1) == 0) {
+			const int node = counter->getEvent() & mask;
+
+			for (int j = 0; j < ARRAY_LENGTH(counts); ++j) {
+				if (counts[j][0] == 0) {
+					counts[j][0] = node;
+				}
+				if (counts[j][0] == node) {
+					++counts[j][1];
+					if (counts[j][1] > 4) {
+						if (asprintf(&gSessionData->mCountersError, "More than 4 events are assigned to the same CCN node") <= 0) {
+							logg->logError("asprintf failed");
+							handleException();
+						}
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/tools/gator/daemon/CCNDriver.h b/tools/gator/daemon/CCNDriver.h
index 06ac33f07a36..8a155a7fc837 100644
--- a/tools/gator/daemon/CCNDriver.h
+++ b/tools/gator/daemon/CCNDriver.h
@@ -24,6 +24,8 @@ public:
 	int writeCounters(mxml_node_t *const root) const;
 	void writeEvents(mxml_node_t *const) const;
 
+	void validateCounters() const;
+
 private:
 	enum NodeType {
 		NT_UNKNOWN,
diff --git a/tools/gator/daemon/CapturedXML.cpp b/tools/gator/daemon/CapturedXML.cpp
index 1854c77dcb0e..e87c909afd56 100644
--- a/tools/gator/daemon/CapturedXML.cpp
+++ b/tools/gator/daemon/CapturedXML.cpp
@@ -32,7 +32,7 @@ mxml_node_t* CapturedXML::getTree(bool includeTime) {
 
 	captured = mxmlNewElement(xml, "captured");
 	mxmlElementSetAttr(captured, "version", "1");
-	if (gSessionData->perf.isSetup()) {
+	if (gSessionData->mPerf.isSetup()) {
 		mxmlElementSetAttr(captured, "type", "Perf");
 	}
 	mxmlElementSetAttrf(captured, "protocol", "%d", PROTOCOL_VERSION);
diff --git a/tools/gator/daemon/Child.cpp b/tools/gator/daemon/Child.cpp
index a19e9cf86805..551ec6bd25f6 100644
--- a/tools/gator/daemon/Child.cpp
+++ b/tools/gator/daemon/Child.cpp
@@ -20,7 +20,6 @@
 #include "Driver.h"
 #include "DriverSource.h"
 #include "ExternalSource.h"
-#include "FtraceSource.h"
 #include "LocalCapture.h"
 #include "Logging.h"
 #include "OlySocket.h"
@@ -35,7 +34,6 @@ static sem_t haltPipeline, senderThreadStarted, startProfile, senderSem; // Shar
 static Source *primarySource = NULL;
 static Source *externalSource = NULL;
 static Source *userSpaceSource = NULL;
-static Source *ftraceSource = NULL;
 static Sender* sender = NULL;        // Shared by Child.cpp and spawned threads
 Child* child = NULL;                 // shared by Child.cpp and main.cpp
 
@@ -44,7 +42,8 @@ void handleException() {
 	if (child && child->numExceptions++ > 0) {
 		// it is possible one of the below functions itself can cause an exception, thus allow only one exception
 		logg->logMessage("Received multiple exceptions, terminating the child");
-		exit(1);
+		// Something is really wrong, exit immediately
+		_exit(1);
 	}
 	fprintf(stderr, "%s", logg->getLastError());
 
@@ -67,8 +66,9 @@ void handleException() {
 		}
 	}
 
-	if (gSessionData->mLocalCapture)
+	if (gSessionData->mLocalCapture) {
 		cleanUp();
+	}
 
 	exit(1);
 }
@@ -152,7 +152,6 @@ static void *senderThread(void *) {
 
 	while (!externalSource->isDone() ||
 	       (userSpaceSource != NULL && !userSpaceSource->isDone()) ||
-		   (ftraceSource != NULL && !ftraceSource->isDone()) ||
 	       !primarySource->isDone()) {
 		sem_wait(&senderSem);
 
@@ -160,9 +159,6 @@ static void *senderThread(void *) {
 		if (userSpaceSource != NULL) {
 			userSpaceSource->write(sender);
 		}
-		if (ftraceSource != NULL) {
-			ftraceSource->write(sender);
-		}
 		primarySource->write(sender);
 	}
 
@@ -213,9 +209,6 @@ void Child::endSession() {
 	if (userSpaceSource != NULL) {
 		userSpaceSource->interrupt();
 	}
-	if (ftraceSource != NULL) {
-		ftraceSource->interrupt();
-	}
 	sem_post(&haltPipeline);
 }
 
@@ -240,7 +233,7 @@ void Child::run() {
 	{ ConfigurationXML configuration; }
 
 	// Set up the driver; must be done after gSessionData->mPerfCounterType[] is populated
-	if (!gSessionData->perf.isSetup()) {
+	if (!gSessionData->mPerf.isSetup()) {
 		primarySource = new DriverSource(&senderSem, &startProfile);
 	} else {
 		primarySource = new PerfSource(&senderSem, &startProfile);
@@ -279,25 +272,24 @@ void Child::run() {
 		free(xmlString);
 	}
 
-	if (gSessionData->kmod.isMaliCapture() && (gSessionData->mSampleRate == 0)) {
+	if (gSessionData->mKmod.isMaliCapture() && (gSessionData->mSampleRate == 0)) {
 		logg->logError("Mali counters are not supported with Sample Rate: None.");
 		handleException();
 	}
 
 	// Initialize ftrace source before child as it's slow and dependens on nothing else
 	// If initialized later, us gator with ftrace has time sync issues
-	if (gSessionData->ftraceDriver.countersEnabled()) {
-		ftraceSource = new FtraceSource(&senderSem);
-		if (!ftraceSource->prepare()) {
-			logg->logError("Unable to prepare userspace source for capture");
-			handleException();
-		}
-		ftraceSource->start();
+	// Must be initialized before senderThread is started as senderThread checks externalSource
+	externalSource = new ExternalSource(&senderSem);
+	if (!externalSource->prepare()) {
+		logg->logError("Unable to prepare external source for capture");
+		handleException();
 	}
+	externalSource->start();
 
 	// Must be after session XML is parsed
 	if (!primarySource->prepare()) {
-		if (gSessionData->perf.isSetup()) {
+		if (gSessionData->mPerf.isSetup()) {
 			logg->logError("Unable to communicate with the perf API, please ensure that CONFIG_TRACING and CONFIG_CONTEXT_SWITCH_TRACER are enabled. Please refer to README_Streamline.txt for more information.");
 		} else {
 			logg->logError("Unable to prepare gator driver for capture");
@@ -308,14 +300,6 @@ void Child::run() {
 	// Sender thread shall be halted until it is signaled for one shot mode
 	sem_init(&haltPipeline, 0, gSessionData->mOneShot ? 0 : 2);
 
-	// Must be initialized before senderThread is started as senderThread checks externalSource
-	externalSource = new ExternalSource(&senderSem);
-	if (!externalSource->prepare()) {
-		logg->logError("Unable to prepare external source for capture");
-		handleException();
-	}
-	externalSource->start();
-
 	// Create the duration, stop, and sender threads
 	bool thread_creation_success = true;
 	if (gSessionData->mDuration > 0 && pthread_create(&durationThreadID, NULL, durationThread, NULL)) {
@@ -327,8 +311,8 @@ void Child::run() {
 	}
 
 	bool startUSSource = false;
-	for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
-		if (gSessionData->usDrivers[i]->countersEnabled()) {
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->mUsDrivers); ++i) {
+		if (gSessionData->mUsDrivers[i]->countersEnabled()) {
 			startUSSource = true;
 		}
 	}
@@ -360,9 +344,6 @@ void Child::run() {
 	primarySource->run();
 
 	// Wait for the other threads to exit
-	if (ftraceSource != NULL) {
-		ftraceSource->join();
-	}
 	if (userSpaceSource != NULL) {
 		userSpaceSource->join();
 	}
@@ -384,7 +365,6 @@ void Child::run() {
 
 	logg->logMessage("Profiling ended.");
 
-	delete ftraceSource;
 	delete userSpaceSource;
 	delete externalSource;
 	delete primarySource;
diff --git a/tools/gator/daemon/Command.cpp b/tools/gator/daemon/Command.cpp
index 0a6e3b9901ee..22f8be703598 100644
--- a/tools/gator/daemon/Command.cpp
+++ b/tools/gator/daemon/Command.cpp
@@ -9,6 +9,7 @@
 #include "Command.h"
 
 #include <fcntl.h>
+#include <grp.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <sys/prctl.h>
@@ -23,14 +24,14 @@
 #include "Logging.h"
 #include "SessionData.h"
 
-static int getUid(const char *const name, char *const shPath, const char *const tmpDir) {
+static int getUid(const char *const name, const char *const tmpDir, uid_t *const uid) {
 	// Lookups may fail when using a different libc or a statically compiled executable
 	char gatorTemp[32];
 	snprintf(gatorTemp, sizeof(gatorTemp), "%s/gator_temp", tmpDir);
 
 	const int fd = open(gatorTemp, 600, O_CREAT | O_CLOEXEC);
 	if (fd < 0) {
-		return -1;
+		return false;
 	}
 	close(fd);
 
@@ -43,15 +44,7 @@ static int getUid(const char *const name, char *const shPath, const char *const
 		handleException();
 	}
 	if (pid == 0) {
-		char cargv1[] = "-c";
-		char *cargv[] = {
-			shPath,
-			cargv1,
-			cmd,
-			NULL,
-		};
-
-		execv(cargv[0], cargv);
+		execlp("sh", "sh", "-c", cmd, NULL);
 		exit(-1);
 	}
 	while ((waitpid(pid, NULL, 0) < 0) && (errno == EINTR));
@@ -62,38 +55,42 @@ static int getUid(const char *const name, char *const shPath, const char *const
 		result = st.st_uid;
 	}
 	unlink(gatorTemp);
-	return result;
+	*uid = result;
+	return true;
 }
 
-static int getUid(const char *const name) {
+static bool getUid(const char *const name, uid_t *const uid, gid_t *const gid) {
 	// Look up the username
 	struct passwd *const user = getpwnam(name);
 	if (user != NULL) {
-		return user->pw_uid;
+		*uid = user->pw_uid;
+		*gid = user->pw_gid;
+		return true;
 	}
 
+	// Unable to get the user without getpwanm, so create a unique uid by adding a fixed number to the pid
+	*gid = 0x484560f8 + getpid();
 
 	// Are we on Linux
-	char cargv0l[] = "/bin/sh";
-	if ((access(cargv0l, X_OK) == 0) && (access("/tmp", W_OK) == 0)) {
-		return getUid(name, cargv0l, "/tmp");
+	if (access("/tmp", W_OK) == 0) {
+		return getUid(name, "/tmp", uid);
 	}
 
 	// Are we on android
-	char cargv0a[] = "/system/bin/sh";
-	if ((access(cargv0a, X_OK) == 0) && (access("/data", W_OK) == 0)) {
-		return getUid(name, cargv0a, "/data");
+	if (access("/data", W_OK) == 0) {
+		return getUid(name, "/data", uid);
 	}
 
-	return -1;
+	return false;
 }
 
 void *commandThread(void *) {
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-command", 0, 0, 0);
 
 	const char *const name = gSessionData->mCaptureUser == NULL ? "nobody" : gSessionData->mCaptureUser;
-	const int uid = getUid(name);
-	if (uid < 0) {
+	uid_t uid;
+	gid_t gid;
+	if (!getUid(name, &uid, &gid)) {
 		logg->logError("Unable to look up the user %s, please double check that the user exists", name);
 		handleException();
 	}
@@ -113,16 +110,6 @@ void *commandThread(void *) {
 		handleException();
 	}
 	if (pid == 0) {
-		char cargv0l[] = "/bin/sh";
-		char cargv0a[] = "/system/bin/sh";
-		char cargv1[] = "-c";
-		char *cargv[] = {
-			cargv0l,
-			cargv1,
-			gSessionData->mCaptureCommand,
-			NULL,
-		};
-
 		buf[0] = '\0';
 		close(pipefd[0]);
 
@@ -132,8 +119,16 @@ void *commandThread(void *) {
 			goto fail_exit;
 		}
 
-		if (setuid(uid) != 0) {
-			snprintf(buf, sizeof(buf), "setuid failed");
+		if (setgroups(1, &gid) != 0) {
+			snprintf(buf, sizeof(buf), "setgroups failed");
+			goto fail_exit;
+		}
+		if (setresgid(gid, gid, gid) != 0) {
+			snprintf(buf, sizeof(buf), "setresgid failed");
+			goto fail_exit;
+		}
+		if (setresuid(uid, uid, uid) != 0) {
+			snprintf(buf, sizeof(buf), "setresuid failed");
 			goto fail_exit;
 		}
 
@@ -145,9 +140,7 @@ void *commandThread(void *) {
 			}
 		}
 
-		execv(cargv[0], cargv);
-		cargv[0] = cargv0a;
-		execv(cargv[0], cargv);
+		execlp("sh", "sh", "-c", gSessionData->mCaptureCommand, NULL);
 		snprintf(buf, sizeof(buf), "execv failed");
 
 	fail_exit:
diff --git a/tools/gator/daemon/Config.h b/tools/gator/daemon/Config.h
index eb31556e6426..3c6752eedfdc 100644
--- a/tools/gator/daemon/Config.h
+++ b/tools/gator/daemon/Config.h
@@ -9,12 +9,19 @@
 #ifndef CONFIG_H
 #define CONFIG_H
 
+#define STRIFY2(ARG) #ARG
+#define STRIFY(ARG) STRIFY2(ARG)
+
 #define ARRAY_LENGTH(A) static_cast<int>(sizeof(A)/sizeof((A)[0]))
 #define ACCESS_ONCE(x) (*(volatile typeof(x)*)&(x))
 
 #define MAX_PERFORMANCE_COUNTERS 50
 #define NR_CPUS 32
 
+// If debugfs is not mounted at /sys/kernel/debug, update TRACING_PATH
+#define TRACING_PATH "/sys/kernel/debug/tracing"
+#define EVENTS_PATH TRACING_PATH "/events"
+
 template<typename T>
 static inline T min(const T a, const T b) {
 	return (a < b ? a : b);
diff --git a/tools/gator/daemon/ConfigurationXML.cpp b/tools/gator/daemon/ConfigurationXML.cpp
index be224a4f2b1f..b433811fc482 100644
--- a/tools/gator/daemon/ConfigurationXML.cpp
+++ b/tools/gator/daemon/ConfigurationXML.cpp
@@ -68,8 +68,10 @@ int ConfigurationXML::parse(const char* configurationXML) {
 	mxml_node_t *tree, *node;
 	int ret;
 
-	// clear counter overflow
-	gSessionData->mCounterOverflow = 0;
+	if (gSessionData->mCountersError != NULL) {
+		free(gSessionData->mCountersError);
+		gSessionData->mCountersError = NULL;
+	}
 	gSessionData->mIsEBS = false;
 	mIndex = 0;
 
@@ -98,6 +100,14 @@ int ConfigurationXML::parse(const char* configurationXML) {
 
 	mxmlDelete(tree);
 
+	if (gSessionData->mCountersError == NULL && mIndex > MAX_PERFORMANCE_COUNTERS) {
+		if (asprintf(&gSessionData->mCountersError, "Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, mIndex) <= 0) {
+			logg->logError("asprintf failed");
+			handleException();
+		}
+	}
+	gSessionData->mCcnDriver.validateCounters();
+
 	return ret;
 }
 
@@ -149,7 +159,6 @@ void ConfigurationXML::configurationTag(mxml_node_t *node) {
 	// handle all other performance counters
 	if (mIndex >= MAX_PERFORMANCE_COUNTERS) {
 		mIndex++;
-		gSessionData->mCounterOverflow = mIndex;
 		return;
 	}
 
diff --git a/tools/gator/daemon/DiskIODriver.cpp b/tools/gator/daemon/DiskIODriver.cpp
index af62bb9a95bd..bd18bbd08de9 100644
--- a/tools/gator/daemon/DiskIODriver.cpp
+++ b/tools/gator/daemon/DiskIODriver.cpp
@@ -53,7 +53,7 @@ DiskIODriver::~DiskIODriver() {
 
 void DiskIODriver::readEvents(mxml_node_t *const) {
 	// Only for use with perf
-	if (!gSessionData->perf.isSetup()) {
+	if (!gSessionData->mPerf.isSetup()) {
 		return;
 	}
 
diff --git a/tools/gator/daemon/DriverSource.cpp b/tools/gator/daemon/DriverSource.cpp
index 34920cee92fb..caa956ff75b5 100644
--- a/tools/gator/daemon/DriverSource.cpp
+++ b/tools/gator/daemon/DriverSource.cpp
@@ -28,33 +28,8 @@
 extern Child *child;
 
 DriverSource::DriverSource(sem_t *senderSem, sem_t *startProfile) : mBuffer(NULL), mFifo(NULL), mSenderSem(senderSem), mStartProfile(startProfile), mBufferSize(0), mBufferFD(0), mLength(1) {
-	int driver_version = 0;
-
 	mBuffer = new Buffer(0, FRAME_PERF_ATTRS, 4*1024*1024, senderSem);
-	if (readIntDriver("/dev/gator/version", &driver_version) == -1) {
-		logg->logError("Error reading gator driver version");
-		handleException();
-	}
-
-	// Verify the driver version matches the daemon version
-	if (driver_version != PROTOCOL_VERSION) {
-		if ((driver_version > PROTOCOL_DEV) || (PROTOCOL_VERSION > PROTOCOL_DEV)) {
-			// One of the mismatched versions is development version
-			logg->logError(
-				"DEVELOPMENT BUILD MISMATCH: gator driver version \"%d\" is not in sync with gator daemon version \"%d\".\n"
-				">> The following must be synchronized from engineering repository:\n"
-				">> * gator driver\n"
-				">> * gator daemon\n"
-				">> * Streamline", driver_version, PROTOCOL_VERSION);
-			handleException();
-		} else {
-			// Release version mismatch
-			logg->logError(
-				"gator driver version \"%d\" is different than gator daemon version \"%d\".\n"
-				">> Please upgrade the driver and daemon to the latest versions.", driver_version, PROTOCOL_VERSION);
-			handleException();
-		}
-	}
+	checkVersion();
 
 	int enable = -1;
 	if (readIntDriver("/dev/gator/enable", &enable) != 0 || enable != 0) {
@@ -85,6 +60,35 @@ DriverSource::~DriverSource() {
 	}
 }
 
+void DriverSource::checkVersion() {
+	int driverVersion = 0;
+
+	if (readIntDriver("/dev/gator/version", &driverVersion) == -1) {
+		logg->logError("Error reading gator driver version");
+		handleException();
+	}
+
+	// Verify the driver version matches the daemon version
+	if (driverVersion != PROTOCOL_VERSION) {
+		if ((driverVersion > PROTOCOL_DEV) || (PROTOCOL_VERSION > PROTOCOL_DEV)) {
+			// One of the mismatched versions is development version
+			logg->logError(
+				"DEVELOPMENT BUILD MISMATCH: gator driver version \"%d\" is not in sync with gator daemon version \"%d\".\n"
+				">> The following must be synchronized from engineering repository:\n"
+				">> * gator driver\n"
+				">> * gator daemon\n"
+				">> * Streamline", driverVersion, PROTOCOL_VERSION);
+			handleException();
+		} else {
+			// Release version mismatch
+			logg->logError(
+				"gator driver version \"%d\" is different than gator daemon version \"%d\".\n"
+				">> Please upgrade the driver and daemon to the latest versions.", driverVersion, PROTOCOL_VERSION);
+			handleException();
+		}
+	}
+}
+
 bool DriverSource::prepare() {
 	// Create user-space buffers, add 5 to the size to account for the 1-byte type and 4-byte length
 	logg->logMessage("Created %d MB collector buffer with a %d-byte ragged end", gSessionData->mTotalBufferSize, mBufferSize);
diff --git a/tools/gator/daemon/DriverSource.h b/tools/gator/daemon/DriverSource.h
index 32d983d6d036..971a4d970e30 100644
--- a/tools/gator/daemon/DriverSource.h
+++ b/tools/gator/daemon/DriverSource.h
@@ -29,6 +29,7 @@ public:
 	bool isDone();
 	void write(Sender *sender);
 
+	static void checkVersion();
 	static int readIntDriver(const char *fullpath, int *value);
 	static int readInt64Driver(const char *fullpath, int64_t *value);
 	static int writeDriver(const char *fullpath, const char *data);
diff --git a/tools/gator/daemon/EventsXML.cpp b/tools/gator/daemon/EventsXML.cpp
index cec08d5a6fff..d8044d8b8abd 100644
--- a/tools/gator/daemon/EventsXML.cpp
+++ b/tools/gator/daemon/EventsXML.cpp
@@ -50,7 +50,7 @@ mxml_node_t *EventsXML::getTree() {
 	// Load the provided or default events xml
 	if (gSessionData->mEventsXMLPath) {
 		strncpy(path, gSessionData->mEventsXMLPath, PATH_MAX);
-		fl = fopen(path, "r");
+		fl = fopen_cloexec(path, "r");
 		if (fl) {
 			xml = mxmlLoadFile(NULL, fl, MXML_NO_CALLBACK);
 			fclose(fl);
@@ -63,7 +63,7 @@ mxml_node_t *EventsXML::getTree() {
 
 	// Append additional events XML
 	if (gSessionData->mEventsXMLAppend) {
-		fl = fopen(gSessionData->mEventsXMLAppend, "r");
+		fl = fopen_cloexec(gSessionData->mEventsXMLAppend, "r");
 		if (fl == NULL) {
 			logg->logError("Unable to open additional events XML %s", gSessionData->mEventsXMLAppend);
 			handleException();
diff --git a/tools/gator/daemon/ExternalDriver.cpp b/tools/gator/daemon/ExternalDriver.cpp
new file mode 100644
index 000000000000..c1b798352512
--- /dev/null
+++ b/tools/gator/daemon/ExternalDriver.cpp
@@ -0,0 +1,269 @@
+/**
+ * Copyright (C) ARM Limited 2010-2015. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ExternalDriver.h"
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "Buffer.h"
+#include "Logging.h"
+#include "OlySocket.h"
+#include "SessionData.h"
+
+static const char MALI_UTGARD_SETUP[] = "\0mali-utgard-setup";
+static const char SETUP_VERSION[] = "ANNOTATE_SETUP 1\n";
+static const size_t HEADER_SIZE = 1 + sizeof(uint32_t);
+
+#define HEADER_ERROR            0x80
+#define HEADER_ACK              0x81
+#define HEADER_REQUEST_COUNTERS 0x82
+#define HEADER_COUNTERS         0x83
+#define HEADER_ENABLE_COUNTERS  0x84
+#define HEADER_START            0x85
+
+static uint32_t readLEInt(char *const buf)
+{
+	size_t i;
+	uint32_t v;
+
+	v = 0;
+	for (i = 0; i < sizeof(v); ++i)
+		v |= (uint32_t)buf[i] << 8*i;
+
+	return v;
+}
+
+static int readPackedInt(char *const buf, const size_t bufSize, size_t *const pos, uint64_t *const l)
+{
+	uint8_t shift = 0;
+	uint8_t b = ~0;
+
+	*l = 0;
+	while ((b & 0x80) != 0) {
+		if (*pos >= bufSize) {
+			return -1;
+		}
+		b = buf[*pos];
+		*pos += 1;
+		*l |= (uint64_t)(b & 0x7f) << shift;
+		shift += 7;
+	}
+
+	if (shift < 8*sizeof(*l) && (b & 0x40) != 0) {
+		*l |= -(1 << shift);
+	}
+
+	return 0;
+}
+
+class ExternalCounter : public DriverCounter {
+public:
+	ExternalCounter(DriverCounter *next, const char *name, int cores) : DriverCounter(next, name), mCores(cores), mEvent(-1) {}
+
+	~ExternalCounter() {
+	}
+
+	int getCores() const { return mCores; }
+	void setEvent(const int event) { mEvent = event; }
+	int getEvent() const { return mEvent; }
+
+private:
+	const int mCores;
+	int mEvent;
+
+	// Intentionally undefined
+	ExternalCounter(const ExternalCounter &);
+	ExternalCounter &operator=(const ExternalCounter &);
+};
+
+ExternalDriver::ExternalDriver() : mUds(-1), mQueried(false), mStarted(false) {
+}
+
+bool ExternalDriver::connect() const {
+	if (mUds < 0) {
+		mUds = OlySocket::connect(MALI_UTGARD_SETUP, sizeof(MALI_UTGARD_SETUP));
+		if (mUds >= 0 && !writeAll(mUds, SETUP_VERSION, sizeof(SETUP_VERSION) - 1)) {
+			logg->logError("Unable to send setup version");
+			handleException();
+		}
+	}
+	return mUds >= 0;
+}
+
+void ExternalDriver::disconnect() {
+	if (mUds >= 0) {
+		close(mUds);
+		mUds = -1;
+		mStarted = false;
+	}
+}
+
+void ExternalDriver::query() const {
+	if (mQueried) {
+		return;
+	}
+	// Only try once even if it fails otherwise not all the possible counters may be shown
+	mQueried = true;
+
+	char *const buf = gSessionData->mSharedData->mMaliUtgardCounters;
+	const size_t bufSize = sizeof(gSessionData->mSharedData->mMaliUtgardCounters);
+	size_t size = 0;
+
+	if (!connect()) {
+		size = gSessionData->mSharedData->mMaliUtgardCountersSize;
+		logg->logMessage("Unable to connect, using cached version; size: %zi", size);
+	} else {
+		gSessionData->mSharedData->mMaliUtgardCountersSize = 0;
+
+		buf[0] = HEADER_REQUEST_COUNTERS;
+		size_t pos = HEADER_SIZE;
+		Buffer::writeLEInt((unsigned char *)(buf + 1), pos);
+		if (!writeAll(mUds, buf, pos)) {
+			logg->logError("Unable to send request counters message");
+			handleException();
+		}
+
+		if (!readAll(mUds, buf, HEADER_SIZE) || buf[0] != (char)HEADER_COUNTERS) {
+			logg->logError("Unable to read request counters response header");
+			handleException();
+		}
+		size = readLEInt(buf + 1);
+		if (size > bufSize || !readAll(mUds, buf, size - HEADER_SIZE)) {
+			logg->logError("Unable to read request counters response");
+			handleException();
+		}
+
+		size -= HEADER_SIZE;
+		gSessionData->mSharedData->mMaliUtgardCountersSize = size;
+		logg->logMessage("Requested counters; size: %zi", size);
+	}
+
+	size_t pos = 0;
+	while (pos < size) {
+		size_t begin = pos;
+		char *name = NULL;
+		uint64_t cores = -1;
+		while (pos < size && buf[pos] != '\0') {
+			++pos;
+		}
+		if (pos > begin) {
+			name = strndup(buf + begin, pos - begin);
+		}
+		if (pos < size && buf[pos] == '\0') {
+			++pos;
+		}
+		;
+		if (name != NULL && readPackedInt(buf, bufSize, &pos, &cores) == 0) {
+			// Cheat so that this can be 'const'
+			((ExternalDriver *)(this))->setCounters(new ExternalCounter(getCounters(), name, cores));
+		}
+	}
+
+	if (pos != size) {
+		logg->logError("Unable to parse request counters response");
+		handleException();
+	}
+}
+
+void ExternalDriver::start() {
+	if (!connect()) {
+		return;
+	}
+
+	if (mStarted) {
+		return;
+	}
+	// Only start once
+	mStarted = true;
+
+	char buf[1<<12];
+	int pos;
+
+	buf[0] = HEADER_ENABLE_COUNTERS;
+	pos = HEADER_SIZE;
+	for (ExternalCounter *counter = static_cast<ExternalCounter *>(getCounters()); counter != NULL; counter = static_cast<ExternalCounter *>(counter->getNext())) {
+		if (!counter->isEnabled()) {
+			continue;
+		}
+		size_t nameLen = strlen(counter->getName());
+		if (pos + nameLen + 1 + 2*Buffer::MAXSIZE_PACK32 > sizeof(buf)) {
+			logg->logError("Unable to enable counters, message is too large");
+			handleException();
+		}
+		memcpy(buf + pos, counter->getName(), nameLen + 1);
+		pos += nameLen + 1;
+		Buffer::packInt(buf, sizeof(buf), pos, counter->getEvent());
+		Buffer::packInt(buf, sizeof(buf), pos, counter->getKey());
+	}
+	Buffer::writeLEInt((unsigned char *)(buf + 1), pos);
+	if (!writeAll(mUds, buf, pos)) {
+		logg->logError("Unable to send enable counters message");
+		handleException();
+	}
+
+	size_t size = 0;
+	if (!readAll(mUds, buf, HEADER_SIZE) || buf[0] != (char)HEADER_ACK) {
+		logg->logError("Unable to read enable counters response header");
+		handleException();
+	}
+	size = readLEInt(buf + 1);
+	if (size != HEADER_SIZE) {
+		logg->logError("Unable to parse enable counters response");
+		handleException();
+	}
+
+	buf[0] = HEADER_START;
+	pos = HEADER_SIZE;
+	// ns/sec / samples/sec = ns/sample
+	// For sample rate of none, sample every 100ms
+	Buffer::packInt(buf, sizeof(buf), pos, NS_PER_S / (gSessionData->mSampleRate == 0 ? 10 : gSessionData->mSampleRate));
+	Buffer::packInt(buf, sizeof(buf), pos, gSessionData->mLiveRate);
+	Buffer::writeLEInt((unsigned char *)(buf + 1), pos);
+	if (!writeAll(mUds, buf, pos)) {
+		logg->logError("Unable to send start message");
+		handleException();
+	}
+
+	size = 0;
+	if (!readAll(mUds, buf, HEADER_SIZE) || buf[0] != (char)HEADER_ACK) {
+		logg->logError("Unable to read start response header");
+		handleException();
+	}
+	size = readLEInt(buf + 1);
+	if (size != HEADER_SIZE) {
+		logg->logError("Unable to parse start response");
+		handleException();
+	}
+}
+
+bool ExternalDriver::claimCounter(const Counter &counter) const {
+	query();
+	return super::claimCounter(counter);
+}
+
+void ExternalDriver::setupCounter(Counter &counter) {
+	ExternalCounter *const externalCounter = static_cast<ExternalCounter *>(findCounter(counter));
+	if (externalCounter == NULL) {
+		counter.setEnabled(false);
+		return;
+	}
+	externalCounter->setEnabled(true);
+	counter.setKey(externalCounter->getKey());
+	if (counter.getEvent() != -1) {
+		externalCounter->setEvent(counter.getEvent());
+	}
+	if (externalCounter->getCores() > 0) {
+		counter.setCores(externalCounter->getCores());
+	}
+}
+
+void ExternalDriver::resetCounters() {
+	query();
+	super::resetCounters();
+}
diff --git a/tools/gator/daemon/ExternalDriver.h b/tools/gator/daemon/ExternalDriver.h
new file mode 100644
index 000000000000..d88f9e125f9c
--- /dev/null
+++ b/tools/gator/daemon/ExternalDriver.h
@@ -0,0 +1,41 @@
+/**
+ * Copyright (C) ARM Limited 2010-2015. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EXTERNALDRIVER_H
+#define EXTERNALDRIVER_H
+
+#include "Driver.h"
+
+class ExternalDriver : public SimpleDriver {
+public:
+	ExternalDriver();
+
+	bool claimCounter(const Counter &counter) const;
+	void resetCounters();
+	void setupCounter(Counter &counter);
+
+	void start();
+
+	void disconnect();
+
+private:
+	typedef SimpleDriver super;
+
+	bool connect() const;
+	void query() const;
+
+	mutable int mUds;
+	mutable bool mQueried;
+	bool mStarted;
+
+	// Intentionally unimplemented
+	ExternalDriver(const ExternalDriver &);
+	ExternalDriver &operator=(const ExternalDriver &);
+};
+
+#endif // EXTERNALDRIVER_H
diff --git a/tools/gator/daemon/ExternalSource.cpp b/tools/gator/daemon/ExternalSource.cpp
index 8d71b6de3d5e..e87fb9ef02ba 100644
--- a/tools/gator/daemon/ExternalSource.cpp
+++ b/tools/gator/daemon/ExternalSource.cpp
@@ -13,6 +13,7 @@
 #include <unistd.h>
 
 #include "Child.h"
+#include "DriverSource.h"
 #include "Logging.h"
 #include "OlySocket.h"
 #include "SessionData.h"
@@ -26,25 +27,10 @@ static const char MALI_VIDEO_V1[] = "MALI_VIDEO 1\n";
 static const char MALI_GRAPHICS[] = "\0mali_thirdparty_server";
 static const char MALI_GRAPHICS_STARTUP[] = "\0mali_thirdparty_client";
 static const char MALI_GRAPHICS_V1[] = "MALI_GRAPHICS 1\n";
+static const char MALI_UTGARD_STARTUP[] = "\0mali-utgard-startup";
+static const char FTRACE_V1[] = "FTRACE 1\n";
 
-static bool setNonblock(const int fd) {
-	int flags;
-
-	flags = fcntl(fd, F_GETFL);
-	if (flags < 0) {
-		logg->logMessage("fcntl getfl failed");
-		return false;
-	}
-
-	if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != 0) {
-		logg->logMessage("fcntl setfl failed");
-		return false;
-	}
-
-	return true;
-}
-
-ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mMaliStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mAnnotate(8083), mAnnotateUds(STREAMLINE_ANNOTATE, sizeof(STREAMLINE_ANNOTATE), true), mInterruptFd(-1), mMaliUds(-1), mMveUds(-1) {
+ExternalSource::ExternalSource(sem_t *senderSem) : mBuffer(0, FRAME_EXTERNAL, 128*1024, senderSem), mMonitor(), mMveStartupUds(MALI_VIDEO_STARTUP, sizeof(MALI_VIDEO_STARTUP)), mMaliStartupUds(MALI_GRAPHICS_STARTUP, sizeof(MALI_GRAPHICS_STARTUP)), mUtgardStartupUds(MALI_UTGARD_STARTUP, sizeof(MALI_UTGARD_STARTUP)), mAnnotate(8083), mAnnotateUds(STREAMLINE_ANNOTATE, sizeof(STREAMLINE_ANNOTATE), true), mInterruptFd(-1), mMaliUds(-1), mMveUds(-1), mFtraceFd(-1) {
 	sem_init(&mBufferSem, 0, 0);
 }
 
@@ -91,7 +77,7 @@ bool ExternalSource::connectMali() {
 }
 
 bool ExternalSource::connectMve() {
-	if (!gSessionData->maliVideo.countersEnabled()) {
+	if (!gSessionData->mMaliVideo.countersEnabled()) {
 		return true;
 	}
 
@@ -100,7 +86,7 @@ bool ExternalSource::connectMve() {
 		return false;
 	}
 
-	if (!gSessionData->maliVideo.start(mMveUds)) {
+	if (!gSessionData->mMaliVideo.start(mMveUds)) {
 		return false;
 	}
 
@@ -109,10 +95,27 @@ bool ExternalSource::connectMve() {
 	return true;
 }
 
+void ExternalSource::connectFtrace() {
+	if (!gSessionData->mFtraceDriver.isSupported()) {
+		return;
+	}
+
+	gSessionData->mFtraceDriver.prepare();
+
+	mFtraceFd = open(TRACING_PATH "/trace_pipe", O_RDONLY | O_CLOEXEC);
+	if (mFtraceFd < 0) {
+		logg->logError("Unable to open trace_pipe");
+		handleException();
+	}
+
+	configureConnection(mFtraceFd, FTRACE_V1, sizeof(FTRACE_V1));
+}
+
 bool ExternalSource::prepare() {
 	if (!mMonitor.init() ||
 			!setNonblock(mMveStartupUds.getFd()) || !mMonitor.add(mMveStartupUds.getFd()) ||
 			!setNonblock(mMaliStartupUds.getFd()) || !mMonitor.add(mMaliStartupUds.getFd()) ||
+			!setNonblock(mUtgardStartupUds.getFd()) || !mMonitor.add(mUtgardStartupUds.getFd()) ||
 			!setNonblock(mAnnotate.getFd()) || !mMonitor.add(mAnnotate.getFd()) ||
 			!setNonblock(mAnnotateUds.getFd()) || !mMonitor.add(mAnnotateUds.getFd()) ||
 			false) {
@@ -121,6 +124,8 @@ bool ExternalSource::prepare() {
 
 	connectMali();
 	connectMve();
+	connectFtrace();
+	gSessionData->mExternalDriver.start();
 
 	return true;
 }
@@ -147,6 +152,30 @@ void ExternalSource::run() {
 		logg->logMessage("Writing to annotate pipe failed");
 	}
 
+	if (mFtraceFd >= 0) {
+		gSessionData->mAtraceDriver.start();
+
+		if (DriverSource::writeDriver(TRACING_PATH "/tracing_on", "1") != 0) {
+			logg->logError("Unable to turn ftrace on");
+			handleException();
+		}
+	}
+
+	// Wait until monotonicStarted is set before sending data
+	int64_t monotonicStarted = 0;
+	while (monotonicStarted <= 0 && gSessionData->mSessionIsActive) {
+		usleep(10);
+
+		if (gSessionData->mPerf.isSetup()) {
+			monotonicStarted = gSessionData->mMonotonicStarted;
+		} else {
+			if (DriverSource::readInt64Driver("/dev/gator/started", &monotonicStarted) == -1) {
+				logg->logError("Error reading gator driver start time");
+				handleException();
+			}
+		}
+	}
+
 	while (gSessionData->mSessionIsActive) {
 		struct epoll_event events[16];
 		// Clear any pending sem posts
@@ -179,6 +208,13 @@ void ExternalSource::run() {
 					logg->logError("Unable to configure incoming Mali graphics connection");
 					handleException();
 				}
+			} else if (fd == mUtgardStartupUds.getFd()) {
+				// Mali Utgard says it's alive
+				int client = mUtgardStartupUds.acceptConnection();
+				// Don't read from this connection, configure utgard and expect them to reconnect with annotations
+				close(client);
+				gSessionData->mExternalDriver.disconnect();
+				gSessionData->mExternalDriver.start();
 			} else if (fd == mAnnotate.getFd()) {
 				int client = mAnnotate.acceptConnection();
 				if (!setNonblock(client) || !mMonitor.add(client)) {
@@ -243,8 +279,14 @@ void ExternalSource::run() {
 
 	mBuffer.setDone();
 
+	if (mFtraceFd >= 0) {
+		gSessionData->mFtraceDriver.stop();
+		gSessionData->mAtraceDriver.stop();
+		close(mFtraceFd);
+	}
+
 	if (mMveUds >= 0) {
-		gSessionData->maliVideo.stop(mMveUds);
+		gSessionData->mMaliVideo.stop(mMveUds);
 	}
 
 	mInterruptFd = -1;
diff --git a/tools/gator/daemon/ExternalSource.h b/tools/gator/daemon/ExternalSource.h
index 25ae7cd4c4ab..69d16afeb76c 100644
--- a/tools/gator/daemon/ExternalSource.h
+++ b/tools/gator/daemon/ExternalSource.h
@@ -34,17 +34,20 @@ private:
 	void configureConnection(const int fd, const char *const handshake, size_t size);
 	bool connectMali();
 	bool connectMve();
+	void connectFtrace();
 
 	sem_t mBufferSem;
 	Buffer mBuffer;
 	Monitor mMonitor;
 	OlyServerSocket mMveStartupUds;
 	OlyServerSocket mMaliStartupUds;
+	OlyServerSocket mUtgardStartupUds;
 	OlyServerSocket mAnnotate;
 	OlyServerSocket mAnnotateUds;
 	int mInterruptFd;
 	int mMaliUds;
 	int mMveUds;
+	int mFtraceFd;
 
 	// Intentionally unimplemented
 	ExternalSource(const ExternalSource &);
diff --git a/tools/gator/daemon/FtraceDriver.cpp b/tools/gator/daemon/FtraceDriver.cpp
index 98bd0a5d9eec..7c57cfcbb1e1 100644
--- a/tools/gator/daemon/FtraceDriver.cpp
+++ b/tools/gator/daemon/FtraceDriver.cpp
@@ -10,22 +10,25 @@
 
 #include <regex.h>
 #include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
+#include "Config.h"
 #include "DriverSource.h"
 #include "Logging.h"
+#include "SessionData.h"
 #include "Setup.h"
 
 class FtraceCounter : public DriverCounter {
 public:
-	FtraceCounter(DriverCounter *next, char *name, const char *regex, const char *enable);
+	FtraceCounter(DriverCounter *next, char *name, const char *enable);
 	~FtraceCounter();
 
 	void prepare();
-	int read(const char *const line, int64_t *values);
 	void stop();
 
 private:
-	regex_t mReg;
 	char *const mEnable;
 	int mWasEnabled;
 
@@ -34,18 +37,10 @@ private:
 	FtraceCounter &operator=(const FtraceCounter &);
 };
 
-FtraceCounter::FtraceCounter(DriverCounter *next, char *name, const char *regex, const char *enable) : DriverCounter(next, name), mEnable(enable == NULL ? NULL : strdup(enable)) {
-	int result = regcomp(&mReg, regex, REG_EXTENDED);
-	if (result != 0) {
-		char buf[128];
-		regerror(result, &mReg, buf, sizeof(buf));
-		logg->logError("Invalid regex '%s': %s", regex, buf);
-		handleException();
-	}
+FtraceCounter::FtraceCounter(DriverCounter *next, char *name, const char *enable) : DriverCounter(next, name), mEnable(enable == NULL ? NULL : strdup(enable)) {
 }
 
 FtraceCounter::~FtraceCounter() {
-	regfree(&mReg);
 	if (mEnable != NULL) {
 		free(mEnable);
 	}
@@ -57,7 +52,7 @@ void FtraceCounter::prepare() {
 	}
 
 	char buf[1<<10];
-	snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/enable", mEnable);
+	snprintf(buf, sizeof(buf), EVENTS_PATH "/%s/enable", mEnable);
 	if ((DriverSource::readIntDriver(buf, &mWasEnabled) != 0) ||
 			(DriverSource::writeDriver(buf, 1) != 0)) {
 		logg->logError("Unable to read or write to %s", buf);
@@ -65,43 +60,17 @@ void FtraceCounter::prepare() {
 	}
 }
 
-int FtraceCounter::read(const char *const line, int64_t *values) {
-	regmatch_t match[2];
-	int result = regexec(&mReg, line, 2, match, 0);
-	if (result != 0) {
-		// No match
-		return 0;
-	}
-
-	int64_t value;
-	if (match[1].rm_so < 0) {
-		value = 1;
-	} else {
-		errno = 0;
-		value = strtoll(line + match[1].rm_so, NULL, 0);
-		if (errno != 0) {
-			logg->logError("Parsing %s failed: %s", getName(), strerror(errno));
-			handleException();
-		}
-	}
-
-	values[0] = getKey();
-	values[1] = value;
-
-	return 1;
-}
-
 void FtraceCounter::stop() {
 	if (mEnable == NULL) {
 		return;
 	}
 
 	char buf[1<<10];
-	snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/enable", mEnable);
+	snprintf(buf, sizeof(buf), EVENTS_PATH "/%s/enable", mEnable);
 	DriverSource::writeDriver(buf, mWasEnabled);
 }
 
-FtraceDriver::FtraceDriver() : mValues(NULL) {
+FtraceDriver::FtraceDriver() : mValues(NULL), mSupported(false), mTracingOn(0) {
 }
 
 FtraceDriver::~FtraceDriver() {
@@ -118,10 +87,20 @@ void FtraceDriver::readEvents(mxml_node_t *const xml) {
 
 	// The perf clock was added in 3.10
 	if (KERNEL_VERSION(release[0], release[1], release[2]) < KERNEL_VERSION(3, 10, 0)) {
+		mSupported = false;
 		logg->logMessage("Unsupported kernel version, to use ftrace please upgrade to Linux 3.10 or later");
 		return;
 	}
 
+	// Is debugfs or tracefs available?
+	if (access(TRACING_PATH, R_OK) != 0) {
+		mSupported = false;
+		logg->logMessage("Unable to locate the tracing directory, disabling ftrace");
+		return;
+	}
+
+	mSupported = true;
+
 	mxml_node_t *node = xml;
 	int count = 0;
 	while (true) {
@@ -143,20 +122,28 @@ void FtraceDriver::readEvents(mxml_node_t *const xml) {
 			logg->logError("The regex counter %s is missing the required regex attribute", counter);
 			handleException();
 		}
-		bool addCounter = true;
+
+		const char *tracepoint = mxmlElementGetAttr(node, "tracepoint");
 		const char *enable = mxmlElementGetAttr(node, "enable");
+		if (enable == NULL) {
+			enable = tracepoint;
+		}
+		if (gSessionData->mPerf.isSetup() && tracepoint != NULL) {
+			logg->logMessage("Not using ftrace for counter %s", counter);
+			continue;
+		}
 		if (enable != NULL) {
 			char buf[1<<10];
-			snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/enable", enable);
+			snprintf(buf, sizeof(buf), EVENTS_PATH "/%s/enable", enable);
 			if (access(buf, W_OK) != 0) {
 				logg->logMessage("Disabling counter %s, %s not found", counter, buf);
-				addCounter = false;
+				continue;
 			}
 		}
-		if (addCounter) {
-			setCounters(new FtraceCounter(getCounters(), strdup(counter), regex, enable));
-			++count;
-		}
+
+		logg->logMessage("Using ftrace for %s", counter);
+		setCounters(new FtraceCounter(getCounters(), strdup(counter), enable));
+		++count;
 	}
 
 	mValues = new int64_t[2*count];
@@ -169,23 +156,37 @@ void FtraceDriver::prepare() {
 		}
 		counter->prepare();
 	}
-}
 
-int FtraceDriver::read(const char *line, int64_t **buf) {
-	int count = 0;
+	if (DriverSource::readIntDriver(TRACING_PATH "/tracing_on", &mTracingOn)) {
+		logg->logError("Unable to read if ftrace is enabled");
+		handleException();
+	}
 
-	for (FtraceCounter *counter = static_cast<FtraceCounter *>(getCounters()); counter != NULL; counter = static_cast<FtraceCounter *>(counter->getNext())) {
-		if (!counter->isEnabled()) {
-			continue;
+	if (DriverSource::writeDriver(TRACING_PATH "/tracing_on", "0") != 0) {
+		logg->logError("Unable to turn ftrace off before truncating the buffer");
+		handleException();
+	}
+
+	{
+		int fd;
+		fd = open(TRACING_PATH "/trace", O_WRONLY | O_TRUNC | O_CLOEXEC, 0666);
+		if (fd < 0) {
+			logg->logError("Unable truncate ftrace buffer: %s", strerror(errno));
+			handleException();
 		}
-		count += counter->read(line, mValues + 2*count);
+		close(fd);
 	}
 
-	*buf = mValues;
-	return count;
+	if (DriverSource::writeDriver(TRACING_PATH "/trace_clock", "perf") != 0) {
+		logg->logError("Unable to switch ftrace to the perf clock, please ensure you are running Linux 3.10 or later");
+		handleException();
+	}
 }
 
 void FtraceDriver::stop() {
+	DriverSource::writeDriver(TRACING_PATH "/tracing_on", mTracingOn);
+	DriverSource::writeDriver(TRACING_PATH "/trace_clock", "local");
+
 	for (FtraceCounter *counter = static_cast<FtraceCounter *>(getCounters()); counter != NULL; counter = static_cast<FtraceCounter *>(counter->getNext())) {
 		if (!counter->isEnabled()) {
 			continue;
diff --git a/tools/gator/daemon/FtraceDriver.h b/tools/gator/daemon/FtraceDriver.h
index b79dc9149d6a..6e592d786617 100644
--- a/tools/gator/daemon/FtraceDriver.h
+++ b/tools/gator/daemon/FtraceDriver.h
@@ -19,11 +19,14 @@ public:
 	void readEvents(mxml_node_t *const xml);
 
 	void prepare();
-	int read(const char *line, int64_t **buf);
 	void stop();
 
+	bool isSupported() const { return mSupported; }
+
 private:
 	int64_t *mValues;
+	bool mSupported;
+	int mTracingOn;
 
 	// Intentionally unimplemented
 	FtraceDriver(const FtraceDriver &);
diff --git a/tools/gator/daemon/FtraceSource.cpp b/tools/gator/daemon/FtraceSource.cpp
deleted file mode 100644
index 14a48b3b870c..000000000000
--- a/tools/gator/daemon/FtraceSource.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2010-2015. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "FtraceSource.h"
-
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/prctl.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-#include "Child.h"
-#include "DriverSource.h"
-#include "Logging.h"
-#include "SessionData.h"
-
-extern Child *child;
-
-static void handler(int signum)
-{
-	(void)signum;
-};
-
-FtraceSource::FtraceSource(sem_t *senderSem) : mFtraceFh(NULL), mBuffer(0, FRAME_BLOCK_COUNTER, 128*1024, senderSem), mTid(-1), mTracingOn(0) {
-}
-
-FtraceSource::~FtraceSource() {
-}
-
-bool FtraceSource::prepare() {
-	{
-		struct sigaction act;
-		act.sa_handler = handler;
-		act.sa_flags = (int)SA_RESETHAND;
-		if (sigaction(SIGUSR1, &act, NULL) != 0) {
-			logg->logError("sigaction failed: %s\n", strerror(errno));
-			handleException();
-		}
-	}
-
-	gSessionData->ftraceDriver.prepare();
-
-	if (DriverSource::readIntDriver("/sys/kernel/debug/tracing/tracing_on", &mTracingOn)) {
-		logg->logError("Unable to read if ftrace is enabled");
-		handleException();
-	}
-
-	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "0") != 0) {
-		logg->logError("Unable to turn ftrace off before truncating the buffer");
-		handleException();
-	}
-
-	{
-		int fd;
-		fd = open("/sys/kernel/debug/tracing/trace", O_WRONLY | O_TRUNC | O_CLOEXEC, 0666);
-		if (fd < 0) {
-			logg->logError("Unable truncate ftrace buffer: %s", strerror(errno));
-			handleException();
-		}
-		close(fd);
-	}
-
-	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "perf") != 0) {
-		logg->logError("Unable to switch ftrace to the perf clock, please ensure you are running Linux 3.10 or later");
-		handleException();
-	}
-
-	mFtraceFh = fopen_cloexec("/sys/kernel/debug/tracing/trace_pipe", "rb");
-	if (mFtraceFh == NULL) {
-		logg->logError("Unable to open trace_pipe");
-		handleException();
-	}
-
-	return true;
-}
-
-void FtraceSource::run() {
-	prctl(PR_SET_NAME, (unsigned long)&"gatord-ftrace", 0, 0, 0);
-	mTid = syscall(__NR_gettid);
-
-	if (DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", "1") != 0) {
-		logg->logError("Unable to turn ftrace on");
-		handleException();
-	}
-
-	// Wait until monotonicStarted is set before sending data
-	int64_t monotonicStarted = 0;
-	while (monotonicStarted <= 0 && gSessionData->mSessionIsActive) {
-		usleep(10);
-
-		if (gSessionData->perf.isSetup()) {
-			monotonicStarted = gSessionData->mMonotonicStarted;
-		} else {
-			if (DriverSource::readInt64Driver("/dev/gator/started", &monotonicStarted) == -1) {
-				logg->logError("Error reading gator driver start time");
-				handleException();
-			}
-		}
-	}
-
-	while (gSessionData->mSessionIsActive) {
-		char buf[1<<12];
-
-		if (fgets(buf, sizeof(buf), mFtraceFh) == NULL) {
-			if (errno == EINTR) {
-				// Interrupted by interrupt - likely user request to terminate
-				break;
-			}
-			logg->logError("Unable read trace data: %s", strerror(errno));
-			handleException();
-		}
-
-		const uint64_t currTime = getTime() - gSessionData->mMonotonicStarted;
-
-		char *const colon = strstr(buf, ": ");
-		if (colon == NULL) {
-			if (strstr(buf, " [LOST ") != NULL) {
-				logg->logError("Ftrace events lost, aborting the capture. It is recommended to discard this report and collect a new capture. If this error occurs often, please reduce the number of ftrace counters selected or the amount of ftrace events generated.");
-			} else {
-				logg->logError("Unable to find colon: %s", buf);
-			}
-			handleException();
-		}
-		*colon = '\0';
-
-		char *const space = strrchr(buf, ' ');
-		if (space == NULL) {
-			logg->logError("Unable to find space: %s", buf);
-			handleException();
-		}
-		*colon = ':';
-
-		int64_t *data = NULL;
-		int count = gSessionData->ftraceDriver.read(colon + 2, &data);
-		if (count > 0) {
-			errno = 0;
-			const long long time = strtod(space, NULL) * 1000000000;
-			if (errno != 0) {
-				logg->logError("Unable to parse time: %s", strerror(errno));
-				handleException();
-			}
-			mBuffer.event64(-1, time);
-
-			for (int i = 0; i < count; ++i) {
-				mBuffer.event64(data[2*i + 0], data[2*i + 1]);
-			}
-
-			mBuffer.check(currTime);
-
-			if (gSessionData->mOneShot && gSessionData->mSessionIsActive && (mBuffer.bytesAvailable() <= 0)) {
-				logg->logMessage("One shot (ftrace)");
-				child->endSession();
-			}
-		}
-
-	}
-
-	mBuffer.setDone();
-
-	DriverSource::writeDriver("/sys/kernel/debug/tracing/tracing_on", mTracingOn);
-	fclose(mFtraceFh);
-	DriverSource::writeDriver("/sys/kernel/debug/tracing/trace_clock", "local");
-	gSessionData->ftraceDriver.stop();
-}
-
-void FtraceSource::interrupt() {
-	// Closing the underlying file handle does not result in the read on the ftrace file handle to return, so send a signal to the thread
-	syscall(__NR_tgkill, getpid(), mTid, SIGUSR1);
-}
-
-bool FtraceSource::isDone() {
-	return mBuffer.isDone();
-}
-
-void FtraceSource::write(Sender *sender) {
-	// Don't send ftrace data until the summary packet is sent so that monotonic delta is available
-	if (!gSessionData->mSentSummary) {
-		return;
-	}
-	if (!mBuffer.isDone()) {
-		mBuffer.write(sender);
-	}
-}
diff --git a/tools/gator/daemon/FtraceSource.h b/tools/gator/daemon/FtraceSource.h
deleted file mode 100644
index bc068d266654..000000000000
--- a/tools/gator/daemon/FtraceSource.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Copyright (C) ARM Limited 2010-2015. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef FTRACESOURCE_H
-#define FTRACESOURCE_H
-
-#include <semaphore.h>
-#include <stdio.h>
-
-#include "Buffer.h"
-#include "Source.h"
-
-class FtraceSource : public Source {
-public:
-	FtraceSource(sem_t *senderSem);
-	~FtraceSource();
-
-	bool prepare();
-	void run();
-	void interrupt();
-
-	bool isDone();
-	void write(Sender *sender);
-
-private:
-	void waitFor(const int bytes);
-
-	FILE *mFtraceFh;
-	Buffer mBuffer;
-	int mTid;
-	int mTracingOn;
-
-	// Intentionally unimplemented
-	FtraceSource(const FtraceSource &);
-	FtraceSource &operator=(const FtraceSource &);
-};
-
-#endif // FTRACESOURCE_H
diff --git a/tools/gator/daemon/MaliVideoDriver.cpp b/tools/gator/daemon/MaliVideoDriver.cpp
index 2db332d3e2e4..209a2d3006db 100644
--- a/tools/gator/daemon/MaliVideoDriver.cpp
+++ b/tools/gator/daemon/MaliVideoDriver.cpp
@@ -58,6 +58,7 @@ MaliVideoDriver::~MaliVideoDriver() {
 }
 
 void MaliVideoDriver::readEvents(mxml_node_t *const xml) {
+	// Always create the counters as /dev/mv500 may show up after gatord starts
 	mxml_node_t *node = xml;
 	while (true) {
 		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
@@ -82,12 +83,22 @@ void MaliVideoDriver::readEvents(mxml_node_t *const xml) {
 
 int MaliVideoDriver::writeCounters(mxml_node_t *root) const {
 	if (access("/dev/mv500", F_OK) != 0) {
+		// Don't show the counters in counter configuration
 		return 0;
 	}
 
 	return super::writeCounters(root);
 }
 
+bool MaliVideoDriver::claimCounter(const Counter &counter) const {
+	if (access("/dev/mv500", F_OK) != 0) {
+		// Don't add the counters to captured XML
+		return 0;
+	}
+
+	return super::claimCounter(counter);
+}
+
 void MaliVideoDriver::marshalEnable(const MaliVideoCounterType type, char *const buf, const size_t bufsize, int &pos) {
 	// size
 	int numEnabled = 0;
@@ -104,20 +115,6 @@ void MaliVideoDriver::marshalEnable(const MaliVideoCounterType type, char *const
 	}
 }
 
-static bool writeAll(const int mveUds, const char *const buf, const int pos) {
-	int written = 0;
-	while (written < pos) {
-		size_t bytes = ::write(mveUds, buf + written, pos - written);
-		if (bytes <= 0) {
-			logg->logMessage("write failed");
-			return false;
-		}
-		written += bytes;
-	}
-
-	return true;
-}
-
 bool MaliVideoDriver::start(const int mveUds) {
 	char buf[256];
 	int pos = 0;
@@ -146,7 +143,7 @@ bool MaliVideoDriver::start(const int mveUds) {
 	// data_protocol_version
 	Buffer::packInt(buf, sizeof(buf), pos, 1);
 	// sample_rate - convert samples/second to ms/sample
-	Buffer::packInt(buf, sizeof(buf), pos, 1000/gSessionData->mSampleRate);
+	Buffer::packInt(buf, sizeof(buf), pos, gSessionData->mSampleRate/1000);
 	// live_rate - convert ns/flush to ms/flush
 	Buffer::packInt(buf, sizeof(buf), pos, gSessionData->mLiveRate/1000000);
 
@@ -183,7 +180,6 @@ void MaliVideoDriver::stop(const int mveUds) {
 	buf[pos++] = 'T';
 	buf[pos++] = 'O';
 	buf[pos++] = 'P';
-	marshalEnable(MVCT_COUNTER, buf, sizeof(buf), pos);
 
 	writeAll(mveUds, buf, pos);
 
diff --git a/tools/gator/daemon/MaliVideoDriver.h b/tools/gator/daemon/MaliVideoDriver.h
index 35b0558e390b..fd01b1b2d85b 100644
--- a/tools/gator/daemon/MaliVideoDriver.h
+++ b/tools/gator/daemon/MaliVideoDriver.h
@@ -30,6 +30,7 @@ public:
 	void readEvents(mxml_node_t *const root);
 
 	int writeCounters(mxml_node_t *root) const;
+	bool claimCounter(const Counter &counter) const;
 
 	bool start(const int mveUds);
 	void stop(const int mveUds);
diff --git a/tools/gator/daemon/MemInfoDriver.cpp b/tools/gator/daemon/MemInfoDriver.cpp
index 6818b978dc5b..4c0b05131945 100644
--- a/tools/gator/daemon/MemInfoDriver.cpp
+++ b/tools/gator/daemon/MemInfoDriver.cpp
@@ -44,7 +44,7 @@ MemInfoDriver::~MemInfoDriver() {
 
 void MemInfoDriver::readEvents(mxml_node_t *const) {
 	// Only for use with perf
-	if (!gSessionData->perf.isSetup()) {
+	if (!gSessionData->mPerf.isSetup()) {
 		return;
 	}
 
diff --git a/tools/gator/daemon/NetDriver.cpp b/tools/gator/daemon/NetDriver.cpp
index 56b25e0cfa63..ab362115f29d 100644
--- a/tools/gator/daemon/NetDriver.cpp
+++ b/tools/gator/daemon/NetDriver.cpp
@@ -52,7 +52,7 @@ NetDriver::~NetDriver() {
 
 void NetDriver::readEvents(mxml_node_t *const) {
 	// Only for use with perf
-	if (!gSessionData->perf.isSetup()) {
+	if (!gSessionData->mPerf.isSetup()) {
 		return;
 	}
 
diff --git a/tools/gator/daemon/PerfDriver.cpp b/tools/gator/daemon/PerfDriver.cpp
index 796ee7564c66..e5f8aaa86c5a 100644
--- a/tools/gator/daemon/PerfDriver.cpp
+++ b/tools/gator/daemon/PerfDriver.cpp
@@ -56,9 +56,9 @@ static const struct gator_cpu gator_cpus[] = {
 	{ 0x51049, "KraitSIM",     "Krait",            4 },
 	{ 0x5104d, "Krait",        "Krait",            4 },
 	{ 0x5106f, "Krait S4 Pro", "Krait",            4 },
-	{ 0x41d03, "Cortex-A53",   "ARM_Cortex-A53",   6 },
-	{ 0x41d07, "Cortex-A57",   "ARM_Cortex-A57",   6 },
-	{ 0x41d08, "Cortex-A72",   "ARM_Cortex-A72",   6 },
+	{ 0x41d03, "Cortex-A53",   "ARMv8_Cortex_A53", 6 },
+	{ 0x41d07, "Cortex-A57",   "ARMv8_Cortex_A57", 6 },
+	{ 0x41d08, "Cortex-A72",   "ARMv8_Cortex_A72", 6 },
 };
 
 static const char OLD_PMU_PREFIX[] = "ARMv7 Cortex-";
@@ -75,14 +75,14 @@ struct uncore_counter {
 
 static const struct uncore_counter uncore_counters[] = {
 	{ "CCI_400",    "CCI_400",     4, true },
-	{ "CCI_400-r1", "CCI_400-r1",  4, true },
+	{ "CCI_400_r1", "CCI_400_r1",  4, true },
 	{ "CCI_500",    "CCI_500",     8, false },
 	{ "ccn",        "ARM_CCN_5XX", 8, true },
 };
 
 class PerfCounter : public DriverCounter {
 public:
-	PerfCounter(DriverCounter *next, const char *name, uint32_t type, uint64_t config, uint64_t sampleType, uint64_t flags) : DriverCounter(next, name), mType(type), mConfig(config), mSampleType(sampleType), mFlags(flags), mCount(0) {}
+	PerfCounter(DriverCounter *next, const char *name, uint32_t type, uint64_t config, uint64_t sampleType, uint64_t flags, const int count) : DriverCounter(next, name), mType(type), mConfig(config), mSampleType(sampleType), mFlags(flags), mCount(count) {}
 
 	~PerfCounter() {
 	}
@@ -93,13 +93,14 @@ public:
 	uint64_t getConfig() const { return mConfig; }
 	void setConfig(const uint64_t config) { mConfig = config; }
 	uint64_t getSampleType() const { return mSampleType; }
+	void setSampleType(uint64_t sampleType) { mSampleType = sampleType; }
 	uint64_t getFlags() const { return mFlags; }
 	virtual void read(Buffer *const, const int) {}
 
 private:
 	const uint32_t mType;
 	uint64_t mConfig;
-	const uint64_t mSampleType;
+	uint64_t mSampleType;
 	const uint64_t mFlags;
 	int mCount;
 
@@ -110,7 +111,7 @@ private:
 
 class CPUFreqDriver : public PerfCounter {
 public:
-	CPUFreqDriver(DriverCounter *next, uint64_t id) : PerfCounter(next, "Linux_power_cpu_freq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_RAW, PERF_GROUP_LEADER | PERF_GROUP_PER_CPU) {}
+	CPUFreqDriver(DriverCounter *next, uint64_t id) : PerfCounter(next, "Linux_power_cpu_freq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_RAW, PERF_GROUP_LEADER | PERF_GROUP_PER_CPU, 1) {}
 
 	void read(Buffer *const buffer, const int cpu) {
 		char buf[64];
@@ -135,17 +136,35 @@ PerfDriver::PerfDriver() : mIsSetup(false), mLegacySupport(false) {
 PerfDriver::~PerfDriver() {
 }
 
+class PerfTracepoint {
+public:
+	PerfTracepoint(PerfTracepoint *const next, const DriverCounter *const counter, const char *const tracepoint) : mNext(next), mCounter(counter), mTracepoint(tracepoint) {}
+
+	PerfTracepoint *getNext() const { return mNext; }
+	const DriverCounter *getCounter() const { return mCounter; }
+	const char *getTracepoint() const { return mTracepoint; }
+
+private:
+	PerfTracepoint *const mNext;
+	const DriverCounter *const mCounter;
+	const char *const mTracepoint;
+
+	// Intentionally undefined
+	PerfTracepoint(const PerfTracepoint &);
+	PerfTracepoint &operator=(const PerfTracepoint &);
+};
+
 void PerfDriver::addCpuCounters(const char *const counterName, const int type, const int numCounters) {
 	int len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
 	char *name = new char[len];
 	snprintf(name, len, "%s_ccnt", counterName);
-	setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU));
+	setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU, 0));
 
 	for (int j = 0; j < numCounters; ++j) {
 		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
 		name = new char[len];
 		snprintf(name, len, "%s_cnt%d", counterName, j);
-		setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU));
+		setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU, 0));
 	}
 }
 
@@ -157,14 +176,60 @@ void PerfDriver::addUncoreCounters(const char *const counterName, const int type
 		len = snprintf(NULL, 0, "%s_ccnt", counterName) + 1;
 		name = new char[len];
 		snprintf(name, len, "%s_ccnt", counterName);
-		setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, 0));
+		setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, 0, 0));
 	}
 
 	for (int j = 0; j < numCounters; ++j) {
 		len = snprintf(NULL, 0, "%s_cnt%d", counterName, j) + 1;
 		name = new char[len];
 		snprintf(name, len, "%s_cnt%d", counterName, j);
-		setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, 0));
+		setCounters(new PerfCounter(getCounters(), name, type, -1, PERF_SAMPLE_READ, 0, 0));
+	}
+}
+
+void PerfDriver::readEvents(mxml_node_t *const xml) {
+	mxml_node_t *node = xml;
+	DynBuf printb;
+
+	// Only for use with perf
+	if (!isSetup()) {
+		return;
+	}
+
+	while (true) {
+		node = mxmlFindElement(node, xml, "event", NULL, NULL, MXML_DESCEND);
+		if (node == NULL) {
+			break;
+		}
+		const char *counter = mxmlElementGetAttr(node, "counter");
+		if (counter == NULL) {
+			continue;
+		}
+
+		if (strncmp(counter, "ftrace_", 7) != 0) {
+			continue;
+		}
+
+		const char *tracepoint = mxmlElementGetAttr(node, "tracepoint");
+		if (tracepoint == NULL) {
+			const char *regex = mxmlElementGetAttr(node, "regex");
+			if (regex == NULL) {
+				logg->logError("The tracepoint counter %s is missing the required tracepoint attribute", counter);
+				handleException();
+			} else {
+				logg->logMessage("Not using perf for counter %s", counter);
+				continue;
+			}
+		}
+
+		const char *arg = mxmlElementGetAttr(node, "arg");
+
+		long long id = getTracepointId(tracepoint, &printb);
+		if (id >= 0) {
+			logg->logMessage("Using perf for %s", counter);
+			setCounters(new PerfCounter(getCounters(), strdup(counter), PERF_TYPE_TRACEPOINT, id, arg == NULL ? 0 : PERF_SAMPLE_RAW, PERF_GROUP_LEADER | PERF_GROUP_PER_CPU, 1));
+			mTracepoints = new PerfTracepoint(mTracepoints, getCounters(), strdup(tracepoint));
+		}
 	}
 }
 
@@ -197,6 +262,7 @@ bool PerfDriver::setup() {
 
 	struct dirent *dirent;
 	while ((dirent = readdir(dir)) != NULL) {
+		logg->logMessage("perf pmu: %s", dirent->d_name);
 		for (int i = 0; i < ARRAY_LENGTH(gator_cpus); ++i) {
 			const struct gator_cpu *const gator_cpu = &gator_cpus[i];
 
@@ -217,7 +283,7 @@ bool PerfDriver::setup() {
 			}
 
 			foundCpu = true;
-			logg->logMessage("Adding cpu counters for %s", gator_cpu->pmnc_name);
+			logg->logMessage("Adding cpu counters for %s with type %i", gator_cpu->pmnc_name, type);
 			addCpuCounters(gator_cpu->pmnc_name, type, gator_cpu->pmnc_counters);
 		}
 
@@ -233,7 +299,7 @@ bool PerfDriver::setup() {
 				continue;
 			}
 
-			logg->logMessage("Adding uncore counters for %s", uncore_counters[i].gatorName);
+			logg->logMessage("Adding uncore counters for %s with type %i", uncore_counters[i].gatorName, type);
 			addUncoreCounters(uncore_counters[i].gatorName, type, uncore_counters[i].count, uncore_counters[i].hasCyclesCounter);
 		}
 	}
@@ -264,25 +330,25 @@ bool PerfDriver::setup() {
 
 	id = getTracepointId("irq/softirq_exit", &printb);
 	if (id >= 0) {
-		setCounters(new PerfCounter(getCounters(), "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU));
+		setCounters(new PerfCounter(getCounters(), "Linux_irq_softirq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU, 0));
 	}
 
 	id = getTracepointId("irq/irq_handler_exit", &printb);
 	if (id >= 0) {
-		setCounters(new PerfCounter(getCounters(), "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU));
+		setCounters(new PerfCounter(getCounters(), "Linux_irq_irq", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU, 0));
 	}
 
 	id = getTracepointId(SCHED_SWITCH, &printb);
 	if (id >= 0) {
-		setCounters(new PerfCounter(getCounters(), "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU));
+		setCounters(new PerfCounter(getCounters(), "Linux_sched_switch", PERF_TYPE_TRACEPOINT, id, PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU, 0));
 	}
 
 	id = getTracepointId(CPU_FREQUENCY, &printb);
-	if (id >= 0) {
+	if (id >= 0 && access("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq", R_OK) == 0) {
 		setCounters(new CPUFreqDriver(getCounters(), id));
 	}
 
-	setCounters(new PerfCounter(getCounters(), "Linux_cpu_wait_contention", TYPE_DERIVED, -1, 0, 0));
+	setCounters(new PerfCounter(getCounters(), "Linux_cpu_wait_contention", TYPE_DERIVED, -1, 0, 0, 0));
 
 	//Linux_cpu_wait_io
 
@@ -300,6 +366,12 @@ bool PerfDriver::summary(Buffer *const buffer) {
 	char buf[512];
 	snprintf(buf, sizeof(buf), "%s %s %s %s %s GNU/Linux", utsname.sysname, utsname.nodename, utsname.release, utsname.version, utsname.machine);
 
+	long pageSize = sysconf(_SC_PAGESIZE);
+	if (pageSize < 0) {
+		logg->logMessage("sysconf _SC_PAGESIZE failed");
+		return false;
+	}
+
 	struct timespec ts;
 	if (clock_gettime(CLOCK_REALTIME, &ts) != 0) {
 		logg->logMessage("clock_gettime failed");
@@ -311,7 +383,7 @@ bool PerfDriver::summary(Buffer *const buffer) {
 	gSessionData->mMonotonicStarted = monotonicStarted;
 	const uint64_t currTime = 0;//getTime() - gSessionData->mMonotonicStarted;
 
-	buffer->summary(currTime, timestamp, monotonicStarted, monotonicStarted, buf);
+	buffer->summary(currTime, timestamp, monotonicStarted, monotonicStarted, buf, pageSize);
 
 	for (int i = 0; i < gSessionData->mCores; ++i) {
 		coreName(currTime, buffer, i);
@@ -322,27 +394,28 @@ bool PerfDriver::summary(Buffer *const buffer) {
 }
 
 void PerfDriver::coreName(const uint64_t currTime, Buffer *const buffer, const int cpu) {
+	const SharedData *const sharedData = gSessionData->mSharedData;
 	// Don't send information on a cpu we know nothing about
-	if (gSessionData->mCpuIds[cpu] == -1) {
+	if (sharedData->mCpuIds[cpu] == -1) {
 		return;
 	}
 
 	int j;
 	for (j = 0; j < ARRAY_LENGTH(gator_cpus); ++j) {
-		if (gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) {
+		if (gator_cpus[j].cpuid == sharedData->mCpuIds[cpu]) {
 			break;
 		}
 	}
-	if (j < ARRAY_LENGTH(gator_cpus) && gator_cpus[j].cpuid == gSessionData->mCpuIds[cpu]) {
-		buffer->coreName(currTime, cpu, gSessionData->mCpuIds[cpu], gator_cpus[j].core_name);
+	if (j < ARRAY_LENGTH(gator_cpus) && gator_cpus[j].cpuid == sharedData->mCpuIds[cpu]) {
+		buffer->coreName(currTime, cpu, sharedData->mCpuIds[cpu], gator_cpus[j].core_name);
 	} else {
 		char buf[32];
-		if (gSessionData->mCpuIds[cpu] == -1) {
+		if (sharedData->mCpuIds[cpu] == -1) {
 			snprintf(buf, sizeof(buf), "Unknown");
 		} else {
-			snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", gSessionData->mCpuIds[cpu]);
+			snprintf(buf, sizeof(buf), "Unknown (0x%.3x)", sharedData->mCpuIds[cpu]);
 		}
-		buffer->coreName(currTime, cpu, gSessionData->mCpuIds[cpu], buf);
+		buffer->coreName(currTime, cpu, sharedData->mCpuIds[cpu], buf);
 	}
 }
 
@@ -357,27 +430,22 @@ void PerfDriver::setupCounter(Counter &counter) {
 	if (counter.getEvent() != -1) {
 		perfCounter->setConfig(counter.getEvent());
 	}
-	perfCounter->setCount(counter.getCount());
+	if (counter.getCount() > 0) {
+		// EBS
+		perfCounter->setCount(counter.getCount());
+		// Collect samples
+		perfCounter->setSampleType(perfCounter->getSampleType() | PERF_SAMPLE_TID | PERF_SAMPLE_IP);
+	}
 	perfCounter->setEnabled(true);
 	counter.setKey(perfCounter->getKey());
 }
 
 bool PerfDriver::enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const {
 	for (PerfCounter *counter = static_cast<PerfCounter *>(getCounters()); counter != NULL; counter = static_cast<PerfCounter *>(counter->getNext())) {
-		if (counter->isEnabled() && (counter->getType() != TYPE_DERIVED)) {
-			int count = counter->getCount();
-			uint64_t sampleType = counter->getSampleType();
-			if (sampleType & PERF_SAMPLE_RAW) {
-				// If raw is enabled, every sample is needed
-				count = 1;
-			}
-			if (!group->add(currTime, buffer, counter->getKey(), counter->getType(), counter->getConfig(), count,
-					// use getCount instead of count as EBS counters need TID and IP but RAW tracepoints don't
-					(counter->getCount() > 0 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP : 0) | sampleType,
-					counter->getFlags())) {
-				logg->logMessage("PerfGroup::add failed");
-				return false;
-			}
+		if (counter->isEnabled() && (counter->getType() != TYPE_DERIVED) &&
+				!group->add(currTime, buffer, counter->getKey(), counter->getType(), counter->getConfig(), counter->getCount(), counter->getSampleType(), counter->getFlags())) {
+			logg->logMessage("PerfGroup::add failed");
+			return false;
 		}
 	}
 
@@ -393,6 +461,38 @@ void PerfDriver::read(Buffer *const buffer, const int cpu) {
 	}
 }
 
+static bool sendTracepointFormat(const uint64_t currTime, Buffer *const buffer, const char *const name, DynBuf *const printb, DynBuf *const b) {
+	if (!printb->printf(EVENTS_PATH "/%s/format", name)) {
+		logg->logMessage("DynBuf::printf failed");
+		return false;
+	}
+	if (!b->read(printb->getBuf())) {
+		logg->logMessage("DynBuf::read failed");
+		return false;
+	}
+	buffer->marshalFormat(currTime, b->getLength(), b->getBuf());
+
+	return true;
+}
+
+bool PerfDriver::sendTracepointFormats(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b) {
+	if (
+		!sendTracepointFormat(currTime, buffer, SCHED_SWITCH, printb, b) ||
+		!sendTracepointFormat(currTime, buffer, CPU_IDLE, printb, b) ||
+		!sendTracepointFormat(currTime, buffer, CPU_FREQUENCY, printb, b) ||
+		false) {
+		return false;
+	}
+
+	for (PerfTracepoint *tracepoint = mTracepoints; tracepoint != NULL; tracepoint = tracepoint->getNext()) {
+		if (tracepoint->getCounter()->isEnabled() && !sendTracepointFormat(currTime, buffer, tracepoint->getTracepoint(), printb, b)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
 long long PerfDriver::getTracepointId(const char *const name, DynBuf *const printb) {
 	if (!printb->printf(EVENTS_PATH "/%s/id", name)) {
 		logg->logMessage("DynBuf::printf failed");
@@ -401,7 +501,7 @@ long long PerfDriver::getTracepointId(const char *const name, DynBuf *const prin
 
 	int64_t result;
 	if (DriverSource::readInt64Driver(printb->getBuf(), &result) != 0) {
-		logg->logMessage("DriverSource::readInt64Driver failed");
+		logg->logMessage("Unable to read tracepoint id for %s", printb->getBuf());
 		return -1;
 	}
 
diff --git a/tools/gator/daemon/PerfDriver.h b/tools/gator/daemon/PerfDriver.h
index 95b42bfa30c0..a38fcde06e89 100644
--- a/tools/gator/daemon/PerfDriver.h
+++ b/tools/gator/daemon/PerfDriver.h
@@ -13,10 +13,6 @@
 
 #include "Driver.h"
 
-// If debugfs is not mounted at /sys/kernel/debug, update DEBUGFS_PATH
-#define DEBUGFS_PATH "/sys/kernel/debug"
-#define EVENTS_PATH DEBUGFS_PATH "/tracing/events"
-
 #define SCHED_SWITCH "sched/sched_switch"
 #define CPU_IDLE "power/cpu_idle"
 #define CPU_FREQUENCY "power/cpu_frequency"
@@ -24,6 +20,7 @@
 class Buffer;
 class DynBuf;
 class PerfGroup;
+class PerfTracepoint;
 
 class PerfDriver : public SimpleDriver {
 public:
@@ -32,6 +29,7 @@ public:
 
 	bool getLegacySupport() const { return mLegacySupport; }
 
+	void readEvents(mxml_node_t *const xml);
 	bool setup();
 	bool summary(Buffer *const buffer);
 	void coreName(const uint64_t currTime, Buffer *const buffer, const int cpu);
@@ -41,6 +39,7 @@ public:
 
 	bool enable(const uint64_t currTime, PerfGroup *const group, Buffer *const buffer) const;
 	void read(Buffer *const buffer, const int cpu);
+	bool sendTracepointFormats(const uint64_t currTime, Buffer *const buffer, DynBuf *const printb, DynBuf *const b);
 
 	static long long getTracepointId(const char *const name, DynBuf *const printb);
 
@@ -50,6 +49,7 @@ private:
 
 	bool mIsSetup;
 	bool mLegacySupport;
+	PerfTracepoint *mTracepoints;
 
 	// Intentionally undefined
 	PerfDriver(const PerfDriver &);
diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp
index cfc62e4cc77e..9b71531d2b1c 100644
--- a/tools/gator/daemon/PerfGroup.cpp
+++ b/tools/gator/daemon/PerfGroup.cpp
@@ -10,6 +10,7 @@
 
 #include <errno.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <string.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
@@ -23,12 +24,11 @@
 #include "SessionData.h"
 
 static const int schedSwitchKey = getEventKey();
-static const int clockKey = getEventKey();
 
 #define DEFAULT_PEA_ARGS(pea, additionalSampleType) \
 	pea.size = sizeof(pea); \
 	/* Emit time, read_format below, group leader id, and raw tracepoint info */ \
-	pea.sample_type = (gSessionData->perf.getLegacySupport() \
+	pea.sample_type = (gSessionData->mPerf.getLegacySupport() \
 			   ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_ID \
 			   : PERF_SAMPLE_IDENTIFIER ) | PERF_SAMPLE_TIME | additionalSampleType; \
 	/* Emit emit value in group format */ \
@@ -132,7 +132,7 @@ bool PerfGroup::createCpuGroup(const uint64_t currTime, Buffer *const buffer) {
 		return false;
 	}
 
-	if (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU) < 0) {
+	if (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && doAdd(currTime, buffer, INT_MAX-PERF_TYPE_HARDWARE, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU) < 0) {
 		return false;
 	}
 
@@ -152,8 +152,7 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key
 		} else {
 			// Non-CPU PMUs are sampled every 100ms for Sample Rate: None and EBS, otherwise they would never be sampled
 			const uint64_t timeout = gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS ? 1000000000UL / gSessionData->mSampleRate : 100000000UL;
-			// PERF_SAMPLE_TID | PERF_SAMPLE_IP aren't helpful on non-CPU or 'uncore' PMUs - which CPU is the right one to sample? But removing it causes problems, remove it later.
-			mLeaders[effectiveType] = doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, timeout, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_LEADER);
+			mLeaders[effectiveType] = doAdd(currTime, buffer, INT_MAX-effectiveType, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, timeout, PERF_SAMPLE_READ, PERF_GROUP_LEADER);
 			if (mLeaders[effectiveType] < 0) {
 				return false;
 			}
@@ -194,16 +193,12 @@ int PerfGroup::prepareCPU(const int cpu, Monitor *const monitor) {
 				// The core is offline
 				return PG_CPU_OFFLINE;
 			}
-#ifndef USE_STRICTER_CHECK
-			continue;
-#else
 			if (errno == ENOENT) {
 				// This event doesn't apply to this CPU but should apply to a different one, ex bL
 				continue;
 			}
 			logg->logMessage("perf_event_open failed");
 			return PG_FAILURE;
-#endif
 		}
 
 		if (!mPb->useFd(cpu, mFds[offset])) {
@@ -236,7 +231,7 @@ static bool readAndSend(const uint64_t currTime, Buffer *const buffer, const int
 int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool enable, Buffer *const buffer) {
 	bool addedEvents = false;
 
-	if (!gSessionData->perf.getLegacySupport()) {
+	if (!gSessionData->mPerf.getLegacySupport()) {
 		int idCount = 0;
 		int coreKeys[ARRAY_LENGTH(mKeys)];
 		__u64 ids[ARRAY_LENGTH(mKeys)];
diff --git a/tools/gator/daemon/PerfSource.cpp b/tools/gator/daemon/PerfSource.cpp
index 2c45de8e06e9..9f34bbd1e54f 100644
--- a/tools/gator/daemon/PerfSource.cpp
+++ b/tools/gator/daemon/PerfSource.cpp
@@ -33,20 +33,6 @@ extern Child *child;
 
 static const int cpuIdleKey = getEventKey();
 
-static bool sendTracepointFormat(const uint64_t currTime, Buffer *const buffer, const char *const name, DynBuf *const printb, DynBuf *const b) {
-	if (!printb->printf(EVENTS_PATH "/%s/format", name)) {
-		logg->logMessage("DynBuf::printf failed");
-		return false;
-	}
-	if (!b->read(printb->getBuf())) {
-		logg->logMessage("DynBuf::read failed");
-		return false;
-	}
-	buffer->marshalFormat(currTime, b->getLength(), b->getBuf());
-
-	return true;
-}
-
 static void *syncFunc(void *arg)
 {
 	struct timespec ts;
@@ -160,17 +146,14 @@ bool PerfSource::prepare() {
 			|| !mUEvent.init()
 			|| !mMonitor.add(mUEvent.getFd())
 
-			|| !sendTracepointFormat(currTime, mBuffer, SCHED_SWITCH, &printb, &b1)
-
 			|| (cpuIdleId = PerfDriver::getTracepointId(CPU_IDLE, &printb)) < 0
-			|| !sendTracepointFormat(currTime, mBuffer, CPU_IDLE, &printb, &b1)
 
-			|| !sendTracepointFormat(currTime, mBuffer, CPU_FREQUENCY, &printb, &b1)
+			|| !gSessionData->mPerf.sendTracepointFormats(currTime, mBuffer, &printb, &b1)
 
 			|| !mCountersGroup.createCpuGroup(currTime, mBuffer)
 			|| !mCountersGroup.add(currTime, mBuffer, cpuIdleKey, PERF_TYPE_TRACEPOINT, cpuIdleId, 1, PERF_SAMPLE_RAW, PERF_GROUP_LEADER | PERF_GROUP_PER_CPU)
 
-			|| !gSessionData->perf.enable(currTime, &mCountersGroup, mBuffer)
+			|| !gSessionData->mPerf.enable(currTime, &mCountersGroup, mBuffer)
 			|| 0) {
 		logg->logMessage("perf setup failed, are you running Linux 3.4 or later?");
 		return false;
@@ -194,7 +177,7 @@ bool PerfSource::prepare() {
 	}
 
 	// Send the summary right before the start so that the monotonic delta is close to the start time
-	if (!gSessionData->perf.summary(&mSummary)) {
+	if (!gSessionData->mPerf.summary(&mSummary)) {
 		logg->logError("PerfDriver::summary failed");
 		handleException();
 	}
@@ -283,7 +266,7 @@ void PerfSource::run() {
 
 		mBuffer->perfCounterHeader(currTime);
 		for (int cpu = 0; cpu < gSessionData->mCores; ++cpu) {
-			gSessionData->perf.read(mBuffer, cpu);
+			gSessionData->mPerf.read(mBuffer, cpu);
 		}
 		mBuffer->perfCounterFooter(currTime);
 
@@ -396,7 +379,7 @@ bool PerfSource::handleUEvent(const uint64_t currTime) {
 			} else if (err == PG_SUCCESS) {
 				if (mCountersGroup.onlineCPU(currTime, cpu, true, mBuffer) > 0) {
 					mBuffer->perfCounterHeader(currTime);
-					gSessionData->perf.read(mBuffer, cpu);
+					gSessionData->mPerf.read(mBuffer, cpu);
 					mBuffer->perfCounterFooter(currTime);
 					ret = true;
 				}
@@ -404,7 +387,7 @@ bool PerfSource::handleUEvent(const uint64_t currTime) {
 			mBuffer->commit(currTime);
 
 			gSessionData->readCpuInfo();
-			gSessionData->perf.coreName(currTime, &mSummary, cpu);
+			gSessionData->mPerf.coreName(currTime, &mSummary, cpu);
 			mSummary.commit(currTime);
 			return ret;
 		} else if (strcmp(result.mAction, "offline") == 0) {
diff --git a/tools/gator/daemon/Proc.cpp b/tools/gator/daemon/Proc.cpp
index 4ba59b632836..ffe1a786b98b 100644
--- a/tools/gator/daemon/Proc.cpp
+++ b/tools/gator/daemon/Proc.cpp
@@ -72,12 +72,7 @@ static const char *readProcExe(DynBuf *const printb, const int pid, const int ti
 	const int err = b->readlink(printb->getBuf());
 	const char *image;
 	if (err == 0) {
-		image = strrchr(b->getBuf(), '/');
-		if (image == NULL) {
-			image = b->getBuf();
-		} else {
-			++image;
-		}
+		image = b->getBuf();
 	} else if (err == -ENOENT) {
 		// readlink /proc/[pid]/exe returns ENOENT for kernel threads
 		image = "\0";
@@ -88,7 +83,7 @@ static const char *readProcExe(DynBuf *const printb, const int pid, const int ti
 
 	// Android apps are run by app_process but the cmdline is changed to reference the actual app name
 	// On 64-bit android app_process can be app_process32 or app_process64
-	if (strncmp(image, APP_PROCESS, sizeof(APP_PROCESS) - 1) != 0) {
+	if (strstr(image, APP_PROCESS) == NULL) {
 		return image;
 	}
 
diff --git a/tools/gator/daemon/Sender.cpp b/tools/gator/daemon/Sender.cpp
index d7ad757165a2..71ddfced6890 100644
--- a/tools/gator/daemon/Sender.cpp
+++ b/tools/gator/daemon/Sender.cpp
@@ -44,7 +44,15 @@ Sender::Sender(OlySocket* socket) {
 		logg->logMessage("Completed magic sequence");
 	}
 
-	pthread_mutex_init(&mSendMutex, NULL);
+	pthread_mutexattr_t attr;
+	if (pthread_mutexattr_init(&attr) != 0 ||
+			pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK) != 0 ||
+			pthread_mutex_init(&mSendMutex, &attr) != 0 ||
+			pthread_mutexattr_destroy(&attr) != 0 ||
+			false) {
+		logg->logError("Unable to setup mutex");
+		handleException();
+	}
 }
 
 Sender::~Sender() {
@@ -78,7 +86,10 @@ void Sender::writeData(const char* data, int length, int type) {
 	}
 
 	// Multiple threads call writeData()
-	pthread_mutex_lock(&mSendMutex);
+	if (pthread_mutex_lock(&mSendMutex) != 0) {
+		logg->logError("pthread_mutex_lock failed");
+		handleException();
+	}
 
 	// Send data over the socket connection
 	if (mDataSocket) {
@@ -100,7 +111,7 @@ void Sender::writeData(const char* data, int length, int type) {
 		const int chunkSize = 100*1000 * alarmDuration / 8;
 		int pos = 0;
 		while (true) {
-			mDataSocket->send((const char*)data + pos, min(length - pos, chunkSize));
+			mDataSocket->send(data + pos, min(length - pos, chunkSize));
 			pos += chunkSize;
 			if (pos >= length) {
 				break;
@@ -125,5 +136,8 @@ void Sender::writeData(const char* data, int length, int type) {
 		}
 	}
 
-	pthread_mutex_unlock(&mSendMutex);
+	if (pthread_mutex_unlock(&mSendMutex) != 0) {
+		logg->logError("pthread_mutex_unlock failed");
+		handleException();
+	}
 }
diff --git a/tools/gator/daemon/SessionData.cpp b/tools/gator/daemon/SessionData.cpp
index 2b661bdf294c..42917c034c51 100644
--- a/tools/gator/daemon/SessionData.cpp
+++ b/tools/gator/daemon/SessionData.cpp
@@ -25,33 +25,40 @@
 
 SessionData* gSessionData = NULL;
 
+SharedData::SharedData() : mMaliUtgardCountersSize(0) {
+	memset(mCpuIds, -1, sizeof(mCpuIds));
+}
+
 SessionData::SessionData() {
-	usDrivers[0] = new HwmonDriver();
-	usDrivers[1] = new FSDriver();
-	usDrivers[2] = new MemInfoDriver();
-	usDrivers[3] = new NetDriver();
-	usDrivers[4] = new DiskIODriver();
+	mUsDrivers[0] = new HwmonDriver();
+	mUsDrivers[1] = new FSDriver();
+	mUsDrivers[2] = new MemInfoDriver();
+	mUsDrivers[3] = new NetDriver();
+	mUsDrivers[4] = new DiskIODriver();
 	initialize();
 }
 
 SessionData::~SessionData() {
 }
 
+// Needed to use placement new
+inline void *operator new(size_t, void *ptr) { return ptr; }
+
 void SessionData::initialize() {
+	mSharedData = (SharedData *)mmap(NULL, sizeof(*mSharedData), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (mSharedData == MAP_FAILED) {
+		logg->logError("Unable to mmap shared memory for cpuids");
+		handleException();
+	}
+	// Use placement new to construct but not allocate the object
+	new ((char *)mSharedData) SharedData();
+
 	mWaitingOnCommand = false;
 	mSessionIsActive = false;
 	mLocalCapture = false;
 	mOneShot = false;
 	mSentSummary = false;
 	mAllowCommands = false;
-	const size_t cpuIdSize = sizeof(int)*NR_CPUS;
-	// Share mCpuIds across all instances of gatord
-	mCpuIds = (int *)mmap(NULL, cpuIdSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
-	if (mCpuIds == MAP_FAILED) {
-		logg->logError("Unable to mmap shared memory for cpuids");
-		handleException();
-	}
-	memset(mCpuIds, -1, cpuIdSize);
 	strcpy(mCoreName, CORE_NAME_UNKNOWN);
 	readModel();
 	readCpuInfo();
@@ -83,11 +90,11 @@ void SessionData::parseSessionXML(char* xmlString) {
 
 	// Set session data values - use prime numbers just below the desired value to reduce the chance of events firing at the same time
 	if (strcmp(session.parameters.sample_rate, "high") == 0) {
-		mSampleRate = 9973; // 10000
+		mSampleRate = 10007; // 10000
 	} else if (strcmp(session.parameters.sample_rate, "normal") == 0) {
-		mSampleRate = 997; // 1000
+		mSampleRate = 1009; // 1000
 	} else if (strcmp(session.parameters.sample_rate, "low") == 0) {
-		mSampleRate = 97; // 100
+		mSampleRate = 101; // 100
 	} else if (strcmp(session.parameters.sample_rate, "none") == 0) {
 		mSampleRate = 0;
 	} else {
@@ -126,7 +133,7 @@ void SessionData::parseSessionXML(char* xmlString) {
 }
 
 void SessionData::readModel() {
-	FILE *fh = fopen("/proc/device-tree/model", "rb");
+	FILE *fh = fopen_cloexec("/proc/device-tree/model", "rb");
 	if (fh == NULL) {
 		return;
 	}
@@ -157,7 +164,7 @@ void SessionData::readCpuInfo() {
 	char temp[256]; // arbitrarily large amount
 	mMaxCpuId = -1;
 
-	FILE *f = fopen("/proc/cpuinfo", "r");
+	FILE *f = fopen_cloexec("/proc/cpuinfo", "r");
 	if (f == NULL) {
 		logg->logMessage("Error opening /proc/cpuinfo\n"
 			"The core name in the captured xml file will be 'unknown'.");
@@ -169,17 +176,19 @@ void SessionData::readCpuInfo() {
 	while (fgets(temp, sizeof(temp), f)) {
 		const size_t len = strlen(temp);
 
+		if (len > 0) {
+			// Replace the line feed with a null
+			temp[len - 1] = '\0';
+		}
+
+		logg->logMessage("cpuinfo: %s", temp);
+
 		if (len == 1) {
 			// New section, clear the processor. Streamline will not know the cpus if the pre Linux 3.8 format of cpuinfo is encountered but also that no incorrect information will be transmitted.
 			processor = -1;
 			continue;
 		}
 
-		if (len > 0) {
-			// Replace the line feed with a null
-			temp[len - 1] = '\0';
-		}
-
 		const bool foundHardware = !foundCoreName && strstr(temp, "Hardware") != 0;
 		const bool foundCPUImplementer = strstr(temp, "CPU implementer") != 0;
 		const bool foundCPUPart = strstr(temp, "CPU part") != 0;
@@ -204,7 +213,7 @@ void SessionData::readCpuInfo() {
 				if (processor >= NR_CPUS) {
 					logg->logMessage("Too many processors, please increase NR_CPUS");
 				} else if (processor >= 0) {
-					setImplementer(mCpuIds[processor], implementer);
+					setImplementer(mSharedData->mCpuIds[processor], implementer);
 				} else {
 					setImplementer(mMaxCpuId, implementer);
 				}
@@ -215,7 +224,7 @@ void SessionData::readCpuInfo() {
 				if (processor >= NR_CPUS) {
 					logg->logMessage("Too many processors, please increase NR_CPUS");
 				} else if (processor >= 0) {
-					setPart(mCpuIds[processor], cpuId);
+					setPart(mSharedData->mCpuIds[processor], cpuId);
 				} else {
 					setPart(mMaxCpuId, cpuId);
 				}
@@ -229,8 +238,8 @@ void SessionData::readCpuInfo() {
 
 	// If this does not have the full topology in /proc/cpuinfo, mCpuIds[0] may not have the 1 CPU part emitted - this guarantees it's in mMaxCpuId
 	for (int i = 0; i < NR_CPUS; ++i) {
-		if (mCpuIds[i] > mMaxCpuId) {
-			mMaxCpuId = mCpuIds[i];
+		if (mSharedData->mCpuIds[i] > mMaxCpuId) {
+			mMaxCpuId = mSharedData->mCpuIds[i];
 		}
 	}
 
@@ -290,3 +299,48 @@ FILE *fopen_cloexec(const char *path, const char *mode) {
 	}
 	return fh;
 }
+
+bool setNonblock(const int fd) {
+	int flags;
+
+	flags = fcntl(fd, F_GETFL);
+	if (flags < 0) {
+		logg->logMessage("fcntl getfl failed");
+		return false;
+	}
+
+	if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != 0) {
+		logg->logMessage("fcntl setfl failed");
+		return false;
+	}
+
+	return true;
+}
+
+bool writeAll(const int fd, const void *const buf, const size_t pos) {
+	size_t written = 0;
+	while (written < pos) {
+		ssize_t bytes = write(fd, (const uint8_t *)buf + written, pos - written);
+		if (bytes <= 0) {
+			logg->logMessage("write failed");
+			return false;
+		}
+		written += bytes;
+	}
+
+	return true;
+}
+
+bool readAll(const int fd, void *const buf, const size_t count) {
+	size_t pos = 0;
+	while (pos < count) {
+		ssize_t bytes = read(fd, (uint8_t *)buf + pos, count - pos);
+		if (bytes <= 0) {
+			logg->logMessage("read failed");
+			return false;
+		}
+		pos += bytes;
+	}
+
+	return true;
+}
diff --git a/tools/gator/daemon/SessionData.h b/tools/gator/daemon/SessionData.h
index d0c8900317a5..12432d02d851 100644
--- a/tools/gator/daemon/SessionData.h
+++ b/tools/gator/daemon/SessionData.h
@@ -11,14 +11,17 @@
 
 #include <stdint.h>
 
+#include "AtraceDriver.h"
+#include "CCNDriver.h"
 #include "Config.h"
 #include "Counter.h"
+#include "ExternalDriver.h"
 #include "FtraceDriver.h"
 #include "KMod.h"
 #include "MaliVideoDriver.h"
 #include "PerfDriver.h"
 
-#define PROTOCOL_VERSION 21
+#define PROTOCOL_VERSION 22
 // Differentiates development versions (timestamp) from release versions
 #define PROTOCOL_DEV 1000
 
@@ -31,22 +34,41 @@ struct ImageLinkList {
 	struct ImageLinkList *next;
 };
 
+class SharedData {
+public:
+	SharedData();
+
+	int mCpuIds[NR_CPUS];
+	size_t mMaliUtgardCountersSize;
+	char mMaliUtgardCounters[1<<12];
+
+private:
+	// Intentionally unimplemented
+	SharedData(const SharedData &);
+	SharedData &operator=(const SharedData &);
+};
+
 class SessionData {
 public:
 	static const size_t MAX_STRING_LEN = 80;
 
 	SessionData();
 	~SessionData();
-	void initialize();
 	void parseSessionXML(char* xmlString);
 	void readModel();
 	void readCpuInfo();
 
-	PolledDriver *usDrivers[5];
-	KMod kmod;
-	PerfDriver perf;
-	MaliVideoDriver maliVideo;
-	FtraceDriver ftraceDriver;
+	SharedData *mSharedData;
+
+	PolledDriver *mUsDrivers[5];
+	KMod mKmod;
+	PerfDriver mPerf;
+	MaliVideoDriver mMaliVideo;
+	// Intentionally above FtraceDriver as drivers are initialized in reverse order AtraceDriver references AtraceDriver
+	AtraceDriver mAtraceDriver;
+	FtraceDriver mFtraceDriver;
+	ExternalDriver mExternalDriver;
+	CCNDriver mCcnDriver;
 
 	char mCoreName[MAX_STRING_LEN];
 	struct ImageLinkList *mImages;
@@ -78,15 +100,16 @@ public:
 	int mDuration;
 	int mCores;
 	int mPageSize;
-	int *mCpuIds;
 	int mMaxCpuId;
 	int mAnnotateStart;
 
 	// PMU Counters
-	int mCounterOverflow;
+	char *mCountersError;
 	Counter mCounters[MAX_PERFORMANCE_COUNTERS];
 
 private:
+	void initialize();
+
 	// Intentionally unimplemented
 	SessionData(const SessionData &);
 	SessionData &operator=(const SessionData &);
@@ -99,5 +122,8 @@ uint64_t getTime();
 int getEventKey();
 int pipe_cloexec(int pipefd[2]);
 FILE *fopen_cloexec(const char *path, const char *mode);
+bool setNonblock(const int fd);
+bool writeAll(const int fd, const void *const buf, const size_t pos);
+bool readAll(const int fd, void *const buf, const size_t count);
 
 #endif // SESSION_DATA_H
diff --git a/tools/gator/daemon/Setup.cpp b/tools/gator/daemon/Setup.cpp
index 7dd83ceafcce..524a70204e24 100644
--- a/tools/gator/daemon/Setup.cpp
+++ b/tools/gator/daemon/Setup.cpp
@@ -153,30 +153,34 @@ void update(const char *const gatorPath) {
 		handleException();
 	}
 
-	if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(2, 6, 32)) {
-		logg->logError(GATOR_ERROR "Streamline can't automatically setup gator as this kernel version is not supported. Please upgrade the kernel on your device.");
-		handleException();
-	}
-
 	if (KERNEL_VERSION(version[0], version[1], version[2]) < KERNEL_VERSION(3, 4, 0)) {
-		logg->logError(GATOR_ERROR "Streamline can't automatically setup gator as gator.ko is required for this version of Linux. Please build gator.ko and gatord and install them on your device.");
+		logg->logError(GATOR_ERROR "Streamline can't automatically setup gator as this kernel version is not supported. Please upgrade the kernel on your device.");
 		handleException();
 	}
 
 	if (geteuid() != 0) {
-		printf(GATOR_MSG "trying sudo\n");
-		execlp("sudo", "sudo", gatorPath, "-u", NULL);
-		// Streamline will provide the password if needed
-
-		printf(GATOR_MSG "trying su\n");
 		char buf[1<<10];
+		snprintf(buf, sizeof(buf),
+			 "which sudo &&"
+			 "("
+			   "sudo -n %1$s -u ||"
+			   "("
+			     "echo " GATOR_MSG "trying sudo;"
+			     // Streamline will provide the password
+			     "sudo %1$s -u"
+			   ")"
+			 ") || ("
+			   "echo " GATOR_MSG "trying su;"
 		/*
 		 * Different versions of su handle additional -c command line options differently and expect the
 		 * arguments in different ways. Try both ways wrapped in a shell.
 		 *
 		 * Then invoke another shell after su as it avoids odd failures on some Android systems
 		 */
-		snprintf(buf, sizeof(buf), "su -c \"sh -c '%s -u'\" || su -c sh -c '%s -u'", gatorPath, gatorPath);
+			   "su -c \"sh -c '%1$s -u'\" ||"
+			   "su -c sh -c '%1$s -u'"
+			 ")",
+			 gatorPath);
 		execlp("sh", "sh", "-c", buf, NULL);
 		// Streamline will provide the password if needed
 
@@ -228,6 +232,11 @@ void update(const char *const gatorPath) {
 	umount("/dev/gator");
 	syscall(__NR_delete_module, "gator", O_NONBLOCK);
 
+	if (access("/sys/module/gator", F_OK) == 0) {
+		logg->logError(GATOR_ERROR "Unable to unload gator.ko, the gator module may be built into the kernel or gator.ko cannot be unloaded. Rebooting the device may resolve the issue.");
+		handleException();
+	}
+
 	rename("gatord", "gatord.old");
 	rename("gator.ko", "gator.ko.old");
 
@@ -314,8 +323,7 @@ void update(const char *const gatorPath) {
 	close(pipefd[1]);
 	const ssize_t bytes = read(pipefd[0], buf, sizeof(buf));
 	if (bytes > 0) {
-		logg->logError("%s", buf);
-		handleException();
+		printf("%s\n", buf);
 	}
 	close(pipefd[0]);
 
diff --git a/tools/gator/daemon/StreamlineSetup.cpp b/tools/gator/daemon/StreamlineSetup.cpp
index e37f2712cd04..66d1f4e94a2a 100644
--- a/tools/gator/daemon/StreamlineSetup.cpp
+++ b/tools/gator/daemon/StreamlineSetup.cpp
@@ -75,8 +75,8 @@ StreamlineSetup::StreamlineSetup(OlySocket* s) {
 		free(data);
 	}
 
-	if (gSessionData->mCounterOverflow > 0) {
-		logg->logError("Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow);
+	if (gSessionData->mCountersError != NULL) {
+		logg->logError("%s", gSessionData->mCountersError);
 		handleException();
 	}
 }
@@ -197,7 +197,7 @@ void StreamlineSetup::sendData(const char* data, uint32_t length, char type) {
 	header[0] = type;
 	Buffer::writeLEInt(header + 1, length);
 	mSocket->send((char*)&header, sizeof(header));
-	mSocket->send((const char*)data, length);
+	mSocket->send(data, length);
 }
 
 void StreamlineSetup::sendEvents() {
@@ -265,8 +265,8 @@ void StreamlineSetup::writeConfiguration(char* xml) {
 	// Re-populate gSessionData with the configuration, as it has now changed
 	{ ConfigurationXML configuration; }
 
-	if (gSessionData->mCounterOverflow > 0) {
-		logg->logError("Only %i performance counters are permitted, %i are selected", MAX_PERFORMANCE_COUNTERS, gSessionData->mCounterOverflow);
+	if (gSessionData->mCountersError != NULL) {
+		logg->logError("%s", gSessionData->mCountersError);
 		handleException();
 	}
 }
diff --git a/tools/gator/daemon/UserSpaceSource.cpp b/tools/gator/daemon/UserSpaceSource.cpp
index f58f828e6e2e..103631248bf3 100644
--- a/tools/gator/daemon/UserSpaceSource.cpp
+++ b/tools/gator/daemon/UserSpaceSource.cpp
@@ -34,15 +34,15 @@ bool UserSpaceSource::prepare() {
 void UserSpaceSource::run() {
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-counters", 0, 0, 0);
 
-	for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
-		gSessionData->usDrivers[i]->start();
+	for (int i = 0; i < ARRAY_LENGTH(gSessionData->mUsDrivers); ++i) {
+		gSessionData->mUsDrivers[i]->start();
 	}
 
 	int64_t monotonicStarted = 0;
 	while (monotonicStarted <= 0 && gSessionData->mSessionIsActive) {
 		usleep(10);
 
-		if (gSessionData->perf.isSetup()) {
+		if (gSessionData->mPerf.isSetup()) {
 			monotonicStarted = gSessionData->mMonotonicStarted;
 		} else {
 			if (DriverSource::readInt64Driver("/dev/gator/started", &monotonicStarted) == -1) {
@@ -64,8 +64,8 @@ void UserSpaceSource::run() {
 		}
 
 		if (mBuffer.eventHeader(currTime)) {
-			for (int i = 0; i < ARRAY_LENGTH(gSessionData->usDrivers); ++i) {
-				gSessionData->usDrivers[i]->read(&mBuffer);
+			for (int i = 0; i < ARRAY_LENGTH(gSessionData->mUsDrivers); ++i) {
+				gSessionData->mUsDrivers[i]->read(&mBuffer);
 			}
 			// Only check after writing all counters so that time and corresponding counters appear in the same frame
 			mBuffer.check(currTime);
diff --git a/tools/gator/daemon/defaults.xml b/tools/gator/daemon/defaults.xml
index 31b127cdcfc6..9adde32667dc 100644
--- a/tools/gator/daemon/defaults.xml
+++ b/tools/gator/daemon/defaults.xml
@@ -34,21 +34,21 @@
   <configuration counter="ARMv7_Cortex_A17_cnt1" event="0x16"/>
   <configuration counter="ARMv7_Cortex_A17_cnt2" event="0x10"/>
   <configuration counter="ARMv7_Cortex_A17_cnt3" event="0x19"/>
-  <configuration counter="ARM_Cortex-A53_ccnt" event="0x11"/>
-  <configuration counter="ARM_Cortex-A53_cnt0" event="0x8"/>
-  <configuration counter="ARM_Cortex-A53_cnt1" event="0x16"/>
-  <configuration counter="ARM_Cortex-A53_cnt2" event="0x10"/>
-  <configuration counter="ARM_Cortex-A53_cnt3" event="0x19"/>
-  <configuration counter="ARM_Cortex-A57_ccnt" event="0x11"/>
-  <configuration counter="ARM_Cortex-A57_cnt0" event="0x8"/>
-  <configuration counter="ARM_Cortex-A57_cnt1" event="0x16"/>
-  <configuration counter="ARM_Cortex-A57_cnt2" event="0x10"/>
-  <configuration counter="ARM_Cortex-A57_cnt3" event="0x19"/>
-  <configuration counter="ARM_Cortex-A72_ccnt" event="0x11"/>
-  <configuration counter="ARM_Cortex-A72_cnt0" event="0x8"/>
-  <configuration counter="ARM_Cortex-A72_cnt1" event="0x16"/>
-  <configuration counter="ARM_Cortex-A72_cnt2" event="0x10"/>
-  <configuration counter="ARM_Cortex-A72_cnt3" event="0x19"/>
+  <configuration counter="ARMv8_Cortex_A53_ccnt" event="0x11"/>
+  <configuration counter="ARMv8_Cortex_A53_cnt0" event="0x8"/>
+  <configuration counter="ARMv8_Cortex_A53_cnt1" event="0x16"/>
+  <configuration counter="ARMv8_Cortex_A53_cnt2" event="0x10"/>
+  <configuration counter="ARMv8_Cortex_A53_cnt3" event="0x19"/>
+  <configuration counter="ARMv8_Cortex_A57_ccnt" event="0x11"/>
+  <configuration counter="ARMv8_Cortex_A57_cnt0" event="0x8"/>
+  <configuration counter="ARMv8_Cortex_A57_cnt1" event="0x16"/>
+  <configuration counter="ARMv8_Cortex_A57_cnt2" event="0x10"/>
+  <configuration counter="ARMv8_Cortex_A57_cnt3" event="0x19"/>
+  <configuration counter="ARMv8_Cortex_A72_ccnt" event="0x11"/>
+  <configuration counter="ARMv8_Cortex_A72_cnt0" event="0x8"/>
+  <configuration counter="ARMv8_Cortex_A72_cnt1" event="0x16"/>
+  <configuration counter="ARMv8_Cortex_A72_cnt2" event="0x10"/>
+  <configuration counter="ARMv8_Cortex_A72_cnt3" event="0x19"/>
   <configuration counter="Scorpion_ccnt" event="0xff"/>
   <configuration counter="Scorpion_cnt0" event="0x08"/>
   <configuration counter="Scorpion_cnt1" event="0x10"/>
diff --git a/tools/gator/daemon/events-CCI-400.xml b/tools/gator/daemon/events-CCI-400.xml
index 40d91e582c19..0dd72c02ec5f 100644
--- a/tools/gator/daemon/events-CCI-400.xml
+++ b/tools/gator/daemon/events-CCI-400.xml
@@ -41,9 +41,9 @@
     <event event="0x19" option_set="Master" title="CCI-400" name="Write stall: barrier hazard" description="Write request stall cycle because of a barrier hazard"/>
     <event event="0x1a" option_set="Master" title="CCI-400" name="Write stall: tracker full" description="Write request stall cycle because the transaction tracker is full. Increase MIx_W_MAX to avoid this stall. See the CoreLink CCI-400 Cache Coherent Interconnect Integration Manual"/>
   </category>
-  <counter_set name="CCI_400-r1_cnt" count="4"/>
-  <category name="CCI-400" counter_set="CCI_400-r1_cnt" per_cpu="no">
-    <event counter="CCI_400-r1_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
+  <counter_set name="CCI_400_r1_cnt" count="4"/>
+  <category name="CCI-400" counter_set="CCI_400_r1_cnt" per_cpu="no">
+    <event counter="CCI_400_r1_ccnt" event="0xff" title="CCI-400 Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" description="The number of core clock cycles"/>
     <option_set name="Slave">
       <option event_delta="0x00" name="S0" description="Slave interface 0"/>
       <option event_delta="0x20" name="S1" description="Slave interface 1"/>
diff --git a/tools/gator/daemon/events-Cortex-A53.xml b/tools/gator/daemon/events-Cortex-A53.xml
index acdfe4ecd242..c15cbd8cbad2 100644
--- a/tools/gator/daemon/events-Cortex-A53.xml
+++ b/tools/gator/daemon/events-Cortex-A53.xml
@@ -1,6 +1,6 @@
-  <counter_set name="ARM_Cortex-A53_cnt" count="6"/>
-  <category name="Cortex-A53" counter_set="ARM_Cortex-A53_cnt" per_cpu="yes" supports_event_based_sampling="yes">
-    <event counter="ARM_Cortex-A53_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+  <counter_set name="ARMv8_Cortex_A53_cnt" count="6"/>
+  <category name="Cortex-A53" counter_set="ARMv8_Cortex_A53_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv8_Cortex_A53_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
     <event event="0x00" title="Software" name="Increment" description="Software increment. The register is incremented only on writes to the Software Increment Register."/>
     <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
     <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
diff --git a/tools/gator/daemon/events-Cortex-A57.xml b/tools/gator/daemon/events-Cortex-A57.xml
index 1da23e7f93c2..d1e97c36bd40 100644
--- a/tools/gator/daemon/events-Cortex-A57.xml
+++ b/tools/gator/daemon/events-Cortex-A57.xml
@@ -1,6 +1,6 @@
-  <counter_set name="ARM_Cortex-A57_cnt" count="6"/>
-  <category name="Cortex-A57" counter_set="ARM_Cortex-A57_cnt" per_cpu="yes" supports_event_based_sampling="yes">
-    <event counter="ARM_Cortex-A57_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+  <counter_set name="ARMv8_Cortex_A57_cnt" count="6"/>
+  <category name="Cortex-A57" counter_set="ARMv8_Cortex_A57_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv8_Cortex_A57_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
     <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
     <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
     <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
diff --git a/tools/gator/daemon/events-Cortex-A72.xml b/tools/gator/daemon/events-Cortex-A72.xml
index 31c9cf373a53..683d0ae84ce0 100644
--- a/tools/gator/daemon/events-Cortex-A72.xml
+++ b/tools/gator/daemon/events-Cortex-A72.xml
@@ -1,6 +1,6 @@
-  <counter_set name="ARM_Cortex_A72_cnt" count="6"/>
-  <category name="Cortex-A72" counter_set="ARM_Cortex_A72_cnt" per_cpu="yes" supports_event_based_sampling="yes">
-    <event counter="ARM_Cortex_A72_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
+  <counter_set name="ARMv8_Cortex_A72_cnt" count="6"/>
+  <category name="Cortex-A72" counter_set="ARMv8_Cortex_A72_cnt" per_cpu="yes" supports_event_based_sampling="yes">
+    <event counter="ARMv8_Cortex_A72_ccnt" event="0x11" title="Clock" name="Cycles" display="hertz" units="Hz" average_selection="yes" average_cores="yes" description="The number of core clock cycles"/>
     <event event="0x00" title="Software" name="Increment" description="Incremented only on writes to the Software Increment Register"/>
     <event event="0x01" title="Cache" name="Instruction refill" description="Instruction fetch that causes a refill of at least the level of instruction or unified cache closest to the processor"/>
     <event event="0x02" title="Cache" name="Inst TLB refill" description="Instruction fetch that causes a TLB refill of at least the level of TLB closest to the processor"/>
diff --git a/tools/gator/daemon/events-Mali-4xx.xml b/tools/gator/daemon/events-Mali-4xx.xml
index 801dd28903a8..e2f4405719c2 100644
--- a/tools/gator/daemon/events-Mali-4xx.xml
+++ b/tools/gator/daemon/events-Mali-4xx.xml
@@ -243,3 +243,14 @@
     <event counter="ARM_Mali-4xx_SW_39" title="Geometry Statistics" name="Strip Lines" description="Number of lines passed to GLES using the mode GL_LINE_STRIP."/>
     <event counter="ARM_Mali-4xx_SW_40" title="Geometry Statistics" name="Loop Lines" description="Number of lines passed to GLES using the mode GL_LINE_LOOP."/>
   </category>
+  <category name="ARM_Mali-4xx_Total_alloc_pages" per_cpu="no">
+    <event counter="ARM_Mali-4xx_Total_alloc_pages" title="Mali GPU Alloc" name="pages" class="absolute" display="average" average_selection="yes" description="Total number of allocated pages"/>
+  </category>
+  <category name="Mali-4xx Session Memory usage" per_cpu="no">
+    <event counter="ARM_Mali-4xx_vertex_index_buffer" title="Mali Memory" name="Vertex and Index buffer" units="B" dclass="absolute" display="average" average_selection="yes" description="Vertex and index input"/>
+    <event counter="ARM_Mali-4xx_texture_buffer" title="Mali Memory" name="Texture buffer" units="B" dclass="absolute" display="average" average_selection="yes" description="Texture data"/>
+    <event counter="ARM_Mali-4xx_varying_buffer" title="Mali Memory" name="Varying buffer" units="B" dclass="absolute" display="average" average_selection="yes" description="Varying buffer"/>
+    <event counter="ARM_Mali-4xx_render_target" title="Mali Memory" name="Render target buffer" units="B" dclass="absolute" display="average" average_selection="yes" description="Render target buffer"/>
+    <event counter="ARM_Mali-4xx_plbu_heap" title="Mali Memory" name="Plbu heap" units="B" dclass="absolute" display="average" average_selection="yes" description="The memory to store commands from PLBU when Slave tilelist memory isn't enough"/>
+    <event counter="ARM_Mali-4xx_slave_tilelist" title="Mali Memory" name="Slave tilelist" units="B" dclass="absolute" display="average" average_selection="yes" description="Slave tilelist memory"/>
+  </category>
diff --git a/tools/gator/daemon/events-Mali-Midgard_hw.xml b/tools/gator/daemon/events-Mali-Midgard_hw.xml
index e8f0cb01c123..802db8b68a29 100644
--- a/tools/gator/daemon/events-Mali-Midgard_hw.xml
+++ b/tools/gator/daemon/events-Mali-Midgard_hw.xml
@@ -44,8 +44,8 @@
 
     <event counter="ARM_Mali-Midgard_FRAG_THREADS" title="Mali Core Threads" name="Fragment threads" description="Number of fragment threads started"/>
     <event counter="ARM_Mali-Midgard_FRAG_DUMMY_THREADS" title="Mali Core Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
-    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS quads" description="Number of threads doing late ZS test"/>
-    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS quads" description="Number of threads killed by late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-Midgard_FRAG_QUADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
     <event counter="ARM_Mali-Midgard_FRAG_THREADS_LZS_TEST" title="Mali Core Threads" name="Frag threads doing late ZS" description="Number of threads doing late ZS test"/>
     <event counter="ARM_Mali-Midgard_FRAG_THREADS_LZS_KILLED" title="Mali Core Threads" name="Frag threads killed late ZS" description="Number of threads killed by late ZS test"/>
 
diff --git a/tools/gator/daemon/events-Mali-T82x_hw.xml b/tools/gator/daemon/events-Mali-T82x_hw.xml
index 5caa464a3078..994c8b154189 100644
--- a/tools/gator/daemon/events-Mali-T82x_hw.xml
+++ b/tools/gator/daemon/events-Mali-T82x_hw.xml
@@ -1,108 +1,108 @@
-
-  <category name="Mali Job Manager" per_cpu="no">
-
-    <event counter="ARM_Mali-T82x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
-    <event counter="ARM_Mali-T82x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
-    <event counter="ARM_Mali-T82x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
-    <event counter="ARM_Mali-T82x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
-    <event counter="ARM_Mali-T82x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
-
-    <event counter="ARM_Mali-T82x_JS0_JOBS" title="Mali Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
-    <event counter="ARM_Mali-T82x_JS1_JOBS" title="Mali Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
-    <event counter="ARM_Mali-T82x_JS2_JOBS" title="Mali Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
-
-    <event counter="ARM_Mali-T82x_JS0_TASKS" title="Mali Job Manager Tasks" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
-    <event counter="ARM_Mali-T82x_JS1_TASKS" title="Mali Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
-    <event counter="ARM_Mali-T82x_JS2_TASKS" title="Mali Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
-
-  </category>
-
-  <category name="Mali Tiler" per_cpu="no">
-
-    <event counter="ARM_Mali-T82x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
-
-    <event counter="ARM_Mali-T82x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
-    <event counter="ARM_Mali-T82x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
-    <event counter="ARM_Mali-T82x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
-    <event counter="ARM_Mali-T82x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
-    <event counter="ARM_Mali-T82x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
-
-    <event counter="ARM_Mali-T82x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
-    <event counter="ARM_Mali-T82x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
-    <event counter="ARM_Mali-T82x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
-    <event counter="ARM_Mali-T82x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
-    <event counter="ARM_Mali-T82x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
-
-  </category>
-
-  <category name="Mali Shader Core" per_cpu="no">
-
-    <event counter="ARM_Mali-T82x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
-    <event counter="ARM_Mali-T82x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
-    <event counter="ARM_Mali-T82x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
-    <event counter="ARM_Mali-T82x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
-    <event counter="ARM_Mali-T82x_FRAG_CYCLES_FPKQ_ACTIVE" title="Mali Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads"/>
-
-    <event counter="ARM_Mali-T82x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
-    <event counter="ARM_Mali-T82x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
-    <event counter="ARM_Mali-T82x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
-    <event counter="ARM_Mali-T82x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
-
-    <event counter="ARM_Mali-T82x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
-    <event counter="ARM_Mali-T82x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
-
-    <event counter="ARM_Mali-T82x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
-    <event counter="ARM_Mali-T82x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
-
-    <event counter="ARM_Mali-T82x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
-    <event counter="ARM_Mali-T82x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
-    <event counter="ARM_Mali-T82x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
-
-    <event counter="ARM_Mali-T82x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
-    <event counter="ARM_Mali-T82x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
-
-    <event counter="ARM_Mali-T82x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe (normalized per pipe)"/>
-
-    <event counter="ARM_Mali-T82x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
-    <event counter="ARM_Mali-T82x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
-
-    <event counter="ARM_Mali-T82x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
-    <event counter="ARM_Mali-T82x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
-    <event counter="ARM_Mali-T82x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
-
-    <event counter="ARM_Mali-T82x_LSC_READ_OP" title="Mali Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache"/>
-    <event counter="ARM_Mali-T82x_LSC_READ_HITS" title="Mali Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache"/>
-
-    <event counter="ARM_Mali-T82x_LSC_WRITE_OP" title="Mali Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache"/>
-    <event counter="ARM_Mali-T82x_LSC_WRITE_HITS" title="Mali Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache"/>
-
-    <event counter="ARM_Mali-T82x_LSC_ATOMIC_OP" title="Mali Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache"/>
-    <event counter="ARM_Mali-T82x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
-
-    <event counter="ARM_Mali-T82x_LSC_LINE_FETCHES" title="Mali Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
-    <event counter="ARM_Mali-T82x_LSC_DIRTY_LINE" title="Mali Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
-
-    <event counter="ARM_Mali-T82x_LSC_SNOOPS" title="Mali Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
-
-  </category>
-
-  <category name="Mali L2 Cache" per_cpu="no">
-
-    <event counter="ARM_Mali-T82x_L2_READ_LOOKUP" title="Mali L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache"/>
-    <event counter="ARM_Mali-T82x_L2_READ_SNOOP" title="Mali L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops"/>
-    <event counter="ARM_Mali-T82x_L2_READ_HIT" title="Mali L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
-
-    <event counter="ARM_Mali-T82x_L2_WRITE_SNOOP" title="Mali L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops"/>
-    <event counter="ARM_Mali-T82x_L2_WRITE_HIT" title="Mali L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
-    <event counter="ARM_Mali-T82x_L2_WRITE_LOOKUP" title="Mali L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache"/>
-
-    <event counter="ARM_Mali-T82x_L2_EXT_READ_BEATS" title="Mali L2 Cache Ext Reads" name="External read beats" description="Number of external bus read beats"/>
-    <event counter="ARM_Mali-T82x_L2_EXT_AR_STALL" title="Mali L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
-    <event counter="ARM_Mali-T82x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
-    <event counter="ARM_Mali-T82x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
-
-    <event counter="ARM_Mali-T82x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache Ext Writes" name="External write beats" description="Number of external bus write beats"/>
-    <event counter="ARM_Mali-T82x_L2_EXT_W_STALL" title="Mali L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
-    <event counter="ARM_Mali-T82x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
-
-  </category>
+
+  <category name="Mali Job Manager" per_cpu="no">
+
+    <event counter="ARM_Mali-T82x_GPU_ACTIVE" title="Mali Job Manager Cycles" name="GPU cycles" description="Number of cycles GPU active"/>
+    <event counter="ARM_Mali-T82x_IRQ_ACTIVE" title="Mali Job Manager Cycles" name="IRQ cycles" description="Number of cycles GPU interrupt pending"/>
+    <event counter="ARM_Mali-T82x_JS0_ACTIVE" title="Mali Job Manager Cycles" name="JS0 cycles" description="Number of cycles JS0 (fragment) active"/>
+    <event counter="ARM_Mali-T82x_JS1_ACTIVE" title="Mali Job Manager Cycles" name="JS1 cycles" description="Number of cycles JS1 (vertex/tiler/compute) active"/>
+    <event counter="ARM_Mali-T82x_JS2_ACTIVE" title="Mali Job Manager Cycles" name="JS2 cycles" description="Number of cycles JS2 (vertex/compute) active"/>
+
+    <event counter="ARM_Mali-T82x_JS0_JOBS" title="Mali Job Manager Jobs" name="JS0 jobs" description="Number of Jobs (fragment) completed in JS0"/>
+    <event counter="ARM_Mali-T82x_JS1_JOBS" title="Mali Job Manager Jobs" name="JS1 jobs" description="Number of Jobs (vertex/tiler/compute) completed in JS1"/>
+    <event counter="ARM_Mali-T82x_JS2_JOBS" title="Mali Job Manager Jobs" name="JS2 jobs" description="Number of Jobs (vertex/compute) completed in JS2"/>
+
+    <event counter="ARM_Mali-T82x_JS0_TASKS" title="Mali Job Manager Tasks" name="JS0 tasks" description="Number of Tasks completed in JS0"/>
+    <event counter="ARM_Mali-T82x_JS1_TASKS" title="Mali Job Manager Tasks" name="JS1 tasks" description="Number of Tasks completed in JS1"/>
+    <event counter="ARM_Mali-T82x_JS2_TASKS" title="Mali Job Manager Tasks" name="JS2 tasks" description="Number of Tasks completed in JS2"/>
+
+  </category>
+
+  <category name="Mali Tiler" per_cpu="no">
+
+    <event counter="ARM_Mali-T82x_TI_ACTIVE" title="Mali Tiler Cycles" name="Tiler cycles" description="Number of cycles Tiler active"/>
+
+    <event counter="ARM_Mali-T82x_TI_POLYGONS" title="Mali Tiler Primitives" name="Polygons" description="Number of polygons processed"/>
+    <event counter="ARM_Mali-T82x_TI_QUADS" title="Mali Tiler Primitives" name="Quads" description="Number of quads processed"/>
+    <event counter="ARM_Mali-T82x_TI_TRIANGLES" title="Mali Tiler Primitives" name="Triangles" description="Number of triangles processed"/>
+    <event counter="ARM_Mali-T82x_TI_LINES" title="Mali Tiler Primitives" name="Lines" description="Number of lines processed"/>
+    <event counter="ARM_Mali-T82x_TI_POINTS" title="Mali Tiler Primitives" name="Points" description="Number of points processed"/>
+
+    <event counter="ARM_Mali-T82x_TI_FRONT_FACING" title="Mali Tiler Culling" name="Front facing prims" description="Number of front facing primitives"/>
+    <event counter="ARM_Mali-T82x_TI_BACK_FACING" title="Mali Tiler Culling" name="Back facing prims" description="Number of back facing primitives"/>
+    <event counter="ARM_Mali-T82x_TI_PRIM_VISIBLE" title="Mali Tiler Culling" name="Visible prims" description="Number of visible primitives"/>
+    <event counter="ARM_Mali-T82x_TI_PRIM_CULLED" title="Mali Tiler Culling" name="Culled prims" description="Number of culled primitives"/>
+    <event counter="ARM_Mali-T82x_TI_PRIM_CLIPPED" title="Mali Tiler Culling" name="Clipped prims" description="Number of clipped primitives"/>
+
+  </category>
+
+  <category name="Mali Shader Core" per_cpu="no">
+
+    <event counter="ARM_Mali-T82x_TRIPIPE_ACTIVE" title="Mali Core Cycles" name="Tripipe cycles" description="Number of cycles tripipe was active"/>
+    <event counter="ARM_Mali-T82x_FRAG_ACTIVE" title="Mali Core Cycles" name="Fragment cycles" description="Number of cycles fragment processing was active"/>
+    <event counter="ARM_Mali-T82x_COMPUTE_ACTIVE" title="Mali Core Cycles" name="Compute cycles" description="Number of cycles vertex\compute processing was active"/>
+    <event counter="ARM_Mali-T82x_FRAG_CYCLES_NO_TILE" title="Mali Core Cycles" name="Fragment cycles waiting for tile" description="Number of cycles spent waiting for a physical tile buffer"/>
+    <event counter="ARM_Mali-T82x_FRAG_CYCLES_FPKQ_ACTIVE" title="Mali Core Cycles" name="Fragment cycles pre-pipe buffer not empty" description="Number of cycles the pre-pipe queue contains quads"/>
+
+    <event counter="ARM_Mali-T82x_FRAG_THREADS" title="Mali Fragment Threads" name="Fragment threads" description="Number of fragment threads started"/>
+    <event counter="ARM_Mali-T82x_FRAG_DUMMY_THREADS" title="Mali Fragment Threads" name="Dummy fragment threads" description="Number of dummy fragment threads started"/>
+    <event counter="ARM_Mali-T82x_FRAG_THREADS_LZS_TEST" title="Mali Fragment Threads" name="Fragment threads doing late ZS" description="Number of threads doing late ZS test"/>
+    <event counter="ARM_Mali-T82x_FRAG_THREADS_LZS_KILLED" title="Mali Fragment Threads" name="Fragment threads killed late ZS" description="Number of threads killed by late ZS test"/>
+
+    <event counter="ARM_Mali-T82x_COMPUTE_TASKS" title="Mali Compute Tasks" name="Compute tasks" description="Number of compute tasks"/>
+    <event counter="ARM_Mali-T82x_COMPUTE_THREADS" title="Mali Compute Threads" name="Compute threads" description="Number of compute threads started"/>
+
+    <event counter="ARM_Mali-T82x_FRAG_PRIMITIVES" title="Mali Fragment Primitives" name="Primitives loaded" description="Number of primitives loaded from tiler"/>
+    <event counter="ARM_Mali-T82x_FRAG_PRIMITIVES_DROPPED" title="Mali Fragment Primitives" name="Primitives dropped" description="Number of primitives dropped because out of tile"/>
+
+    <event counter="ARM_Mali-T82x_FRAG_QUADS_RAST" title="Mali Fragment Quads" name="Quads rasterized" description="Number of quads rasterized"/>
+    <event counter="ARM_Mali-T82x_FRAG_QUADS_EZS_TEST" title="Mali Fragment Quads" name="Quads doing early ZS" description="Number of quads doing early ZS test"/>
+    <event counter="ARM_Mali-T82x_FRAG_QUADS_EZS_KILLED" title="Mali Fragment Quads" name="Quads killed early Z" description="Number of quads killed by early ZS test"/>
+
+    <event counter="ARM_Mali-T82x_FRAG_NUM_TILES" title="Mali Fragment Tasks" name="Tiles rendered" description="Number of tiles rendered"/>
+    <event counter="ARM_Mali-T82x_FRAG_TRANS_ELIM" title="Mali Fragment Tasks" name="Tile writes killed by TE" description="Number of tile writes skipped by transaction elimination"/>
+
+    <event counter="ARM_Mali-T82x_ARITH_WORDS" title="Mali Arithmetic Pipe" name="A instructions" description="Number of batched instructions executed by the A-pipe (normalized per pipe)"/>
+
+    <event counter="ARM_Mali-T82x_LS_WORDS" title="Mali Load/Store Pipe" name="LS instructions" description="Number of instructions completed by the LS-pipe"/>
+    <event counter="ARM_Mali-T82x_LS_ISSUES" title="Mali Load/Store Pipe" name="LS instruction issues" description="Number of instructions issued to the LS-pipe, including restarts"/>
+
+    <event counter="ARM_Mali-T82x_TEX_WORDS" title="Mali Texture Pipe" name="T instructions" description="Number of instructions completed by the T-pipe"/>
+    <event counter="ARM_Mali-T82x_TEX_ISSUES" title="Mali Texture Pipe" name="T instruction issues" description="Number of threads through loop 2 address calculation"/>
+    <event counter="ARM_Mali-T82x_TEX_RECIRC_FMISS" title="Mali Texture Pipe" name="Cache misses" description="Number of instructions in the T-pipe, recirculated due to cache miss"/>
+
+    <event counter="ARM_Mali-T82x_LSC_READ_OP" title="Mali Load/Store Cache Reads" name="Read operations" description="Number of read operations in the Load/Store cache"/>
+    <event counter="ARM_Mali-T82x_LSC_READ_HITS" title="Mali Load/Store Cache Reads" name="Read hits" description="Number of read hits in the Load/Store cache"/>
+
+    <event counter="ARM_Mali-T82x_LSC_WRITE_OP" title="Mali Load/Store Cache Writes" name="Write operations" description="Number of write operations in the Load/Store cache"/>
+    <event counter="ARM_Mali-T82x_LSC_WRITE_HITS" title="Mali Load/Store Cache Writes" name="Write hits" description="Number of write hits in the Load/Store cache"/>
+
+    <event counter="ARM_Mali-T82x_LSC_ATOMIC_OP" title="Mali Load/Store Cache Atomics" name="Atomic operations" description="Number of atomic operations in the Load/Store cache"/>
+    <event counter="ARM_Mali-T82x_LSC_ATOMIC_HITS" title="Mali Load/Store Cache Atomics" name="Atomic hits" description="Number of atomic hits in the Load/Store cache"/>
+
+    <event counter="ARM_Mali-T82x_LSC_LINE_FETCHES" title="Mali Load/Store Cache Bus" name="Line fetches" description="Number of line fetches in the Load/Store cache"/>
+    <event counter="ARM_Mali-T82x_LSC_DIRTY_LINE" title="Mali Load/Store Cache Bus" name="Dirty line evictions" description="Number of dirty line evictions in the Load/Store cache"/>
+
+    <event counter="ARM_Mali-T82x_LSC_SNOOPS" title="Mali Load/Store Cache Bus" name="Snoops in to LSC" description="Number of coherent memory snoops in to the Load/Store cache"/>
+
+  </category>
+
+  <category name="Mali L2 Cache" per_cpu="no">
+
+    <event counter="ARM_Mali-T82x_L2_READ_LOOKUP" title="Mali L2 Cache Reads" name="L2 read lookups" description="Number of reads into the L2 cache"/>
+    <event counter="ARM_Mali-T82x_L2_READ_SNOOP" title="Mali L2 Cache Reads" name="Read snoops" description="Number of read transaction snoops"/>
+    <event counter="ARM_Mali-T82x_L2_READ_HIT" title="Mali L2 Cache Reads" name="L2 read hits" description="Number of reads hitting in the L2 cache"/>
+
+    <event counter="ARM_Mali-T82x_L2_WRITE_SNOOP" title="Mali L2 Cache Writes" name="Write snoops" description="Number of write transaction snoops"/>
+    <event counter="ARM_Mali-T82x_L2_WRITE_HIT" title="Mali L2 Cache Writes" name="L2 write hits" description="Number of writes hitting in the L2 cache"/>
+    <event counter="ARM_Mali-T82x_L2_WRITE_LOOKUP" title="Mali L2 Cache Writes" name="L2 write lookups" description="Number of writes into the L2 cache"/>
+
+    <event counter="ARM_Mali-T82x_L2_EXT_READ_BEATS" title="Mali L2 Cache Ext Reads" name="External read beats" description="Number of external bus read beats"/>
+    <event counter="ARM_Mali-T82x_L2_EXT_AR_STALL" title="Mali L2 Cache Ext Reads" name="External bus stalls (AR)" description="Number of cycles a valid read address (AR) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T82x_L2_EXT_R_BUF_FULL" title="Mali L2 Cache Ext Reads" name="External bus response buffer full" description="Number of cycles a valid request is blocked by a full response buffer"/>
+    <event counter="ARM_Mali-T82x_L2_EXT_RD_BUF_FULL" title="Mali L2 Cache Ext Reads" name="External bus read data buffer full" description="Number of cycles a valid request is blocked by a full read data buffer"/>
+
+    <event counter="ARM_Mali-T82x_L2_EXT_WRITE_BEATS" title="Mali L2 Cache Ext Writes" name="External write beats" description="Number of external bus write beats"/>
+    <event counter="ARM_Mali-T82x_L2_EXT_W_STALL" title="Mali L2 Cache Ext Writes" name="External bus stalls (W)" description="Number of cycles a valid write data (W channel) is stalled by the external interconnect"/>
+    <event counter="ARM_Mali-T82x_L2_EXT_W_BUF_FULL" title="Mali L2 Cache Ext Writes" name="External bus write buffer full" description="Number of cycles a valid request is blocked by a full write buffer"/>
+
+  </category>
diff --git a/tools/gator/daemon/events-Mali-V500.xml b/tools/gator/daemon/events-Mali-V500.xml
index 89bc7f4734df..bc2d6aa74afb 100644
--- a/tools/gator/daemon/events-Mali-V500.xml
+++ b/tools/gator/daemon/events-Mali-V500.xml
@@ -25,6 +25,6 @@
     <event counter="ARM_Mali-V500_evn2" title="MVE-V500 Frames" name="Frame Processed" description="Generated when the MVE has finished processing a frame"/>
     <event counter="ARM_Mali-V500_evn3" title="MVE-V500 Output" name="Output buffer received" description="Generated when an an output buffer is returned to us from the MVE"/>
     <event counter="ARM_Mali-V500_evn4" title="MVE-V500 Input" name="Input buffer received" description="Generated when we an input buffer is returned to us from the MVE"/>
-    <event counter="ARM_Mali-V500_act0" title="MVE-V500 Parsed" name="Activity" class="activity" activity1="activity" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/>
-    <event counter="ARM_Mali-V500_act1" title="MVE-V500 Piped" name="Activity" class="activity" activity1="activity" activity_color1="0x0000ff00" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" cores="8" description="Mali-V500 Activity"/>
+    <event counter="ARM_Mali-V500_act0" title="MVE-V500 Parsed" name="Activity" class="activity" activity1="activity" activity_color1="0x000000ff" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" per_cpu="yes" cores="8" description="Mali-V500 Activity"/>
+    <event counter="ARM_Mali-V500_act1" title="MVE-V500 Piped" name="Activity" class="activity" activity1="activity" activity_color1="0x0000ff00" rendering_type="bar" average_selection="yes" average_cores="yes" percentage="yes" per_cpu="yes" cores="8" description="Mali-V500 Activity"/>
   </category>
diff --git a/tools/gator/daemon/events-atrace.xml b/tools/gator/daemon/events-atrace.xml
new file mode 100644
index 000000000000..432b6af70f75
--- /dev/null
+++ b/tools/gator/daemon/events-atrace.xml
@@ -0,0 +1,19 @@
+  <category name="Atrace">
+    <event counter="atrace_graphics" flag="0x2" title="Atrace" name="Graphics" description="Graphics"/>
+    <event counter="atrace_input" flag="0x4" title="Atrace" name="Input" description="Input"/>
+    <event counter="atrace_view" flag="0x8" title="Atrace" name="View" description="View"/>
+    <event counter="atrace_webview" flag="0x10" title="Atrace" name="Webview" description="Webview"/>
+    <event counter="atrace_window_manager" flag="0x20" title="Atrace" name="Window manager" description="Window manager"/>
+    <event counter="atrace_activity_manager" flag="0x40" title="Atrace" name="Activity manager" description="Activity manager"/>
+    <event counter="atrace_sync_manager" flag="0x80" title="Atrace" name="Sync manager" description="Sync manager"/>
+    <event counter="atrace_audio" flag="0x100" title="Atrace" name="Audio" description="Audio"/>
+    <event counter="atrace_video" flag="0x200" title="Atrace" name="Video" description="Video"/>
+    <event counter="atrace_camera" flag="0x400" title="Atrace" name="Camera" description="Camera"/>
+    <event counter="atrace_hal" flag="0x800" title="Atrace" name="Hal" description="Hal"/>
+    <event counter="atrace_app" flag="0x1000" title="Atrace" name="App" description="App"/>
+    <event counter="atrace_resources" flag="0x2000" title="Atrace" name="Resources" description="Resources"/>
+    <event counter="atrace_dalvik" flag="0x4000" title="Atrace" name="Dalvik" description="Dalvik"/>
+    <event counter="atrace_rs" flag="0x8000" title="Atrace" name="Rs" description="Rs"/>
+    <event counter="atrace_bionic" flag="0x10000" title="Atrace" name="Bionic" description="Bionic"/>
+    <event counter="atrace_power" flag="0x20000" title="Atrace" name="Power" description="Power"/>
+  </category>
diff --git a/tools/gator/daemon/events-ftrace.xml b/tools/gator/daemon/events-ftrace.xml
index ae5529f2678d..17fad176b1ed 100644
--- a/tools/gator/daemon/events-ftrace.xml
+++ b/tools/gator/daemon/events-ftrace.xml
@@ -1,22 +1,24 @@
   <category name="Ftrace">
     <!--
-	Ftrace counters require Linux 3.10 or later. If you do you see ftrace counters in counter configuration, please check your Linux version.
+	ftrace counters require Linux 3.10 or later; if you do not see ftrace counters in counter configuration, please check your Linux version
 	'counter' attribute must start with ftrace_ and be unique
 	the regex item in () is the value shown or, if the parentheses are missing, the number of regex matches is counted
 	'enable' (optional) is the ftrace event to enable associated with the gator event
+	'tracepoint' (optional) same meaning as enable, but will use perf instead of ftrace when using user space gator
+	'arg' (optional) used in conjunction with 'tracepoint' to specify the value to show otherwise the number of tracepoint events is counted
     -->
     <!--
-    <event counter="ftrace_trace_marker_numbers" title="ftrace" name="trace_marker" regex="^tracing_mark_write: ([0-9]+)\s$" class="absolute" description="Numbers written to /sys/kernel/debug/tracing/trace_marker, ex: echo 42 > /sys/kernel/debug/tracing/trace_marker"/>
+    <event counter="ftrace_trace_marker_numbers" title="ftrace" name="trace_marker" regex="^tracing_mark_write: ([0-9]+)$" class="absolute" description="Numbers written to /sys/kernel/debug/tracing/trace_marker, ex: echo 42 > /sys/kernel/debug/tracing/trace_marker"/>
     -->
 
     <!-- ftrace counters -->
-    <event counter="ftrace_kmem_kmalloc" title="Kmem" name="kmalloc" regex="^kmalloc:.* bytes_alloc=([0-9]+) " enable="kmem/kmalloc" class="incident" description="Number of bytes allocated in the kernel using kmalloc"/>
-    <event counter="ftrace_ext4_ext4_da_write" title="Ext4" name="ext4_da_write" regex="^ext4_da_write_end:.* len ([0-9]+) " enable="ext4/ext4_da_write_end" class="incident" description="Number of bytes written to an ext4 filesystem"/>
-    <event counter="ftrace_f2fs_f2fs_write" title="F2FS" name="f2fs_write" regex="^f2fs_write_end:.* len ([0-9]+), " enable="f2fs/f2fs_write_end" class="incident" description="Number of bytes written to an f2fs filesystem"/>
-    <event counter="ftrace_power_clock_set_rate" title="Power" name="clock_set_rate" regex="^clock_set_rate:.* state=([0-9]+) " enable="power/clock_set_rate" class="absolute" description="Clock rate state"/>
+    <event counter="ftrace_kmem_kmalloc" title="Kmem" name="kmalloc" regex="^kmalloc:.* bytes_alloc=([0-9]+) " tracepoint="kmem/kmalloc" arg="bytes_alloc" class="incident" description="Number of bytes allocated in the kernel using kmalloc"/>
+    <event counter="ftrace_ext4_ext4_da_write" title="Ext4" name="ext4_da_write" regex="^ext4_da_write_end:.* len ([0-9]+) " tracepoint="ext4/ext4_da_write_end" arg="len" class="incident" description="Number of bytes written to an ext4 filesystem"/>
+    <event counter="ftrace_f2fs_f2fs_write" title="F2FS" name="f2fs_write" regex="^f2fs_write_end:.* len ([0-9]+), " tracepoint="f2fs/f2fs_write_end" arg="len" class="incident" description="Number of bytes written to an f2fs filesystem"/>
+    <event counter="ftrace_power_clock_set_rate" title="Power" name="clock_set_rate" regex="^clock_set_rate:.* state=([0-9]+) " tracepoint="power/clock_set_rate" arg="state" class="absolute" description="Clock rate state"/>
 
     <!-- counting ftrace counters -->
-    <event counter="ftrace_block_block_rq_complete" title="Block" name="block_rq_complete" regex="^block_rq_complete: " enable="block/block_rq_complete" class="delta" description="Number of block IO operations completed by device driver"/>
-    <event counter="ftrace_block_block_rq_issue" title="Block" name="block_rq_issue" regex="^block_rq_issue: " enable="block/block_rq_issue" class="delta" description="Number of block IO operations issued to device driver"/>
-    <event counter="ftrace_power_cpu_idle" title="Power" name="cpu_idle" regex="^cpu_idle: " enable="power/cpu_idle" class="delta" description="Number of times cpu_idle is entered or exited"/>
+    <event counter="ftrace_block_block_rq_complete" title="Block" name="block_rq_complete" regex="^block_rq_complete: " tracepoint="block/block_rq_complete" class="delta" description="Number of block IO operations completed by device driver"/>
+    <event counter="ftrace_block_block_rq_issue" title="Block" name="block_rq_issue" regex="^block_rq_issue: " tracepoint="block/block_rq_issue" class="delta" description="Number of block IO operations issued to device driver"/>
+    <event counter="ftrace_power_cpu_idle" title="Power" name="cpu_idle" regex="^cpu_idle: " tracepoint="power/cpu_idle" class="delta" description="Number of times cpu_idle is entered or exited"/>
   </category>
diff --git a/tools/gator/daemon/k/perf_event.3.12.h b/tools/gator/daemon/k/perf_event.3.12.h
deleted file mode 100644
index e886c48cadf6..000000000000
--- a/tools/gator/daemon/k/perf_event.3.12.h
+++ /dev/null
@@ -1,792 +0,0 @@
-/*
- * Performance events:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
- *
- * Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- * For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_EVENT_H
-#define _LINUX_PERF_EVENT_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-	PERF_TYPE_BREAKPOINT			= 5,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance event event_id types, used by the
- * attr.event_id parameter of the sys_perf_event_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
-	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
-	PERF_COUNT_HW_REF_CPU_CYCLES		= 9,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache events:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-	PERF_COUNT_HW_CACHE_NODE		= 6,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" events provided by the kernel, even if the hardware
- * does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
-	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
-	PERF_COUNT_SW_DUMMY			= 9,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_event_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
-	PERF_SAMPLE_REGS_USER			= 1U << 12,
-	PERF_SAMPLE_STACK_USER			= 1U << 13,
-	PERF_SAMPLE_WEIGHT			= 1U << 14,
-	PERF_SAMPLE_DATA_SRC			= 1U << 15,
-	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
-
-	PERF_SAMPLE_MAX = 1U << 17,		/* non-ABI */
-};
-
-/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
- *
- * If the user does not pass priv level information via branch_sample_type,
- * the kernel uses the event's priv level. Branch and event priv levels do
- * not have to match. Branch priv level is checked for permissions.
- *
- * The branch types can be combined, however BRANCH_ANY covers all types
- * of branches and therefore it supersedes all the other types.
- */
-enum perf_branch_sample_type {
-	PERF_SAMPLE_BRANCH_USER		= 1U << 0, /* user branches */
-	PERF_SAMPLE_BRANCH_KERNEL	= 1U << 1, /* kernel branches */
-	PERF_SAMPLE_BRANCH_HV		= 1U << 2, /* hypervisor branches */
-
-	PERF_SAMPLE_BRANCH_ANY		= 1U << 3, /* any branch types */
-	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << 4, /* any call branch */
-	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << 5, /* any return branch */
-	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << 6, /* indirect calls */
-	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
-	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
-	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
-
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
-};
-
-#define PERF_SAMPLE_BRANCH_PLM_ALL \
-	(PERF_SAMPLE_BRANCH_USER|\
-	 PERF_SAMPLE_BRANCH_KERNEL|\
-	 PERF_SAMPLE_BRANCH_HV)
-
-/*
- * Values to determine ABI of the registers dump.
- */
-enum perf_sample_regs_abi {
-	PERF_SAMPLE_REGS_ABI_NONE	= 0,
-	PERF_SAMPLE_REGS_ABI_32		= 1,
-	PERF_SAMPLE_REGS_ABI_64		= 2,
-};
-
-/*
- * The format of the data returned by read() on a perf event fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- *	{ u64		value;
- *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
- *	  { u64		id;           } && PERF_FORMAT_ID
- *	} && !PERF_FORMAT_GROUP
- *
- *	{ u64		nr;
- *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
- *	  { u64		value;
- *	    { u64	id;           } && PERF_FORMAT_ID
- *	  }		cntr[nr];
- *	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_event_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
-#define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
-					/* add: sample_stack_user */
-
-/*
- * Hardware event_id to monitor via a performance monitoring event:
- */
-struct perf_event_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-				/*
-				 * precise_ip:
-				 *
-				 *  0 - SAMPLE_IP can have arbitrary skid
-				 *  1 - SAMPLE_IP must have constant skid
-				 *  2 - SAMPLE_IP requested to have 0 skid
-				 *  3 - SAMPLE_IP must have 0 skid
-				 *
-				 *  See also PERF_RECORD_MISC_EXACT_IP
-				 */
-				precise_ip     :  2, /* skid constraint       */
-				mmap_data      :  1, /* non-exec mmap data    */
-				sample_id_all  :  1, /* sample_type all events */
-
-				exclude_host   :  1, /* don't count in host   */
-				exclude_guest  :  1, /* don't count in guest  */
-
-				exclude_callchain_kernel : 1, /* exclude kernel callchains */
-				exclude_callchain_user   : 1, /* exclude user callchains */
-				mmap2          :  1, /* include mmap with inode data     */
-
-				__reserved_1   : 40;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-
-	__u32			bp_type;
-	union {
-		__u64		bp_addr;
-		__u64		config1; /* extension of config */
-	};
-	union {
-		__u64		bp_len;
-		__u64		config2; /* extension of config1 */
-	};
-	__u64	branch_sample_type; /* enum perf_branch_sample_type */
-
-	/*
-	 * Defines set of user regs to dump on samples.
-	 * See asm/perf_regs.h for details.
-	 */
-	__u64	sample_regs_user;
-
-	/*
-	 * Defines size of the user stack to dump on samples.
-	 */
-	__u32	sample_stack_user;
-
-	/* Align to u64. */
-	__u32	__reserved_2;
-};
-
-#define perf_flags(attr)	(*(&(attr)->read_format + 1))
-
-/*
- * Ioctls that can be done on a perf event fd:
- */
-#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
-#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
-
-enum perf_event_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_event_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw events in user-space.
-	 *
-	 *   u32 seq, time_mult, time_shift, idx, width;
-	 *   u64 count, enabled, running;
-	 *   u64 cyc, time_offset;
-	 *   s64 pmc = 0;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *     barrier()
-	 *
-	 *     enabled = pc->time_enabled;
-	 *     running = pc->time_running;
-	 *
-	 *     if (pc->cap_usr_time && enabled != running) {
-	 *       cyc = rdtsc();
-	 *       time_offset = pc->time_offset;
-	 *       time_mult   = pc->time_mult;
-	 *       time_shift  = pc->time_shift;
-	 *     }
-	 *
-	 *     idx = pc->index;
-	 *     count = pc->offset;
-	 *     if (pc->cap_usr_rdpmc && idx) {
-	 *       width = pc->pmc_width;
-	 *       pmc = rdpmc(idx - 1);
-	 *     }
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware event identifier */
-	__s64	offset;			/* add to hardware event value */
-	__u64	time_enabled;		/* time event active */
-	__u64	time_running;		/* time event on cpu */
-	union {
-		__u64	capabilities;
-		struct {
-			__u64	cap_bit0		: 1, /* Always 0, deprecated, see commit 860f085b74e9 */
-				cap_bit0_is_deprecated	: 1, /* Always 1, signals that bit 0 is zero */
-
-				cap_user_rdpmc		: 1, /* The RDPMC instruction can be used to read counts */
-				cap_user_time		: 1, /* The time_* fields are used */
-				cap_user_time_zero	: 1, /* The time_zero field is used */
-				cap_____res		: 59;
-		};
-	};
-
-	/*
-	 * If cap_usr_rdpmc this field provides the bit-width of the value
-	 * read using the rdpmc() or equivalent instruction. This can be used
-	 * to sign extend the result like:
-	 *
-	 *   pmc <<= 64 - width;
-	 *   pmc >>= 64 - width; // signed shift right
-	 *   count += pmc;
-	 */
-	__u16	pmc_width;
-
-	/*
-	 * If cap_usr_time the below fields can be used to compute the time
-	 * delta since time_enabled (in ns) using rdtsc or similar.
-	 *
-	 *   u64 quot, rem;
-	 *   u64 delta;
-	 *
-	 *   quot = (cyc >> time_shift);
-	 *   rem = cyc & ((1 << time_shift) - 1);
-	 *   delta = time_offset + quot * time_mult +
-	 *              ((rem * time_mult) >> time_shift);
-	 *
-	 * Where time_offset,time_mult,time_shift and cyc are read in the
-	 * seqcount loop described above. This delta can then be added to
-	 * enabled and possible running (if idx), improving the scaling:
-	 *
-	 *   enabled += delta;
-	 *   if (idx)
-	 *     running += delta;
-	 *
-	 *   quot = count / running;
-	 *   rem  = count % running;
-	 *   count = quot * enabled + (rem * enabled) / running;
-	 */
-	__u16	time_shift;
-	__u32	time_mult;
-	__u64	time_offset;
-	/*
-	 * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
-	 * from sample timestamps.
-	 *
-	 *   time = timestamp - time_zero;
-	 *   quot = time / time_mult;
-	 *   rem  = time % time_mult;
-	 *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
-	 *
-	 * And vice versa:
-	 *
-	 *   quot = cyc >> time_shift;
-	 *   rem  = cyc & ((1 << time_shift) - 1);
-	 *   timestamp = time_zero + quot * time_mult +
-	 *               ((rem * time_mult) >> time_shift);
-	 */
-	__u64	time_zero;
-	__u32	size;			/* Header size up to __reserved[] fields. */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u8	__reserved[118*8+4];	/* align to 1k. */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an smp_rmb(),
-	 * after reading this value.
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data, after issueing
-	 * an smp_mb() to separate the data read from the ->data_tail store.
-	 * In this case the kernel will not over-write unread data.
-	 *
-	 * See perf_output_put_handle() for the data ordering.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
-#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
-#define PERF_RECORD_MISC_KERNEL			(1 << 0)
-#define PERF_RECORD_MISC_USER			(2 << 0)
-#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
-#define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
-#define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
-
-#define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
-/*
- * Indicates that the content of PERF_SAMPLE_IP points to
- * the actual instruction that triggered the event. See also
- * perf_event_attr::precise_ip.
- */
-#define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
-/*
- * Reserve the last bit to indicate some extended misc field
- */
-#define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * If perf_event_attr.sample_id_all is set then all event types will
-	 * have the sample_type selected fields related to where/when
-	 * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
-	 * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
-	 * just after the perf_event_header and the fields already present for
-	 * the existing fields, i.e. at the end of the payload. That way a newer
-	 * perf.data file will be supported by older perf tools, with these new
-	 * optional fields being ignored.
-	 *
-	 * struct sample_id {
-	 * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 * 	{ u64			time;     } && PERF_SAMPLE_TIME
-	 * 	{ u64			id;       } && PERF_SAMPLE_ID
-	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
-	 * } && perf_event_attr::sample_id_all
-	 *
-	 * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
-	 * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
-	 * relative to header.size.
-	 */
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_MMAP			= 1,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				id;
-	 *	u64				lost;
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_THROTTLE			= 5,
-	PERF_RECORD_UNTHROTTLE			= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_FORK			= 7,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, tid;
-	 *
-	 *	struct read_format		values;
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	#
-	 *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
-	 *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
-	 *	# is fixed relative to header.
-	 *	#
-	 *
-	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 *	#
-	 *	# The RAW record below is opaque data wrt the ABI
-	 *	#
-	 *	# That is, the ABI doesn't make any promises wrt to
-	 *	# the stability of its content, it may vary depending
-	 *	# on event, hardware, kernel version and phase of
-	 *	# the moon.
-	 *	#
-	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 *	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 *
-	 *	{ u64                   nr;
-	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
-	 *
-	 * 	{ u64			abi; # enum perf_sample_regs_abi
-	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
-	 *
-	 * 	{ u64			size;
-	 * 	  char			data[size];
-	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
-	 *
-	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
-	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
-	 * };
-	 */
-	PERF_RECORD_SAMPLE			= 9,
-
-	/*
-	 * The MMAP2 records are an augmented version of MMAP, they add
-	 * maj, min, ino numbers to be used to uniquely identify each mapping
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	u32				maj;
-	 *	u32				min;
-	 *	u64				ino;
-	 *	u64				ino_generation;
-	 *	char				filename[];
-	 * 	struct sample_id		sample_id;
-	 * };
-	 */
-	PERF_RECORD_MMAP2			= 10,
-
-	PERF_RECORD_MAX,			/* non-ABI */
-};
-
-#define PERF_MAX_STACK_DEPTH		127
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
-#define PERF_FLAG_FD_OUTPUT		(1U << 1)
-#define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
-
-union perf_mem_data_src {
-	__u64 val;
-	struct {
-		__u64   mem_op:5,	/* type of opcode */
-			mem_lvl:14,	/* memory hierarchy level */
-			mem_snoop:5,	/* snoop mode */
-			mem_lock:2,	/* lock instr */
-			mem_dtlb:7,	/* tlb access */
-			mem_rsvd:31;
-	};
-};
-
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA		0x01 /* not available */
-#define PERF_MEM_OP_LOAD	0x02 /* load instruction */
-#define PERF_MEM_OP_STORE	0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH	0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT	0
-
-/* memory hierarchy (memory level, hit or miss) */
-#define PERF_MEM_LVL_NA		0x01  /* not available */
-#define PERF_MEM_LVL_HIT	0x02  /* hit level */
-#define PERF_MEM_LVL_MISS	0x04  /* miss level  */
-#define PERF_MEM_LVL_L1		0x08  /* L1 */
-#define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2		0x20  /* L2 */
-#define PERF_MEM_LVL_L3		0x40  /* L3 */
-#define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1	0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2	0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO		0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT	5
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA	0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT	0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS	0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT	19
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA	0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT	24
-
-/* TLB access */
-#define PERF_MEM_TLB_NA		0x01 /* not available */
-#define PERF_MEM_TLB_HIT	0x02 /* hit level */
-#define PERF_MEM_TLB_MISS	0x04 /* miss level */
-#define PERF_MEM_TLB_L1		0x08 /* L1 */
-#define PERF_MEM_TLB_L2		0x10 /* L2 */
-#define PERF_MEM_TLB_WK		0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT	26
-
-#define PERF_MEM_S(a, s) \
-	(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
-
-/*
- * single taken branch record layout:
- *
- *      from: source instruction (may not always be a branch insn)
- *        to: branch target
- *   mispred: branch target was mispredicted
- * predicted: branch target was predicted
- *
- * support for mispred, predicted is optional. In case it
- * is not supported mispred = predicted = 0.
- *
- *     in_tx: running in a hardware transaction
- *     abort: aborting a hardware transaction
- */
-struct perf_branch_entry {
-	__u64	from;
-	__u64	to;
-	__u64	mispred:1,  /* target mispredicted */
-		predicted:1,/* target predicted */
-		in_tx:1,    /* in transaction */
-		abort:1,    /* transaction abort */
-		reserved:60;
-};
-
-#endif /* _LINUX_PERF_EVENT_H */
diff --git a/tools/gator/daemon/k/perf_event.h b/tools/gator/daemon/k/perf_event.h
index e5dff8c21ef4..e886c48cadf6 120000..100644
--- a/tools/gator/daemon/k/perf_event.h
+++ b/tools/gator/daemon/k/perf_event.h
@@ -1 +1,792 @@
-perf_event.3.12.h
-\ No newline at end of file
+/*
+ * Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_EVENT_H
+#define _LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_BREAKPOINT			= 5,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
+	PERF_COUNT_HW_REF_CPU_CYCLES		= 9,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+	PERF_COUNT_HW_CACHE_NODE		= 6,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
+	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
+	PERF_COUNT_SW_DUMMY			= 9,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
+	PERF_SAMPLE_REGS_USER			= 1U << 12,
+	PERF_SAMPLE_STACK_USER			= 1U << 13,
+	PERF_SAMPLE_WEIGHT			= 1U << 14,
+	PERF_SAMPLE_DATA_SRC			= 1U << 15,
+	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
+
+	PERF_SAMPLE_MAX = 1U << 17,		/* non-ABI */
+};
+
+/*
+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ *
+ * If the user does not pass priv level information via branch_sample_type,
+ * the kernel uses the event's priv level. Branch and event priv levels do
+ * not have to match. Branch priv level is checked for permissions.
+ *
+ * The branch types can be combined, however BRANCH_ANY covers all types
+ * of branches and therefore it supersedes all the other types.
+ */
+enum perf_branch_sample_type {
+	PERF_SAMPLE_BRANCH_USER		= 1U << 0, /* user branches */
+	PERF_SAMPLE_BRANCH_KERNEL	= 1U << 1, /* kernel branches */
+	PERF_SAMPLE_BRANCH_HV		= 1U << 2, /* hypervisor branches */
+
+	PERF_SAMPLE_BRANCH_ANY		= 1U << 3, /* any branch types */
+	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << 4, /* any call branch */
+	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << 5, /* any return branch */
+	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << 6, /* indirect calls */
+	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
+	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
+	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
+
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
+};
+
+#define PERF_SAMPLE_BRANCH_PLM_ALL \
+	(PERF_SAMPLE_BRANCH_USER|\
+	 PERF_SAMPLE_BRANCH_KERNEL|\
+	 PERF_SAMPLE_BRANCH_HV)
+
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+	PERF_SAMPLE_REGS_ABI_NONE	= 0,
+	PERF_SAMPLE_REGS_ABI_32		= 1,
+	PERF_SAMPLE_REGS_ABI_64		= 2,
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
+ *
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+#define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
+#define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
+					/* add: sample_stack_user */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ */
+struct perf_event_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+				/*
+				 * precise_ip:
+				 *
+				 *  0 - SAMPLE_IP can have arbitrary skid
+				 *  1 - SAMPLE_IP must have constant skid
+				 *  2 - SAMPLE_IP requested to have 0 skid
+				 *  3 - SAMPLE_IP must have 0 skid
+				 *
+				 *  See also PERF_RECORD_MISC_EXACT_IP
+				 */
+				precise_ip     :  2, /* skid constraint       */
+				mmap_data      :  1, /* non-exec mmap data    */
+				sample_id_all  :  1, /* sample_type all events */
+
+				exclude_host   :  1, /* don't count in host   */
+				exclude_guest  :  1, /* don't count in guest  */
+
+				exclude_callchain_kernel : 1, /* exclude kernel callchains */
+				exclude_callchain_user   : 1, /* exclude user callchains */
+				mmap2          :  1, /* include mmap with inode data     */
+
+				__reserved_1   : 40;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+
+	__u32			bp_type;
+	union {
+		__u64		bp_addr;
+		__u64		config1; /* extension of config */
+	};
+	union {
+		__u64		bp_len;
+		__u64		config2; /* extension of config1 */
+	};
+	__u64	branch_sample_type; /* enum perf_branch_sample_type */
+
+	/*
+	 * Defines set of user regs to dump on samples.
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_user;
+
+	/*
+	 * Defines size of the user stack to dump on samples.
+	 */
+	__u32	sample_stack_user;
+
+	/* Align to u64. */
+	__u32	__reserved_2;
+};
+
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
+
+enum perf_event_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw events in user-space.
+	 *
+	 *   u32 seq, time_mult, time_shift, idx, width;
+	 *   u64 count, enabled, running;
+	 *   u64 cyc, time_offset;
+	 *   s64 pmc = 0;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *     barrier()
+	 *
+	 *     enabled = pc->time_enabled;
+	 *     running = pc->time_running;
+	 *
+	 *     if (pc->cap_usr_time && enabled != running) {
+	 *       cyc = rdtsc();
+	 *       time_offset = pc->time_offset;
+	 *       time_mult   = pc->time_mult;
+	 *       time_shift  = pc->time_shift;
+	 *     }
+	 *
+	 *     idx = pc->index;
+	 *     count = pc->offset;
+	 *     if (pc->cap_usr_rdpmc && idx) {
+	 *       width = pc->pmc_width;
+	 *       pmc = rdpmc(idx - 1);
+	 *     }
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware event identifier */
+	__s64	offset;			/* add to hardware event value */
+	__u64	time_enabled;		/* time event active */
+	__u64	time_running;		/* time event on cpu */
+	union {
+		__u64	capabilities;
+		struct {
+			__u64	cap_bit0		: 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+				cap_bit0_is_deprecated	: 1, /* Always 1, signals that bit 0 is zero */
+
+				cap_user_rdpmc		: 1, /* The RDPMC instruction can be used to read counts */
+				cap_user_time		: 1, /* The time_* fields are used */
+				cap_user_time_zero	: 1, /* The time_zero field is used */
+				cap_____res		: 59;
+		};
+	};
+
+	/*
+	 * If cap_usr_rdpmc this field provides the bit-width of the value
+	 * read using the rdpmc() or equivalent instruction. This can be used
+	 * to sign extend the result like:
+	 *
+	 *   pmc <<= 64 - width;
+	 *   pmc >>= 64 - width; // signed shift right
+	 *   count += pmc;
+	 */
+	__u16	pmc_width;
+
+	/*
+	 * If cap_usr_time the below fields can be used to compute the time
+	 * delta since time_enabled (in ns) using rdtsc or similar.
+	 *
+	 *   u64 quot, rem;
+	 *   u64 delta;
+	 *
+	 *   quot = (cyc >> time_shift);
+	 *   rem = cyc & ((1 << time_shift) - 1);
+	 *   delta = time_offset + quot * time_mult +
+	 *              ((rem * time_mult) >> time_shift);
+	 *
+	 * Where time_offset,time_mult,time_shift and cyc are read in the
+	 * seqcount loop described above. This delta can then be added to
+	 * enabled and possible running (if idx), improving the scaling:
+	 *
+	 *   enabled += delta;
+	 *   if (idx)
+	 *     running += delta;
+	 *
+	 *   quot = count / running;
+	 *   rem  = count % running;
+	 *   count = quot * enabled + (rem * enabled) / running;
+	 */
+	__u16	time_shift;
+	__u32	time_mult;
+	__u64	time_offset;
+	/*
+	 * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+	 * from sample timestamps.
+	 *
+	 *   time = timestamp - time_zero;
+	 *   quot = time / time_mult;
+	 *   rem  = time % time_mult;
+	 *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+	 *
+	 * And vice versa:
+	 *
+	 *   quot = cyc >> time_shift;
+	 *   rem  = cyc & ((1 << time_shift) - 1);
+	 *   timestamp = time_zero + quot * time_mult +
+	 *               ((rem * time_mult) >> time_shift);
+	 */
+	__u64	time_zero;
+	__u32	size;			/* Header size up to __reserved[] fields. */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u8	__reserved[118*8+4];	/* align to 1k. */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an smp_rmb(),
+	 * after reading this value.
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data, after issueing
+	 * an smp_mb() to separate the data read from the ->data_tail store.
+	 * In this case the kernel will not over-write unread data.
+	 *
+	 * See perf_output_put_handle() for the data ordering.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
+#define PERF_RECORD_MISC_KERNEL			(1 << 0)
+#define PERF_RECORD_MISC_USER			(2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
+
+#define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+/*
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ */
+#define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * If perf_event_attr.sample_id_all is set then all event types will
+	 * have the sample_type selected fields related to where/when
+	 * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+	 * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+	 * just after the perf_event_header and the fields already present for
+	 * the existing fields, i.e. at the end of the payload. That way a newer
+	 * perf.data file will be supported by older perf tools, with these new
+	 * optional fields being ignored.
+	 *
+	 * struct sample_id {
+	 * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 * 	{ u64			time;     } && PERF_SAMPLE_TIME
+	 * 	{ u64			id;       } && PERF_SAMPLE_ID
+	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+	 * } && perf_event_attr::sample_id_all
+	 *
+	 * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
+	 * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+	 * relative to header.size.
+	 */
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_THROTTLE			= 5,
+	PERF_RECORD_UNTHROTTLE			= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_FORK			= 7,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
+	 *
+	 *	struct read_format		values;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	#
+	 *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+	 *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+	 *	# is fixed relative to header.
+	 *	#
+	 *
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 *
+	 *	{ u64                   nr;
+	 *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *
+	 * 	{ u64			abi; # enum perf_sample_regs_abi
+	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+	 *
+	 * 	{ u64			size;
+	 * 	  char			data[size];
+	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
+	 *
+	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
+	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
+	 * };
+	 */
+	PERF_RECORD_SAMPLE			= 9,
+
+	/*
+	 * The MMAP2 records are an augmented version of MMAP, they add
+	 * maj, min, ino numbers to be used to uniquely identify each mapping
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	u32				maj;
+	 *	u32				min;
+	 *	u64				ino;
+	 *	u64				ino_generation;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP2			= 10,
+
+	PERF_RECORD_MAX,			/* non-ABI */
+};
+
+#define PERF_MAX_STACK_DEPTH		127
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+#define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
+
+union perf_mem_data_src {
+	__u64 val;
+	struct {
+		__u64   mem_op:5,	/* type of opcode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_snoop:5,	/* snoop mode */
+			mem_lock:2,	/* lock instr */
+			mem_dtlb:7,	/* tlb access */
+			mem_rsvd:31;
+	};
+};
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA		0x01 /* not available */
+#define PERF_MEM_OP_LOAD	0x02 /* load instruction */
+#define PERF_MEM_OP_STORE	0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH	0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT	0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA		0x01  /* not available */
+#define PERF_MEM_LVL_HIT	0x02  /* hit level */
+#define PERF_MEM_LVL_MISS	0x04  /* miss level  */
+#define PERF_MEM_LVL_L1		0x08  /* L1 */
+#define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2		0x20  /* L2 */
+#define PERF_MEM_LVL_L3		0x40  /* L3 */
+#define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1	0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2	0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO		0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT	5
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA	0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT	0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS	0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT	19
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA	0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT	24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA		0x01 /* not available */
+#define PERF_MEM_TLB_HIT	0x02 /* hit level */
+#define PERF_MEM_TLB_MISS	0x04 /* miss level */
+#define PERF_MEM_TLB_L1		0x08 /* L1 */
+#define PERF_MEM_TLB_L2		0x10 /* L2 */
+#define PERF_MEM_TLB_WK		0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT	26
+
+#define PERF_MEM_S(a, s) \
+	(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
+/*
+ * single taken branch record layout:
+ *
+ *      from: source instruction (may not always be a branch insn)
+ *        to: branch target
+ *   mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ *     in_tx: running in a hardware transaction
+ *     abort: aborting a hardware transaction
+ */
+struct perf_branch_entry {
+	__u64	from;
+	__u64	to;
+	__u64	mispred:1,  /* target mispredicted */
+		predicted:1,/* target predicted */
+		in_tx:1,    /* in transaction */
+		abort:1,    /* transaction abort */
+		reserved:60;
+};
+
+#endif /* _LINUX_PERF_EVENT_H */
diff --git a/tools/gator/daemon/main.cpp b/tools/gator/daemon/main.cpp
index c68a892e74fc..95e179ef4459 100644
--- a/tools/gator/daemon/main.cpp
+++ b/tools/gator/daemon/main.cpp
@@ -20,8 +20,8 @@
 #include <unistd.h>
 
 #include "AnnotateListener.h"
-#include "CCNDriver.h"
 #include "Child.h"
+#include "DriverSource.h"
 #include "EventsXML.h"
 #include "Logging.h"
 #include "Monitor.h"
@@ -297,15 +297,10 @@ static bool setupFilesystem(char* module) {
 		}
 
 		// Load driver
-		bool success = init_module(location);
-		if (!success) {
-			logg->logMessage("init_module failed, trying insmod");
-			snprintf(command, sizeof(command), "insmod %s >/dev/null 2>&1", location);
-			if (system(command) != 0) {
-				logg->logMessage("Unable to load gator.ko driver with command: %s", command);
-				logg->logError("Unable to load (insmod) gator.ko driver:\n  >>> gator.ko must be built against the current kernel version & configuration\n  >>> See dmesg for more details");
-				handleException();
-			}
+		if (!init_module(location)) {
+			logg->logMessage("Unable to load gator.ko driver with command: %s", command);
+			logg->logError("Unable to load (insmod) gator.ko driver:\n  >>> gator.ko must be built against the current kernel version & configuration\n  >>> See dmesg for more details");
+			handleException();
 		}
 
 		if (mountGatorFS() == -1) {
@@ -323,10 +318,7 @@ static int shutdownFilesystem() {
 	}
 	if (driverRunningAtStart == false) {
 		if (syscall(__NR_delete_module, "gator", O_NONBLOCK) != 0) {
-			logg->logMessage("delete_module failed, trying rmmod");
-			if (system("rmmod gator >/dev/null 2>&1") != 0) {
-				return -1;
-			}
+			return -1;
 		}
 	}
 
@@ -496,16 +488,13 @@ int main(int argc, char** argv) {
 	//   e.g. it may not be the group leader when launched as 'sudo gatord'
 	setsid();
 
-  // Set up global thread-safe logging
+	// Set up global thread-safe logging
 	logg = new Logging(hasDebugFlag(argc, argv));
 	// Global data class
 	gSessionData = new SessionData();
 	// Set up global utility class
 	util = new OlyUtility();
 
-	// Initialize drivers
-	new CCNDriver();
-
 	prctl(PR_SET_NAME, (unsigned long)&"gatord-main", 0, 0, 0);
 	pthread_mutex_init(&numSessions_mutex, NULL);
 
@@ -535,9 +524,11 @@ int main(int argc, char** argv) {
 	}
 
 	// Call before setting up the SIGCHLD handler, as system() spawns child processes
-	if (!setupFilesystem(cmdline.module)) {
+	if (setupFilesystem(cmdline.module)) {
+		DriverSource::checkVersion();
+	} else {
 		logg->logMessage("Unable to set up gatorfs, trying perf");
-		if (!gSessionData->perf.setup()) {
+		if (!gSessionData->mPerf.setup()) {
 			logg->logError(
 				       "Unable to locate gator.ko driver:\n"
 				       "  >>> gator.ko should be co-located with gatord in the same directory\n"
diff --git a/tools/gator/daemon/notify/COPYING b/tools/gator/daemon/notify/COPYING
new file mode 100644
index 000000000000..d159169d1050
--- /dev/null
+++ b/tools/gator/daemon/notify/COPYING
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/tools/gator/daemon/notify/Makefile b/tools/gator/daemon/notify/Makefile
new file mode 100644
index 000000000000..0d8f0415f0e2
--- /dev/null
+++ b/tools/gator/daemon/notify/Makefile
@@ -0,0 +1,24 @@
+ifneq ($(SDKDIR),)
+
+# Find the oldest SDK Platform installed >= 16
+SDK_PLATFORM := $(shell ls -d $(SDKDIR)/platforms/android-1[6-9] $(SDKDIR)/platforms/android-[2-9][0-9] | head -1)
+# Find the newest SDK Build-tools
+SDK_BUILD_TOOLS := $(shell ls -d $(SDKDIR)/build-tools/* | tail -1)
+
+all: notify.dex
+
+notify.dex: Notify.java
+	javac -cp $(SDK_PLATFORM)/data/layoutlib.jar -source 1.6 -target 1.6 $^
+	$(SDK_BUILD_TOOLS)/dx --dex --output=$@ $(^:%.java=%.class)
+
+else
+
+all:
+	$(error Please specify SDKDIR as the location to the Android SDK)
+
+endif
+
+.PHONY: clean
+
+clean:
+	rm -f *~ *.class *.dex
diff --git a/tools/gator/daemon/notify/Notify.java b/tools/gator/daemon/notify/Notify.java
new file mode 100644
index 000000000000..970245114880
--- /dev/null
+++ b/tools/gator/daemon/notify/Notify.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) ARM Limited 2015. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+import android.os.IBinder;
+import android.os.Parcel;
+import android.os.RemoteException;
+import android.os.ServiceManager;
+
+public class Notify {
+	public static void main(String[] args) throws RemoteException {
+		for (String service : ServiceManager.listServices()) {
+			IBinder b = ServiceManager.checkService(service);
+			if (b != null) {
+				Parcel p = null;
+				try {
+					p = Parcel.obtain();
+					b.transact(IBinder.SYSPROPS_TRANSACTION, p, null, 0);
+				} finally {
+					if (p != null) {
+						p.recycle();
+					}
+				}
+			}
+		}
+	}
+}