diff --git a/CREDITS b/CREDITS index 8207cc62ee9d6079bb55032090ef518ef08f6b04..af67a84517d700526327c7a60543500029abb1ca 100644 --- a/CREDITS +++ b/CREDITS @@ -1507,6 +1507,14 @@ S: 312/107 Canberra Avenue S: Griffith, ACT 2603 S: Australia +N: Andreas Herrmann +E: herrmann.der.user@gmail.com +E: herrmann.der.user@googlemail.com +D: Key developer of x86/AMD64 +D: Author of AMD family 15h processor power monitoring driver +D: Maintainer of AMD Athlon 64 and Opteron processor frequency driver +S: Germany + N: Sebastian Hetze E: she@lunetix.de D: German Linux Documentation, diff --git a/Documentation/ABI/testing/configfs-iio b/Documentation/ABI/testing/configfs-iio new file mode 100644 index 0000000000000000000000000000000000000000..2483756fccf58ecbee5ed51a4d1c4af669b29411 --- /dev/null +++ b/Documentation/ABI/testing/configfs-iio @@ -0,0 +1,21 @@ +What: /config/iio +Date: October 2015 +KernelVersion: 4.4 +Contact: linux-iio@vger.kernel.org +Description: + This represents Industrial IO configuration entry point + directory. It contains sub-groups corresponding to IIO + objects. + +What: /config/iio/triggers +Date: October 2015 +KernelVersion: 4.4 +Description: + Industrial IO software triggers directory. + +What: /config/iio/triggers/hrtimers +Date: October 2015 +KernelVersion: 4.4 +Description: + High resolution timers directory. Creating a directory here + will result in creating a hrtimer trigger in the IIO subsystem. diff --git a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink index bc7ff731aa0cf839fdc88e60e4934526387adc6a..f56335af2d88f7d8faf5ad0109e0da75aafe1e2b 100644 --- a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink +++ b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink @@ -10,3 +10,5 @@ Description: isoc_mult - 0..2 (hs/ss only) isoc_maxburst - 0..15 (ss only) buflen - buffer length + bulk_qlen - depth of queue for bulk + iso_qlen - depth of queue for iso diff --git a/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc b/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc new file mode 100644 index 0000000000000000000000000000000000000000..8916f7ec6507406dcef3f7cbfbcde5bc5209b189 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-ina2xx-adc @@ -0,0 +1,24 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_allow_async_readout +Date: December 2015 +KernelVersion: 4.4 +Contact: linux-iio@vger.kernel.org +Description: + By default (value '0'), the capture thread checks for the Conversion + Ready Flag to being set prior to committing a new value to the sample + buffer. This synchronizes the in-chip conversion rate with the + in-driver readout rate at the cost of an additional register read. + + Writing '1' will remove the polling for the Conversion Ready Flags to + save the additional i2c transaction, which will improve the bandwidth + available for reading data. However, samples can be occasionally skipped + or repeated, depending on the beat between the capture and conversion + rates. + +What: /sys/bus/iio/devices/iio:deviceX/in_shunt_resistor +Date: December 2015 +KernelVersion: 4.4 +Contact: linux-iio@vger.kernel.org +Description: + The value of the shunt resistor may be known only at runtime fom an + eeprom content read by a client application. This attribute allows to + set its value in ohms. diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb index 3a4abfc44f5e0d8a9e358626c1ca8251d756003d..0bd731cbb50ccdb1a1aadaa06b1977eb2331900f 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb +++ b/Documentation/ABI/testing/sysfs-bus-usb @@ -134,19 +134,21 @@ Description: enabled for the device. Developer can write y/Y/1 or n/N/0 to the file to enable/disable the feature. -What: /sys/bus/usb/devices/.../power/usb3_hardware_lpm -Date: June 2015 +What: /sys/bus/usb/devices/.../power/usb3_hardware_lpm_u1 + /sys/bus/usb/devices/.../power/usb3_hardware_lpm_u2 +Date: November 2015 Contact: Kevin Strasser + Lu Baolu Description: If CONFIG_PM is set and a USB 3.0 lpm-capable device is plugged in to a xHCI host which supports link PM, it will check if U1 and U2 exit latencies have been set in the BOS descriptor; if - the check is is passed and the host supports USB3 hardware LPM, + the check is passed and the host supports USB3 hardware LPM, USB3 hardware LPM will be enabled for the device and the USB - device directory will contain a file named - power/usb3_hardware_lpm. The file holds a string value (enable - or disable) indicating whether or not USB3 hardware LPM is - enabled for the device. + device directory will contain two files named + power/usb3_hardware_lpm_u1 and power/usb3_hardware_lpm_u2. These + files hold a string value (enable or disable) indicating whether + or not USB3 hardware LPM U1 or U2 is enabled for the device. What: /sys/bus/usb/devices/.../removable Date: February 2012 @@ -187,6 +189,17 @@ Description: The file will read "hotplug", "wired" and "not used" if the information is available, and "unknown" otherwise. +What: /sys/bus/usb/devices/.../(hub interface)/portX/usb3_lpm_permit +Date: November 2015 +Contact: Lu Baolu +Description: + Some USB3.0 devices are not friendly to USB3 LPM. usb3_lpm_permit + attribute allows enabling/disabling usb3 lpm of a port. It takes + effect both before and after a usb device is enumerated. Supported + values are "0" if both u1 and u2 are NOT permitted, "u1" if only u1 + is permitted, "u2" if only u2 is permitted, "u1_u2" if both u1 and + u2 are permitted. + What: /sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout Date: May 2013 Contact: Mathias Nyman diff --git a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm index 5cedf72df3584c89ebe10745b61c4343807704eb..f7be0e88b139d0483bca5624aa0cffb9243309b0 100644 --- a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm +++ b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm @@ -19,6 +19,25 @@ Description: Set to 0 to pad all frames. Set greater than tx_max to disable all padding. +What: /sys/class/net//cdc_ncm/ndp_to_end +Date: Dec 2015 +KernelVersion: 4.5 +Contact: Bjørn Mork +Description: + Boolean attribute showing the status of the "NDP to + end" quirk. Defaults to 'N', except for devices + already known to need it enabled. + + The "NDP to end" quirk makes the driver place the NDP + (the packet index table) after the payload. The NCM + specification does not mandate this, but some devices + are known to be more restrictive. Write 'Y' to this + attribute for temporary testing of a suspect device + failing to work with the default driver settings. + + A device entry should be added to the driver if this + quirk is found to be required. + What: /sys/class/net//cdc_ncm/rx_max Date: May 2014 KernelVersion: 3.16 diff --git a/Documentation/ABI/testing/sysfs-class-net-mesh b/Documentation/ABI/testing/sysfs-class-net-mesh index c464062966317b71838454a5de532095f4027851..c2b956d44a9520d276b7f13ee3d2617991127a5a 100644 --- a/Documentation/ABI/testing/sysfs-class-net-mesh +++ b/Documentation/ABI/testing/sysfs-class-net-mesh @@ -8,7 +8,7 @@ Description: What: /sys/class/net//mesh//ap_isolation Date: May 2011 -Contact: Antonio Quartulli +Contact: Antonio Quartulli Description: Indicates whether the data traffic going from a wireless client to another wireless client will be @@ -70,7 +70,7 @@ Description: What: /sys/class/net//mesh/isolation_mark Date: Nov 2013 -Contact: Antonio Quartulli +Contact: Antonio Quartulli Description: Defines the isolation mark (and its bitmask) which is used to classify clients as "isolated" by the diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi new file mode 100644 index 0000000000000000000000000000000000000000..fa5a00bb114372bfbc67008e8d0ebe8d1566ed6b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-qmi @@ -0,0 +1,23 @@ +What: /sys/class/net//qmi/raw_ip +Date: Dec 2015 +KernelVersion: 4.4 +Contact: Bjørn Mork +Description: + Boolean. Default: 'N' + + Set this to 'Y' to change the network device link + framing from '802.3' to 'raw-ip'. + + The netdev will change to reflect the link framing + mode. The netdev is an ordinary ethernet device in + '802.3' mode, and the driver expects to exchange + frames with an ethernet header over the USB link. The + netdev is a headerless p-t-p device in 'raw-ip' mode, + and the driver expects to echange IPv4 or IPv6 packets + without any L2 header over the USB link. + + Userspace is in full control of firmware configuration + through the delegation of the QMI protocol. Userspace + is responsible for coordination of driver and firmware + link framing mode, changing this setting to 'Y' if the + firmware is configured for 'raw-ip' mode. diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 0345f2d1c7278ffebb1406c2b6ecf10fb7f430ff..e5200f354abfe933d3fbe69f919da2dcc0e585a6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -87,6 +87,12 @@ Contact: "Jaegeuk Kim" Description: Controls the checkpoint timing. +What: /sys/fs/f2fs//idle_interval +Date: January 2016 +Contact: "Jaegeuk Kim" +Description: + Controls the idle timing. + What: /sys/fs/f2fs//ra_nid_pages Date: October 2015 Contact: "Chao Yu" diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch index 5bf42a840b22028280d5b997c5e8e523d06d4ab1..da87f43aec584de74945714247c6849bebf0cfa7 100644 --- a/Documentation/ABI/testing/sysfs-kernel-livepatch +++ b/Documentation/ABI/testing/sysfs-kernel-livepatch @@ -33,7 +33,7 @@ Description: The object directory contains subdirectories for each function that is patched within the object. -What: /sys/kernel/livepatch/// +What: /sys/kernel/livepatch/// Date: Nov 2014 KernelVersion: 3.19.0 Contact: live-patching@vger.kernel.org @@ -41,4 +41,8 @@ Description: The function directory contains attributes regarding the properties and state of the patched function. + The directory name contains the patched function name and a + sympos number corresponding to the nth occurrence of the symbol + name in kallsyms for the patched object. + There are currently no such attributes. diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp index 44806a678f12a21a3ff149bc1b7838a4c14da602..a17f817a93095b22f12b9aff657c0dd50c845fc7 100644 --- a/Documentation/ABI/testing/sysfs-ptp +++ b/Documentation/ABI/testing/sysfs-ptp @@ -74,7 +74,7 @@ Description: assignment may be changed by two writing numbers into the file. -What: /sys/class/ptp/ptpN/pps_avaiable +What: /sys/class/ptp/ptpN/pps_available Date: September 2010 Contact: Richard Cochran Description: diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 42a2d8593e39cd15acbab6ace23ed08e5c33a313..cdd8b24db68d3efc7f9a8d85130c8b851b2f963b 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -238,83 +238,32 @@ X!Isound/sound_firmware.c !Iinclude/media/videobuf2-memops.h Digital TV (DVB) devices -!Idrivers/media/dvb-core/dvb_ca_en50221.h -!Idrivers/media/dvb-core/dvb_frontend.h + Digital TV Common functions !Idrivers/media/dvb-core/dvb_math.h !Idrivers/media/dvb-core/dvb_ringbuffer.h !Idrivers/media/dvb-core/dvbdev.h - Digital TV Demux API - The kernel demux API defines a driver-internal interface for - registering low-level, hardware specific driver to a hardware - independent demux layer. It is only of interest for Digital TV - device driver writers. The header file for this API is named - demux.h and located in - drivers/media/dvb-core. - - The demux API should be implemented for each demux in the - system. It is used to select the TS source of a demux and to manage - the demux resources. When the demux client allocates a resource via - the demux API, it receives a pointer to the API of that - resource. - Each demux receives its TS input from a DVB front-end or from - memory, as set via this demux API. In a system with more than one - front-end, the API can be used to select one of the DVB front-ends - as a TS source for a demux, unless this is fixed in the HW platform. - The demux API only controls front-ends regarding to their connections - with demuxes; the APIs used to set the other front-end parameters, - such as tuning, are not defined in this document. - The functions that implement the abstract interface demux should - be defined static or module private and registered to the Demux - core for external access. It is not necessary to implement every - function in the struct dmx_demux. For example, - a demux interface might support Section filtering, but not PES - filtering. The API client is expected to check the value of any - function pointer before calling the function: the value of NULL means - that the “function is not available”. - Whenever the functions of the demux API modify shared data, - the possibilities of lost update and race condition problems should - be addressed, e.g. by protecting parts of code with mutexes. - Note that functions called from a bottom half context must not - sleep. Even a simple memory allocation without using GFP_ATOMIC can - result in a kernel thread being put to sleep if swapping is needed. - For example, the Linux kernel calls the functions of a network device - interface from a bottom half context. Thus, if a demux API function - is called from network device code, the function must not sleep. - - - -
- Demux Callback API - This kernel-space API comprises the callback functions that - deliver filtered data to the demux client. Unlike the other DVB - kABIs, these functions are provided by the client and called from - the demux code. - The function pointers of this abstract interface are not - packed into a structure as in the other demux APIs, because the - callback functions are registered and used independent of each - other. As an example, it is possible for the API client to provide - several callback functions for receiving TS packets and no - callbacks for PES packets or sections. - The functions that implement the callback API need not be - re-entrant: when a demux driver calls one of these functions, - the driver is not allowed to call the function again before - the original call returns. If a callback is triggered by a - hardware interrupt, it is recommended to use the Linux - “bottom half” mechanism or start a tasklet instead of - making the callback function call directly from a hardware - interrupt. - This mechanism is implemented by - dmx_ts_cb() and - dmx_section_cb(). -
- +
+ Digital TV Frontend kABI +!Pdrivers/media/dvb-core/dvb_frontend.h Digital TV Frontend +!Idrivers/media/dvb-core/dvb_frontend.h + + Digital TV Demux kABI +!Pdrivers/media/dvb-core/demux.h Digital TV Demux + Demux Callback API +!Pdrivers/media/dvb-core/demux.h Demux Callback + !Idrivers/media/dvb-core/demux.h - +
+ Digital TV Conditional Access kABI +!Idrivers/media/dvb-core/dvb_ca_en50221.h + + Remote Controller devices !Iinclude/media/rc-core.h !Iinclude/media/lirc_dev.h Media Controller devices +!Pinclude/media/media-device.h Media Controller !Iinclude/media/media-device.h !Iinclude/media/media-devnode.h !Iinclude/media/media-entity.h diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile index 08527e7ea4d0ff009e55ddfb11f5efc716210e5d..2840ff483d5a204db2e5e1c183ca143295f7ab20 100644 --- a/Documentation/DocBook/media/Makefile +++ b/Documentation/DocBook/media/Makefile @@ -199,8 +199,10 @@ DVB_DOCUMENTED = \ # install_media_images = \ - $(Q)-mkdir $(MEDIA_OBJ_DIR)/media_api; \ - cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/*.svg $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api + $(Q)if [ "x$(findstring media_api.xml,$(DOCBOOKS))" != "x" ]; then \ + mkdir -p $(MEDIA_OBJ_DIR)/media_api; \ + cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/*.svg $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api; \ + fi $(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64 $(Q)base64 -d $< >$@ diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml index 08227d4e915044985b2a6fd7b7966f95dac6e7a0..e579ae5088aefa21023f77a58c5ea16ab2198f52 100644 --- a/Documentation/DocBook/media/dvb/dvbproperty.xml +++ b/Documentation/DocBook/media/dvb/dvbproperty.xml @@ -76,7 +76,7 @@ int main(void) NOTE: While it is possible to directly call the Kernel code like the above example, it is strongly recommended to use - libdvbv5, + libdvbv5, as it provides abstraction to work with the supported digital TV standards and provides methods for usual operations like program scanning and to read/write channel descriptor files. diff --git a/Documentation/DocBook/media/dvb/examples.xml b/Documentation/DocBook/media/dvb/examples.xml index c9f68c7183cca21ddd4ef7c9a6549fe56cfa953d..837fb3b64b72959a9fbb59331d51bade071e4d61 100644 --- a/Documentation/DocBook/media/dvb/examples.xml +++ b/Documentation/DocBook/media/dvb/examples.xml @@ -3,7 +3,7 @@ NOTE: This section is out of date, and the code below won't even compile. Please refer to the - libdvbv5 + libdvbv5 for updated/recommended examples. diff --git a/Documentation/DocBook/media/dvb/intro.xml b/Documentation/DocBook/media/dvb/intro.xml index 51db1564809976dc14e3b66f3c18b13024bb4c54..b5b701f5d8c297416ab734f182496b0fdaf06464 100644 --- a/Documentation/DocBook/media/dvb/intro.xml +++ b/Documentation/DocBook/media/dvb/intro.xml @@ -32,7 +32,7 @@ and filtering several section and PES data streams at the same time. new standard Linux DVB API. As a commitment to the development of terminals based on open standards, Nokia and Convergence made it available to all Linux developers and published it on - in September 2000. + in September 2000. Convergence is the maintainer of the Linux DVB API. Together with the LinuxTV community (i.e. you, the reader of this document), the Linux DVB API will be constantly reviewed and improved. With the Linux driver for diff --git a/Documentation/DocBook/media/v4l/capture.c.xml b/Documentation/DocBook/media/v4l/capture.c.xml index 1c5c49a2de59322174c9d89af61feb34585e8fe6..22126a991b3498c82c9b5ab43d3c37b0873cdb2d 100644 --- a/Documentation/DocBook/media/v4l/capture.c.xml +++ b/Documentation/DocBook/media/v4l/capture.c.xml @@ -5,7 +5,7 @@ * This program can be used and distributed without restrictions. * * This program is provided with the V4L2 API - * see http://linuxtv.org/docs.php for more information + * see https://linuxtv.org/docs.php for more information */ #include <stdio.h> diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml index 5701a08ed792dae2cf3fdbd5a8a8bda4fe29cc2d..5399e8904715da19fe3eec6ae40e97588dae9e8e 100644 --- a/Documentation/DocBook/media/v4l/compat.xml +++ b/Documentation/DocBook/media/v4l/compat.xml @@ -2666,7 +2666,7 @@ is useful to display images captured with V4L2 devices. V4L2 does not support digital terrestrial, cable or satellite broadcast. A separate project aiming at digital receivers exists. You can find its homepage at http://linuxtv.org. The Linux DVB API +url="https://linuxtv.org">https://linuxtv.org. The Linux DVB API has no connection to the V4L2 API except that drivers for hybrid hardware may support both. diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml index da654031ef3fd82d5ec73c56ad65af38e131e0f3..144158b3a5acad6e64f4e7be75bd82c54df1532a 100644 --- a/Documentation/DocBook/media/v4l/io.xml +++ b/Documentation/DocBook/media/v4l/io.xml @@ -699,7 +699,7 @@ linkend="v4l2-buf-type" /> buffer. It depends on the negotiated data format and may change with each buffer for compressed variable size data like JPEG images. Drivers must set this field when type -refers to an input stream, applications when it refers to an output stream. +refers to a capture stream, applications when it refers to an output stream. If the application sets this to 0 for an output stream, then bytesused will be set to the size of the buffer (see the length field of this struct) by @@ -720,14 +720,14 @@ linkend="buffer-flags" />. Indicates the field order of the image in the buffer, see . This field is not used when the buffer contains VBI data. Drivers must set it when -type refers to an input stream, +type refers to a capture stream, applications when it refers to an output stream. struct timeval timestamp - For input streams this is time when the first data + For capture streams this is time when the first data byte was captured, as returned by the clock_gettime() function for the relevant clock id; see V4L2_BUF_FLAG_TIMESTAMP_* in @@ -866,7 +866,7 @@ must set this to 0. The number of bytes occupied by data in the plane (its payload). Drivers must set this field when type - refers to an input stream, applications when it refers to an output stream. + refers to a capture stream, applications when it refers to an output stream. If the application sets this to 0 for an output stream, then bytesused will be set to the size of the plane (see the length field of this struct) @@ -919,7 +919,7 @@ must set this to 0. Offset in bytes to video data in the plane. Drivers must set this field when type - refers to an input stream, applications when it refers to an output stream. + refers to a capture stream, applications when it refers to an output stream. Note that data_offset is included in bytesused. So the size of the image in the plane is bytesused-data_offset at diff --git a/Documentation/DocBook/media/v4l/media-controller.xml b/Documentation/DocBook/media/v4l/media-controller.xml index 873ac3a621f013a36148bcdf7f878e6a0bf419c2..5f2fc07a93d72a0f45411cd27c6a0e6fd7125740 100644 --- a/Documentation/DocBook/media/v4l/media-controller.xml +++ b/Documentation/DocBook/media/v4l/media-controller.xml @@ -58,21 +58,36 @@ Media device model Discovering a device internal topology, and configuring it at runtime, is one of the goals of the media controller API. To achieve this, hardware - devices are modelled as an oriented graph of building blocks called entities - connected through pads. - An entity is a basic media hardware or software building block. It can - correspond to a large variety of logical blocks such as physical hardware - devices (CMOS sensor for instance), logical hardware devices (a building - block in a System-on-Chip image processing pipeline), DMA channels or - physical connectors. - A pad is a connection endpoint through which an entity can interact - with other entities. Data (not restricted to video) produced by an entity - flows from the entity's output to one or more entity inputs. Pads should not - be confused with physical pins at chip boundaries. - A link is a point-to-point oriented connection between two pads, - either on the same entity or on different entities. Data flows from a source - pad to a sink pad. + devices and Linux Kernel interfaces are modelled as graph objects on + an oriented graph. The object types that constitute the graph are: + + An entity + is a basic media hardware or software building block. It can correspond to + a large variety of logical blocks such as physical hardware devices + (CMOS sensor for instance), logical hardware devices (a building block in + a System-on-Chip image processing pipeline), DMA channels or physical + connectors. + An interface + is a graph representation of a Linux Kernel userspace API interface, + like a device node or a sysfs file that controls one or more entities + in the graph. + A pad + is a data connection endpoint through which an entity can interact with + other entities. Data (not restricted to video) produced by an entity + flows from the entity's output to one or more entity inputs. Pads should + not be confused with physical pins at chip boundaries. + A data link + is a point-to-point oriented connection between two pads, either on the + same entity or on different entities. Data flows from a source pad to a + sink pad. + An interface link + is a point-to-point bidirectional control connection between a Linux + Kernel interface and an entity.m + + + + &sub-media-types; @@ -83,6 +98,7 @@ &sub-media-func-ioctl; &sub-media-ioc-device-info; + &sub-media-ioc-g-topology; &sub-media-ioc-enum-entities; &sub-media-ioc-enum-links; &sub-media-ioc-setup-link; diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml index 5872f8bbf7747efb848daf103b3d70271b18e1c6..0c4f96bfc2defe8c6b0dedb55b6194f6f06cd17b 100644 --- a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml +++ b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml @@ -59,15 +59,6 @@ Entity IDs can be non-contiguous. Applications must not try to enumerate entities by calling MEDIA_IOC_ENUM_ENTITIES with increasing id's until they get an error. - Two or more entities that share a common non-zero - group_id value are considered as logically - grouped. Groups are used to report - - ALSA, VBI and video nodes that carry the same media - stream - lens and flash controllers associated with a sensor - - struct <structname>media_entity_desc</structname> @@ -106,7 +97,7 @@ revision - Entity revision in a driver/hardware specific format. + Entity revision. Always zero (obsolete) __u32 @@ -120,7 +111,7 @@ group_id - Entity group ID + Entity group ID. Always zero (obsolete) __u16 @@ -171,97 +162,6 @@
- - - Media entity types - - - - - - MEDIA_ENT_T_DEVNODE - Unknown device node - - - MEDIA_ENT_T_DEVNODE_V4L - V4L video, radio or vbi device node - - - MEDIA_ENT_T_DEVNODE_FB - Frame buffer device node - - - MEDIA_ENT_T_DEVNODE_ALSA - ALSA card - - - MEDIA_ENT_T_DEVNODE_DVB_FE - DVB frontend devnode - - - MEDIA_ENT_T_DEVNODE_DVB_DEMUX - DVB demux devnode - - - MEDIA_ENT_T_DEVNODE_DVB_DVR - DVB DVR devnode - - - MEDIA_ENT_T_DEVNODE_DVB_CA - DVB CAM devnode - - - MEDIA_ENT_T_DEVNODE_DVB_NET - DVB network devnode - - - MEDIA_ENT_T_V4L2_SUBDEV - Unknown V4L sub-device - - - MEDIA_ENT_T_V4L2_SUBDEV_SENSOR - Video sensor - - - MEDIA_ENT_T_V4L2_SUBDEV_FLASH - Flash controller - - - MEDIA_ENT_T_V4L2_SUBDEV_LENS - Lens controller - - - MEDIA_ENT_T_V4L2_SUBDEV_DECODER - Video decoder, the basic function of the video decoder is to - accept analogue video from a wide variety of sources such as - broadcast, DVD players, cameras and video cassette recorders, in - either NTSC, PAL or HD format and still occasionally SECAM, separate - it into its component parts, luminance and chrominance, and output - it in some digital video standard, with appropriate embedded timing - signals. - - - MEDIA_ENT_T_V4L2_SUBDEV_TUNER - TV and/or radio tuner - - - -
- - - Media entity flags - - - - - - MEDIA_ENT_FL_DEFAULT - Default entity for its type. Used to discover the default - audio, VBI and video devices, the default camera sensor, ... - - - -
diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml index 74fb394ec6676295fbf5e46813aa0bec2b4fa108..2bbeea9f3e18c80ad8206b1bfed3d37705188c78 100644 --- a/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml +++ b/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml @@ -118,35 +118,6 @@ - - Media pad flags - - - - - - MEDIA_PAD_FL_SINK - Input pad, relative to the entity. Input pads sink data and - are targets of links. - - - MEDIA_PAD_FL_SOURCE - Output pad, relative to the entity. Output pads source data - and are origins of links. - - - MEDIA_PAD_FL_MUST_CONNECT - If this flag is set and the pad is linked to any other - pad, then at least one of those links must be enabled for the - entity to be able to stream. There could be temporary reasons - (e.g. device configuration dependent) for the pad to need - enabled links even when this flag isn't set; the absence of the - flag doesn't imply there is none. - - - -
- struct <structname>media_link_desc</structname> @@ -171,33 +142,6 @@ - - Media link flags - - - - - - MEDIA_LNK_FL_ENABLED - The link is enabled and can be used to transfer media data. - When two or more links target a sink pad, only one of them can be - enabled at a time. - - - MEDIA_LNK_FL_IMMUTABLE - The link enabled state can't be modified at runtime. An - immutable link is always enabled. - - - MEDIA_LNK_FL_DYNAMIC - The link enabled state can be modified during streaming. This - flag is set by drivers and is read-only for applications. - - - - - One and only one of MEDIA_PAD_FL_SINK and - MEDIA_PAD_FL_SOURCE must be set for every pad.
diff --git a/Documentation/DocBook/media/v4l/media-ioc-g-topology.xml b/Documentation/DocBook/media/v4l/media-ioc-g-topology.xml new file mode 100644 index 0000000000000000000000000000000000000000..63152ab9efbad0b10c59105effc55c49cc02d488 --- /dev/null +++ b/Documentation/DocBook/media/v4l/media-ioc-g-topology.xml @@ -0,0 +1,394 @@ + + + ioctl MEDIA_IOC_G_TOPOLOGY + &manvol; + + + + MEDIA_IOC_G_TOPOLOGY + Enumerate the graph topology and graph element properties + + + + + + int ioctl + int fd + int request + struct media_v2_topology *argp + + + + + + Arguments + + + + fd + + File descriptor returned by + open(). + + + + request + + MEDIA_IOC_G_TOPOLOGY + + + + argp + + + + + + + + + Description + + NOTE: This new ioctl is programmed to be added on Kernel 4.6. Its definition/arguments may change until its final version. + + The typical usage of this ioctl is to call it twice. + On the first call, the structure defined at &media-v2-topology; should + be zeroed. At return, if no errors happen, this ioctl will return the + topology_version and the total number of entities, + interfaces, pads and links. + Before the second call, the userspace should allocate arrays to + store the graph elements that are desired, putting the pointers to them + at the ptr_entities, ptr_interfaces, ptr_links and/or ptr_pads, keeping + the other values untouched. + If the topology_version remains the same, the + ioctl should fill the desired arrays with the media graph elements. + + + struct <structname>media_v2_topology</structname> + + + + + + + + + __u64 + topology_version + + + Version of the media graph topology. When the graph is + created, this field starts with zero. Every time a graph + element is added or removed, this field is + incremented. + + + __u64 + num_entities + + + Number of entities in the graph + + + __u64 + ptr_entities + + + A pointer to a memory area where the entities array + will be stored, converted to a 64-bits integer. + It can be zero. if zero, the ioctl won't store the + entities. It will just update + num_entities + + + __u64 + num_interfaces + + + Number of interfaces in the graph + + + __u64 + ptr_interfaces + + + A pointer to a memory area where the interfaces array + will be stored, converted to a 64-bits integer. + It can be zero. if zero, the ioctl won't store the + interfaces. It will just update + num_interfaces + + + __u64 + num_pads + + + Total number of pads in the graph + + + __u64 + ptr_pads + + + A pointer to a memory area where the pads array + will be stored, converted to a 64-bits integer. + It can be zero. if zero, the ioctl won't store the + pads. It will just update + num_pads + + + __u64 + num_links + + + Total number of data and interface links in the graph + + + __u64 + ptr_links + + + A pointer to a memory area where the links array + will be stored, converted to a 64-bits integer. + It can be zero. if zero, the ioctl won't store the + links. It will just update + num_links + + + +
+ + + struct <structname>media_v2_entity</structname> + + + + + + + + + __u32 + id + + + Unique ID for the entity. + + + char + name[64] + + + Entity name as an UTF-8 NULL-terminated string. + + + __u32 + function + + + Entity main function, see for details. + + + __u32 + reserved[12] + Reserved for future extensions. Drivers and applications must + set this array to zero. + + + +
+ + + struct <structname>media_v2_interface</structname> + + + + + + + + + __u32 + id + + + Unique ID for the interface. + + + __u32 + intf_type + + + Interface type, see for details. + + + __u32 + flags + + + Interface flags. Currently unused. + + + __u32 + reserved[9] + + + Reserved for future extensions. Drivers and applications must + set this array to zero. + + + struct media_v2_intf_devnode + devnode + + + Used only for device node interfaces. See for details.. + + + +
+ + + struct <structname>media_v2_interface</structname> + + + + + + + + + __u32 + major + + + Device node major number. + + + __u32 + minor + + + Device node minor number. + + + +
+ + + struct <structname>media_v2_pad</structname> + + + + + + + + + __u32 + id + + + Unique ID for the pad. + + + __u32 + entity_id + + + Unique ID for the entity where this pad belongs. + + + __u32 + flags + + + Pad flags, see for more details. + + + __u32 + reserved[9] + + + Reserved for future extensions. Drivers and applications must + set this array to zero. + + + +
+ + + struct <structname>media_v2_pad</structname> + + + + + + + + + __u32 + id + + + Unique ID for the pad. + + + __u32 + source_id + + + + On pad to pad links: unique ID for the source pad. + On interface to entity links: unique ID for the interface. + + + + __u32 + sink_id + + + + On pad to pad links: unique ID for the sink pad. + On interface to entity links: unique ID for the entity. + + + + __u32 + flags + + + Link flags, see for more details. + + + __u32 + reserved[5] + + + Reserved for future extensions. Drivers and applications must + set this array to zero. + + + + + +
+ + + &return-value; + + + + ENOSPC + + This is returned when either one or more of the num_entities, + num_interfaces, num_links or num_pads are non-zero and are smaller + than the actual number of elements inside the graph. This may happen + if the topology_version changed when compared + to the last time this ioctl was called. Userspace should usually + free the area for the pointers, zero the struct elements and call + this ioctl again. + + + + +
diff --git a/Documentation/DocBook/media/v4l/media-types.xml b/Documentation/DocBook/media/v4l/media-types.xml new file mode 100644 index 0000000000000000000000000000000000000000..1af3842509105ea45fd815ad953781d901b03bfe --- /dev/null +++ b/Documentation/DocBook/media/v4l/media-types.xml @@ -0,0 +1,240 @@ +
+Types and flags used to represent the media graph elements + + + Media entity types + + + + + + MEDIA_ENT_F_UNKNOWN and MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN + Unknown entity. That generally indicates that + a driver didn't initialize properly the entity, with is a Kernel bug + + + MEDIA_ENT_F_IO_V4L + Data streaming input and/or output entity. + + + MEDIA_ENT_F_IO_VBI + V4L VBI streaming input or output entity + + + MEDIA_ENT_F_IO_SWRADIO + V4L Software Digital Radio (SDR) streaming input or output entity + + + MEDIA_ENT_F_IO_DTV + DVB Digital TV streaming input or output entity + + + MEDIA_ENT_F_DTV_DEMOD + Digital TV demodulator entity. + + + MEDIA_ENT_F_TS_DEMUX + MPEG Transport stream demux entity. Could be implemented on hardware or in Kernelspace by the Linux DVB subsystem. + + + MEDIA_ENT_F_DTV_CA + Digital TV Conditional Access module (CAM) entity + + + MEDIA_ENT_F_DTV_NET_DECAP + Digital TV network ULE/MLE desencapsulation entity. Could be implemented on hardware or in Kernelspace + + + MEDIA_ENT_F_CONN_RF + Connector for a Radio Frequency (RF) signal. + + + MEDIA_ENT_F_CONN_SVIDEO + Connector for a S-Video signal. + + + MEDIA_ENT_F_CONN_COMPOSITE + Connector for a RGB composite signal. + + + MEDIA_ENT_F_CONN_TEST + Connector for a test generator. + + + MEDIA_ENT_F_CAM_SENSOR + Camera video sensor entity. + + + MEDIA_ENT_F_FLASH + Flash controller entity. + + + MEDIA_ENT_F_LENS + Lens controller entity. + + + MEDIA_ENT_F_ATV_DECODER + Analog video decoder, the basic function of the video decoder + is to accept analogue video from a wide variety of sources such as + broadcast, DVD players, cameras and video cassette recorders, in + either NTSC, PAL, SECAM or HD format, separating the stream + into its component parts, luminance and chrominance, and output + it in some digital video standard, with appropriate timing + signals. + + + MEDIA_ENT_F_TUNER + Digital TV, analog TV, radio and/or software radio tuner. + + + +
+ + + Media entity flags + + + + + + MEDIA_ENT_FL_DEFAULT + Default entity for its type. Used to discover the default + audio, VBI and video devices, the default camera sensor, ... + + + MEDIA_ENT_FL_CONNECTOR + The entity represents a data conector + + + +
+ + + Media interface types + + + + + + + MEDIA_INTF_T_DVB_FE + Device node interface for the Digital TV frontend + typically, /dev/dvb/adapter?/frontend? + + + MEDIA_INTF_T_DVB_DEMUX + Device node interface for the Digital TV demux + typically, /dev/dvb/adapter?/demux? + + + MEDIA_INTF_T_DVB_DVR + Device node interface for the Digital TV DVR + typically, /dev/dvb/adapter?/dvr? + + + MEDIA_INTF_T_DVB_CA + Device node interface for the Digital TV Conditional Access + typically, /dev/dvb/adapter?/ca? + + + MEDIA_INTF_T_DVB_FE + Device node interface for the Digital TV network control + typically, /dev/dvb/adapter?/net? + + + MEDIA_INTF_T_V4L_VIDEO + Device node interface for video (V4L) + typically, /dev/video? + + + MEDIA_INTF_T_V4L_VBI + Device node interface for VBI (V4L) + typically, /dev/vbi? + + + MEDIA_INTF_T_V4L_RADIO + Device node interface for radio (V4L) + typically, /dev/vbi? + + + MEDIA_INTF_T_V4L_SUBDEV + Device node interface for a V4L subdevice + typically, /dev/v4l-subdev? + + + MEDIA_INTF_T_V4L_SWRADIO + Device node interface for Software Defined Radio (V4L) + typically, /dev/swradio? + + + +
+ + + Media pad flags + + + + + + MEDIA_PAD_FL_SINK + Input pad, relative to the entity. Input pads sink data and + are targets of links. + + + MEDIA_PAD_FL_SOURCE + Output pad, relative to the entity. Output pads source data + and are origins of links. + + + MEDIA_PAD_FL_MUST_CONNECT + If this flag is set and the pad is linked to any other + pad, then at least one of those links must be enabled for the + entity to be able to stream. There could be temporary reasons + (e.g. device configuration dependent) for the pad to need + enabled links even when this flag isn't set; the absence of the + flag doesn't imply there is none. + + + +
+ + One and only one of MEDIA_PAD_FL_SINK and + MEDIA_PAD_FL_SOURCE must be set for every pad. + + + Media link flags + + + + + + MEDIA_LNK_FL_ENABLED + The link is enabled and can be used to transfer media data. + When two or more links target a sink pad, only one of them can be + enabled at a time. + + + MEDIA_LNK_FL_IMMUTABLE + The link enabled state can't be modified at runtime. An + immutable link is always enabled. + + + MEDIA_LNK_FL_DYNAMIC + The link enabled state can be modified during streaming. This + flag is set by drivers and is read-only for applications. + + + MEDIA_LNK_FL_LINK_TYPE + This is a bitmask that defines the type of the link. + Currently, two types of links are supported: + MEDIA_LNK_FL_DATA_LINK + if the link is between two pads + MEDIA_LNK_FL_INTERFACE_LINK + if the link is between an interface and an entity + + + + + +
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml index 7e61643358de034df628a0106e84e1e222315566..42e626d6c936234ad469b5b51dfef401b232d3d3 100644 --- a/Documentation/DocBook/media/v4l/v4l2.xml +++ b/Documentation/DocBook/media/v4l/v4l2.xml @@ -151,6 +151,16 @@ Rubli, Andy Walls, Muralidharan Karicheri, Mauro Carvalho Chehab, structs, ioctls) must be noted in more detail in the history chapter (compat.xml), along with the possible impact on existing drivers and applications. --> + + 4.5 + 2015-10-29 + rr + Extend vidioc-g-ext-ctrls;. Replace ctrl_class with a new +union with ctrl_class and which. Which is used to select the current value of +the control or the default value. + + + 4.4 2015-05-26 diff --git a/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml b/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml index 8ffe74f84af1f40209cf028e0a217e0b81dc3bb5..d81fa0d4016b15208135babd76175da8982342db 100644 --- a/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml +++ b/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml @@ -58,7 +58,7 @@ This ioctl is used to create buffers for memory mapped or user pointer or DMA buffer I/O. It can be used as an alternative or in -addition to the VIDIOC_REQBUFS ioctl, when a tighter +addition to the &VIDIOC-REQBUFS; ioctl, when a tighter control over buffers is required. This ioctl can be called multiple times to create buffers of different sizes. @@ -71,30 +71,28 @@ zeroed. The format field specifies the image format that the buffers must be able to handle. The application has to fill in this -&v4l2-format;. Usually this will be done using the -VIDIOC_TRY_FMT or VIDIOC_G_FMT ioctl() -to ensure that the requested format is supported by the driver. Unsupported -formats will result in an error. +&v4l2-format;. Usually this will be done using the &VIDIOC-TRY-FMT; or &VIDIOC-G-FMT; ioctls +to ensure that the requested format is supported by the driver. +Based on the format's type field the requested buffer +size (for single-planar) or plane sizes (for multi-planar formats) will be +used for the allocated buffers. The driver may return an error if the size(s) +are not supported by the hardware (usually because they are too small). The buffers created by this ioctl will have as minimum size the size -defined by the format.pix.sizeimage field. If the +defined by the format.pix.sizeimage field (or the +corresponding fields for other format types). Usually if the format.pix.sizeimage field is less than the minimum -required for the given format, then sizeimage will be -increased by the driver to that minimum to allocate the buffers. If it is -larger, then the value will be used as-is. The same applies to the -sizeimage field of the -v4l2_plane_pix_format structure in the case of -multiplanar formats. +required for the given format, then an error will be returned since drivers will +typically not allow this. If it is larger, then the value will be used as-is. +In other words, the driver may reject the requested size, but if it is accepted +the driver will use it unchanged. When the ioctl is called with a pointer to this structure the driver will attempt to allocate up to the requested number of buffers and store the actual number allocated and the starting index in the count and the index fields respectively. On return count can be smaller than -the number requested. The driver may also increase buffer sizes if required, -however, it will not update sizeimage field values. -The user has to use VIDIOC_QUERYBUF to retrieve that -information. +the number requested. struct <structname>v4l2_create_buffers</structname> diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml index 4c4603c135fe2b6843e82b542fd4e94a11cfbdfb..f14a3bb1afaa6fdd2b9eff83546edfdc7325a723 100644 --- a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml +++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml @@ -99,7 +99,7 @@ if the driver supports writing registers to the device.We recommended the v4l2-dbg utility over calling this ioctl directly. It is available from the LinuxTV v4l-dvb repository; see http://linuxtv.org/repo/ for +url="https://linuxtv.org/repo/">https://linuxtv.org/repo/ for access instructions. -http://www.linuxtv.org/lists.php"> +https://linuxtv.org/lists.php"> -http://linuxtv.org/repo/"> +https://linuxtv.org/repo/"> --------"> ----------"> ------------"> @@ -91,7 +91,7 @@ components, like mixers, PCM capture, PCM playback, etc, which are controlled via ALSA API.For additional information and for the latest development code, - see: http://linuxtv.org. + see: https://linuxtv.org.For discussing improvements, reporting troubles, sending new drivers, etc, please mail to: Linux Media Mailing List (LMML).. diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index 7da8f0402af50b880b27055122c8128bdc6fb8c7..b442921bca540a7e8833be39446822dc48ca1628 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -162,12 +162,15 @@ Basic defines - At least you have to provide a mtd structure and - a storage for the ioremap'ed chip address. - You can allocate the mtd structure using kmalloc - or you can allocate it statically. - In case of static allocation you have to allocate - a nand_chip structure too. + At least you have to provide a nand_chip structure + and a storage for the ioremap'ed chip address. + You can allocate the nand_chip structure using + kmalloc or you can allocate it statically. + The NAND chip structure embeds an mtd structure + which will be registered to the MTD subsystem. + You can extract a pointer to the mtd structure + from a nand_chip pointer using the nand_to_mtd() + helper. Kmalloc based example @@ -180,7 +183,6 @@ static void __iomem *baseaddr; Static example -static struct mtd_info board_mtd; static struct nand_chip board_chip; static void __iomem *baseaddr; @@ -235,7 +237,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd) static void board_hwcontrol(struct mtd_info *mtd, int cmd) { - struct nand_chip *this = (struct nand_chip *) mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); switch(cmd){ case NAND_CTL_SETCLE: this->IO_ADDR_W |= CLE_ADRR_BIT; break; case NAND_CTL_CLRCLE: this->IO_ADDR_W &= ~CLE_ADRR_BIT; break; @@ -274,13 +276,15 @@ static int __init board_init (void) int err = 0; /* Allocate memory for MTD device structure and private data */ - board_mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL); - if (!board_mtd) { + this = kzalloc(sizeof(struct nand_chip), GFP_KERNEL); + if (!this) { printk ("Unable to allocate NAND MTD device structure.\n"); err = -ENOMEM; goto out; } + board_mtd = nand_to_mtd(this); + /* map physical address */ baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024); if (!baseaddr) { @@ -289,11 +293,6 @@ static int __init board_init (void) goto out_mtd; } - /* Get pointer to private data */ - this = (struct nand_chip *) (); - /* Link the private data with the MTD structure */ - board_mtd->priv = this; - /* Set address of NAND IO lines */ this->IO_ADDR_R = baseaddr; this->IO_ADDR_W = baseaddr; @@ -317,7 +316,7 @@ static int __init board_init (void) out_ior: iounmap(baseaddr); out_mtd: - kfree (board_mtd); + kfree (this); out: return err; } @@ -343,7 +342,7 @@ static void __exit board_cleanup (void) iounmap(baseaddr); /* Free the MTD device structure */ - kfree (board_mtd); + kfree (mtd_to_nand(board_mtd)); } module_exit(board_cleanup); #endif @@ -399,7 +398,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) static void board_select_chip (struct mtd_info *mtd, int chip) { - struct nand_chip *this = (struct nand_chip *) mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); /* Deselect all chips */ this->IO_ADDR_R &= ~BOARD_NAND_ADDR_MASK; diff --git a/Documentation/Makefile b/Documentation/Makefile index bc0548201755e1a8d29614bccbd78fcbbe5a34ae..1207d7907650028927809c57c274d3cb962337c4 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -1,4 +1,4 @@ subdir-y := accounting auxdisplay blackfin connector \ filesystems filesystems ia64 laptops mic misc-devices \ - networking pcmcia prctl ptp spi timers vDSO video4linux \ + networking pcmcia prctl ptp timers vDSO video4linux \ watchdog diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png new file mode 100644 index 0000000000000000000000000000000000000000..7496a55e4e7b41becdb658a2bd34765fbe80013f Binary files /dev/null and b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png differ diff --git a/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg new file mode 100644 index 0000000000000000000000000000000000000000..4b4014fda7703ddae1b7de1b9304ad32b40071af --- /dev/null +++ b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg @@ -0,0 +1,374 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + synchronize_rcu() + + + + + + + WRITE_ONCE(a, 1); + WRITE_ONCE(b, 1); + r1 = READ_ONCE(a); + WRITE_ONCE(c, 1); + r2 = READ_ONCE(b); + r3 = READ_ONCE(c); + thread0() + thread1() + thread2() + + + + rcu_read_lock(); + rcu_read_lock(); + rcu_read_unlock(); + rcu_read_unlock(); + + QS + + QS + + + QS + + diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg new file mode 100644 index 0000000000000000000000000000000000000000..ebcbeee391ed7babdce76130c60977f76ca10832 --- /dev/null +++ b/Documentation/RCU/Design/Requirements/RCUApplicability.svg @@ -0,0 +1,237 @@ + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + Read-Mostly, Stale & + + Inconsistent Data OK + + (RCU Works Great!!!) + + (RCU Works Well) + + Read-Mostly, Need Consistent Data + + Read-Write, Need Consistent Data + + Update-Mostly, Need Consistent Data + + (RCU Might Be OK...) + + (1) Provide Existence Guarantees For Update-Friendly Mechanisms + + (2) Provide Wait-Free Read-Side Primitives for Real-Time Use) + + (RCU is Very Unlikely to be the Right Tool For The Job, But it Can: + + diff --git a/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg new file mode 100644 index 0000000000000000000000000000000000000000..48cd1623d4d49c48024af9d30f8d6d0f6114f1ae --- /dev/null +++ b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg @@ -0,0 +1,639 @@ + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + synchronize_rcu() + + + + + + + WRITE_ONCE(a, 1); + WRITE_ONCE(b, 1); + r1 = READ_ONCE(a); + WRITE_ONCE(c, 1); + WRITE_ONCE(d, 1); + r2 = READ_ONCE(c); + thread0() + thread1() + thread2() + + + + rcu_read_lock(); + rcu_read_lock(); + rcu_read_unlock(); + rcu_read_unlock(); + + QS + + QS + + + QS + + + + synchronize_rcu() + + + + + + + r3 = READ_ONCE(d); + WRITE_ONCE(e, 1); + + QS + r4 = READ_ONCE(b); + r5 = READ_ONCE(e); + rcu_read_lock(); + rcu_read_unlock(); + QS + + QS + + QS + + thread3() + thread4() + + diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html new file mode 100644 index 0000000000000000000000000000000000000000..a725f9900ec8962a43dfb888f71f1a2b05efabf0 --- /dev/null +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -0,0 +1,2897 @@ + + + + + A Tour Through RCU's Requirements [LWN.net] + + +

A Tour Through RCU's Requirements

+ +

Copyright IBM Corporation, 2015

+

Author: Paul E. McKenney

+

The initial version of this document appeared in the +LWN articles +here, +here, and +here.

+ +

Introduction

+ +

+Read-copy update (RCU) is a synchronization mechanism that is often +used as a replacement for reader-writer locking. +RCU is unusual in that updaters do not block readers, +which means that RCU's read-side primitives can be exceedingly fast +and scalable. +In addition, updaters can make useful forward progress concurrently +with readers. +However, all this concurrency between RCU readers and updaters does raise +the question of exactly what RCU readers are doing, which in turn +raises the question of exactly what RCU's requirements are. + +

+This document therefore summarizes RCU's requirements, and can be thought +of as an informal, high-level specification for RCU. +It is important to understand that RCU's specification is primarily +empirical in nature; +in fact, I learned about many of these requirements the hard way. +This situation might cause some consternation, however, not only +has this learning process been a lot of fun, but it has also been +a great privilege to work with so many people willing to apply +technologies in interesting new ways. + +

+All that aside, here are the categories of currently known RCU requirements: +

+ +
    +
  1. + Fundamental Requirements +
  2. Fundamental Non-Requirements +
  3. + Parallelism Facts of Life +
  4. + Quality-of-Implementation Requirements +
  5. + Linux Kernel Complications +
  6. + Software-Engineering Requirements +
  7. + Other RCU Flavors +
  8. + Possible Future Changes +
+ +

+This is followed by a summary, +which is in turn followed by the inevitable +answers to the quick quizzes. + +

Fundamental Requirements

+ +

+RCU's fundamental requirements are the closest thing RCU has to hard +mathematical requirements. +These are: + +

    +
  1. + Grace-Period Guarantee +
  2. + Publish-Subscribe Guarantee +
  3. + Memory-Barrier Guarantees +
  4. + RCU Primitives Guaranteed to Execute Unconditionally +
  5. + Guaranteed Read-to-Write Upgrade +
+ +

Grace-Period Guarantee

+ +

+RCU's grace-period guarantee is unusual in being premeditated: +Jack Slingwine and I had this guarantee firmly in mind when we started +work on RCU (then called “rclock”) in the early 1990s. +That said, the past two decades of experience with RCU have produced +a much more detailed understanding of this guarantee. + +

+RCU's grace-period guarantee allows updaters to wait for the completion +of all pre-existing RCU read-side critical sections. +An RCU read-side critical section +begins with the marker rcu_read_lock() and ends with +the marker rcu_read_unlock(). +These markers may be nested, and RCU treats a nested set as one +big RCU read-side critical section. +Production-quality implementations of rcu_read_lock() and +rcu_read_unlock() are extremely lightweight, and in +fact have exactly zero overhead in Linux kernels built for production +use with CONFIG_PREEMPT=n. + +

+This guarantee allows ordering to be enforced with extremely low +overhead to readers, for example: + +

+
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5   rcu_read_lock();
+ 6   r1 = READ_ONCE(x);
+ 7   r2 = READ_ONCE(y);
+ 8   rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13   WRITE_ONCE(x, 1);
+14   synchronize_rcu();
+15   WRITE_ONCE(y, 1);
+16 }
+
+
+ +

+Because the synchronize_rcu() on line 14 waits for +all pre-existing readers, any instance of thread0() that +loads a value of zero from x must complete before +thread1() stores to y, so that instance must +also load a value of zero from y. +Similarly, any instance of thread0() that loads a value of +one from y must have started after the +synchronize_rcu() started, and must therefore also load +a value of one from x. +Therefore, the outcome: +

+
+(r1 == 0 && r2 == 1)
+
+
+cannot happen. + +

Quick Quiz 1: +Wait a minute! +You said that updaters can make useful forward progress concurrently +with readers, but pre-existing readers will block +synchronize_rcu()!!! +Just who are you trying to fool??? +
Answer + +

+This scenario resembles one of the first uses of RCU in +DYNIX/ptx, +which managed a distributed lock manager's transition into +a state suitable for handling recovery from node failure, +more or less as follows: + +

+
+ 1 #define STATE_NORMAL        0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING    2
+ 4 #define STATE_WANT_NORMAL   3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+10   int state_snap;
+11
+12   rcu_read_lock();
+13   state_snap = READ_ONCE(state);
+14   if (state_snap == STATE_NORMAL)
+15     do_something();
+16   else
+17     do_something_carefully();
+18   rcu_read_unlock();
+19 }
+20
+21 void start_recovery(void)
+22 {
+23   WRITE_ONCE(state, STATE_WANT_RECOVERY);
+24   synchronize_rcu();
+25   WRITE_ONCE(state, STATE_RECOVERING);
+26   recovery();
+27   WRITE_ONCE(state, STATE_WANT_NORMAL);
+28   synchronize_rcu();
+29   WRITE_ONCE(state, STATE_NORMAL);
+30 }
+
+
+ +

+The RCU read-side critical section in do_something_dlm() +works with the synchronize_rcu() in start_recovery() +to guarantee that do_something() never runs concurrently +with recovery(), but with little or no synchronization +overhead in do_something_dlm(). + +

Quick Quiz 2: +Why is the synchronize_rcu() on line 28 needed? +
Answer + +

+In order to avoid fatal problems such as deadlocks, +an RCU read-side critical section must not contain calls to +synchronize_rcu(). +Similarly, an RCU read-side critical section must not +contain anything that waits, directly or indirectly, on completion of +an invocation of synchronize_rcu(). + +

+Although RCU's grace-period guarantee is useful in and of itself, with +quite a few use cases, +it would be good to be able to use RCU to coordinate read-side +access to linked data structures. +For this, the grace-period guarantee is not sufficient, as can +be seen in function add_gp_buggy() below. +We will look at the reader's code later, but in the meantime, just think of +the reader as locklessly picking up the gp pointer, +and, if the value loaded is non-NULL, locklessly accessing the +->a and ->b fields. + +

+
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4   if (!p)
+ 5     return -ENOMEM;
+ 6   spin_lock(&gp_lock);
+ 7   if (rcu_access_pointer(gp)) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   p->a = a;
+12   p->b = a;
+13   gp = p; /* ORDERING BUG */
+14   spin_unlock(&gp_lock);
+15   return true;
+16 }
+
+
+ +

+The problem is that both the compiler and weakly ordered CPUs are within +their rights to reorder this code as follows: + +

+
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4   if (!p)
+ 5     return -ENOMEM;
+ 6   spin_lock(&gp_lock);
+ 7   if (rcu_access_pointer(gp)) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   gp = p; /* ORDERING BUG */
+12   p->a = a;
+13   p->b = a;
+14   spin_unlock(&gp_lock);
+15   return true;
+16 }
+
+
+ +

+If an RCU reader fetches gp just after +add_gp_buggy_optimized executes line 11, +it will see garbage in the ->a and ->b +fields. +And this is but one of many ways in which compiler and hardware optimizations +could cause trouble. +Therefore, we clearly need some way to prevent the compiler and the CPU from +reordering in this manner, which brings us to the publish-subscribe +guarantee discussed in the next section. + +

Publish/Subscribe Guarantee

+ +

+RCU's publish-subscribe guarantee allows data to be inserted +into a linked data structure without disrupting RCU readers. +The updater uses rcu_assign_pointer() to insert the +new data, and readers use rcu_dereference() to +access data, whether new or old. +The following shows an example of insertion: + +

+
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4   if (!p)
+ 5     return -ENOMEM;
+ 6   spin_lock(&gp_lock);
+ 7   if (rcu_access_pointer(gp)) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   p->a = a;
+12   p->b = a;
+13   rcu_assign_pointer(gp, p);
+14   spin_unlock(&gp_lock);
+15   return true;
+16 }
+
+
+ +

+The rcu_assign_pointer() on line 13 is conceptually +equivalent to a simple assignment statement, but also guarantees +that its assignment will +happen after the two assignments in lines 11 and 12, +similar to the C11 memory_order_release store operation. +It also prevents any number of “interesting” compiler +optimizations, for example, the use of gp as a scratch +location immediately preceding the assignment. + +

Quick Quiz 3: +But rcu_assign_pointer() does nothing to prevent the +two assignments to p->a and p->b +from being reordered. +Can't that also cause problems? +
Answer + +

+It is tempting to assume that the reader need not do anything special +to control its accesses to the RCU-protected data, +as shown in do_something_gp_buggy() below: + +

+
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   p = gp;  /* OPTIMIZATIONS GALORE!!! */
+ 5   if (p) {
+ 6     do_something(p->a, p->b);
+ 7     rcu_read_unlock();
+ 8     return true;
+ 9   }
+10   rcu_read_unlock();
+11   return false;
+12 }
+
+
+ +

+However, this temptation must be resisted because there are a +surprisingly large number of ways that the compiler +(to say nothing of +DEC Alpha CPUs) +can trip this code up. +For but one example, if the compiler were short of registers, it +might choose to refetch from gp rather than keeping +a separate copy in p as follows: + +

+
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   if (gp) { /* OPTIMIZATIONS GALORE!!! */
+ 5     do_something(gp->a, gp->b);
+ 6     rcu_read_unlock();
+ 7     return true;
+ 8   }
+ 9   rcu_read_unlock();
+10   return false;
+11 }
+
+
+ +

+If this function ran concurrently with a series of updates that +replaced the current structure with a new one, +the fetches of gp->a +and gp->b might well come from two different structures, +which could cause serious confusion. +To prevent this (and much else besides), do_something_gp() uses +rcu_dereference() to fetch from gp: + +

+
+ 1 bool do_something_gp(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   p = rcu_dereference(gp);
+ 5   if (p) {
+ 6     do_something(p->a, p->b);
+ 7     rcu_read_unlock();
+ 8     return true;
+ 9   }
+10   rcu_read_unlock();
+11   return false;
+12 }
+
+
+ +

+The rcu_dereference() uses volatile casts and (for DEC Alpha) +memory barriers in the Linux kernel. +Should a +high-quality implementation of C11 memory_order_consume [PDF] +ever appear, then rcu_dereference() could be implemented +as a memory_order_consume load. +Regardless of the exact implementation, a pointer fetched by +rcu_dereference() may not be used outside of the +outermost RCU read-side critical section containing that +rcu_dereference(), unless protection of +the corresponding data element has been passed from RCU to some +other synchronization mechanism, most commonly locking or +reference counting. + +

+In short, updaters use rcu_assign_pointer() and readers +use rcu_dereference(), and these two RCU API elements +work together to ensure that readers have a consistent view of +newly added data elements. + +

+Of course, it is also necessary to remove elements from RCU-protected +data structures, for example, using the following process: + +

    +
  1. Remove the data element from the enclosing structure. +
  2. Wait for all pre-existing RCU read-side critical sections + to complete (because only pre-existing readers can possibly have + a reference to the newly removed data element). +
  3. At this point, only the updater has a reference to the + newly removed data element, so it can safely reclaim + the data element, for example, by passing it to kfree(). +
+ +This process is implemented by remove_gp_synchronous(): + +
+
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3   struct foo *p;
+ 4
+ 5   spin_lock(&gp_lock);
+ 6   p = rcu_access_pointer(gp);
+ 7   if (!p) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   rcu_assign_pointer(gp, NULL);
+12   spin_unlock(&gp_lock);
+13   synchronize_rcu();
+14   kfree(p);
+15   return true;
+16 }
+
+
+ +

+This function is straightforward, with line 13 waiting for a grace +period before line 14 frees the old data element. +This waiting ensures that readers will reach line 7 of +do_something_gp() before the data element referenced by +p is freed. +The rcu_access_pointer() on line 6 is similar to +rcu_dereference(), except that: + +

    +
  1. The value returned by rcu_access_pointer() + cannot be dereferenced. + If you want to access the value pointed to as well as + the pointer itself, use rcu_dereference() + instead of rcu_access_pointer(). +
  2. The call to rcu_access_pointer() need not be + protected. + In contrast, rcu_dereference() must either be + within an RCU read-side critical section or in a code + segment where the pointer cannot change, for example, in + code protected by the corresponding update-side lock. +
+ +

Quick Quiz 4: +Without the rcu_dereference() or the +rcu_access_pointer(), what destructive optimizations +might the compiler make use of? +
Answer + +

+In short, RCU's publish-subscribe guarantee is provided by the combination +of rcu_assign_pointer() and rcu_dereference(). +This guarantee allows data elements to be safely added to RCU-protected +linked data structures without disrupting RCU readers. +This guarantee can be used in combination with the grace-period +guarantee to also allow data elements to be removed from RCU-protected +linked data structures, again without disrupting RCU readers. + +

+This guarantee was only partially premeditated. +DYNIX/ptx used an explicit memory barrier for publication, but had nothing +resembling rcu_dereference() for subscription, nor did it +have anything resembling the smp_read_barrier_depends() +that was later subsumed into rcu_dereference(). +The need for these operations made itself known quite suddenly at a +late-1990s meeting with the DEC Alpha architects, back in the days when +DEC was still a free-standing company. +It took the Alpha architects a good hour to convince me that any sort +of barrier would ever be needed, and it then took me a good two hours +to convince them that their documentation did not make this point clear. +More recent work with the C and C++ standards committees have provided +much education on tricks and traps from the compiler. +In short, compilers were much less tricky in the early 1990s, but in +2015, don't even think about omitting rcu_dereference()! + +

Memory-Barrier Guarantees

+ +

+The previous section's simple linked-data-structure scenario clearly +demonstrates the need for RCU's stringent memory-ordering guarantees on +systems with more than one CPU: + +

    +
  1. Each CPU that has an RCU read-side critical section that + begins before synchronize_rcu() starts is + guaranteed to execute a full memory barrier between the time + that the RCU read-side critical section ends and the time that + synchronize_rcu() returns. + Without this guarantee, a pre-existing RCU read-side critical section + might hold a reference to the newly removed struct foo + after the kfree() on line 14 of + remove_gp_synchronous(). +
  2. Each CPU that has an RCU read-side critical section that ends + after synchronize_rcu() returns is guaranteed + to execute a full memory barrier between the time that + synchronize_rcu() begins and the time that the RCU + read-side critical section begins. + Without this guarantee, a later RCU read-side critical section + running after the kfree() on line 14 of + remove_gp_synchronous() might + later run do_something_gp() and find the + newly deleted struct foo. +
  3. If the task invoking synchronize_rcu() remains + on a given CPU, then that CPU is guaranteed to execute a full + memory barrier sometime during the execution of + synchronize_rcu(). + This guarantee ensures that the kfree() on + line 14 of remove_gp_synchronous() really does + execute after the removal on line 11. +
  4. If the task invoking synchronize_rcu() migrates + among a group of CPUs during that invocation, then each of the + CPUs in that group is guaranteed to execute a full memory barrier + sometime during the execution of synchronize_rcu(). + This guarantee also ensures that the kfree() on + line 14 of remove_gp_synchronous() really does + execute after the removal on + line 11, but also in the case where the thread executing the + synchronize_rcu() migrates in the meantime. +
+ +

Quick Quiz 5: +Given that multiple CPUs can start RCU read-side critical sections +at any time without any ordering whatsoever, how can RCU possibly tell whether +or not a given RCU read-side critical section starts before a +given instance of synchronize_rcu()? +
Answer + +

Quick Quiz 6: +The first and second guarantees require unbelievably strict ordering! +Are all these memory barriers really required? +
Answer + +

+Note that these memory-barrier requirements do not replace the fundamental +RCU requirement that a grace period wait for all pre-existing readers. +On the contrary, the memory barriers called out in this section must operate in +such a way as to enforce this fundamental requirement. +Of course, different implementations enforce this requirement in different +ways, but enforce it they must. + +

RCU Primitives Guaranteed to Execute Unconditionally

+ +

+The common-case RCU primitives are unconditional. +They are invoked, they do their job, and they return, with no possibility +of error, and no need to retry. +This is a key RCU design philosophy. + +

+However, this philosophy is pragmatic rather than pigheaded. +If someone comes up with a good justification for a particular conditional +RCU primitive, it might well be implemented and added. +After all, this guarantee was reverse-engineered, not premeditated. +The unconditional nature of the RCU primitives was initially an +accident of implementation, and later experience with synchronization +primitives with conditional primitives caused me to elevate this +accident to a guarantee. +Therefore, the justification for adding a conditional primitive to +RCU would need to be based on detailed and compelling use cases. + +

Guaranteed Read-to-Write Upgrade

+ +

+As far as RCU is concerned, it is always possible to carry out an +update within an RCU read-side critical section. +For example, that RCU read-side critical section might search for +a given data element, and then might acquire the update-side +spinlock in order to update that element, all while remaining +in that RCU read-side critical section. +Of course, it is necessary to exit the RCU read-side critical section +before invoking synchronize_rcu(), however, this +inconvenience can be avoided through use of the +call_rcu() and kfree_rcu() API members +described later in this document. + +

Quick Quiz 7: +But how does the upgrade-to-write operation exclude other readers? +
Answer + +

+This guarantee allows lookup code to be shared between read-side +and update-side code, and was premeditated, appearing in the earliest +DYNIX/ptx RCU documentation. + +

Fundamental Non-Requirements

+ +

+RCU provides extremely lightweight readers, and its read-side guarantees, +though quite useful, are correspondingly lightweight. +It is therefore all too easy to assume that RCU is guaranteeing more +than it really is. +Of course, the list of things that RCU does not guarantee is infinitely +long, however, the following sections list a few non-guarantees that +have caused confusion. +Except where otherwise noted, these non-guarantees were premeditated. + +

    +
  1. + Readers Impose Minimal Ordering +
  2. + Readers Do Not Exclude Updaters +
  3. + Updaters Only Wait For Old Readers +
  4. + Grace Periods Don't Partition Read-Side Critical Sections +
  5. + Read-Side Critical Sections Don't Partition Grace Periods +
  6. + Disabling Preemption Does Not Block Grace Periods +
+ +

Readers Impose Minimal Ordering

+ +

+Reader-side markers such as rcu_read_lock() and +rcu_read_unlock() provide absolutely no ordering guarantees +except through their interaction with the grace-period APIs such as +synchronize_rcu(). +To see this, consider the following pair of threads: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(x, 1);
+ 5   rcu_read_unlock();
+ 6   rcu_read_lock();
+ 7   WRITE_ONCE(y, 1);
+ 8   rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13   rcu_read_lock();
+14   r1 = READ_ONCE(y);
+15   rcu_read_unlock();
+16   rcu_read_lock();
+17   r2 = READ_ONCE(x);
+18   rcu_read_unlock();
+19 }
+
+
+ +

+After thread0() and thread1() execute +concurrently, it is quite possible to have + +

+
+(r1 == 1 && r2 == 0)
+
+
+ +(that is, y appears to have been assigned before x), +which would not be possible if rcu_read_lock() and +rcu_read_unlock() had much in the way of ordering +properties. +But they do not, so the CPU is within its rights +to do significant reordering. +This is by design: Any significant ordering constraints would slow down +these fast-path APIs. + +

Quick Quiz 8: +Can't the compiler also reorder this code? +
Answer + +

Readers Do Not Exclude Updaters

+ +

+Neither rcu_read_lock() nor rcu_read_unlock() +exclude updates. +All they do is to prevent grace periods from ending. +The following example illustrates this: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   r1 = READ_ONCE(y);
+ 5   if (r1) {
+ 6     do_something_with_nonzero_x();
+ 7     r2 = READ_ONCE(x);
+ 8     WARN_ON(!r2); /* BUG!!! */
+ 9   }
+10   rcu_read_unlock();
+11 }
+12
+13 void thread1(void)
+14 {
+15   spin_lock(&my_lock);
+16   WRITE_ONCE(x, 1);
+17   WRITE_ONCE(y, 1);
+18   spin_unlock(&my_lock);
+19 }
+
+
+ +

+If the thread0() function's rcu_read_lock() +excluded the thread1() function's update, +the WARN_ON() could never fire. +But the fact is that rcu_read_lock() does not exclude +much of anything aside from subsequent grace periods, of which +thread1() has none, so the +WARN_ON() can and does fire. + +

Updaters Only Wait For Old Readers

+ +

+It might be tempting to assume that after synchronize_rcu() +completes, there are no readers executing. +This temptation must be avoided because +new readers can start immediately after synchronize_rcu() +starts, and synchronize_rcu() is under no +obligation to wait for these new readers. + +

Quick Quiz 9: +Suppose that synchronize_rcu() did wait until all readers had completed. +Would the updater be able to rely on this? +
Answer + +

+Grace Periods Don't Partition Read-Side Critical Sections

+ +

+It is tempting to assume that if any part of one RCU read-side critical +section precedes a given grace period, and if any part of another RCU +read-side critical section follows that same grace period, then all of +the first RCU read-side critical section must precede all of the second. +However, this just isn't the case: A single grace period does not +partition the set of RCU read-side critical sections. +An example of this situation can be illustrated as follows, where +x, y, and z are initially all zero: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(a, 1);
+ 5   WRITE_ONCE(b, 1);
+ 6   rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11   r1 = READ_ONCE(a);
+12   synchronize_rcu();
+13   WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18   rcu_read_lock();
+19   r2 = READ_ONCE(b);
+20   r3 = READ_ONCE(c);
+21   rcu_read_unlock();
+22 }
+
+
+ +

+It turns out that the outcome: + +

+
+(r1 == 1 && r2 == 0 && r3 == 1)
+
+
+ +is entirely possible. +The following figure show how this can happen, with each circled +QS indicating the point at which RCU recorded a +quiescent state for each thread, that is, a state in which +RCU knows that the thread cannot be in the midst of an RCU read-side +critical section that started before the current grace period: + +

GPpartitionReaders1.svg

+ +

+If it is necessary to partition RCU read-side critical sections in this +manner, it is necessary to use two grace periods, where the first +grace period is known to end before the second grace period starts: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(a, 1);
+ 5   WRITE_ONCE(b, 1);
+ 6   rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11   r1 = READ_ONCE(a);
+12   synchronize_rcu();
+13   WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18   r2 = READ_ONCE(c);
+19   synchronize_rcu();
+20   WRITE_ONCE(d, 1);
+21 }
+22
+23 void thread3(void)
+24 {
+25   rcu_read_lock();
+26   r3 = READ_ONCE(b);
+27   r4 = READ_ONCE(d);
+28   rcu_read_unlock();
+29 }
+
+
+ +

+Here, if (r1 == 1), then +thread0()'s write to b must happen +before the end of thread1()'s grace period. +If in addition (r4 == 1), then +thread3()'s read from b must happen +after the beginning of thread2()'s grace period. +If it is also the case that (r2 == 1), then the +end of thread1()'s grace period must precede the +beginning of thread2()'s grace period. +This mean that the two RCU read-side critical sections cannot overlap, +guaranteeing that (r3 == 1). +As a result, the outcome: + +

+
+(r1 == 1 && r2 == 1 && r3 == 0 && r4 == 1)
+
+
+ +cannot happen. + +

+This non-requirement was also non-premeditated, but became apparent +when studying RCU's interaction with memory ordering. + +

+Read-Side Critical Sections Don't Partition Grace Periods

+ +

+It is also tempting to assume that if an RCU read-side critical section +happens between a pair of grace periods, then those grace periods cannot +overlap. +However, this temptation leads nowhere good, as can be illustrated by +the following, with all variables initially zero: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(a, 1);
+ 5   WRITE_ONCE(b, 1);
+ 6   rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11   r1 = READ_ONCE(a);
+12   synchronize_rcu();
+13   WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18   rcu_read_lock();
+19   WRITE_ONCE(d, 1);
+20   r2 = READ_ONCE(c);
+21   rcu_read_unlock();
+22 }
+23
+24 void thread3(void)
+25 {
+26   r3 = READ_ONCE(d);
+27   synchronize_rcu();
+28   WRITE_ONCE(e, 1);
+29 }
+30
+31 void thread4(void)
+32 {
+33   rcu_read_lock();
+34   r4 = READ_ONCE(b);
+35   r5 = READ_ONCE(e);
+36   rcu_read_unlock();
+37 }
+
+
+ +

+In this case, the outcome: + +

+
+(r1 == 1 && r2 == 1 && r3 == 1 && r4 == 0 && r5 == 1)
+
+
+ +is entirely possible, as illustrated below: + +

ReadersPartitionGP1.svg

+ +

+Again, an RCU read-side critical section can overlap almost all of a +given grace period, just so long as it does not overlap the entire +grace period. +As a result, an RCU read-side critical section cannot partition a pair +of RCU grace periods. + +

Quick Quiz 10: +How long a sequence of grace periods, each separated by an RCU read-side +critical section, would be required to partition the RCU read-side +critical sections at the beginning and end of the chain? +
Answer + +

+Disabling Preemption Does Not Block Grace Periods

+ +

+There was a time when disabling preemption on any given CPU would block +subsequent grace periods. +However, this was an accident of implementation and is not a requirement. +And in the current Linux-kernel implementation, disabling preemption +on a given CPU in fact does not block grace periods, as Oleg Nesterov +demonstrated. + +

+If you need a preempt-disable region to block grace periods, you need to add +rcu_read_lock() and rcu_read_unlock(), for example +as follows: + +

+
+ 1 preempt_disable();
+ 2 rcu_read_lock();
+ 3 do_something();
+ 4 rcu_read_unlock();
+ 5 preempt_enable();
+ 6
+ 7 /* Spinlocks implicitly disable preemption. */
+ 8 spin_lock(&mylock);
+ 9 rcu_read_lock();
+10 do_something();
+11 rcu_read_unlock();
+12 spin_unlock(&mylock);
+
+
+ +

+In theory, you could enter the RCU read-side critical section first, +but it is more efficient to keep the entire RCU read-side critical +section contained in the preempt-disable region as shown above. +Of course, RCU read-side critical sections that extend outside of +preempt-disable regions will work correctly, but such critical sections +can be preempted, which forces rcu_read_unlock() to do +more work. +And no, this is not an invitation to enclose all of your RCU +read-side critical sections within preempt-disable regions, because +doing so would degrade real-time response. + +

+This non-requirement appeared with preemptible RCU. +If you need a grace period that waits on non-preemptible code regions, use +RCU-sched. + +

Parallelism Facts of Life

+ +

+These parallelism facts of life are by no means specific to RCU, but +the RCU implementation must abide by them. +They therefore bear repeating: + +

    +
  1. Any CPU or task may be delayed at any time, + and any attempts to avoid these delays by disabling + preemption, interrupts, or whatever are completely futile. + This is most obvious in preemptible user-level + environments and in virtualized environments (where + a given guest OS's VCPUs can be preempted at any time by + the underlying hypervisor), but can also happen in bare-metal + environments due to ECC errors, NMIs, and other hardware + events. + Although a delay of more than about 20 seconds can result + in splats, the RCU implementation is obligated to use + algorithms that can tolerate extremely long delays, but where + “extremely long” is not long enough to allow + wrap-around when incrementing a 64-bit counter. +
  2. Both the compiler and the CPU can reorder memory accesses. + Where it matters, RCU must use compiler directives and + memory-barrier instructions to preserve ordering. +
  3. Conflicting writes to memory locations in any given cache line + will result in expensive cache misses. + Greater numbers of concurrent writes and more-frequent + concurrent writes will result in more dramatic slowdowns. + RCU is therefore obligated to use algorithms that have + sufficient locality to avoid significant performance and + scalability problems. +
  4. As a rough rule of thumb, only one CPU's worth of processing + may be carried out under the protection of any given exclusive + lock. + RCU must therefore use scalable locking designs. +
  5. Counters are finite, especially on 32-bit systems. + RCU's use of counters must therefore tolerate counter wrap, + or be designed such that counter wrap would take way more + time than a single system is likely to run. + An uptime of ten years is quite possible, a runtime + of a century much less so. + As an example of the latter, RCU's dyntick-idle nesting counter + allows 54 bits for interrupt nesting level (this counter + is 64 bits even on a 32-bit system). + Overflowing this counter requires 254 + half-interrupts on a given CPU without that CPU ever going idle. + If a half-interrupt happened every microsecond, it would take + 570 years of runtime to overflow this counter, which is currently + believed to be an acceptably long time. +
  6. Linux systems can have thousands of CPUs running a single + Linux kernel in a single shared-memory environment. + RCU must therefore pay close attention to high-end scalability. +
+ +

+This last parallelism fact of life means that RCU must pay special +attention to the preceding facts of life. +The idea that Linux might scale to systems with thousands of CPUs would +have been met with some skepticism in the 1990s, but these requirements +would have otherwise have been unsurprising, even in the early 1990s. + +

Quality-of-Implementation Requirements

+ +

+These sections list quality-of-implementation requirements. +Although an RCU implementation that ignores these requirements could +still be used, it would likely be subject to limitations that would +make it inappropriate for industrial-strength production use. +Classes of quality-of-implementation requirements are as follows: + +

    +
  1. Specialization +
  2. Performance and Scalability +
  3. Composability +
  4. Corner Cases +
+ +

+These classes is covered in the following sections. + +

Specialization

+ +

+RCU is and always has been intended primarily for read-mostly situations, as +illustrated by the following figure. +This means that RCU's read-side primitives are optimized, often at the +expense of its update-side primitives. + +

RCUApplicability.svg

+ +

+This focus on read-mostly situations means that RCU must interoperate +with other synchronization primitives. +For example, the add_gp() and remove_gp_synchronous() +examples discussed earlier use RCU to protect readers and locking to +coordinate updaters. +However, the need extends much farther, requiring that a variety of +synchronization primitives be legal within RCU read-side critical sections, +including spinlocks, sequence locks, atomic operations, reference +counters, and memory barriers. + +

Quick Quiz 11: +What about sleeping locks? +
Answer + +

+It often comes as a surprise that many algorithms do not require a +consistent view of data, but many can function in that mode, +with network routing being the poster child. +Internet routing algorithms take significant time to propagate +updates, so that by the time an update arrives at a given system, +that system has been sending network traffic the wrong way for +a considerable length of time. +Having a few threads continue to send traffic the wrong way for a +few more milliseconds is clearly not a problem: In the worst case, +TCP retransmissions will eventually get the data where it needs to go. +In general, when tracking the state of the universe outside of the +computer, some level of inconsistency must be tolerated due to +speed-of-light delays if nothing else. + +

+Furthermore, uncertainty about external state is inherent in many cases. +For example, a pair of veternarians might use heartbeat to determine +whether or not a given cat was alive. +But how long should they wait after the last heartbeat to decide that +the cat is in fact dead? +Waiting less than 400 milliseconds makes no sense because this would +mean that a relaxed cat would be considered to cycle between death +and life more than 100 times per minute. +Moreover, just as with human beings, a cat's heart might stop for +some period of time, so the exact wait period is a judgment call. +One of our pair of veternarians might wait 30 seconds before pronouncing +the cat dead, while the other might insist on waiting a full minute. +The two veternarians would then disagree on the state of the cat during +the final 30 seconds of the minute following the last heartbeat, as +fancifully illustrated below: + +

2013-08-is-it-dead.png

+ +

+Interestingly enough, this same situation applies to hardware. +When push comes to shove, how do we tell whether or not some +external server has failed? +We send messages to it periodically, and declare it failed if we +don't receive a response within a given period of time. +Policy decisions can usually tolerate short +periods of inconsistency. +The policy was decided some time ago, and is only now being put into +effect, so a few milliseconds of delay is normally inconsequential. + +

+However, there are algorithms that absolutely must see consistent data. +For example, the translation between a user-level SystemV semaphore +ID to the corresponding in-kernel data structure is protected by RCU, +but it is absolutely forbidden to update a semaphore that has just been +removed. +In the Linux kernel, this need for consistency is accommodated by acquiring +spinlocks located in the in-kernel data structure from within +the RCU read-side critical section, and this is indicated by the +green box in the figure above. +Many other techniques may be used, and are in fact used within the +Linux kernel. + +

+In short, RCU is not required to maintain consistency, and other +mechanisms may be used in concert with RCU when consistency is required. +RCU's specialization allows it to do its job extremely well, and its +ability to interoperate with other synchronization mechanisms allows +the right mix of synchronization tools to be used for a given job. + +

Performance and Scalability

+ +

+Energy efficiency is a critical component of performance today, +and Linux-kernel RCU implementations must therefore avoid unnecessarily +awakening idle CPUs. +I cannot claim that this requirement was premeditated. +In fact, I learned of it during a telephone conversation in which I +was given “frank and open” feedback on the importance +of energy efficiency in battery-powered systems and on specific +energy-efficiency shortcomings of the Linux-kernel RCU implementation. +In my experience, the battery-powered embedded community will consider +any unnecessary wakeups to be extremely unfriendly acts. +So much so that mere Linux-kernel-mailing-list posts are +insufficient to vent their ire. + +

+Memory consumption is not particularly important for in most +situations, and has become decreasingly +so as memory sizes have expanded and memory +costs have plummeted. +However, as I learned from Matt Mackall's +bloatwatch +efforts, memory footprint is critically important on single-CPU systems with +non-preemptible (CONFIG_PREEMPT=n) kernels, and thus +tiny RCU +was born. +Josh Triplett has since taken over the small-memory banner with his +Linux kernel tinification +project, which resulted in +SRCU +becoming optional for those kernels not needing it. + +

+The remaining performance requirements are, for the most part, +unsurprising. +For example, in keeping with RCU's read-side specialization, +rcu_dereference() should have negligible overhead (for +example, suppression of a few minor compiler optimizations). +Similarly, in non-preemptible environments, rcu_read_lock() and +rcu_read_unlock() should have exactly zero overhead. + +

+In preemptible environments, in the case where the RCU read-side +critical section was not preempted (as will be the case for the +highest-priority real-time process), rcu_read_lock() and +rcu_read_unlock() should have minimal overhead. +In particular, they should not contain atomic read-modify-write +operations, memory-barrier instructions, preemption disabling, +interrupt disabling, or backwards branches. +However, in the case where the RCU read-side critical section was preempted, +rcu_read_unlock() may acquire spinlocks and disable interrupts. +This is why it is better to nest an RCU read-side critical section +within a preempt-disable region than vice versa, at least in cases +where that critical section is short enough to avoid unduly degrading +real-time latencies. + +

+The synchronize_rcu() grace-period-wait primitive is +optimized for throughput. +It may therefore incur several milliseconds of latency in addition to +the duration of the longest RCU read-side critical section. +On the other hand, multiple concurrent invocations of +synchronize_rcu() are required to use batching optimizations +so that they can be satisfied by a single underlying grace-period-wait +operation. +For example, in the Linux kernel, it is not unusual for a single +grace-period-wait operation to serve more than +1,000 separate invocations +of synchronize_rcu(), thus amortizing the per-invocation +overhead down to nearly zero. +However, the grace-period optimization is also required to avoid +measurable degradation of real-time scheduling and interrupt latencies. + +

+In some cases, the multi-millisecond synchronize_rcu() +latencies are unacceptable. +In these cases, synchronize_rcu_expedited() may be used +instead, reducing the grace-period latency down to a few tens of +microseconds on small systems, at least in cases where the RCU read-side +critical sections are short. +There are currently no special latency requirements for +synchronize_rcu_expedited() on large systems, but, +consistent with the empirical nature of the RCU specification, +that is subject to change. +However, there most definitely are scalability requirements: +A storm of synchronize_rcu_expedited() invocations on 4096 +CPUs should at least make reasonable forward progress. +In return for its shorter latencies, synchronize_rcu_expedited() +is permitted to impose modest degradation of real-time latency +on non-idle online CPUs. +That said, it will likely be necessary to take further steps to reduce this +degradation, hopefully to roughly that of a scheduling-clock interrupt. + +

+There are a number of situations where even +synchronize_rcu_expedited()'s reduced grace-period +latency is unacceptable. +In these situations, the asynchronous call_rcu() can be +used in place of synchronize_rcu() as follows: + +

+
+ 1 struct foo {
+ 2   int a;
+ 3   int b;
+ 4   struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9   struct foo *p = container_of(rhp, struct foo, rh);
+10
+11   kfree(p);
+12 }
+13
+14 bool remove_gp_asynchronous(void)
+15 {
+16   struct foo *p;
+17
+18   spin_lock(&gp_lock);
+19   p = rcu_dereference(gp);
+20   if (!p) {
+21     spin_unlock(&gp_lock);
+22     return false;
+23   }
+24   rcu_assign_pointer(gp, NULL);
+25   call_rcu(&p->rh, remove_gp_cb);
+26   spin_unlock(&gp_lock);
+27   return true;
+28 }
+
+
+ +

+A definition of struct foo is finally needed, and appears +on lines 1-5. +The function remove_gp_cb() is passed to call_rcu() +on line 25, and will be invoked after the end of a subsequent +grace period. +This gets the same effect as remove_gp_synchronous(), +but without forcing the updater to wait for a grace period to elapse. +The call_rcu() function may be used in a number of +situations where neither synchronize_rcu() nor +synchronize_rcu_expedited() would be legal, +including within preempt-disable code, local_bh_disable() code, +interrupt-disable code, and interrupt handlers. +However, even call_rcu() is illegal within NMI handlers. +The callback function (remove_gp_cb() in this case) will be +executed within softirq (software interrupt) environment within the +Linux kernel, +either within a real softirq handler or under the protection +of local_bh_disable(). +In both the Linux kernel and in userspace, it is bad practice to +write an RCU callback function that takes too long. +Long-running operations should be relegated to separate threads or +(in the Linux kernel) workqueues. + +

Quick Quiz 12: +Why does line 19 use rcu_access_pointer()? +After all, call_rcu() on line 25 stores into the +structure, which would interact badly with concurrent insertions. +Doesn't this mean that rcu_dereference() is required? +
Answer + +

+However, all that remove_gp_cb() is doing is +invoking kfree() on the data element. +This is a common idiom, and is supported by kfree_rcu(), +which allows “fire and forget” operation as shown below: + +

+
+ 1 struct foo {
+ 2   int a;
+ 3   int b;
+ 4   struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9   struct foo *p;
+10
+11   spin_lock(&gp_lock);
+12   p = rcu_dereference(gp);
+13   if (!p) {
+14     spin_unlock(&gp_lock);
+15     return false;
+16   }
+17   rcu_assign_pointer(gp, NULL);
+18   kfree_rcu(p, rh);
+19   spin_unlock(&gp_lock);
+20   return true;
+21 }
+
+
+ +

+Note that remove_gp_faf() simply invokes +kfree_rcu() and proceeds, without any need to pay any +further attention to the subsequent grace period and kfree(). +It is permissible to invoke kfree_rcu() from the same +environments as for call_rcu(). +Interestingly enough, DYNIX/ptx had the equivalents of +call_rcu() and kfree_rcu(), but not +synchronize_rcu(). +This was due to the fact that RCU was not heavily used within DYNIX/ptx, +so the very few places that needed something like +synchronize_rcu() simply open-coded it. + +

Quick Quiz 13: +Earlier it was claimed that call_rcu() and +kfree_rcu() allowed updaters to avoid being blocked +by readers. +But how can that be correct, given that the invocation of the callback +and the freeing of the memory (respectively) must still wait for +a grace period to elapse? +
Answer + +

+But what if the updater must wait for the completion of code to be +executed after the end of the grace period, but has other tasks +that can be carried out in the meantime? +The polling-style get_state_synchronize_rcu() and +cond_synchronize_rcu() functions may be used for this +purpose, as shown below: + +

+
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3   struct foo *p;
+ 4   unsigned long s;
+ 5
+ 6   spin_lock(&gp_lock);
+ 7   p = rcu_access_pointer(gp);
+ 8   if (!p) {
+ 9     spin_unlock(&gp_lock);
+10     return false;
+11   }
+12   rcu_assign_pointer(gp, NULL);
+13   spin_unlock(&gp_lock);
+14   s = get_state_synchronize_rcu();
+15   do_something_while_waiting();
+16   cond_synchronize_rcu(s);
+17   kfree(p);
+18   return true;
+19 }
+
+
+ +

+On line 14, get_state_synchronize_rcu() obtains a +“cookie” from RCU, +then line 15 carries out other tasks, +and finally, line 16 returns immediately if a grace period has +elapsed in the meantime, but otherwise waits as required. +The need for get_state_synchronize_rcu and +cond_synchronize_rcu() has appeared quite recently, +so it is too early to tell whether they will stand the test of time. + +

+RCU thus provides a range of tools to allow updaters to strike the +required tradeoff between latency, flexibility and CPU overhead. + +

Composability

+ +

+Composability has received much attention in recent years, perhaps in part +due to the collision of multicore hardware with object-oriented techniques +designed in single-threaded environments for single-threaded use. +And in theory, RCU read-side critical sections may be composed, and in +fact may be nested arbitrarily deeply. +In practice, as with all real-world implementations of composable +constructs, there are limitations. + +

+Implementations of RCU for which rcu_read_lock() +and rcu_read_unlock() generate no code, such as +Linux-kernel RCU when CONFIG_PREEMPT=n, can be +nested arbitrarily deeply. +After all, there is no overhead. +Except that if all these instances of rcu_read_lock() +and rcu_read_unlock() are visible to the compiler, +compilation will eventually fail due to exhausting memory, +mass storage, or user patience, whichever comes first. +If the nesting is not visible to the compiler, as is the case with +mutually recursive functions each in its own translation unit, +stack overflow will result. +If the nesting takes the form of loops, either the control variable +will overflow or (in the Linux kernel) you will get an RCU CPU stall warning. +Nevertheless, this class of RCU implementations is one +of the most composable constructs in existence. + +

+RCU implementations that explicitly track nesting depth +are limited by the nesting-depth counter. +For example, the Linux kernel's preemptible RCU limits nesting to +INT_MAX. +This should suffice for almost all practical purposes. +That said, a consecutive pair of RCU read-side critical sections +between which there is an operation that waits for a grace period +cannot be enclosed in another RCU read-side critical section. +This is because it is not legal to wait for a grace period within +an RCU read-side critical section: To do so would result either +in deadlock or +in RCU implicitly splitting the enclosing RCU read-side critical +section, neither of which is conducive to a long-lived and prosperous +kernel. + +

+It is worth noting that RCU is not alone in limiting composability. +For example, many transactional-memory implementations prohibit +composing a pair of transactions separated by an irrevocable +operation (for example, a network receive operation). +For another example, lock-based critical sections can be composed +surprisingly freely, but only if deadlock is avoided. + +

+In short, although RCU read-side critical sections are highly composable, +care is required in some situations, just as is the case for any other +composable synchronization mechanism. + +

Corner Cases

+ +

+A given RCU workload might have an endless and intense stream of +RCU read-side critical sections, perhaps even so intense that there +was never a point in time during which there was not at least one +RCU read-side critical section in flight. +RCU cannot allow this situation to block grace periods: As long as +all the RCU read-side critical sections are finite, grace periods +must also be finite. + +

+That said, preemptible RCU implementations could potentially result +in RCU read-side critical sections being preempted for long durations, +which has the effect of creating a long-duration RCU read-side +critical section. +This situation can arise only in heavily loaded systems, but systems using +real-time priorities are of course more vulnerable. +Therefore, RCU priority boosting is provided to help deal with this +case. +That said, the exact requirements on RCU priority boosting will likely +evolve as more experience accumulates. + +

+Other workloads might have very high update rates. +Although one can argue that such workloads should instead use +something other than RCU, the fact remains that RCU must +handle such workloads gracefully. +This requirement is another factor driving batching of grace periods, +but it is also the driving force behind the checks for large numbers +of queued RCU callbacks in the call_rcu() code path. +Finally, high update rates should not delay RCU read-side critical +sections, although some read-side delays can occur when using +synchronize_rcu_expedited(), courtesy of this function's use +of try_stop_cpus(). +(In the future, synchronize_rcu_expedited() will be +converted to use lighter-weight inter-processor interrupts (IPIs), +but this will still disturb readers, though to a much smaller degree.) + +

+Although all three of these corner cases were understood in the early +1990s, a simple user-level test consisting of close(open(path)) +in a tight loop +in the early 2000s suddenly provided a much deeper appreciation of the +high-update-rate corner case. +This test also motivated addition of some RCU code to react to high update +rates, for example, if a given CPU finds itself with more than 10,000 +RCU callbacks queued, it will cause RCU to take evasive action by +more aggressively starting grace periods and more aggressively forcing +completion of grace-period processing. +This evasive action causes the grace period to complete more quickly, +but at the cost of restricting RCU's batching optimizations, thus +increasing the CPU overhead incurred by that grace period. + +

+Software-Engineering Requirements

+ +

+Between Murphy's Law and “To err is human”, it is necessary to +guard against mishaps and misuse: + +

    +
  1. It is all too easy to forget to use rcu_read_lock() + everywhere that it is needed, so kernels built with + CONFIG_PROVE_RCU=y will spat if + rcu_dereference() is used outside of an + RCU read-side critical section. + Update-side code can use rcu_dereference_protected(), + which takes a + lockdep expression + to indicate what is providing the protection. + If the indicated protection is not provided, a lockdep splat + is emitted. + +

    + Code shared between readers and updaters can use + rcu_dereference_check(), which also takes a + lockdep expression, and emits a lockdep splat if neither + rcu_read_lock() nor the indicated protection + is in place. + In addition, rcu_dereference_raw() is used in those + (hopefully rare) cases where the required protection cannot + be easily described. + Finally, rcu_read_lock_held() is provided to + allow a function to verify that it has been invoked within + an RCU read-side critical section. + I was made aware of this set of requirements shortly after Thomas + Gleixner audited a number of RCU uses. +

  2. A given function might wish to check for RCU-related preconditions + upon entry, before using any other RCU API. + The rcu_lockdep_assert() does this job, + asserting the expression in kernels having lockdep enabled + and doing nothing otherwise. +
  3. It is also easy to forget to use rcu_assign_pointer() + and rcu_dereference(), perhaps (incorrectly) + substituting a simple assignment. + To catch this sort of error, a given RCU-protected pointer may be + tagged with __rcu, after which running sparse + with CONFIG_SPARSE_RCU_POINTER=y will complain + about simple-assignment accesses to that pointer. + Arnd Bergmann made me aware of this requirement, and also + supplied the needed + patch series. +
  4. Kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y + will splat if a data element is passed to call_rcu() + twice in a row, without a grace period in between. + (This error is similar to a double free.) + The corresponding rcu_head structures that are + dynamically allocated are automatically tracked, but + rcu_head structures allocated on the stack + must be initialized with init_rcu_head_on_stack() + and cleaned up with destroy_rcu_head_on_stack(). + Similarly, statically allocated non-stack rcu_head + structures must be initialized with init_rcu_head() + and cleaned up with destroy_rcu_head(). + Mathieu Desnoyers made me aware of this requirement, and also + supplied the needed + patch. +
  5. An infinite loop in an RCU read-side critical section will + eventually trigger an RCU CPU stall warning splat, with + the duration of “eventually” being controlled by the + RCU_CPU_STALL_TIMEOUT Kconfig option, or, + alternatively, by the + rcupdate.rcu_cpu_stall_timeout boot/sysfs + parameter. + However, RCU is not obligated to produce this splat + unless there is a grace period waiting on that particular + RCU read-side critical section. +

    + Some extreme workloads might intentionally delay + RCU grace periods, and systems running those workloads can + be booted with rcupdate.rcu_cpu_stall_suppress + to suppress the splats. + This kernel parameter may also be set via sysfs. + Furthermore, RCU CPU stall warnings are counter-productive + during sysrq dumps and during panics. + RCU therefore supplies the rcu_sysrq_start() and + rcu_sysrq_end() API members to be called before + and after long sysrq dumps. + RCU also supplies the rcu_panic() notifier that is + automatically invoked at the beginning of a panic to suppress + further RCU CPU stall warnings. + +

    + This requirement made itself known in the early 1990s, pretty + much the first time that it was necessary to debug a CPU stall. + That said, the initial implementation in DYNIX/ptx was quite + generic in comparison with that of Linux. +

  6. Although it would be very good to detect pointers leaking out + of RCU read-side critical sections, there is currently no + good way of doing this. + One complication is the need to distinguish between pointers + leaking and pointers that have been handed off from RCU to + some other synchronization mechanism, for example, reference + counting. +
  7. In kernels built with CONFIG_RCU_TRACE=y, RCU-related + information is provided via both debugfs and event tracing. +
  8. Open-coded use of rcu_assign_pointer() and + rcu_dereference() to create typical linked + data structures can be surprisingly error-prone. + Therefore, RCU-protected + linked lists + and, more recently, RCU-protected + hash tables + are available. + Many other special-purpose RCU-protected data structures are + available in the Linux kernel and the userspace RCU library. +
  9. Some linked structures are created at compile time, but still + require __rcu checking. + The RCU_POINTER_INITIALIZER() macro serves this + purpose. +
  10. It is not necessary to use rcu_assign_pointer() + when creating linked structures that are to be published via + a single external pointer. + The RCU_INIT_POINTER() macro is provided for + this task and also for assigning NULL pointers + at runtime. +
+ +

+This not a hard-and-fast list: RCU's diagnostic capabilities will +continue to be guided by the number and type of usage bugs found +in real-world RCU usage. + +

Linux Kernel Complications

+ +

+The Linux kernel provides an interesting environment for all kinds of +software, including RCU. +Some of the relevant points of interest are as follows: + +

    +
  1. Configuration. +
  2. Firmware Interface. +
  3. Early Boot. +
  4. + Interrupts and non-maskable interrupts (NMIs). +
  5. Loadable Modules. +
  6. Hotplug CPU. +
  7. Scheduler and RCU. +
  8. Tracing and RCU. +
  9. Energy Efficiency. +
  10. Memory Efficiency. +
  11. + Performance, Scalability, Response Time, and Reliability. +
+ +

+This list is probably incomplete, but it does give a feel for the +most notable Linux-kernel complications. +Each of the following sections covers one of the above topics. + +

Configuration

+ +

+RCU's goal is automatic configuration, so that almost nobody +needs to worry about RCU's Kconfig options. +And for almost all users, RCU does in fact work well +“out of the box.” + +

+However, there are specialized use cases that are handled by +kernel boot parameters and Kconfig options. +Unfortunately, the Kconfig system will explicitly ask users +about new Kconfig options, which requires almost all of them +be hidden behind a CONFIG_RCU_EXPERT Kconfig option. + +

+This all should be quite obvious, but the fact remains that +Linus Torvalds recently had to +remind +me of this requirement. + +

Firmware Interface

+ +

+In many cases, kernel obtains information about the system from the +firmware, and sometimes things are lost in translation. +Or the translation is accurate, but the original message is bogus. + +

+For example, some systems' firmware overreports the number of CPUs, +sometimes by a large factor. +If RCU naively believed the firmware, as it used to do, +it would create too many per-CPU kthreads. +Although the resulting system will still run correctly, the extra +kthreads needlessly consume memory and can cause confusion +when they show up in ps listings. + +

+RCU must therefore wait for a given CPU to actually come online before +it can allow itself to believe that the CPU actually exists. +The resulting “ghost CPUs” (which are never going to +come online) cause a number of +interesting complications. + +

Early Boot

+ +

+The Linux kernel's boot sequence is an interesting process, +and RCU is used early, even before rcu_init() +is invoked. +In fact, a number of RCU's primitives can be used as soon as the +initial task's task_struct is available and the +boot CPU's per-CPU variables are set up. +The read-side primitives (rcu_read_lock(), +rcu_read_unlock(), rcu_dereference(), +and rcu_access_pointer()) will operate normally very early on, +as will rcu_assign_pointer(). + +

+Although call_rcu() may be invoked at any +time during boot, callbacks are not guaranteed to be invoked until after +the scheduler is fully up and running. +This delay in callback invocation is due to the fact that RCU does not +invoke callbacks until it is fully initialized, and this full initialization +cannot occur until after the scheduler has initialized itself to the +point where RCU can spawn and run its kthreads. +In theory, it would be possible to invoke callbacks earlier, +however, this is not a panacea because there would be severe restrictions +on what operations those callbacks could invoke. + +

+Perhaps surprisingly, synchronize_rcu(), +synchronize_rcu_bh() +(discussed below), +and +synchronize_sched() +will all operate normally +during very early boot, the reason being that there is only one CPU +and preemption is disabled. +This means that the call synchronize_rcu() (or friends) +itself is a quiescent +state and thus a grace period, so the early-boot implementation can +be a no-op. + +

+Both synchronize_rcu_bh() and synchronize_sched() +continue to operate normally through the remainder of boot, courtesy +of the fact that preemption is disabled across their RCU read-side +critical sections and also courtesy of the fact that there is still +only one CPU. +However, once the scheduler starts initializing, preemption is enabled. +There is still only a single CPU, but the fact that preemption is enabled +means that the no-op implementation of synchronize_rcu() no +longer works in CONFIG_PREEMPT=y kernels. +Therefore, as soon as the scheduler starts initializing, the early-boot +fastpath is disabled. +This means that synchronize_rcu() switches to its runtime +mode of operation where it posts callbacks, which in turn means that +any call to synchronize_rcu() will block until the corresponding +callback is invoked. +Unfortunately, the callback cannot be invoked until RCU's runtime +grace-period machinery is up and running, which cannot happen until +the scheduler has initialized itself sufficiently to allow RCU's +kthreads to be spawned. +Therefore, invoking synchronize_rcu() during scheduler +initialization can result in deadlock. + +

Quick Quiz 14: +So what happens with synchronize_rcu() during +scheduler initialization for CONFIG_PREEMPT=n +kernels? +
Answer + +

+I learned of these boot-time requirements as a result of a series of +system hangs. + +

Interrupts and NMIs

+ +

+The Linux kernel has interrupts, and RCU read-side critical sections are +legal within interrupt handlers and within interrupt-disabled regions +of code, as are invocations of call_rcu(). + +

+Some Linux-kernel architectures can enter an interrupt handler from +non-idle process context, and then just never leave it, instead stealthily +transitioning back to process context. +This trick is sometimes used to invoke system calls from inside the kernel. +These “half-interrupts” mean that RCU has to be very careful +about how it counts interrupt nesting levels. +I learned of this requirement the hard way during a rewrite +of RCU's dyntick-idle code. + +

+The Linux kernel has non-maskable interrupts (NMIs), and +RCU read-side critical sections are legal within NMI handlers. +Thankfully, RCU update-side primitives, including +call_rcu(), are prohibited within NMI handlers. + +

+The name notwithstanding, some Linux-kernel architectures +can have nested NMIs, which RCU must handle correctly. +Andy Lutomirski +surprised me +with this requirement; +he also kindly surprised me with +an algorithm +that meets this requirement. + +

Loadable Modules

+ +

+The Linux kernel has loadable modules, and these modules can +also be unloaded. +After a given module has been unloaded, any attempt to call +one of its functions results in a segmentation fault. +The module-unload functions must therefore cancel any +delayed calls to loadable-module functions, for example, +any outstanding mod_timer() must be dealt with +via del_timer_sync() or similar. + +

+Unfortunately, there is no way to cancel an RCU callback; +once you invoke call_rcu(), the callback function is +going to eventually be invoked, unless the system goes down first. +Because it is normally considered socially irresponsible to crash the system +in response to a module unload request, we need some other way +to deal with in-flight RCU callbacks. + +

+RCU therefore provides +rcu_barrier(), +which waits until all in-flight RCU callbacks have been invoked. +If a module uses call_rcu(), its exit function should therefore +prevent any future invocation of call_rcu(), then invoke +rcu_barrier(). +In theory, the underlying module-unload code could invoke +rcu_barrier() unconditionally, but in practice this would +incur unacceptable latencies. + +

+Nikita Danilov noted this requirement for an analogous filesystem-unmount +situation, and Dipankar Sarma incorporated rcu_barrier() into RCU. +The need for rcu_barrier() for module unloading became +apparent later. + +

Hotplug CPU

+ +

+The Linux kernel supports CPU hotplug, which means that CPUs +can come and go. +It is of course illegal to use any RCU API member from an offline CPU. +This requirement was present from day one in DYNIX/ptx, but +on the other hand, the Linux kernel's CPU-hotplug implementation +is “interesting.” + +

+The Linux-kernel CPU-hotplug implementation has notifiers that +are used to allow the various kernel subsystems (including RCU) +to respond appropriately to a given CPU-hotplug operation. +Most RCU operations may be invoked from CPU-hotplug notifiers, +including even normal synchronous grace-period operations +such as synchronize_rcu(). +However, expedited grace-period operations such as +synchronize_rcu_expedited() are not supported, +due to the fact that current implementations block CPU-hotplug +operations, which could result in deadlock. + +

+In addition, all-callback-wait operations such as +rcu_barrier() are also not supported, due to the +fact that there are phases of CPU-hotplug operations where +the outgoing CPU's callbacks will not be invoked until after +the CPU-hotplug operation ends, which could also result in deadlock. + +

Scheduler and RCU

+ +

+RCU depends on the scheduler, and the scheduler uses RCU to +protect some of its data structures. +This means the scheduler is forbidden from acquiring +the runqueue locks and the priority-inheritance locks +in the middle of an outermost RCU read-side critical section unless either +(1) it releases them before exiting that same +RCU read-side critical section, or +(2) interrupts are disabled across +that entire RCU read-side critical section. +This same prohibition also applies (recursively!) to any lock that is acquired +while holding any lock to which this prohibition applies. +Adhering to this rule prevents preemptible RCU from invoking +rcu_read_unlock_special() while either runqueue or +priority-inheritance locks are held, thus avoiding deadlock. + +

+Prior to v4.4, it was only necessary to disable preemption across +RCU read-side critical sections that acquired scheduler locks. +In v4.4, expedited grace periods started using IPIs, and these +IPIs could force a rcu_read_unlock() to take the slowpath. +Therefore, this expedited-grace-period change required disabling of +interrupts, not just preemption. + +

+For RCU's part, the preemptible-RCU rcu_read_unlock() +implementation must be written carefully to avoid similar deadlocks. +In particular, rcu_read_unlock() must tolerate an +interrupt where the interrupt handler invokes both +rcu_read_lock() and rcu_read_unlock(). +This possibility requires rcu_read_unlock() to use +negative nesting levels to avoid destructive recursion via +interrupt handler's use of RCU. + +

+This pair of mutual scheduler-RCU requirements came as a +complete surprise. + +

+As noted above, RCU makes use of kthreads, and it is necessary to +avoid excessive CPU-time accumulation by these kthreads. +This requirement was no surprise, but RCU's violation of it +when running context-switch-heavy workloads when built with +CONFIG_NO_HZ_FULL=y +did come as a surprise [PDF]. +RCU has made good progress towards meeting this requirement, even +for context-switch-have CONFIG_NO_HZ_FULL=y workloads, +but there is room for further improvement. + +

Tracing and RCU

+ +

+It is possible to use tracing on RCU code, but tracing itself +uses RCU. +For this reason, rcu_dereference_raw_notrace() +is provided for use by tracing, which avoids the destructive +recursion that could otherwise ensue. +This API is also used by virtualization in some architectures, +where RCU readers execute in environments in which tracing +cannot be used. +The tracing folks both located the requirement and provided the +needed fix, so this surprise requirement was relatively painless. + +

Energy Efficiency

+ +

+Interrupting idle CPUs is considered socially unacceptable, +especially by people with battery-powered embedded systems. +RCU therefore conserves energy by detecting which CPUs are +idle, including tracking CPUs that have been interrupted from idle. +This is a large part of the energy-efficiency requirement, +so I learned of this via an irate phone call. + +

+Because RCU avoids interrupting idle CPUs, it is illegal to +execute an RCU read-side critical section on an idle CPU. +(Kernels built with CONFIG_PROVE_RCU=y will splat +if you try it.) +The RCU_NONIDLE() macro and _rcuidle +event tracing is provided to work around this restriction. +In addition, rcu_is_watching() may be used to +test whether or not it is currently legal to run RCU read-side +critical sections on this CPU. +I learned of the need for diagnostics on the one hand +and RCU_NONIDLE() on the other while inspecting +idle-loop code. +Steven Rostedt supplied _rcuidle event tracing, +which is used quite heavily in the idle loop. + +

+It is similarly socially unacceptable to interrupt an +nohz_full CPU running in userspace. +RCU must therefore track nohz_full userspace +execution. +And in +CONFIG_NO_HZ_FULL_SYSIDLE=y +kernels, RCU must separately track idle CPUs on the one hand and +CPUs that are either idle or executing in userspace on the other. +In both cases, RCU must be able to sample state at two points in +time, and be able to determine whether or not some other CPU spent +any time idle and/or executing in userspace. + +

+These energy-efficiency requirements have proven quite difficult to +understand and to meet, for example, there have been more than five +clean-sheet rewrites of RCU's energy-efficiency code, the last of +which was finally able to demonstrate +real energy savings running on real hardware [PDF]. +As noted earlier, +I learned of many of these requirements via angry phone calls: +Flaming me on the Linux-kernel mailing list was apparently not +sufficient to fully vent their ire at RCU's energy-efficiency bugs! + +

Memory Efficiency

+ +

+Although small-memory non-realtime systems can simply use Tiny RCU, +code size is only one aspect of memory efficiency. +Another aspect is the size of the rcu_head structure +used by call_rcu() and kfree_rcu(). +Although this structure contains nothing more than a pair of pointers, +it does appear in many RCU-protected data structures, including +some that are size critical. +The page structure is a case in point, as evidenced by +the many occurrences of the union keyword within that structure. + +

+This need for memory efficiency is one reason that RCU uses hand-crafted +singly linked lists to track the rcu_head structures that +are waiting for a grace period to elapse. +It is also the reason why rcu_head structures do not contain +debug information, such as fields tracking the file and line of the +call_rcu() or kfree_rcu() that posted them. +Although this information might appear in debug-only kernel builds at some +point, in the meantime, the ->func field will often provide +the needed debug information. + +

+However, in some cases, the need for memory efficiency leads to even +more extreme measures. +Returning to the page structure, the rcu_head field +shares storage with a great many other structures that are used at +various points in the corresponding page's lifetime. +In order to correctly resolve certain +race conditions, +the Linux kernel's memory-management subsystem needs a particular bit +to remain zero during all phases of grace-period processing, +and that bit happens to map to the bottom bit of the +rcu_head structure's ->next field. +RCU makes this guarantee as long as call_rcu() +is used to post the callback, as opposed to kfree_rcu() +or some future “lazy” +variant of call_rcu() that might one day be created for +energy-efficiency purposes. + +

+Performance, Scalability, Response Time, and Reliability

+ +

+Expanding on the +earlier discussion, +RCU is used heavily by hot code paths in performance-critical +portions of the Linux kernel's networking, security, virtualization, +and scheduling code paths. +RCU must therefore use efficient implementations, especially in its +read-side primitives. +To that end, it would be good if preemptible RCU's implementation +of rcu_read_lock() could be inlined, however, doing +this requires resolving #include issues with the +task_struct structure. + +

+The Linux kernel supports hardware configurations with up to +4096 CPUs, which means that RCU must be extremely scalable. +Algorithms that involve frequent acquisitions of global locks or +frequent atomic operations on global variables simply cannot be +tolerated within the RCU implementation. +RCU therefore makes heavy use of a combining tree based on the +rcu_node structure. +RCU is required to tolerate all CPUs continuously invoking any +combination of RCU's runtime primitives with minimal per-operation +overhead. +In fact, in many cases, increasing load must decrease the +per-operation overhead, witness the batching optimizations for +synchronize_rcu(), call_rcu(), +synchronize_rcu_expedited(), and rcu_barrier(). +As a general rule, RCU must cheerfully accept whatever the +rest of the Linux kernel decides to throw at it. + +

+The Linux kernel is used for real-time workloads, especially +in conjunction with the +-rt patchset. +The real-time-latency response requirements are such that the +traditional approach of disabling preemption across RCU +read-side critical sections is inappropriate. +Kernels built with CONFIG_PREEMPT=y therefore +use an RCU implementation that allows RCU read-side critical +sections to be preempted. +This requirement made its presence known after users made it +clear that an earlier +real-time patch +did not meet their needs, in conjunction with some +RCU issues +encountered by a very early version of the -rt patchset. + +

+In addition, RCU must make do with a sub-100-microsecond real-time latency +budget. +In fact, on smaller systems with the -rt patchset, the Linux kernel +provides sub-20-microsecond real-time latencies for the whole kernel, +including RCU. +RCU's scalability and latency must therefore be sufficient for +these sorts of configurations. +To my surprise, the sub-100-microsecond real-time latency budget + +applies to even the largest systems [PDF], +up to and including systems with 4096 CPUs. +This real-time requirement motivated the grace-period kthread, which +also simplified handling of a number of race conditions. + +

+Finally, RCU's status as a synchronization primitive means that +any RCU failure can result in arbitrary memory corruption that can be +extremely difficult to debug. +This means that RCU must be extremely reliable, which in +practice also means that RCU must have an aggressive stress-test +suite. +This stress-test suite is called rcutorture. + +

+Although the need for rcutorture was no surprise, +the current immense popularity of the Linux kernel is posing +interesting—and perhaps unprecedented—validation +challenges. +To see this, keep in mind that there are well over one billion +instances of the Linux kernel running today, given Android +smartphones, Linux-powered televisions, and servers. +This number can be expected to increase sharply with the advent of +the celebrated Internet of Things. + +

+Suppose that RCU contains a race condition that manifests on average +once per million years of runtime. +This bug will be occurring about three times per day across +the installed base. +RCU could simply hide behind hardware error rates, given that no one +should really expect their smartphone to last for a million years. +However, anyone taking too much comfort from this thought should +consider the fact that in most jurisdictions, a successful multi-year +test of a given mechanism, which might include a Linux kernel, +suffices for a number of types of safety-critical certifications. +In fact, rumor has it that the Linux kernel is already being used +in production for safety-critical applications. +I don't know about you, but I would feel quite bad if a bug in RCU +killed someone. +Which might explain my recent focus on validation and verification. + +

Other RCU Flavors

+ +

+One of the more surprising things about RCU is that there are now +no fewer than five flavors, or API families. +In addition, the primary flavor that has been the sole focus up to +this point has two different implementations, non-preemptible and +preemptible. +The other four flavors are listed below, with requirements for each +described in a separate section. + +

    +
  1. Bottom-Half Flavor +
  2. Sched Flavor +
  3. Sleepable RCU +
  4. Tasks RCU +
+ +

Bottom-Half Flavor

+ +

+The softirq-disable (AKA “bottom-half”, +hence the “_bh” abbreviations) +flavor of RCU, or RCU-bh, was developed by +Dipankar Sarma to provide a flavor of RCU that could withstand the +network-based denial-of-service attacks researched by Robert +Olsson. +These attacks placed so much networking load on the system +that some of the CPUs never exited softirq execution, +which in turn prevented those CPUs from ever executing a context switch, +which, in the RCU implementation of that time, prevented grace periods +from ever ending. +The result was an out-of-memory condition and a system hang. + +

+The solution was the creation of RCU-bh, which does +local_bh_disable() +across its read-side critical sections, and which uses the transition +from one type of softirq processing to another as a quiescent state +in addition to context switch, idle, user mode, and offline. +This means that RCU-bh grace periods can complete even when some of +the CPUs execute in softirq indefinitely, thus allowing algorithms +based on RCU-bh to withstand network-based denial-of-service attacks. + +

+Because +rcu_read_lock_bh() and rcu_read_unlock_bh() +disable and re-enable softirq handlers, any attempt to start a softirq +handlers during the +RCU-bh read-side critical section will be deferred. +In this case, rcu_read_unlock_bh() +will invoke softirq processing, which can take considerable time. +One can of course argue that this softirq overhead should be associated +with the code following the RCU-bh read-side critical section rather +than rcu_read_unlock_bh(), but the fact +is that most profiling tools cannot be expected to make this sort +of fine distinction. +For example, suppose that a three-millisecond-long RCU-bh read-side +critical section executes during a time of heavy networking load. +There will very likely be an attempt to invoke at least one softirq +handler during that three milliseconds, but any such invocation will +be delayed until the time of the rcu_read_unlock_bh(). +This can of course make it appear at first glance as if +rcu_read_unlock_bh() was executing very slowly. + +

+The +RCU-bh API +includes +rcu_read_lock_bh(), +rcu_read_unlock_bh(), +rcu_dereference_bh(), +rcu_dereference_bh_check(), +synchronize_rcu_bh(), +synchronize_rcu_bh_expedited(), +call_rcu_bh(), +rcu_barrier_bh(), and +rcu_read_lock_bh_held(). + +

Sched Flavor

+ +

+Before preemptible RCU, waiting for an RCU grace period had the +side effect of also waiting for all pre-existing interrupt +and NMI handlers. +However, there are legitimate preemptible-RCU implementations that +do not have this property, given that any point in the code outside +of an RCU read-side critical section can be a quiescent state. +Therefore, RCU-sched was created, which follows “classic” +RCU in that an RCU-sched grace period waits for for pre-existing +interrupt and NMI handlers. +In kernels built with CONFIG_PREEMPT=n, the RCU and RCU-sched +APIs have identical implementations, while kernels built with +CONFIG_PREEMPT=y provide a separate implementation for each. + +

+Note well that in CONFIG_PREEMPT=y kernels, +rcu_read_lock_sched() and rcu_read_unlock_sched() +disable and re-enable preemption, respectively. +This means that if there was a preemption attempt during the +RCU-sched read-side critical section, rcu_read_unlock_sched() +will enter the scheduler, with all the latency and overhead entailed. +Just as with rcu_read_unlock_bh(), this can make it look +as if rcu_read_unlock_sched() was executing very slowly. +However, the highest-priority task won't be preempted, so that task +will enjoy low-overhead rcu_read_unlock_sched() invocations. + +

+The +RCU-sched API +includes +rcu_read_lock_sched(), +rcu_read_unlock_sched(), +rcu_read_lock_sched_notrace(), +rcu_read_unlock_sched_notrace(), +rcu_dereference_sched(), +rcu_dereference_sched_check(), +synchronize_sched(), +synchronize_rcu_sched_expedited(), +call_rcu_sched(), +rcu_barrier_sched(), and +rcu_read_lock_sched_held(). +However, anything that disables preemption also marks an RCU-sched +read-side critical section, including +preempt_disable() and preempt_enable(), +local_irq_save() and local_irq_restore(), +and so on. + +

Sleepable RCU

+ +

+For well over a decade, someone saying “I need to block within +an RCU read-side critical section” was a reliable indication +that this someone did not understand RCU. +After all, if you are always blocking in an RCU read-side critical +section, you can probably afford to use a higher-overhead synchronization +mechanism. +However, that changed with the advent of the Linux kernel's notifiers, +whose RCU read-side critical +sections almost never sleep, but sometimes need to. +This resulted in the introduction of +sleepable RCU, +or SRCU. + +

+SRCU allows different domains to be defined, with each such domain +defined by an instance of an srcu_struct structure. +A pointer to this structure must be passed in to each SRCU function, +for example, synchronize_srcu(&ss), where +ss is the srcu_struct structure. +The key benefit of these domains is that a slow SRCU reader in one +domain does not delay an SRCU grace period in some other domain. +That said, one consequence of these domains is that read-side code +must pass a “cookie” from srcu_read_lock() +to srcu_read_unlock(), for example, as follows: + +

+
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&ss, idx);
+
+
+ +

+As noted above, it is legal to block within SRCU read-side critical sections, +however, with great power comes great responsibility. +If you block forever in one of a given domain's SRCU read-side critical +sections, then that domain's grace periods will also be blocked forever. +Of course, one good way to block forever is to deadlock, which can +happen if any operation in a given domain's SRCU read-side critical +section can block waiting, either directly or indirectly, for that domain's +grace period to elapse. +For example, this results in a self-deadlock: + +

+
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&ss);
+ 4 do_something();
+ 5 synchronize_srcu(&ss);
+ 6 srcu_read_unlock(&ss, idx);
+
+
+ +

+However, if line 5 acquired a mutex that was held across +a synchronize_srcu() for domain ss, +deadlock would still be possible. +Furthermore, if line 5 acquired a mutex that was held across +a synchronize_srcu() for some other domain ss1, +and if an ss1-domain SRCU read-side critical section +acquired another mutex that was held across as ss-domain +synchronize_srcu(), +deadlock would again be possible. +Such a deadlock cycle could extend across an arbitrarily large number +of different SRCU domains. +Again, with great power comes great responsibility. + +

+Unlike the other RCU flavors, SRCU read-side critical sections can +run on idle and even offline CPUs. +This ability requires that srcu_read_lock() and +srcu_read_unlock() contain memory barriers, which means +that SRCU readers will run a bit slower than would RCU readers. +It also motivates the smp_mb__after_srcu_read_unlock() +API, which, in combination with srcu_read_unlock(), +guarantees a full memory barrier. + +

+The +SRCU API +includes +srcu_read_lock(), +srcu_read_unlock(), +srcu_dereference(), +srcu_dereference_check(), +synchronize_srcu(), +synchronize_srcu_expedited(), +call_srcu(), +srcu_barrier(), and +srcu_read_lock_held(). +It also includes +DEFINE_SRCU(), +DEFINE_STATIC_SRCU(), and +init_srcu_struct() +APIs for defining and initializing srcu_struct structures. + +

Tasks RCU

+ +

+Some forms of tracing use “tramopolines” to handle the +binary rewriting required to install different types of probes. +It would be good to be able to free old trampolines, which sounds +like a job for some form of RCU. +However, because it is necessary to be able to install a trace +anywhere in the code, it is not possible to use read-side markers +such as rcu_read_lock() and rcu_read_unlock(). +In addition, it does not work to have these markers in the trampoline +itself, because there would need to be instructions following +rcu_read_unlock(). +Although synchronize_rcu() would guarantee that execution +reached the rcu_read_unlock(), it would not be able to +guarantee that execution had completely left the trampoline. + +

+The solution, in the form of +Tasks RCU, +is to have implicit +read-side critical sections that are delimited by voluntary context +switches, that is, calls to schedule(), +cond_resched_rcu_qs(), and +synchronize_rcu_tasks(). +In addition, transitions to and from userspace execution also delimit +tasks-RCU read-side critical sections. + +

+The tasks-RCU API is quite compact, consisting only of +call_rcu_tasks(), +synchronize_rcu_tasks(), and +rcu_barrier_tasks(). + +

Possible Future Changes

+ +

+One of the tricks that RCU uses to attain update-side scalability is +to increase grace-period latency with increasing numbers of CPUs. +If this becomes a serious problem, it will be necessary to rework the +grace-period state machine so as to avoid the need for the additional +latency. + +

+Expedited grace periods scan the CPUs, so their latency and overhead +increases with increasing numbers of CPUs. +If this becomes a serious problem on large systems, it will be necessary +to do some redesign to avoid this scalability problem. + +

+RCU disables CPU hotplug in a few places, perhaps most notably in the +expedited grace-period and rcu_barrier() operations. +If there is a strong reason to use expedited grace periods in CPU-hotplug +notifiers, it will be necessary to avoid disabling CPU hotplug. +This would introduce some complexity, so there had better be a very +good reason. + +

+The tradeoff between grace-period latency on the one hand and interruptions +of other CPUs on the other hand may need to be re-examined. +The desire is of course for zero grace-period latency as well as zero +interprocessor interrupts undertaken during an expedited grace period +operation. +While this ideal is unlikely to be achievable, it is quite possible that +further improvements can be made. + +

+The multiprocessor implementations of RCU use a combining tree that +groups CPUs so as to reduce lock contention and increase cache locality. +However, this combining tree does not spread its memory across NUMA +nodes nor does it align the CPU groups with hardware features such +as sockets or cores. +Such spreading and alignment is currently believed to be unnecessary +because the hotpath read-side primitives do not access the combining +tree, nor does call_rcu() in the common case. +If you believe that your architecture needs such spreading and alignment, +then your architecture should also benefit from the +rcutree.rcu_fanout_leaf boot parameter, which can be set +to the number of CPUs in a socket, NUMA node, or whatever. +If the number of CPUs is too large, use a fraction of the number of +CPUs. +If the number of CPUs is a large prime number, well, that certainly +is an “interesting” architectural choice! +More flexible arrangements might be considered, but only if +rcutree.rcu_fanout_leaf has proven inadequate, and only +if the inadequacy has been demonstrated by a carefully run and +realistic system-level workload. + +

+Please note that arrangements that require RCU to remap CPU numbers will +require extremely good demonstration of need and full exploration of +alternatives. + +

+There is an embarrassingly large number of flavors of RCU, and this +number has been increasing over time. +Perhaps it will be possible to combine some at some future date. + +

+RCU's various kthreads are reasonably recent additions. +It is quite likely that adjustments will be required to more gracefully +handle extreme loads. +It might also be necessary to be able to relate CPU utilization by +RCU's kthreads and softirq handlers to the code that instigated this +CPU utilization. +For example, RCU callback overhead might be charged back to the +originating call_rcu() instance, though probably not +in production kernels. + +

Summary

+ +

+This document has presented more than two decade's worth of RCU +requirements. +Given that the requirements keep changing, this will not be the last +word on this subject, but at least it serves to get an important +subset of the requirements set forth. + +

Acknowledgments

+ +I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar, +Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and +Andy Lutomirski for their help in rendering +this article human readable, and to Michelle Rankin for her support +of this effort. +Other contributions are acknowledged in the Linux kernel's git archive. +The cartoon is copyright (c) 2013 by Melissa Broussard, +and is provided +under the terms of the Creative Commons Attribution-Share Alike 3.0 +United States license. + +

+Answers to Quick Quizzes

+ + +

Quick Quiz 1: +Wait a minute! +You said that updaters can make useful forward progress concurrently +with readers, but pre-existing readers will block +synchronize_rcu()!!! +Just who are you trying to fool??? + + +

Answer: +First, if updaters do not wish to be blocked by readers, they can use +call_rcu() or kfree_rcu(), which will +be discussed later. +Second, even when using synchronize_rcu(), the other +update-side code does run concurrently with readers, whether pre-existing +or not. + + +

Back to Quick Quiz 1. + + +

Quick Quiz 2: +Why is the synchronize_rcu() on line 28 needed? + + +

Answer: +Without that extra grace period, memory reordering could result in +do_something_dlm() executing do_something() +concurrently with the last bits of recovery(). + + +

Back to Quick Quiz 2. + + +

Quick Quiz 3: +But rcu_assign_pointer() does nothing to prevent the +two assignments to p->a and p->b +from being reordered. +Can't that also cause problems? + + +

Answer: +No, it cannot. +The readers cannot see either of these two fields until +the assignment to gp, by which time both fields are +fully initialized. +So reordering the assignments +to p->a and p->b cannot possibly +cause any problems. + + +

Back to Quick Quiz 3. + + +

Quick Quiz 4: +Without the rcu_dereference() or the +rcu_access_pointer(), what destructive optimizations +might the compiler make use of? + + +

Answer: +Let's start with what happens to do_something_gp() +if it fails to use rcu_dereference(). +It could reuse a value formerly fetched from this same pointer. +It could also fetch the pointer from gp in a byte-at-a-time +manner, resulting in load tearing, in turn resulting a bytewise +mash-up of two distince pointer values. +It might even use value-speculation optimizations, where it makes a wrong +guess, but by the time it gets around to checking the value, an update +has changed the pointer to match the wrong guess. +Too bad about any dereferences that returned pre-initialization garbage +in the meantime! + +

+For remove_gp_synchronous(), as long as all modifications +to gp are carried out while holding gp_lock, +the above optimizations are harmless. +However, +with CONFIG_SPARSE_RCU_POINTER=y, +sparse will complain if you +define gp with __rcu and then +access it without using +either rcu_access_pointer() or rcu_dereference(). + + +

Back to Quick Quiz 4. + + +

Quick Quiz 5: +Given that multiple CPUs can start RCU read-side critical sections +at any time without any ordering whatsoever, how can RCU possibly tell whether +or not a given RCU read-side critical section starts before a +given instance of synchronize_rcu()? + + +

Answer: +If RCU cannot tell whether or not a given +RCU read-side critical section starts before a +given instance of synchronize_rcu(), +then it must assume that the RCU read-side critical section +started first. +In other words, a given instance of synchronize_rcu() +can avoid waiting on a given RCU read-side critical section only +if it can prove that synchronize_rcu() started first. + + +

Back to Quick Quiz 5. + + +

Quick Quiz 6: +The first and second guarantees require unbelievably strict ordering! +Are all these memory barriers really required? + + +

Answer: +Yes, they really are required. +To see why the first guarantee is required, consider the following +sequence of events: + +

    +
  1. CPU 1: rcu_read_lock() +
  2. CPU 1: q = rcu_dereference(gp); + /* Very likely to return p. */ +
  3. CPU 0: list_del_rcu(p); +
  4. CPU 0: synchronize_rcu() starts. +
  5. CPU 1: do_something_with(q->a); + /* No smp_mb(), so might happen after kfree(). */ +
  6. CPU 1: rcu_read_unlock() +
  7. CPU 0: synchronize_rcu() returns. +
  8. CPU 0: kfree(p); +
+ +

+Therefore, there absolutely must be a full memory barrier between the +end of the RCU read-side critical section and the end of the +grace period. + +

+The sequence of events demonstrating the necessity of the second rule +is roughly similar: + +

    +
  1. CPU 0: list_del_rcu(p); +
  2. CPU 0: synchronize_rcu() starts. +
  3. CPU 1: rcu_read_lock() +
  4. CPU 1: q = rcu_dereference(gp); + /* Might return p if no memory barrier. */ +
  5. CPU 0: synchronize_rcu() returns. +
  6. CPU 0: kfree(p); +
  7. CPU 1: do_something_with(q->a); /* Boom!!! */ +
  8. CPU 1: rcu_read_unlock() +
+ +

+And similarly, without a memory barrier between the beginning of the +grace period and the beginning of the RCU read-side critical section, +CPU 1 might end up accessing the freelist. + +

+The “as if” rule of course applies, so that any implementation +that acts as if the appropriate memory barriers were in place is a +correct implementation. +That said, it is much easier to fool yourself into believing that you have +adhered to the as-if rule than it is to actually adhere to it! + + +

Back to Quick Quiz 6. + + +

Quick Quiz 7: +But how does the upgrade-to-write operation exclude other readers? + + +

Answer: +It doesn't, just like normal RCU updates, which also do not exclude +RCU readers. + + +

Back to Quick Quiz 7. + + +

Quick Quiz 8: +Can't the compiler also reorder this code? + + +

Answer: +No, the volatile casts in READ_ONCE() and +WRITE_ONCE() prevent the compiler from reordering in +this particular case. + + +

Back to Quick Quiz 8. + + +

Quick Quiz 9: +Suppose that synchronize_rcu() did wait until all readers had completed. +Would the updater be able to rely on this? + + +

Answer: +No. +Even if synchronize_rcu() were to wait until +all readers had completed, a new reader might start immediately after +synchronize_rcu() completed. +Therefore, the code following +synchronize_rcu() cannot rely on there being no readers +in any case. + + +

Back to Quick Quiz 9. + + +

Quick Quiz 10: +How long a sequence of grace periods, each separated by an RCU read-side +critical section, would be required to partition the RCU read-side +critical sections at the beginning and end of the chain? + + +

Answer: +In theory, an infinite number. +In practice, an unknown number that is sensitive to both implementation +details and timing considerations. +Therefore, even in practice, RCU users must abide by the theoretical rather +than the practical answer. + + +

Back to Quick Quiz 10. + + +

Quick Quiz 11: +What about sleeping locks? + + +

Answer: +These are forbidden within Linux-kernel RCU read-side critical sections +because it is not legal to place a quiescent state (in this case, +voluntary context switch) within an RCU read-side critical section. +However, sleeping locks may be used within userspace RCU read-side critical +sections, and also within Linux-kernel sleepable RCU +(SRCU) +read-side critical sections. +In addition, the -rt patchset turns spinlocks into a sleeping locks so +that the corresponding critical sections can be preempted, which +also means that these sleeplockified spinlocks (but not other sleeping locks!) +may be acquire within -rt-Linux-kernel RCU read-side critical sections. + +

+Note that it is legal for a normal RCU read-side critical section +to conditionally acquire a sleeping locks (as in mutex_trylock()), +but only as long as it does not loop indefinitely attempting to +conditionally acquire that sleeping locks. +The key point is that things like mutex_trylock() +either return with the mutex held, or return an error indication if +the mutex was not immediately available. +Either way, mutex_trylock() returns immediately without sleeping. + + +

Back to Quick Quiz 11. + + +

Quick Quiz 12: +Why does line 19 use rcu_access_pointer()? +After all, call_rcu() on line 25 stores into the +structure, which would interact badly with concurrent insertions. +Doesn't this mean that rcu_dereference() is required? + + +

Answer: +Presumably the ->gp_lock acquired on line 18 excludes +any changes, including any insertions that rcu_dereference() +would protect against. +Therefore, any insertions will be delayed until after ->gp_lock +is released on line 25, which in turn means that +rcu_access_pointer() suffices. + + +

Back to Quick Quiz 12. + + +

Quick Quiz 13: +Earlier it was claimed that call_rcu() and +kfree_rcu() allowed updaters to avoid being blocked +by readers. +But how can that be correct, given that the invocation of the callback +and the freeing of the memory (respectively) must still wait for +a grace period to elapse? + + +

Answer: +We could define things this way, but keep in mind that this sort of +definition would say that updates in garbage-collected languages +cannot complete until the next time the garbage collector runs, +which does not seem at all reasonable. +The key point is that in most cases, an updater using either +call_rcu() or kfree_rcu() can proceed to the +next update as soon as it has invoked call_rcu() or +kfree_rcu(), without having to wait for a subsequent +grace period. + + +

Back to Quick Quiz 13. + + +

Quick Quiz 14: +So what happens with synchronize_rcu() during +scheduler initialization for CONFIG_PREEMPT=n +kernels? + + +

Answer: +In CONFIG_PREEMPT=n kernel, synchronize_rcu() +maps directly to synchronize_sched(). +Therefore, synchronize_rcu() works normally throughout +boot in CONFIG_PREEMPT=n kernels. +However, your code must also work in CONFIG_PREEMPT=y kernels, +so it is still necessary to avoid invoking synchronize_rcu() +during scheduler initialization. + + +

Back to Quick Quiz 14. + + + diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx new file mode 100644 index 0000000000000000000000000000000000000000..3a97ba490c42b03d606c5fe36e5afe1a9b213e28 --- /dev/null +++ b/Documentation/RCU/Design/Requirements/Requirements.htmlx @@ -0,0 +1,2741 @@ + + + A Tour Through RCU's Requirements [LWN.net] + + +

A Tour Through RCU's Requirements

+ +

Copyright IBM Corporation, 2015

+

Author: Paul E. McKenney

+

The initial version of this document appeared in the +LWN articles +here, +here, and +here.

+ +

Introduction

+ +

+Read-copy update (RCU) is a synchronization mechanism that is often +used as a replacement for reader-writer locking. +RCU is unusual in that updaters do not block readers, +which means that RCU's read-side primitives can be exceedingly fast +and scalable. +In addition, updaters can make useful forward progress concurrently +with readers. +However, all this concurrency between RCU readers and updaters does raise +the question of exactly what RCU readers are doing, which in turn +raises the question of exactly what RCU's requirements are. + +

+This document therefore summarizes RCU's requirements, and can be thought +of as an informal, high-level specification for RCU. +It is important to understand that RCU's specification is primarily +empirical in nature; +in fact, I learned about many of these requirements the hard way. +This situation might cause some consternation, however, not only +has this learning process been a lot of fun, but it has also been +a great privilege to work with so many people willing to apply +technologies in interesting new ways. + +

+All that aside, here are the categories of currently known RCU requirements: +

+ +
    +
  1. + Fundamental Requirements +
  2. Fundamental Non-Requirements +
  3. + Parallelism Facts of Life +
  4. + Quality-of-Implementation Requirements +
  5. + Linux Kernel Complications +
  6. + Software-Engineering Requirements +
  7. + Other RCU Flavors +
  8. + Possible Future Changes +
+ +

+This is followed by a summary, +which is in turn followed by the inevitable +answers to the quick quizzes. + +

Fundamental Requirements

+ +

+RCU's fundamental requirements are the closest thing RCU has to hard +mathematical requirements. +These are: + +

    +
  1. + Grace-Period Guarantee +
  2. + Publish-Subscribe Guarantee +
  3. + Memory-Barrier Guarantees +
  4. + RCU Primitives Guaranteed to Execute Unconditionally +
  5. + Guaranteed Read-to-Write Upgrade +
+ +

Grace-Period Guarantee

+ +

+RCU's grace-period guarantee is unusual in being premeditated: +Jack Slingwine and I had this guarantee firmly in mind when we started +work on RCU (then called “rclock”) in the early 1990s. +That said, the past two decades of experience with RCU have produced +a much more detailed understanding of this guarantee. + +

+RCU's grace-period guarantee allows updaters to wait for the completion +of all pre-existing RCU read-side critical sections. +An RCU read-side critical section +begins with the marker rcu_read_lock() and ends with +the marker rcu_read_unlock(). +These markers may be nested, and RCU treats a nested set as one +big RCU read-side critical section. +Production-quality implementations of rcu_read_lock() and +rcu_read_unlock() are extremely lightweight, and in +fact have exactly zero overhead in Linux kernels built for production +use with CONFIG_PREEMPT=n. + +

+This guarantee allows ordering to be enforced with extremely low +overhead to readers, for example: + +

+
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5   rcu_read_lock();
+ 6   r1 = READ_ONCE(x);
+ 7   r2 = READ_ONCE(y);
+ 8   rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13   WRITE_ONCE(x, 1);
+14   synchronize_rcu();
+15   WRITE_ONCE(y, 1);
+16 }
+
+
+ +

+Because the synchronize_rcu() on line 14 waits for +all pre-existing readers, any instance of thread0() that +loads a value of zero from x must complete before +thread1() stores to y, so that instance must +also load a value of zero from y. +Similarly, any instance of thread0() that loads a value of +one from y must have started after the +synchronize_rcu() started, and must therefore also load +a value of one from x. +Therefore, the outcome: +

+
+(r1 == 0 && r2 == 1)
+
+
+cannot happen. + +

@@QQ@@ +Wait a minute! +You said that updaters can make useful forward progress concurrently +with readers, but pre-existing readers will block +synchronize_rcu()!!! +Just who are you trying to fool??? +

@@QQA@@ +First, if updaters do not wish to be blocked by readers, they can use +call_rcu() or kfree_rcu(), which will +be discussed later. +Second, even when using synchronize_rcu(), the other +update-side code does run concurrently with readers, whether pre-existing +or not. +

@@QQE@@ + +

+This scenario resembles one of the first uses of RCU in +DYNIX/ptx, +which managed a distributed lock manager's transition into +a state suitable for handling recovery from node failure, +more or less as follows: + +

+
+ 1 #define STATE_NORMAL        0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING    2
+ 4 #define STATE_WANT_NORMAL   3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+10   int state_snap;
+11
+12   rcu_read_lock();
+13   state_snap = READ_ONCE(state);
+14   if (state_snap == STATE_NORMAL)
+15     do_something();
+16   else
+17     do_something_carefully();
+18   rcu_read_unlock();
+19 }
+20
+21 void start_recovery(void)
+22 {
+23   WRITE_ONCE(state, STATE_WANT_RECOVERY);
+24   synchronize_rcu();
+25   WRITE_ONCE(state, STATE_RECOVERING);
+26   recovery();
+27   WRITE_ONCE(state, STATE_WANT_NORMAL);
+28   synchronize_rcu();
+29   WRITE_ONCE(state, STATE_NORMAL);
+30 }
+
+
+ +

+The RCU read-side critical section in do_something_dlm() +works with the synchronize_rcu() in start_recovery() +to guarantee that do_something() never runs concurrently +with recovery(), but with little or no synchronization +overhead in do_something_dlm(). + +

@@QQ@@ +Why is the synchronize_rcu() on line 28 needed? +

@@QQA@@ +Without that extra grace period, memory reordering could result in +do_something_dlm() executing do_something() +concurrently with the last bits of recovery(). +

@@QQE@@ + +

+In order to avoid fatal problems such as deadlocks, +an RCU read-side critical section must not contain calls to +synchronize_rcu(). +Similarly, an RCU read-side critical section must not +contain anything that waits, directly or indirectly, on completion of +an invocation of synchronize_rcu(). + +

+Although RCU's grace-period guarantee is useful in and of itself, with +quite a few use cases, +it would be good to be able to use RCU to coordinate read-side +access to linked data structures. +For this, the grace-period guarantee is not sufficient, as can +be seen in function add_gp_buggy() below. +We will look at the reader's code later, but in the meantime, just think of +the reader as locklessly picking up the gp pointer, +and, if the value loaded is non-NULL, locklessly accessing the +->a and ->b fields. + +

+
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4   if (!p)
+ 5     return -ENOMEM;
+ 6   spin_lock(&gp_lock);
+ 7   if (rcu_access_pointer(gp)) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   p->a = a;
+12   p->b = a;
+13   gp = p; /* ORDERING BUG */
+14   spin_unlock(&gp_lock);
+15   return true;
+16 }
+
+
+ +

+The problem is that both the compiler and weakly ordered CPUs are within +their rights to reorder this code as follows: + +

+
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4   if (!p)
+ 5     return -ENOMEM;
+ 6   spin_lock(&gp_lock);
+ 7   if (rcu_access_pointer(gp)) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   gp = p; /* ORDERING BUG */
+12   p->a = a;
+13   p->b = a;
+14   spin_unlock(&gp_lock);
+15   return true;
+16 }
+
+
+ +

+If an RCU reader fetches gp just after +add_gp_buggy_optimized executes line 11, +it will see garbage in the ->a and ->b +fields. +And this is but one of many ways in which compiler and hardware optimizations +could cause trouble. +Therefore, we clearly need some way to prevent the compiler and the CPU from +reordering in this manner, which brings us to the publish-subscribe +guarantee discussed in the next section. + +

Publish/Subscribe Guarantee

+ +

+RCU's publish-subscribe guarantee allows data to be inserted +into a linked data structure without disrupting RCU readers. +The updater uses rcu_assign_pointer() to insert the +new data, and readers use rcu_dereference() to +access data, whether new or old. +The following shows an example of insertion: + +

+
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4   if (!p)
+ 5     return -ENOMEM;
+ 6   spin_lock(&gp_lock);
+ 7   if (rcu_access_pointer(gp)) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   p->a = a;
+12   p->b = a;
+13   rcu_assign_pointer(gp, p);
+14   spin_unlock(&gp_lock);
+15   return true;
+16 }
+
+
+ +

+The rcu_assign_pointer() on line 13 is conceptually +equivalent to a simple assignment statement, but also guarantees +that its assignment will +happen after the two assignments in lines 11 and 12, +similar to the C11 memory_order_release store operation. +It also prevents any number of “interesting” compiler +optimizations, for example, the use of gp as a scratch +location immediately preceding the assignment. + +

@@QQ@@ +But rcu_assign_pointer() does nothing to prevent the +two assignments to p->a and p->b +from being reordered. +Can't that also cause problems? +

@@QQA@@ +No, it cannot. +The readers cannot see either of these two fields until +the assignment to gp, by which time both fields are +fully initialized. +So reordering the assignments +to p->a and p->b cannot possibly +cause any problems. +

@@QQE@@ + +

+It is tempting to assume that the reader need not do anything special +to control its accesses to the RCU-protected data, +as shown in do_something_gp_buggy() below: + +

+
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   p = gp;  /* OPTIMIZATIONS GALORE!!! */
+ 5   if (p) {
+ 6     do_something(p->a, p->b);
+ 7     rcu_read_unlock();
+ 8     return true;
+ 9   }
+10   rcu_read_unlock();
+11   return false;
+12 }
+
+
+ +

+However, this temptation must be resisted because there are a +surprisingly large number of ways that the compiler +(to say nothing of +DEC Alpha CPUs) +can trip this code up. +For but one example, if the compiler were short of registers, it +might choose to refetch from gp rather than keeping +a separate copy in p as follows: + +

+
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   if (gp) { /* OPTIMIZATIONS GALORE!!! */
+ 5     do_something(gp->a, gp->b);
+ 6     rcu_read_unlock();
+ 7     return true;
+ 8   }
+ 9   rcu_read_unlock();
+10   return false;
+11 }
+
+
+ +

+If this function ran concurrently with a series of updates that +replaced the current structure with a new one, +the fetches of gp->a +and gp->b might well come from two different structures, +which could cause serious confusion. +To prevent this (and much else besides), do_something_gp() uses +rcu_dereference() to fetch from gp: + +

+
+ 1 bool do_something_gp(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   p = rcu_dereference(gp);
+ 5   if (p) {
+ 6     do_something(p->a, p->b);
+ 7     rcu_read_unlock();
+ 8     return true;
+ 9   }
+10   rcu_read_unlock();
+11   return false;
+12 }
+
+
+ +

+The rcu_dereference() uses volatile casts and (for DEC Alpha) +memory barriers in the Linux kernel. +Should a +high-quality implementation of C11 memory_order_consume [PDF] +ever appear, then rcu_dereference() could be implemented +as a memory_order_consume load. +Regardless of the exact implementation, a pointer fetched by +rcu_dereference() may not be used outside of the +outermost RCU read-side critical section containing that +rcu_dereference(), unless protection of +the corresponding data element has been passed from RCU to some +other synchronization mechanism, most commonly locking or +reference counting. + +

+In short, updaters use rcu_assign_pointer() and readers +use rcu_dereference(), and these two RCU API elements +work together to ensure that readers have a consistent view of +newly added data elements. + +

+Of course, it is also necessary to remove elements from RCU-protected +data structures, for example, using the following process: + +

    +
  1. Remove the data element from the enclosing structure. +
  2. Wait for all pre-existing RCU read-side critical sections + to complete (because only pre-existing readers can possibly have + a reference to the newly removed data element). +
  3. At this point, only the updater has a reference to the + newly removed data element, so it can safely reclaim + the data element, for example, by passing it to kfree(). +
+ +This process is implemented by remove_gp_synchronous(): + +
+
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3   struct foo *p;
+ 4
+ 5   spin_lock(&gp_lock);
+ 6   p = rcu_access_pointer(gp);
+ 7   if (!p) {
+ 8     spin_unlock(&gp_lock);
+ 9     return false;
+10   }
+11   rcu_assign_pointer(gp, NULL);
+12   spin_unlock(&gp_lock);
+13   synchronize_rcu();
+14   kfree(p);
+15   return true;
+16 }
+
+
+ +

+This function is straightforward, with line 13 waiting for a grace +period before line 14 frees the old data element. +This waiting ensures that readers will reach line 7 of +do_something_gp() before the data element referenced by +p is freed. +The rcu_access_pointer() on line 6 is similar to +rcu_dereference(), except that: + +

    +
  1. The value returned by rcu_access_pointer() + cannot be dereferenced. + If you want to access the value pointed to as well as + the pointer itself, use rcu_dereference() + instead of rcu_access_pointer(). +
  2. The call to rcu_access_pointer() need not be + protected. + In contrast, rcu_dereference() must either be + within an RCU read-side critical section or in a code + segment where the pointer cannot change, for example, in + code protected by the corresponding update-side lock. +
+ +

@@QQ@@ +Without the rcu_dereference() or the +rcu_access_pointer(), what destructive optimizations +might the compiler make use of? +

@@QQA@@ +Let's start with what happens to do_something_gp() +if it fails to use rcu_dereference(). +It could reuse a value formerly fetched from this same pointer. +It could also fetch the pointer from gp in a byte-at-a-time +manner, resulting in load tearing, in turn resulting a bytewise +mash-up of two distince pointer values. +It might even use value-speculation optimizations, where it makes a wrong +guess, but by the time it gets around to checking the value, an update +has changed the pointer to match the wrong guess. +Too bad about any dereferences that returned pre-initialization garbage +in the meantime! + +

+For remove_gp_synchronous(), as long as all modifications +to gp are carried out while holding gp_lock, +the above optimizations are harmless. +However, +with CONFIG_SPARSE_RCU_POINTER=y, +sparse will complain if you +define gp with __rcu and then +access it without using +either rcu_access_pointer() or rcu_dereference(). +

@@QQE@@ + +

+In short, RCU's publish-subscribe guarantee is provided by the combination +of rcu_assign_pointer() and rcu_dereference(). +This guarantee allows data elements to be safely added to RCU-protected +linked data structures without disrupting RCU readers. +This guarantee can be used in combination with the grace-period +guarantee to also allow data elements to be removed from RCU-protected +linked data structures, again without disrupting RCU readers. + +

+This guarantee was only partially premeditated. +DYNIX/ptx used an explicit memory barrier for publication, but had nothing +resembling rcu_dereference() for subscription, nor did it +have anything resembling the smp_read_barrier_depends() +that was later subsumed into rcu_dereference(). +The need for these operations made itself known quite suddenly at a +late-1990s meeting with the DEC Alpha architects, back in the days when +DEC was still a free-standing company. +It took the Alpha architects a good hour to convince me that any sort +of barrier would ever be needed, and it then took me a good two hours +to convince them that their documentation did not make this point clear. +More recent work with the C and C++ standards committees have provided +much education on tricks and traps from the compiler. +In short, compilers were much less tricky in the early 1990s, but in +2015, don't even think about omitting rcu_dereference()! + +

Memory-Barrier Guarantees

+ +

+The previous section's simple linked-data-structure scenario clearly +demonstrates the need for RCU's stringent memory-ordering guarantees on +systems with more than one CPU: + +

    +
  1. Each CPU that has an RCU read-side critical section that + begins before synchronize_rcu() starts is + guaranteed to execute a full memory barrier between the time + that the RCU read-side critical section ends and the time that + synchronize_rcu() returns. + Without this guarantee, a pre-existing RCU read-side critical section + might hold a reference to the newly removed struct foo + after the kfree() on line 14 of + remove_gp_synchronous(). +
  2. Each CPU that has an RCU read-side critical section that ends + after synchronize_rcu() returns is guaranteed + to execute a full memory barrier between the time that + synchronize_rcu() begins and the time that the RCU + read-side critical section begins. + Without this guarantee, a later RCU read-side critical section + running after the kfree() on line 14 of + remove_gp_synchronous() might + later run do_something_gp() and find the + newly deleted struct foo. +
  3. If the task invoking synchronize_rcu() remains + on a given CPU, then that CPU is guaranteed to execute a full + memory barrier sometime during the execution of + synchronize_rcu(). + This guarantee ensures that the kfree() on + line 14 of remove_gp_synchronous() really does + execute after the removal on line 11. +
  4. If the task invoking synchronize_rcu() migrates + among a group of CPUs during that invocation, then each of the + CPUs in that group is guaranteed to execute a full memory barrier + sometime during the execution of synchronize_rcu(). + This guarantee also ensures that the kfree() on + line 14 of remove_gp_synchronous() really does + execute after the removal on + line 11, but also in the case where the thread executing the + synchronize_rcu() migrates in the meantime. +
+ +

@@QQ@@ +Given that multiple CPUs can start RCU read-side critical sections +at any time without any ordering whatsoever, how can RCU possibly tell whether +or not a given RCU read-side critical section starts before a +given instance of synchronize_rcu()? +

@@QQA@@ +If RCU cannot tell whether or not a given +RCU read-side critical section starts before a +given instance of synchronize_rcu(), +then it must assume that the RCU read-side critical section +started first. +In other words, a given instance of synchronize_rcu() +can avoid waiting on a given RCU read-side critical section only +if it can prove that synchronize_rcu() started first. +

@@QQE@@ + +

@@QQ@@ +The first and second guarantees require unbelievably strict ordering! +Are all these memory barriers really required? +

@@QQA@@ +Yes, they really are required. +To see why the first guarantee is required, consider the following +sequence of events: + +

    +
  1. CPU 1: rcu_read_lock() +
  2. CPU 1: q = rcu_dereference(gp); + /* Very likely to return p. */ +
  3. CPU 0: list_del_rcu(p); +
  4. CPU 0: synchronize_rcu() starts. +
  5. CPU 1: do_something_with(q->a); + /* No smp_mb(), so might happen after kfree(). */ +
  6. CPU 1: rcu_read_unlock() +
  7. CPU 0: synchronize_rcu() returns. +
  8. CPU 0: kfree(p); +
+ +

+Therefore, there absolutely must be a full memory barrier between the +end of the RCU read-side critical section and the end of the +grace period. + +

+The sequence of events demonstrating the necessity of the second rule +is roughly similar: + +

    +
  1. CPU 0: list_del_rcu(p); +
  2. CPU 0: synchronize_rcu() starts. +
  3. CPU 1: rcu_read_lock() +
  4. CPU 1: q = rcu_dereference(gp); + /* Might return p if no memory barrier. */ +
  5. CPU 0: synchronize_rcu() returns. +
  6. CPU 0: kfree(p); +
  7. CPU 1: do_something_with(q->a); /* Boom!!! */ +
  8. CPU 1: rcu_read_unlock() +
+ +

+And similarly, without a memory barrier between the beginning of the +grace period and the beginning of the RCU read-side critical section, +CPU 1 might end up accessing the freelist. + +

+The “as if” rule of course applies, so that any implementation +that acts as if the appropriate memory barriers were in place is a +correct implementation. +That said, it is much easier to fool yourself into believing that you have +adhered to the as-if rule than it is to actually adhere to it! +

@@QQE@@ + +

+Note that these memory-barrier requirements do not replace the fundamental +RCU requirement that a grace period wait for all pre-existing readers. +On the contrary, the memory barriers called out in this section must operate in +such a way as to enforce this fundamental requirement. +Of course, different implementations enforce this requirement in different +ways, but enforce it they must. + +

RCU Primitives Guaranteed to Execute Unconditionally

+ +

+The common-case RCU primitives are unconditional. +They are invoked, they do their job, and they return, with no possibility +of error, and no need to retry. +This is a key RCU design philosophy. + +

+However, this philosophy is pragmatic rather than pigheaded. +If someone comes up with a good justification for a particular conditional +RCU primitive, it might well be implemented and added. +After all, this guarantee was reverse-engineered, not premeditated. +The unconditional nature of the RCU primitives was initially an +accident of implementation, and later experience with synchronization +primitives with conditional primitives caused me to elevate this +accident to a guarantee. +Therefore, the justification for adding a conditional primitive to +RCU would need to be based on detailed and compelling use cases. + +

Guaranteed Read-to-Write Upgrade

+ +

+As far as RCU is concerned, it is always possible to carry out an +update within an RCU read-side critical section. +For example, that RCU read-side critical section might search for +a given data element, and then might acquire the update-side +spinlock in order to update that element, all while remaining +in that RCU read-side critical section. +Of course, it is necessary to exit the RCU read-side critical section +before invoking synchronize_rcu(), however, this +inconvenience can be avoided through use of the +call_rcu() and kfree_rcu() API members +described later in this document. + +

@@QQ@@ +But how does the upgrade-to-write operation exclude other readers? +

@@QQA@@ +It doesn't, just like normal RCU updates, which also do not exclude +RCU readers. +

@@QQE@@ + +

+This guarantee allows lookup code to be shared between read-side +and update-side code, and was premeditated, appearing in the earliest +DYNIX/ptx RCU documentation. + +

Fundamental Non-Requirements

+ +

+RCU provides extremely lightweight readers, and its read-side guarantees, +though quite useful, are correspondingly lightweight. +It is therefore all too easy to assume that RCU is guaranteeing more +than it really is. +Of course, the list of things that RCU does not guarantee is infinitely +long, however, the following sections list a few non-guarantees that +have caused confusion. +Except where otherwise noted, these non-guarantees were premeditated. + +

    +
  1. + Readers Impose Minimal Ordering +
  2. + Readers Do Not Exclude Updaters +
  3. + Updaters Only Wait For Old Readers +
  4. + Grace Periods Don't Partition Read-Side Critical Sections +
  5. + Read-Side Critical Sections Don't Partition Grace Periods +
  6. + Disabling Preemption Does Not Block Grace Periods +
+ +

Readers Impose Minimal Ordering

+ +

+Reader-side markers such as rcu_read_lock() and +rcu_read_unlock() provide absolutely no ordering guarantees +except through their interaction with the grace-period APIs such as +synchronize_rcu(). +To see this, consider the following pair of threads: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(x, 1);
+ 5   rcu_read_unlock();
+ 6   rcu_read_lock();
+ 7   WRITE_ONCE(y, 1);
+ 8   rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13   rcu_read_lock();
+14   r1 = READ_ONCE(y);
+15   rcu_read_unlock();
+16   rcu_read_lock();
+17   r2 = READ_ONCE(x);
+18   rcu_read_unlock();
+19 }
+
+
+ +

+After thread0() and thread1() execute +concurrently, it is quite possible to have + +

+
+(r1 == 1 && r2 == 0)
+
+
+ +(that is, y appears to have been assigned before x), +which would not be possible if rcu_read_lock() and +rcu_read_unlock() had much in the way of ordering +properties. +But they do not, so the CPU is within its rights +to do significant reordering. +This is by design: Any significant ordering constraints would slow down +these fast-path APIs. + +

@@QQ@@ +Can't the compiler also reorder this code? +

@@QQA@@ +No, the volatile casts in READ_ONCE() and +WRITE_ONCE() prevent the compiler from reordering in +this particular case. +

@@QQE@@ + +

Readers Do Not Exclude Updaters

+ +

+Neither rcu_read_lock() nor rcu_read_unlock() +exclude updates. +All they do is to prevent grace periods from ending. +The following example illustrates this: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   r1 = READ_ONCE(y);
+ 5   if (r1) {
+ 6     do_something_with_nonzero_x();
+ 7     r2 = READ_ONCE(x);
+ 8     WARN_ON(!r2); /* BUG!!! */
+ 9   }
+10   rcu_read_unlock();
+11 }
+12
+13 void thread1(void)
+14 {
+15   spin_lock(&my_lock);
+16   WRITE_ONCE(x, 1);
+17   WRITE_ONCE(y, 1);
+18   spin_unlock(&my_lock);
+19 }
+
+
+ +

+If the thread0() function's rcu_read_lock() +excluded the thread1() function's update, +the WARN_ON() could never fire. +But the fact is that rcu_read_lock() does not exclude +much of anything aside from subsequent grace periods, of which +thread1() has none, so the +WARN_ON() can and does fire. + +

Updaters Only Wait For Old Readers

+ +

+It might be tempting to assume that after synchronize_rcu() +completes, there are no readers executing. +This temptation must be avoided because +new readers can start immediately after synchronize_rcu() +starts, and synchronize_rcu() is under no +obligation to wait for these new readers. + +

@@QQ@@ +Suppose that synchronize_rcu() did wait until all readers had completed. +Would the updater be able to rely on this? +

@@QQA@@ +No. +Even if synchronize_rcu() were to wait until +all readers had completed, a new reader might start immediately after +synchronize_rcu() completed. +Therefore, the code following +synchronize_rcu() cannot rely on there being no readers +in any case. +

@@QQE@@ + +

+Grace Periods Don't Partition Read-Side Critical Sections

+ +

+It is tempting to assume that if any part of one RCU read-side critical +section precedes a given grace period, and if any part of another RCU +read-side critical section follows that same grace period, then all of +the first RCU read-side critical section must precede all of the second. +However, this just isn't the case: A single grace period does not +partition the set of RCU read-side critical sections. +An example of this situation can be illustrated as follows, where +x, y, and z are initially all zero: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(a, 1);
+ 5   WRITE_ONCE(b, 1);
+ 6   rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11   r1 = READ_ONCE(a);
+12   synchronize_rcu();
+13   WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18   rcu_read_lock();
+19   r2 = READ_ONCE(b);
+20   r3 = READ_ONCE(c);
+21   rcu_read_unlock();
+22 }
+
+
+ +

+It turns out that the outcome: + +

+
+(r1 == 1 && r2 == 0 && r3 == 1)
+
+
+ +is entirely possible. +The following figure show how this can happen, with each circled +QS indicating the point at which RCU recorded a +quiescent state for each thread, that is, a state in which +RCU knows that the thread cannot be in the midst of an RCU read-side +critical section that started before the current grace period: + +

GPpartitionReaders1.svg

+ +

+If it is necessary to partition RCU read-side critical sections in this +manner, it is necessary to use two grace periods, where the first +grace period is known to end before the second grace period starts: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(a, 1);
+ 5   WRITE_ONCE(b, 1);
+ 6   rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11   r1 = READ_ONCE(a);
+12   synchronize_rcu();
+13   WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18   r2 = READ_ONCE(c);
+19   synchronize_rcu();
+20   WRITE_ONCE(d, 1);
+21 }
+22
+23 void thread3(void)
+24 {
+25   rcu_read_lock();
+26   r3 = READ_ONCE(b);
+27   r4 = READ_ONCE(d);
+28   rcu_read_unlock();
+29 }
+
+
+ +

+Here, if (r1 == 1), then +thread0()'s write to b must happen +before the end of thread1()'s grace period. +If in addition (r4 == 1), then +thread3()'s read from b must happen +after the beginning of thread2()'s grace period. +If it is also the case that (r2 == 1), then the +end of thread1()'s grace period must precede the +beginning of thread2()'s grace period. +This mean that the two RCU read-side critical sections cannot overlap, +guaranteeing that (r3 == 1). +As a result, the outcome: + +

+
+(r1 == 1 && r2 == 1 && r3 == 0 && r4 == 1)
+
+
+ +cannot happen. + +

+This non-requirement was also non-premeditated, but became apparent +when studying RCU's interaction with memory ordering. + +

+Read-Side Critical Sections Don't Partition Grace Periods

+ +

+It is also tempting to assume that if an RCU read-side critical section +happens between a pair of grace periods, then those grace periods cannot +overlap. +However, this temptation leads nowhere good, as can be illustrated by +the following, with all variables initially zero: + +

+
+ 1 void thread0(void)
+ 2 {
+ 3   rcu_read_lock();
+ 4   WRITE_ONCE(a, 1);
+ 5   WRITE_ONCE(b, 1);
+ 6   rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11   r1 = READ_ONCE(a);
+12   synchronize_rcu();
+13   WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18   rcu_read_lock();
+19   WRITE_ONCE(d, 1);
+20   r2 = READ_ONCE(c);
+21   rcu_read_unlock();
+22 }
+23
+24 void thread3(void)
+25 {
+26   r3 = READ_ONCE(d);
+27   synchronize_rcu();
+28   WRITE_ONCE(e, 1);
+29 }
+30
+31 void thread4(void)
+32 {
+33   rcu_read_lock();
+34   r4 = READ_ONCE(b);
+35   r5 = READ_ONCE(e);
+36   rcu_read_unlock();
+37 }
+
+
+ +

+In this case, the outcome: + +

+
+(r1 == 1 && r2 == 1 && r3 == 1 && r4 == 0 && r5 == 1)
+
+
+ +is entirely possible, as illustrated below: + +

ReadersPartitionGP1.svg

+ +

+Again, an RCU read-side critical section can overlap almost all of a +given grace period, just so long as it does not overlap the entire +grace period. +As a result, an RCU read-side critical section cannot partition a pair +of RCU grace periods. + +

@@QQ@@ +How long a sequence of grace periods, each separated by an RCU read-side +critical section, would be required to partition the RCU read-side +critical sections at the beginning and end of the chain? +

@@QQA@@ +In theory, an infinite number. +In practice, an unknown number that is sensitive to both implementation +details and timing considerations. +Therefore, even in practice, RCU users must abide by the theoretical rather +than the practical answer. +

@@QQE@@ + +

+Disabling Preemption Does Not Block Grace Periods

+ +

+There was a time when disabling preemption on any given CPU would block +subsequent grace periods. +However, this was an accident of implementation and is not a requirement. +And in the current Linux-kernel implementation, disabling preemption +on a given CPU in fact does not block grace periods, as Oleg Nesterov +demonstrated. + +

+If you need a preempt-disable region to block grace periods, you need to add +rcu_read_lock() and rcu_read_unlock(), for example +as follows: + +

+
+ 1 preempt_disable();
+ 2 rcu_read_lock();
+ 3 do_something();
+ 4 rcu_read_unlock();
+ 5 preempt_enable();
+ 6
+ 7 /* Spinlocks implicitly disable preemption. */
+ 8 spin_lock(&mylock);
+ 9 rcu_read_lock();
+10 do_something();
+11 rcu_read_unlock();
+12 spin_unlock(&mylock);
+
+
+ +

+In theory, you could enter the RCU read-side critical section first, +but it is more efficient to keep the entire RCU read-side critical +section contained in the preempt-disable region as shown above. +Of course, RCU read-side critical sections that extend outside of +preempt-disable regions will work correctly, but such critical sections +can be preempted, which forces rcu_read_unlock() to do +more work. +And no, this is not an invitation to enclose all of your RCU +read-side critical sections within preempt-disable regions, because +doing so would degrade real-time response. + +

+This non-requirement appeared with preemptible RCU. +If you need a grace period that waits on non-preemptible code regions, use +RCU-sched. + +

Parallelism Facts of Life

+ +

+These parallelism facts of life are by no means specific to RCU, but +the RCU implementation must abide by them. +They therefore bear repeating: + +

    +
  1. Any CPU or task may be delayed at any time, + and any attempts to avoid these delays by disabling + preemption, interrupts, or whatever are completely futile. + This is most obvious in preemptible user-level + environments and in virtualized environments (where + a given guest OS's VCPUs can be preempted at any time by + the underlying hypervisor), but can also happen in bare-metal + environments due to ECC errors, NMIs, and other hardware + events. + Although a delay of more than about 20 seconds can result + in splats, the RCU implementation is obligated to use + algorithms that can tolerate extremely long delays, but where + “extremely long” is not long enough to allow + wrap-around when incrementing a 64-bit counter. +
  2. Both the compiler and the CPU can reorder memory accesses. + Where it matters, RCU must use compiler directives and + memory-barrier instructions to preserve ordering. +
  3. Conflicting writes to memory locations in any given cache line + will result in expensive cache misses. + Greater numbers of concurrent writes and more-frequent + concurrent writes will result in more dramatic slowdowns. + RCU is therefore obligated to use algorithms that have + sufficient locality to avoid significant performance and + scalability problems. +
  4. As a rough rule of thumb, only one CPU's worth of processing + may be carried out under the protection of any given exclusive + lock. + RCU must therefore use scalable locking designs. +
  5. Counters are finite, especially on 32-bit systems. + RCU's use of counters must therefore tolerate counter wrap, + or be designed such that counter wrap would take way more + time than a single system is likely to run. + An uptime of ten years is quite possible, a runtime + of a century much less so. + As an example of the latter, RCU's dyntick-idle nesting counter + allows 54 bits for interrupt nesting level (this counter + is 64 bits even on a 32-bit system). + Overflowing this counter requires 254 + half-interrupts on a given CPU without that CPU ever going idle. + If a half-interrupt happened every microsecond, it would take + 570 years of runtime to overflow this counter, which is currently + believed to be an acceptably long time. +
  6. Linux systems can have thousands of CPUs running a single + Linux kernel in a single shared-memory environment. + RCU must therefore pay close attention to high-end scalability. +
+ +

+This last parallelism fact of life means that RCU must pay special +attention to the preceding facts of life. +The idea that Linux might scale to systems with thousands of CPUs would +have been met with some skepticism in the 1990s, but these requirements +would have otherwise have been unsurprising, even in the early 1990s. + +

Quality-of-Implementation Requirements

+ +

+These sections list quality-of-implementation requirements. +Although an RCU implementation that ignores these requirements could +still be used, it would likely be subject to limitations that would +make it inappropriate for industrial-strength production use. +Classes of quality-of-implementation requirements are as follows: + +

    +
  1. Specialization +
  2. Performance and Scalability +
  3. Composability +
  4. Corner Cases +
+ +

+These classes is covered in the following sections. + +

Specialization

+ +

+RCU is and always has been intended primarily for read-mostly situations, as +illustrated by the following figure. +This means that RCU's read-side primitives are optimized, often at the +expense of its update-side primitives. + +

RCUApplicability.svg

+ +

+This focus on read-mostly situations means that RCU must interoperate +with other synchronization primitives. +For example, the add_gp() and remove_gp_synchronous() +examples discussed earlier use RCU to protect readers and locking to +coordinate updaters. +However, the need extends much farther, requiring that a variety of +synchronization primitives be legal within RCU read-side critical sections, +including spinlocks, sequence locks, atomic operations, reference +counters, and memory barriers. + +

@@QQ@@ +What about sleeping locks? +

@@QQA@@ +These are forbidden within Linux-kernel RCU read-side critical sections +because it is not legal to place a quiescent state (in this case, +voluntary context switch) within an RCU read-side critical section. +However, sleeping locks may be used within userspace RCU read-side critical +sections, and also within Linux-kernel sleepable RCU +(SRCU) +read-side critical sections. +In addition, the -rt patchset turns spinlocks into a sleeping locks so +that the corresponding critical sections can be preempted, which +also means that these sleeplockified spinlocks (but not other sleeping locks!) +may be acquire within -rt-Linux-kernel RCU read-side critical sections. + +

+Note that it is legal for a normal RCU read-side critical section +to conditionally acquire a sleeping locks (as in mutex_trylock()), +but only as long as it does not loop indefinitely attempting to +conditionally acquire that sleeping locks. +The key point is that things like mutex_trylock() +either return with the mutex held, or return an error indication if +the mutex was not immediately available. +Either way, mutex_trylock() returns immediately without sleeping. +

@@QQE@@ + +

+It often comes as a surprise that many algorithms do not require a +consistent view of data, but many can function in that mode, +with network routing being the poster child. +Internet routing algorithms take significant time to propagate +updates, so that by the time an update arrives at a given system, +that system has been sending network traffic the wrong way for +a considerable length of time. +Having a few threads continue to send traffic the wrong way for a +few more milliseconds is clearly not a problem: In the worst case, +TCP retransmissions will eventually get the data where it needs to go. +In general, when tracking the state of the universe outside of the +computer, some level of inconsistency must be tolerated due to +speed-of-light delays if nothing else. + +

+Furthermore, uncertainty about external state is inherent in many cases. +For example, a pair of veternarians might use heartbeat to determine +whether or not a given cat was alive. +But how long should they wait after the last heartbeat to decide that +the cat is in fact dead? +Waiting less than 400 milliseconds makes no sense because this would +mean that a relaxed cat would be considered to cycle between death +and life more than 100 times per minute. +Moreover, just as with human beings, a cat's heart might stop for +some period of time, so the exact wait period is a judgment call. +One of our pair of veternarians might wait 30 seconds before pronouncing +the cat dead, while the other might insist on waiting a full minute. +The two veternarians would then disagree on the state of the cat during +the final 30 seconds of the minute following the last heartbeat, as +fancifully illustrated below: + +

2013-08-is-it-dead.png

+ +

+Interestingly enough, this same situation applies to hardware. +When push comes to shove, how do we tell whether or not some +external server has failed? +We send messages to it periodically, and declare it failed if we +don't receive a response within a given period of time. +Policy decisions can usually tolerate short +periods of inconsistency. +The policy was decided some time ago, and is only now being put into +effect, so a few milliseconds of delay is normally inconsequential. + +

+However, there are algorithms that absolutely must see consistent data. +For example, the translation between a user-level SystemV semaphore +ID to the corresponding in-kernel data structure is protected by RCU, +but it is absolutely forbidden to update a semaphore that has just been +removed. +In the Linux kernel, this need for consistency is accommodated by acquiring +spinlocks located in the in-kernel data structure from within +the RCU read-side critical section, and this is indicated by the +green box in the figure above. +Many other techniques may be used, and are in fact used within the +Linux kernel. + +

+In short, RCU is not required to maintain consistency, and other +mechanisms may be used in concert with RCU when consistency is required. +RCU's specialization allows it to do its job extremely well, and its +ability to interoperate with other synchronization mechanisms allows +the right mix of synchronization tools to be used for a given job. + +

Performance and Scalability

+ +

+Energy efficiency is a critical component of performance today, +and Linux-kernel RCU implementations must therefore avoid unnecessarily +awakening idle CPUs. +I cannot claim that this requirement was premeditated. +In fact, I learned of it during a telephone conversation in which I +was given “frank and open” feedback on the importance +of energy efficiency in battery-powered systems and on specific +energy-efficiency shortcomings of the Linux-kernel RCU implementation. +In my experience, the battery-powered embedded community will consider +any unnecessary wakeups to be extremely unfriendly acts. +So much so that mere Linux-kernel-mailing-list posts are +insufficient to vent their ire. + +

+Memory consumption is not particularly important for in most +situations, and has become decreasingly +so as memory sizes have expanded and memory +costs have plummeted. +However, as I learned from Matt Mackall's +bloatwatch +efforts, memory footprint is critically important on single-CPU systems with +non-preemptible (CONFIG_PREEMPT=n) kernels, and thus +tiny RCU +was born. +Josh Triplett has since taken over the small-memory banner with his +Linux kernel tinification +project, which resulted in +SRCU +becoming optional for those kernels not needing it. + +

+The remaining performance requirements are, for the most part, +unsurprising. +For example, in keeping with RCU's read-side specialization, +rcu_dereference() should have negligible overhead (for +example, suppression of a few minor compiler optimizations). +Similarly, in non-preemptible environments, rcu_read_lock() and +rcu_read_unlock() should have exactly zero overhead. + +

+In preemptible environments, in the case where the RCU read-side +critical section was not preempted (as will be the case for the +highest-priority real-time process), rcu_read_lock() and +rcu_read_unlock() should have minimal overhead. +In particular, they should not contain atomic read-modify-write +operations, memory-barrier instructions, preemption disabling, +interrupt disabling, or backwards branches. +However, in the case where the RCU read-side critical section was preempted, +rcu_read_unlock() may acquire spinlocks and disable interrupts. +This is why it is better to nest an RCU read-side critical section +within a preempt-disable region than vice versa, at least in cases +where that critical section is short enough to avoid unduly degrading +real-time latencies. + +

+The synchronize_rcu() grace-period-wait primitive is +optimized for throughput. +It may therefore incur several milliseconds of latency in addition to +the duration of the longest RCU read-side critical section. +On the other hand, multiple concurrent invocations of +synchronize_rcu() are required to use batching optimizations +so that they can be satisfied by a single underlying grace-period-wait +operation. +For example, in the Linux kernel, it is not unusual for a single +grace-period-wait operation to serve more than +1,000 separate invocations +of synchronize_rcu(), thus amortizing the per-invocation +overhead down to nearly zero. +However, the grace-period optimization is also required to avoid +measurable degradation of real-time scheduling and interrupt latencies. + +

+In some cases, the multi-millisecond synchronize_rcu() +latencies are unacceptable. +In these cases, synchronize_rcu_expedited() may be used +instead, reducing the grace-period latency down to a few tens of +microseconds on small systems, at least in cases where the RCU read-side +critical sections are short. +There are currently no special latency requirements for +synchronize_rcu_expedited() on large systems, but, +consistent with the empirical nature of the RCU specification, +that is subject to change. +However, there most definitely are scalability requirements: +A storm of synchronize_rcu_expedited() invocations on 4096 +CPUs should at least make reasonable forward progress. +In return for its shorter latencies, synchronize_rcu_expedited() +is permitted to impose modest degradation of real-time latency +on non-idle online CPUs. +That said, it will likely be necessary to take further steps to reduce this +degradation, hopefully to roughly that of a scheduling-clock interrupt. + +

+There are a number of situations where even +synchronize_rcu_expedited()'s reduced grace-period +latency is unacceptable. +In these situations, the asynchronous call_rcu() can be +used in place of synchronize_rcu() as follows: + +

+
+ 1 struct foo {
+ 2   int a;
+ 3   int b;
+ 4   struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9   struct foo *p = container_of(rhp, struct foo, rh);
+10
+11   kfree(p);
+12 }
+13
+14 bool remove_gp_asynchronous(void)
+15 {
+16   struct foo *p;
+17
+18   spin_lock(&gp_lock);
+19   p = rcu_dereference(gp);
+20   if (!p) {
+21     spin_unlock(&gp_lock);
+22     return false;
+23   }
+24   rcu_assign_pointer(gp, NULL);
+25   call_rcu(&p->rh, remove_gp_cb);
+26   spin_unlock(&gp_lock);
+27   return true;
+28 }
+
+
+ +

+A definition of struct foo is finally needed, and appears +on lines 1-5. +The function remove_gp_cb() is passed to call_rcu() +on line 25, and will be invoked after the end of a subsequent +grace period. +This gets the same effect as remove_gp_synchronous(), +but without forcing the updater to wait for a grace period to elapse. +The call_rcu() function may be used in a number of +situations where neither synchronize_rcu() nor +synchronize_rcu_expedited() would be legal, +including within preempt-disable code, local_bh_disable() code, +interrupt-disable code, and interrupt handlers. +However, even call_rcu() is illegal within NMI handlers. +The callback function (remove_gp_cb() in this case) will be +executed within softirq (software interrupt) environment within the +Linux kernel, +either within a real softirq handler or under the protection +of local_bh_disable(). +In both the Linux kernel and in userspace, it is bad practice to +write an RCU callback function that takes too long. +Long-running operations should be relegated to separate threads or +(in the Linux kernel) workqueues. + +

@@QQ@@ +Why does line 19 use rcu_access_pointer()? +After all, call_rcu() on line 25 stores into the +structure, which would interact badly with concurrent insertions. +Doesn't this mean that rcu_dereference() is required? +

@@QQA@@ +Presumably the ->gp_lock acquired on line 18 excludes +any changes, including any insertions that rcu_dereference() +would protect against. +Therefore, any insertions will be delayed until after ->gp_lock +is released on line 25, which in turn means that +rcu_access_pointer() suffices. +

@@QQE@@ + +

+However, all that remove_gp_cb() is doing is +invoking kfree() on the data element. +This is a common idiom, and is supported by kfree_rcu(), +which allows “fire and forget” operation as shown below: + +

+
+ 1 struct foo {
+ 2   int a;
+ 3   int b;
+ 4   struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9   struct foo *p;
+10
+11   spin_lock(&gp_lock);
+12   p = rcu_dereference(gp);
+13   if (!p) {
+14     spin_unlock(&gp_lock);
+15     return false;
+16   }
+17   rcu_assign_pointer(gp, NULL);
+18   kfree_rcu(p, rh);
+19   spin_unlock(&gp_lock);
+20   return true;
+21 }
+
+
+ +

+Note that remove_gp_faf() simply invokes +kfree_rcu() and proceeds, without any need to pay any +further attention to the subsequent grace period and kfree(). +It is permissible to invoke kfree_rcu() from the same +environments as for call_rcu(). +Interestingly enough, DYNIX/ptx had the equivalents of +call_rcu() and kfree_rcu(), but not +synchronize_rcu(). +This was due to the fact that RCU was not heavily used within DYNIX/ptx, +so the very few places that needed something like +synchronize_rcu() simply open-coded it. + +

@@QQ@@ +Earlier it was claimed that call_rcu() and +kfree_rcu() allowed updaters to avoid being blocked +by readers. +But how can that be correct, given that the invocation of the callback +and the freeing of the memory (respectively) must still wait for +a grace period to elapse? +

@@QQA@@ +We could define things this way, but keep in mind that this sort of +definition would say that updates in garbage-collected languages +cannot complete until the next time the garbage collector runs, +which does not seem at all reasonable. +The key point is that in most cases, an updater using either +call_rcu() or kfree_rcu() can proceed to the +next update as soon as it has invoked call_rcu() or +kfree_rcu(), without having to wait for a subsequent +grace period. +

@@QQE@@ + +

+But what if the updater must wait for the completion of code to be +executed after the end of the grace period, but has other tasks +that can be carried out in the meantime? +The polling-style get_state_synchronize_rcu() and +cond_synchronize_rcu() functions may be used for this +purpose, as shown below: + +

+
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3   struct foo *p;
+ 4   unsigned long s;
+ 5
+ 6   spin_lock(&gp_lock);
+ 7   p = rcu_access_pointer(gp);
+ 8   if (!p) {
+ 9     spin_unlock(&gp_lock);
+10     return false;
+11   }
+12   rcu_assign_pointer(gp, NULL);
+13   spin_unlock(&gp_lock);
+14   s = get_state_synchronize_rcu();
+15   do_something_while_waiting();
+16   cond_synchronize_rcu(s);
+17   kfree(p);
+18   return true;
+19 }
+
+
+ +

+On line 14, get_state_synchronize_rcu() obtains a +“cookie” from RCU, +then line 15 carries out other tasks, +and finally, line 16 returns immediately if a grace period has +elapsed in the meantime, but otherwise waits as required. +The need for get_state_synchronize_rcu and +cond_synchronize_rcu() has appeared quite recently, +so it is too early to tell whether they will stand the test of time. + +

+RCU thus provides a range of tools to allow updaters to strike the +required tradeoff between latency, flexibility and CPU overhead. + +

Composability

+ +

+Composability has received much attention in recent years, perhaps in part +due to the collision of multicore hardware with object-oriented techniques +designed in single-threaded environments for single-threaded use. +And in theory, RCU read-side critical sections may be composed, and in +fact may be nested arbitrarily deeply. +In practice, as with all real-world implementations of composable +constructs, there are limitations. + +

+Implementations of RCU for which rcu_read_lock() +and rcu_read_unlock() generate no code, such as +Linux-kernel RCU when CONFIG_PREEMPT=n, can be +nested arbitrarily deeply. +After all, there is no overhead. +Except that if all these instances of rcu_read_lock() +and rcu_read_unlock() are visible to the compiler, +compilation will eventually fail due to exhausting memory, +mass storage, or user patience, whichever comes first. +If the nesting is not visible to the compiler, as is the case with +mutually recursive functions each in its own translation unit, +stack overflow will result. +If the nesting takes the form of loops, either the control variable +will overflow or (in the Linux kernel) you will get an RCU CPU stall warning. +Nevertheless, this class of RCU implementations is one +of the most composable constructs in existence. + +

+RCU implementations that explicitly track nesting depth +are limited by the nesting-depth counter. +For example, the Linux kernel's preemptible RCU limits nesting to +INT_MAX. +This should suffice for almost all practical purposes. +That said, a consecutive pair of RCU read-side critical sections +between which there is an operation that waits for a grace period +cannot be enclosed in another RCU read-side critical section. +This is because it is not legal to wait for a grace period within +an RCU read-side critical section: To do so would result either +in deadlock or +in RCU implicitly splitting the enclosing RCU read-side critical +section, neither of which is conducive to a long-lived and prosperous +kernel. + +

+It is worth noting that RCU is not alone in limiting composability. +For example, many transactional-memory implementations prohibit +composing a pair of transactions separated by an irrevocable +operation (for example, a network receive operation). +For another example, lock-based critical sections can be composed +surprisingly freely, but only if deadlock is avoided. + +

+In short, although RCU read-side critical sections are highly composable, +care is required in some situations, just as is the case for any other +composable synchronization mechanism. + +

Corner Cases

+ +

+A given RCU workload might have an endless and intense stream of +RCU read-side critical sections, perhaps even so intense that there +was never a point in time during which there was not at least one +RCU read-side critical section in flight. +RCU cannot allow this situation to block grace periods: As long as +all the RCU read-side critical sections are finite, grace periods +must also be finite. + +

+That said, preemptible RCU implementations could potentially result +in RCU read-side critical sections being preempted for long durations, +which has the effect of creating a long-duration RCU read-side +critical section. +This situation can arise only in heavily loaded systems, but systems using +real-time priorities are of course more vulnerable. +Therefore, RCU priority boosting is provided to help deal with this +case. +That said, the exact requirements on RCU priority boosting will likely +evolve as more experience accumulates. + +

+Other workloads might have very high update rates. +Although one can argue that such workloads should instead use +something other than RCU, the fact remains that RCU must +handle such workloads gracefully. +This requirement is another factor driving batching of grace periods, +but it is also the driving force behind the checks for large numbers +of queued RCU callbacks in the call_rcu() code path. +Finally, high update rates should not delay RCU read-side critical +sections, although some read-side delays can occur when using +synchronize_rcu_expedited(), courtesy of this function's use +of try_stop_cpus(). +(In the future, synchronize_rcu_expedited() will be +converted to use lighter-weight inter-processor interrupts (IPIs), +but this will still disturb readers, though to a much smaller degree.) + +

+Although all three of these corner cases were understood in the early +1990s, a simple user-level test consisting of close(open(path)) +in a tight loop +in the early 2000s suddenly provided a much deeper appreciation of the +high-update-rate corner case. +This test also motivated addition of some RCU code to react to high update +rates, for example, if a given CPU finds itself with more than 10,000 +RCU callbacks queued, it will cause RCU to take evasive action by +more aggressively starting grace periods and more aggressively forcing +completion of grace-period processing. +This evasive action causes the grace period to complete more quickly, +but at the cost of restricting RCU's batching optimizations, thus +increasing the CPU overhead incurred by that grace period. + +

+Software-Engineering Requirements

+ +

+Between Murphy's Law and “To err is human”, it is necessary to +guard against mishaps and misuse: + +

    +
  1. It is all too easy to forget to use rcu_read_lock() + everywhere that it is needed, so kernels built with + CONFIG_PROVE_RCU=y will spat if + rcu_dereference() is used outside of an + RCU read-side critical section. + Update-side code can use rcu_dereference_protected(), + which takes a + lockdep expression + to indicate what is providing the protection. + If the indicated protection is not provided, a lockdep splat + is emitted. + +

    + Code shared between readers and updaters can use + rcu_dereference_check(), which also takes a + lockdep expression, and emits a lockdep splat if neither + rcu_read_lock() nor the indicated protection + is in place. + In addition, rcu_dereference_raw() is used in those + (hopefully rare) cases where the required protection cannot + be easily described. + Finally, rcu_read_lock_held() is provided to + allow a function to verify that it has been invoked within + an RCU read-side critical section. + I was made aware of this set of requirements shortly after Thomas + Gleixner audited a number of RCU uses. +

  2. A given function might wish to check for RCU-related preconditions + upon entry, before using any other RCU API. + The rcu_lockdep_assert() does this job, + asserting the expression in kernels having lockdep enabled + and doing nothing otherwise. +
  3. It is also easy to forget to use rcu_assign_pointer() + and rcu_dereference(), perhaps (incorrectly) + substituting a simple assignment. + To catch this sort of error, a given RCU-protected pointer may be + tagged with __rcu, after which running sparse + with CONFIG_SPARSE_RCU_POINTER=y will complain + about simple-assignment accesses to that pointer. + Arnd Bergmann made me aware of this requirement, and also + supplied the needed + patch series. +
  4. Kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y + will splat if a data element is passed to call_rcu() + twice in a row, without a grace period in between. + (This error is similar to a double free.) + The corresponding rcu_head structures that are + dynamically allocated are automatically tracked, but + rcu_head structures allocated on the stack + must be initialized with init_rcu_head_on_stack() + and cleaned up with destroy_rcu_head_on_stack(). + Similarly, statically allocated non-stack rcu_head + structures must be initialized with init_rcu_head() + and cleaned up with destroy_rcu_head(). + Mathieu Desnoyers made me aware of this requirement, and also + supplied the needed + patch. +
  5. An infinite loop in an RCU read-side critical section will + eventually trigger an RCU CPU stall warning splat, with + the duration of “eventually” being controlled by the + RCU_CPU_STALL_TIMEOUT Kconfig option, or, + alternatively, by the + rcupdate.rcu_cpu_stall_timeout boot/sysfs + parameter. + However, RCU is not obligated to produce this splat + unless there is a grace period waiting on that particular + RCU read-side critical section. +

    + Some extreme workloads might intentionally delay + RCU grace periods, and systems running those workloads can + be booted with rcupdate.rcu_cpu_stall_suppress + to suppress the splats. + This kernel parameter may also be set via sysfs. + Furthermore, RCU CPU stall warnings are counter-productive + during sysrq dumps and during panics. + RCU therefore supplies the rcu_sysrq_start() and + rcu_sysrq_end() API members to be called before + and after long sysrq dumps. + RCU also supplies the rcu_panic() notifier that is + automatically invoked at the beginning of a panic to suppress + further RCU CPU stall warnings. + +

    + This requirement made itself known in the early 1990s, pretty + much the first time that it was necessary to debug a CPU stall. + That said, the initial implementation in DYNIX/ptx was quite + generic in comparison with that of Linux. +

  6. Although it would be very good to detect pointers leaking out + of RCU read-side critical sections, there is currently no + good way of doing this. + One complication is the need to distinguish between pointers + leaking and pointers that have been handed off from RCU to + some other synchronization mechanism, for example, reference + counting. +
  7. In kernels built with CONFIG_RCU_TRACE=y, RCU-related + information is provided via both debugfs and event tracing. +
  8. Open-coded use of rcu_assign_pointer() and + rcu_dereference() to create typical linked + data structures can be surprisingly error-prone. + Therefore, RCU-protected + linked lists + and, more recently, RCU-protected + hash tables + are available. + Many other special-purpose RCU-protected data structures are + available in the Linux kernel and the userspace RCU library. +
  9. Some linked structures are created at compile time, but still + require __rcu checking. + The RCU_POINTER_INITIALIZER() macro serves this + purpose. +
  10. It is not necessary to use rcu_assign_pointer() + when creating linked structures that are to be published via + a single external pointer. + The RCU_INIT_POINTER() macro is provided for + this task and also for assigning NULL pointers + at runtime. +
+ +

+This not a hard-and-fast list: RCU's diagnostic capabilities will +continue to be guided by the number and type of usage bugs found +in real-world RCU usage. + +

Linux Kernel Complications

+ +

+The Linux kernel provides an interesting environment for all kinds of +software, including RCU. +Some of the relevant points of interest are as follows: + +

    +
  1. Configuration. +
  2. Firmware Interface. +
  3. Early Boot. +
  4. + Interrupts and non-maskable interrupts (NMIs). +
  5. Loadable Modules. +
  6. Hotplug CPU. +
  7. Scheduler and RCU. +
  8. Tracing and RCU. +
  9. Energy Efficiency. +
  10. Memory Efficiency. +
  11. + Performance, Scalability, Response Time, and Reliability. +
+ +

+This list is probably incomplete, but it does give a feel for the +most notable Linux-kernel complications. +Each of the following sections covers one of the above topics. + +

Configuration

+ +

+RCU's goal is automatic configuration, so that almost nobody +needs to worry about RCU's Kconfig options. +And for almost all users, RCU does in fact work well +“out of the box.” + +

+However, there are specialized use cases that are handled by +kernel boot parameters and Kconfig options. +Unfortunately, the Kconfig system will explicitly ask users +about new Kconfig options, which requires almost all of them +be hidden behind a CONFIG_RCU_EXPERT Kconfig option. + +

+This all should be quite obvious, but the fact remains that +Linus Torvalds recently had to +remind +me of this requirement. + +

Firmware Interface

+ +

+In many cases, kernel obtains information about the system from the +firmware, and sometimes things are lost in translation. +Or the translation is accurate, but the original message is bogus. + +

+For example, some systems' firmware overreports the number of CPUs, +sometimes by a large factor. +If RCU naively believed the firmware, as it used to do, +it would create too many per-CPU kthreads. +Although the resulting system will still run correctly, the extra +kthreads needlessly consume memory and can cause confusion +when they show up in ps listings. + +

+RCU must therefore wait for a given CPU to actually come online before +it can allow itself to believe that the CPU actually exists. +The resulting “ghost CPUs” (which are never going to +come online) cause a number of +interesting complications. + +

Early Boot

+ +

+The Linux kernel's boot sequence is an interesting process, +and RCU is used early, even before rcu_init() +is invoked. +In fact, a number of RCU's primitives can be used as soon as the +initial task's task_struct is available and the +boot CPU's per-CPU variables are set up. +The read-side primitives (rcu_read_lock(), +rcu_read_unlock(), rcu_dereference(), +and rcu_access_pointer()) will operate normally very early on, +as will rcu_assign_pointer(). + +

+Although call_rcu() may be invoked at any +time during boot, callbacks are not guaranteed to be invoked until after +the scheduler is fully up and running. +This delay in callback invocation is due to the fact that RCU does not +invoke callbacks until it is fully initialized, and this full initialization +cannot occur until after the scheduler has initialized itself to the +point where RCU can spawn and run its kthreads. +In theory, it would be possible to invoke callbacks earlier, +however, this is not a panacea because there would be severe restrictions +on what operations those callbacks could invoke. + +

+Perhaps surprisingly, synchronize_rcu(), +synchronize_rcu_bh() +(discussed below), +and +synchronize_sched() +will all operate normally +during very early boot, the reason being that there is only one CPU +and preemption is disabled. +This means that the call synchronize_rcu() (or friends) +itself is a quiescent +state and thus a grace period, so the early-boot implementation can +be a no-op. + +

+Both synchronize_rcu_bh() and synchronize_sched() +continue to operate normally through the remainder of boot, courtesy +of the fact that preemption is disabled across their RCU read-side +critical sections and also courtesy of the fact that there is still +only one CPU. +However, once the scheduler starts initializing, preemption is enabled. +There is still only a single CPU, but the fact that preemption is enabled +means that the no-op implementation of synchronize_rcu() no +longer works in CONFIG_PREEMPT=y kernels. +Therefore, as soon as the scheduler starts initializing, the early-boot +fastpath is disabled. +This means that synchronize_rcu() switches to its runtime +mode of operation where it posts callbacks, which in turn means that +any call to synchronize_rcu() will block until the corresponding +callback is invoked. +Unfortunately, the callback cannot be invoked until RCU's runtime +grace-period machinery is up and running, which cannot happen until +the scheduler has initialized itself sufficiently to allow RCU's +kthreads to be spawned. +Therefore, invoking synchronize_rcu() during scheduler +initialization can result in deadlock. + +

@@QQ@@ +So what happens with synchronize_rcu() during +scheduler initialization for CONFIG_PREEMPT=n +kernels? +

@@QQA@@ +In CONFIG_PREEMPT=n kernel, synchronize_rcu() +maps directly to synchronize_sched(). +Therefore, synchronize_rcu() works normally throughout +boot in CONFIG_PREEMPT=n kernels. +However, your code must also work in CONFIG_PREEMPT=y kernels, +so it is still necessary to avoid invoking synchronize_rcu() +during scheduler initialization. +

@@QQE@@ + +

+I learned of these boot-time requirements as a result of a series of +system hangs. + +

Interrupts and NMIs

+ +

+The Linux kernel has interrupts, and RCU read-side critical sections are +legal within interrupt handlers and within interrupt-disabled regions +of code, as are invocations of call_rcu(). + +

+Some Linux-kernel architectures can enter an interrupt handler from +non-idle process context, and then just never leave it, instead stealthily +transitioning back to process context. +This trick is sometimes used to invoke system calls from inside the kernel. +These “half-interrupts” mean that RCU has to be very careful +about how it counts interrupt nesting levels. +I learned of this requirement the hard way during a rewrite +of RCU's dyntick-idle code. + +

+The Linux kernel has non-maskable interrupts (NMIs), and +RCU read-side critical sections are legal within NMI handlers. +Thankfully, RCU update-side primitives, including +call_rcu(), are prohibited within NMI handlers. + +

+The name notwithstanding, some Linux-kernel architectures +can have nested NMIs, which RCU must handle correctly. +Andy Lutomirski +surprised me +with this requirement; +he also kindly surprised me with +an algorithm +that meets this requirement. + +

Loadable Modules

+ +

+The Linux kernel has loadable modules, and these modules can +also be unloaded. +After a given module has been unloaded, any attempt to call +one of its functions results in a segmentation fault. +The module-unload functions must therefore cancel any +delayed calls to loadable-module functions, for example, +any outstanding mod_timer() must be dealt with +via del_timer_sync() or similar. + +

+Unfortunately, there is no way to cancel an RCU callback; +once you invoke call_rcu(), the callback function is +going to eventually be invoked, unless the system goes down first. +Because it is normally considered socially irresponsible to crash the system +in response to a module unload request, we need some other way +to deal with in-flight RCU callbacks. + +

+RCU therefore provides +rcu_barrier(), +which waits until all in-flight RCU callbacks have been invoked. +If a module uses call_rcu(), its exit function should therefore +prevent any future invocation of call_rcu(), then invoke +rcu_barrier(). +In theory, the underlying module-unload code could invoke +rcu_barrier() unconditionally, but in practice this would +incur unacceptable latencies. + +

+Nikita Danilov noted this requirement for an analogous filesystem-unmount +situation, and Dipankar Sarma incorporated rcu_barrier() into RCU. +The need for rcu_barrier() for module unloading became +apparent later. + +

Hotplug CPU

+ +

+The Linux kernel supports CPU hotplug, which means that CPUs +can come and go. +It is of course illegal to use any RCU API member from an offline CPU. +This requirement was present from day one in DYNIX/ptx, but +on the other hand, the Linux kernel's CPU-hotplug implementation +is “interesting.” + +

+The Linux-kernel CPU-hotplug implementation has notifiers that +are used to allow the various kernel subsystems (including RCU) +to respond appropriately to a given CPU-hotplug operation. +Most RCU operations may be invoked from CPU-hotplug notifiers, +including even normal synchronous grace-period operations +such as synchronize_rcu(). +However, expedited grace-period operations such as +synchronize_rcu_expedited() are not supported, +due to the fact that current implementations block CPU-hotplug +operations, which could result in deadlock. + +

+In addition, all-callback-wait operations such as +rcu_barrier() are also not supported, due to the +fact that there are phases of CPU-hotplug operations where +the outgoing CPU's callbacks will not be invoked until after +the CPU-hotplug operation ends, which could also result in deadlock. + +

Scheduler and RCU

+ +

+RCU depends on the scheduler, and the scheduler uses RCU to +protect some of its data structures. +This means the scheduler is forbidden from acquiring +the runqueue locks and the priority-inheritance locks +in the middle of an outermost RCU read-side critical section unless either +(1) it releases them before exiting that same +RCU read-side critical section, or +(2) interrupts are disabled across +that entire RCU read-side critical section. +This same prohibition also applies (recursively!) to any lock that is acquired +while holding any lock to which this prohibition applies. +Adhering to this rule prevents preemptible RCU from invoking +rcu_read_unlock_special() while either runqueue or +priority-inheritance locks are held, thus avoiding deadlock. + +

+Prior to v4.4, it was only necessary to disable preemption across +RCU read-side critical sections that acquired scheduler locks. +In v4.4, expedited grace periods started using IPIs, and these +IPIs could force a rcu_read_unlock() to take the slowpath. +Therefore, this expedited-grace-period change required disabling of +interrupts, not just preemption. + +

+For RCU's part, the preemptible-RCU rcu_read_unlock() +implementation must be written carefully to avoid similar deadlocks. +In particular, rcu_read_unlock() must tolerate an +interrupt where the interrupt handler invokes both +rcu_read_lock() and rcu_read_unlock(). +This possibility requires rcu_read_unlock() to use +negative nesting levels to avoid destructive recursion via +interrupt handler's use of RCU. + +

+This pair of mutual scheduler-RCU requirements came as a +complete surprise. + +

+As noted above, RCU makes use of kthreads, and it is necessary to +avoid excessive CPU-time accumulation by these kthreads. +This requirement was no surprise, but RCU's violation of it +when running context-switch-heavy workloads when built with +CONFIG_NO_HZ_FULL=y +did come as a surprise [PDF]. +RCU has made good progress towards meeting this requirement, even +for context-switch-have CONFIG_NO_HZ_FULL=y workloads, +but there is room for further improvement. + +

Tracing and RCU

+ +

+It is possible to use tracing on RCU code, but tracing itself +uses RCU. +For this reason, rcu_dereference_raw_notrace() +is provided for use by tracing, which avoids the destructive +recursion that could otherwise ensue. +This API is also used by virtualization in some architectures, +where RCU readers execute in environments in which tracing +cannot be used. +The tracing folks both located the requirement and provided the +needed fix, so this surprise requirement was relatively painless. + +

Energy Efficiency

+ +

+Interrupting idle CPUs is considered socially unacceptable, +especially by people with battery-powered embedded systems. +RCU therefore conserves energy by detecting which CPUs are +idle, including tracking CPUs that have been interrupted from idle. +This is a large part of the energy-efficiency requirement, +so I learned of this via an irate phone call. + +

+Because RCU avoids interrupting idle CPUs, it is illegal to +execute an RCU read-side critical section on an idle CPU. +(Kernels built with CONFIG_PROVE_RCU=y will splat +if you try it.) +The RCU_NONIDLE() macro and _rcuidle +event tracing is provided to work around this restriction. +In addition, rcu_is_watching() may be used to +test whether or not it is currently legal to run RCU read-side +critical sections on this CPU. +I learned of the need for diagnostics on the one hand +and RCU_NONIDLE() on the other while inspecting +idle-loop code. +Steven Rostedt supplied _rcuidle event tracing, +which is used quite heavily in the idle loop. + +

+It is similarly socially unacceptable to interrupt an +nohz_full CPU running in userspace. +RCU must therefore track nohz_full userspace +execution. +And in +CONFIG_NO_HZ_FULL_SYSIDLE=y +kernels, RCU must separately track idle CPUs on the one hand and +CPUs that are either idle or executing in userspace on the other. +In both cases, RCU must be able to sample state at two points in +time, and be able to determine whether or not some other CPU spent +any time idle and/or executing in userspace. + +

+These energy-efficiency requirements have proven quite difficult to +understand and to meet, for example, there have been more than five +clean-sheet rewrites of RCU's energy-efficiency code, the last of +which was finally able to demonstrate +real energy savings running on real hardware [PDF]. +As noted earlier, +I learned of many of these requirements via angry phone calls: +Flaming me on the Linux-kernel mailing list was apparently not +sufficient to fully vent their ire at RCU's energy-efficiency bugs! + +

Memory Efficiency

+ +

+Although small-memory non-realtime systems can simply use Tiny RCU, +code size is only one aspect of memory efficiency. +Another aspect is the size of the rcu_head structure +used by call_rcu() and kfree_rcu(). +Although this structure contains nothing more than a pair of pointers, +it does appear in many RCU-protected data structures, including +some that are size critical. +The page structure is a case in point, as evidenced by +the many occurrences of the union keyword within that structure. + +

+This need for memory efficiency is one reason that RCU uses hand-crafted +singly linked lists to track the rcu_head structures that +are waiting for a grace period to elapse. +It is also the reason why rcu_head structures do not contain +debug information, such as fields tracking the file and line of the +call_rcu() or kfree_rcu() that posted them. +Although this information might appear in debug-only kernel builds at some +point, in the meantime, the ->func field will often provide +the needed debug information. + +

+However, in some cases, the need for memory efficiency leads to even +more extreme measures. +Returning to the page structure, the rcu_head field +shares storage with a great many other structures that are used at +various points in the corresponding page's lifetime. +In order to correctly resolve certain +race conditions, +the Linux kernel's memory-management subsystem needs a particular bit +to remain zero during all phases of grace-period processing, +and that bit happens to map to the bottom bit of the +rcu_head structure's ->next field. +RCU makes this guarantee as long as call_rcu() +is used to post the callback, as opposed to kfree_rcu() +or some future “lazy” +variant of call_rcu() that might one day be created for +energy-efficiency purposes. + +

+Performance, Scalability, Response Time, and Reliability

+ +

+Expanding on the +earlier discussion, +RCU is used heavily by hot code paths in performance-critical +portions of the Linux kernel's networking, security, virtualization, +and scheduling code paths. +RCU must therefore use efficient implementations, especially in its +read-side primitives. +To that end, it would be good if preemptible RCU's implementation +of rcu_read_lock() could be inlined, however, doing +this requires resolving #include issues with the +task_struct structure. + +

+The Linux kernel supports hardware configurations with up to +4096 CPUs, which means that RCU must be extremely scalable. +Algorithms that involve frequent acquisitions of global locks or +frequent atomic operations on global variables simply cannot be +tolerated within the RCU implementation. +RCU therefore makes heavy use of a combining tree based on the +rcu_node structure. +RCU is required to tolerate all CPUs continuously invoking any +combination of RCU's runtime primitives with minimal per-operation +overhead. +In fact, in many cases, increasing load must decrease the +per-operation overhead, witness the batching optimizations for +synchronize_rcu(), call_rcu(), +synchronize_rcu_expedited(), and rcu_barrier(). +As a general rule, RCU must cheerfully accept whatever the +rest of the Linux kernel decides to throw at it. + +

+The Linux kernel is used for real-time workloads, especially +in conjunction with the +-rt patchset. +The real-time-latency response requirements are such that the +traditional approach of disabling preemption across RCU +read-side critical sections is inappropriate. +Kernels built with CONFIG_PREEMPT=y therefore +use an RCU implementation that allows RCU read-side critical +sections to be preempted. +This requirement made its presence known after users made it +clear that an earlier +real-time patch +did not meet their needs, in conjunction with some +RCU issues +encountered by a very early version of the -rt patchset. + +

+In addition, RCU must make do with a sub-100-microsecond real-time latency +budget. +In fact, on smaller systems with the -rt patchset, the Linux kernel +provides sub-20-microsecond real-time latencies for the whole kernel, +including RCU. +RCU's scalability and latency must therefore be sufficient for +these sorts of configurations. +To my surprise, the sub-100-microsecond real-time latency budget + +applies to even the largest systems [PDF], +up to and including systems with 4096 CPUs. +This real-time requirement motivated the grace-period kthread, which +also simplified handling of a number of race conditions. + +

+Finally, RCU's status as a synchronization primitive means that +any RCU failure can result in arbitrary memory corruption that can be +extremely difficult to debug. +This means that RCU must be extremely reliable, which in +practice also means that RCU must have an aggressive stress-test +suite. +This stress-test suite is called rcutorture. + +

+Although the need for rcutorture was no surprise, +the current immense popularity of the Linux kernel is posing +interesting—and perhaps unprecedented—validation +challenges. +To see this, keep in mind that there are well over one billion +instances of the Linux kernel running today, given Android +smartphones, Linux-powered televisions, and servers. +This number can be expected to increase sharply with the advent of +the celebrated Internet of Things. + +

+Suppose that RCU contains a race condition that manifests on average +once per million years of runtime. +This bug will be occurring about three times per day across +the installed base. +RCU could simply hide behind hardware error rates, given that no one +should really expect their smartphone to last for a million years. +However, anyone taking too much comfort from this thought should +consider the fact that in most jurisdictions, a successful multi-year +test of a given mechanism, which might include a Linux kernel, +suffices for a number of types of safety-critical certifications. +In fact, rumor has it that the Linux kernel is already being used +in production for safety-critical applications. +I don't know about you, but I would feel quite bad if a bug in RCU +killed someone. +Which might explain my recent focus on validation and verification. + +

Other RCU Flavors

+ +

+One of the more surprising things about RCU is that there are now +no fewer than five flavors, or API families. +In addition, the primary flavor that has been the sole focus up to +this point has two different implementations, non-preemptible and +preemptible. +The other four flavors are listed below, with requirements for each +described in a separate section. + +

    +
  1. Bottom-Half Flavor +
  2. Sched Flavor +
  3. Sleepable RCU +
  4. Tasks RCU +
+ +

Bottom-Half Flavor

+ +

+The softirq-disable (AKA “bottom-half”, +hence the “_bh” abbreviations) +flavor of RCU, or RCU-bh, was developed by +Dipankar Sarma to provide a flavor of RCU that could withstand the +network-based denial-of-service attacks researched by Robert +Olsson. +These attacks placed so much networking load on the system +that some of the CPUs never exited softirq execution, +which in turn prevented those CPUs from ever executing a context switch, +which, in the RCU implementation of that time, prevented grace periods +from ever ending. +The result was an out-of-memory condition and a system hang. + +

+The solution was the creation of RCU-bh, which does +local_bh_disable() +across its read-side critical sections, and which uses the transition +from one type of softirq processing to another as a quiescent state +in addition to context switch, idle, user mode, and offline. +This means that RCU-bh grace periods can complete even when some of +the CPUs execute in softirq indefinitely, thus allowing algorithms +based on RCU-bh to withstand network-based denial-of-service attacks. + +

+Because +rcu_read_lock_bh() and rcu_read_unlock_bh() +disable and re-enable softirq handlers, any attempt to start a softirq +handlers during the +RCU-bh read-side critical section will be deferred. +In this case, rcu_read_unlock_bh() +will invoke softirq processing, which can take considerable time. +One can of course argue that this softirq overhead should be associated +with the code following the RCU-bh read-side critical section rather +than rcu_read_unlock_bh(), but the fact +is that most profiling tools cannot be expected to make this sort +of fine distinction. +For example, suppose that a three-millisecond-long RCU-bh read-side +critical section executes during a time of heavy networking load. +There will very likely be an attempt to invoke at least one softirq +handler during that three milliseconds, but any such invocation will +be delayed until the time of the rcu_read_unlock_bh(). +This can of course make it appear at first glance as if +rcu_read_unlock_bh() was executing very slowly. + +

+The +RCU-bh API +includes +rcu_read_lock_bh(), +rcu_read_unlock_bh(), +rcu_dereference_bh(), +rcu_dereference_bh_check(), +synchronize_rcu_bh(), +synchronize_rcu_bh_expedited(), +call_rcu_bh(), +rcu_barrier_bh(), and +rcu_read_lock_bh_held(). + +

Sched Flavor

+ +

+Before preemptible RCU, waiting for an RCU grace period had the +side effect of also waiting for all pre-existing interrupt +and NMI handlers. +However, there are legitimate preemptible-RCU implementations that +do not have this property, given that any point in the code outside +of an RCU read-side critical section can be a quiescent state. +Therefore, RCU-sched was created, which follows “classic” +RCU in that an RCU-sched grace period waits for for pre-existing +interrupt and NMI handlers. +In kernels built with CONFIG_PREEMPT=n, the RCU and RCU-sched +APIs have identical implementations, while kernels built with +CONFIG_PREEMPT=y provide a separate implementation for each. + +

+Note well that in CONFIG_PREEMPT=y kernels, +rcu_read_lock_sched() and rcu_read_unlock_sched() +disable and re-enable preemption, respectively. +This means that if there was a preemption attempt during the +RCU-sched read-side critical section, rcu_read_unlock_sched() +will enter the scheduler, with all the latency and overhead entailed. +Just as with rcu_read_unlock_bh(), this can make it look +as if rcu_read_unlock_sched() was executing very slowly. +However, the highest-priority task won't be preempted, so that task +will enjoy low-overhead rcu_read_unlock_sched() invocations. + +

+The +RCU-sched API +includes +rcu_read_lock_sched(), +rcu_read_unlock_sched(), +rcu_read_lock_sched_notrace(), +rcu_read_unlock_sched_notrace(), +rcu_dereference_sched(), +rcu_dereference_sched_check(), +synchronize_sched(), +synchronize_rcu_sched_expedited(), +call_rcu_sched(), +rcu_barrier_sched(), and +rcu_read_lock_sched_held(). +However, anything that disables preemption also marks an RCU-sched +read-side critical section, including +preempt_disable() and preempt_enable(), +local_irq_save() and local_irq_restore(), +and so on. + +

Sleepable RCU

+ +

+For well over a decade, someone saying “I need to block within +an RCU read-side critical section” was a reliable indication +that this someone did not understand RCU. +After all, if you are always blocking in an RCU read-side critical +section, you can probably afford to use a higher-overhead synchronization +mechanism. +However, that changed with the advent of the Linux kernel's notifiers, +whose RCU read-side critical +sections almost never sleep, but sometimes need to. +This resulted in the introduction of +sleepable RCU, +or SRCU. + +

+SRCU allows different domains to be defined, with each such domain +defined by an instance of an srcu_struct structure. +A pointer to this structure must be passed in to each SRCU function, +for example, synchronize_srcu(&ss), where +ss is the srcu_struct structure. +The key benefit of these domains is that a slow SRCU reader in one +domain does not delay an SRCU grace period in some other domain. +That said, one consequence of these domains is that read-side code +must pass a “cookie” from srcu_read_lock() +to srcu_read_unlock(), for example, as follows: + +

+
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&ss, idx);
+
+
+ +

+As noted above, it is legal to block within SRCU read-side critical sections, +however, with great power comes great responsibility. +If you block forever in one of a given domain's SRCU read-side critical +sections, then that domain's grace periods will also be blocked forever. +Of course, one good way to block forever is to deadlock, which can +happen if any operation in a given domain's SRCU read-side critical +section can block waiting, either directly or indirectly, for that domain's +grace period to elapse. +For example, this results in a self-deadlock: + +

+
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&ss);
+ 4 do_something();
+ 5 synchronize_srcu(&ss);
+ 6 srcu_read_unlock(&ss, idx);
+
+
+ +

+However, if line 5 acquired a mutex that was held across +a synchronize_srcu() for domain ss, +deadlock would still be possible. +Furthermore, if line 5 acquired a mutex that was held across +a synchronize_srcu() for some other domain ss1, +and if an ss1-domain SRCU read-side critical section +acquired another mutex that was held across as ss-domain +synchronize_srcu(), +deadlock would again be possible. +Such a deadlock cycle could extend across an arbitrarily large number +of different SRCU domains. +Again, with great power comes great responsibility. + +

+Unlike the other RCU flavors, SRCU read-side critical sections can +run on idle and even offline CPUs. +This ability requires that srcu_read_lock() and +srcu_read_unlock() contain memory barriers, which means +that SRCU readers will run a bit slower than would RCU readers. +It also motivates the smp_mb__after_srcu_read_unlock() +API, which, in combination with srcu_read_unlock(), +guarantees a full memory barrier. + +

+The +SRCU API +includes +srcu_read_lock(), +srcu_read_unlock(), +srcu_dereference(), +srcu_dereference_check(), +synchronize_srcu(), +synchronize_srcu_expedited(), +call_srcu(), +srcu_barrier(), and +srcu_read_lock_held(). +It also includes +DEFINE_SRCU(), +DEFINE_STATIC_SRCU(), and +init_srcu_struct() +APIs for defining and initializing srcu_struct structures. + +

Tasks RCU

+ +

+Some forms of tracing use “tramopolines” to handle the +binary rewriting required to install different types of probes. +It would be good to be able to free old trampolines, which sounds +like a job for some form of RCU. +However, because it is necessary to be able to install a trace +anywhere in the code, it is not possible to use read-side markers +such as rcu_read_lock() and rcu_read_unlock(). +In addition, it does not work to have these markers in the trampoline +itself, because there would need to be instructions following +rcu_read_unlock(). +Although synchronize_rcu() would guarantee that execution +reached the rcu_read_unlock(), it would not be able to +guarantee that execution had completely left the trampoline. + +

+The solution, in the form of +Tasks RCU, +is to have implicit +read-side critical sections that are delimited by voluntary context +switches, that is, calls to schedule(), +cond_resched_rcu_qs(), and +synchronize_rcu_tasks(). +In addition, transitions to and from userspace execution also delimit +tasks-RCU read-side critical sections. + +

+The tasks-RCU API is quite compact, consisting only of +call_rcu_tasks(), +synchronize_rcu_tasks(), and +rcu_barrier_tasks(). + +

Possible Future Changes

+ +

+One of the tricks that RCU uses to attain update-side scalability is +to increase grace-period latency with increasing numbers of CPUs. +If this becomes a serious problem, it will be necessary to rework the +grace-period state machine so as to avoid the need for the additional +latency. + +

+Expedited grace periods scan the CPUs, so their latency and overhead +increases with increasing numbers of CPUs. +If this becomes a serious problem on large systems, it will be necessary +to do some redesign to avoid this scalability problem. + +

+RCU disables CPU hotplug in a few places, perhaps most notably in the +expedited grace-period and rcu_barrier() operations. +If there is a strong reason to use expedited grace periods in CPU-hotplug +notifiers, it will be necessary to avoid disabling CPU hotplug. +This would introduce some complexity, so there had better be a very +good reason. + +

+The tradeoff between grace-period latency on the one hand and interruptions +of other CPUs on the other hand may need to be re-examined. +The desire is of course for zero grace-period latency as well as zero +interprocessor interrupts undertaken during an expedited grace period +operation. +While this ideal is unlikely to be achievable, it is quite possible that +further improvements can be made. + +

+The multiprocessor implementations of RCU use a combining tree that +groups CPUs so as to reduce lock contention and increase cache locality. +However, this combining tree does not spread its memory across NUMA +nodes nor does it align the CPU groups with hardware features such +as sockets or cores. +Such spreading and alignment is currently believed to be unnecessary +because the hotpath read-side primitives do not access the combining +tree, nor does call_rcu() in the common case. +If you believe that your architecture needs such spreading and alignment, +then your architecture should also benefit from the +rcutree.rcu_fanout_leaf boot parameter, which can be set +to the number of CPUs in a socket, NUMA node, or whatever. +If the number of CPUs is too large, use a fraction of the number of +CPUs. +If the number of CPUs is a large prime number, well, that certainly +is an “interesting” architectural choice! +More flexible arrangements might be considered, but only if +rcutree.rcu_fanout_leaf has proven inadequate, and only +if the inadequacy has been demonstrated by a carefully run and +realistic system-level workload. + +

+Please note that arrangements that require RCU to remap CPU numbers will +require extremely good demonstration of need and full exploration of +alternatives. + +

+There is an embarrassingly large number of flavors of RCU, and this +number has been increasing over time. +Perhaps it will be possible to combine some at some future date. + +

+RCU's various kthreads are reasonably recent additions. +It is quite likely that adjustments will be required to more gracefully +handle extreme loads. +It might also be necessary to be able to relate CPU utilization by +RCU's kthreads and softirq handlers to the code that instigated this +CPU utilization. +For example, RCU callback overhead might be charged back to the +originating call_rcu() instance, though probably not +in production kernels. + +

Summary

+ +

+This document has presented more than two decade's worth of RCU +requirements. +Given that the requirements keep changing, this will not be the last +word on this subject, but at least it serves to get an important +subset of the requirements set forth. + +

Acknowledgments

+ +I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar, +Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and +Andy Lutomirski for their help in rendering +this article human readable, and to Michelle Rankin for her support +of this effort. +Other contributions are acknowledged in the Linux kernel's git archive. +The cartoon is copyright (c) 2013 by Melissa Broussard, +and is provided +under the terms of the Creative Commons Attribution-Share Alike 3.0 +United States license. + +

@@QQAL@@ + + diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh new file mode 100755 index 0000000000000000000000000000000000000000..d354f069559b8f9c186f87786d608c9fa773fe8e --- /dev/null +++ b/Documentation/RCU/Design/htmlqqz.sh @@ -0,0 +1,108 @@ +#!/bin/sh +# +# Usage: sh htmlqqz.sh file +# +# Extracts and converts quick quizzes in a proto-HTML document file.htmlx. +# Commands, all of which must be on a line by themselves: +# +# "

@@QQ@@": Start of a quick quiz. +# "

@@QQA@@": Start of a quick-quiz answer. +# "

@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz. +# "

@@QQAL@@": Place to put quick-quiz answer list. +# +# Places the result in file.html. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (c) 2013 Paul E. McKenney, IBM Corporation. + +fn=$1 +if test ! -r $fn.htmlx +then + echo "Error: $fn.htmlx unreadable." + exit 1 +fi + +echo "" > $fn.html +echo "" >> $fn.html +awk < $fn.htmlx >> $fn.html ' + +state == "" && $1 != "

@@QQ@@" && $1 != "

@@QQAL@@" { + print $0; + if ($0 ~ /^

@@QQ/) + print "Bad Quick Quiz command: " NR " (expected

@@QQ@@ or

@@QQAL@@)." > "/dev/stderr" + next; +} + +state == "" && $1 == "

@@QQ@@" { + qqn++; + qqlineno = NR; + haveqq = 1; + state = "qq"; + print "

Quick Quiz " qqn ":" + next; +} + +state == "qq" && $1 != "

@@QQA@@" { + qq[qqn] = qq[qqn] $0 "\n"; + print $0 + if ($0 ~ /^

@@QQ/) + print "Bad Quick Quiz command: " NR ". (expected

@@QQA@@)" > "/dev/stderr" + next; +} + +state == "qq" && $1 == "

@@QQA@@" { + state = "qqa"; + print "
Answer" + next; +} + +state == "qqa" && $1 != "

@@QQE@@" { + qqa[qqn] = qqa[qqn] $0 "\n"; + if ($0 ~ /^

@@QQ/) + print "Bad Quick Quiz command: " NR " (expected

@@QQE@@)." > "/dev/stderr" + next; +} + +state == "qqa" && $1 == "

@@QQE@@" { + state = ""; + next; +} + +state == "" && $1 == "

@@QQAL@@" { + haveqq = ""; + print "

" + print "Answers to Quick Quizzes

" + print ""; + for (i = 1; i <= qqn; i++) { + print "" + print "

Quick Quiz " i ":" + print qq[i]; + print ""; + print "

Answer:" + print qqa[i]; + print ""; + print "

Back to Quick Quiz " i "." + print ""; + } + next; +} + +END { + if (state != "") + print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr" + else if (haveqq) + print "Missing \"

@@QQAL@@\", no Quick Quiz." > "/dev/stderr" +}' diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt new file mode 100644 index 0000000000000000000000000000000000000000..58b71ddf9b602a5e0e81852fbbbc54ef0c5b92cd --- /dev/null +++ b/Documentation/arm64/silicon-errata.txt @@ -0,0 +1,58 @@ + Silicon Errata and Software Workarounds + ======================================= + +Author: Will Deacon +Date : 27 November 2015 + +It is an unfortunate fact of life that hardware is often produced with +so-called "errata", which can cause it to deviate from the architecture +under specific circumstances. For hardware produced by ARM, these +errata are broadly classified into the following categories: + + Category A: A critical error without a viable workaround. + Category B: A significant or critical error with an acceptable + workaround. + Category C: A minor error that is not expected to occur under normal + operation. + +For more information, consult one of the "Software Developers Errata +Notice" documents available on infocenter.arm.com (registration +required). + +As far as Linux is concerned, Category B errata may require some special +treatment in the operating system. For example, avoiding a particular +sequence of code, or configuring the processor in a particular way. A +less common situation may require similar actions in order to declassify +a Category A erratum into a Category C erratum. These are collectively +known as "software workarounds" and are only required in the minority of +cases (e.g. those cases that both require a non-secure workaround *and* +can be triggered by Linux). + +For software workarounds that may adversely impact systems unaffected by +the erratum in question, a Kconfig entry is added under "Kernel +Features" -> "ARM errata workarounds via the alternatives framework". +These are enabled by default and patched in at runtime when an affected +CPU is detected. For less-intrusive workarounds, a Kconfig option is not +available and the code is structured (preferably with a comment) in such +a way that the erratum will not be hit. + +This approach can make it slightly onerous to determine exactly which +errata are worked around in an arbitrary kernel source tree, so this +file acts as a registry of software workarounds in the Linux Kernel and +will be updated when new workarounds are committed and backported to +stable kernels. + +| Implementor | Component | Erratum ID | Kconfig | ++----------------+-----------------+-----------------+-------------------------+ +| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | +| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | +| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | +| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | +| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | +| ARM | Cortex-A57 | #852523 | N/A | +| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| | | | | +| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | +| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroup-v1/00-INDEX similarity index 95% rename from Documentation/cgroups/00-INDEX rename to Documentation/cgroup-v1/00-INDEX index 3f5a40f57d4a43822afacee6eca951423e948445..6ad425f7cf56737522833c862d985ea7dbacf669 100644 --- a/Documentation/cgroups/00-INDEX +++ b/Documentation/cgroup-v1/00-INDEX @@ -24,7 +24,5 @@ net_prio.txt - Network priority cgroups details and usages. pids.txt - Process number cgroups details and usages. -resource_counter.txt - - Resource Counter API. unified-hierarchy.txt - Description the new/next cgroup interface. diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt similarity index 80% rename from Documentation/cgroups/blkio-controller.txt rename to Documentation/cgroup-v1/blkio-controller.txt index 52fa9f353342cc76734f49938f536f37545fee93..673dc34d3f7812c5030133821e81d9772f8aa546 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroup-v1/blkio-controller.txt @@ -84,8 +84,7 @@ Throttling/Upper Limit policy - Run dd to read a file and see if rate is throttled to 1MB/s or not. - # dd if=/mnt/common/zerofile of=/dev/null bs=4K count=1024 - # iflag=direct + # dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024 1024+0 records in 1024+0 records out 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s @@ -374,82 +373,3 @@ One can experience an overall throughput drop if you have created multiple groups and put applications in that group which are not driving enough IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle on individual groups and throughput should improve. - -Writeback -========= - -Page cache is dirtied through buffered writes and shared mmaps and -written asynchronously to the backing filesystem by the writeback -mechanism. Writeback sits between the memory and IO domains and -regulates the proportion of dirty memory by balancing dirtying and -write IOs. - -On traditional cgroup hierarchies, relationships between different -controllers cannot be established making it impossible for writeback -to operate accounting for cgroup resource restrictions and all -writeback IOs are attributed to the root cgroup. - -If both the blkio and memory controllers are used on the v2 hierarchy -and the filesystem supports cgroup writeback, writeback operations -correctly follow the resource restrictions imposed by both memory and -blkio controllers. - -Writeback examines both system-wide and per-cgroup dirty memory status -and enforces the more restrictive of the two. Also, writeback control -parameters which are absolute values - vm.dirty_bytes and -vm.dirty_background_bytes - are distributed across cgroups according -to their current writeback bandwidth. - -There's a peculiarity stemming from the discrepancy in ownership -granularity between memory controller and writeback. While memory -controller tracks ownership per page, writeback operates on inode -basis. cgroup writeback bridges the gap by tracking ownership by -inode but migrating ownership if too many foreign pages, pages which -don't match the current inode ownership, have been encountered while -writing back the inode. - -This is a conscious design choice as writeback operations are -inherently tied to inodes making strictly following page ownership -complicated and inefficient. The only use case which suffers from -this compromise is multiple cgroups concurrently dirtying disjoint -regions of the same inode, which is an unlikely use case and decided -to be unsupported. Note that as memory controller assigns page -ownership on the first use and doesn't update it until the page is -released, even if cgroup writeback strictly follows page ownership, -multiple cgroups dirtying overlapping areas wouldn't work as expected. -In general, write-sharing an inode across multiple cgroups is not well -supported. - -Filesystem support for cgroup writeback ---------------------------------------- - -A filesystem can make writeback IOs cgroup-aware by updating -address_space_operations->writepage[s]() to annotate bio's using the -following two functions. - -* wbc_init_bio(@wbc, @bio) - - Should be called for each bio carrying writeback data and associates - the bio with the inode's owner cgroup. Can be called anytime - between bio allocation and submission. - -* wbc_account_io(@wbc, @page, @bytes) - - Should be called for each data segment being written out. While - this function doesn't care exactly when it's called during the - writeback session, it's the easiest and most natural to call it as - data segments are added to a bio. - -With writeback bio's annotated, cgroup support can be enabled per -super_block by setting MS_CGROUPWB in ->s_flags. This allows for -selective disabling of cgroup writeback support which is helpful when -certain filesystem features, e.g. journaled data mode, are -incompatible. - -wbc_init_bio() binds the specified bio to its cgroup. Depending on -the configuration, the bio may be executed at a lower priority and if -the writeback session is holding shared resources, e.g. a journal -entry, may lead to priority inversion. There is no one easy solution -for the problem. Filesystems can try to work around specific problem -cases by skipping wbc_init_bio() or using bio_associate_blkcg() -directly. diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroup-v1/cgroups.txt similarity index 100% rename from Documentation/cgroups/cgroups.txt rename to Documentation/cgroup-v1/cgroups.txt diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroup-v1/cpuacct.txt similarity index 100% rename from Documentation/cgroups/cpuacct.txt rename to Documentation/cgroup-v1/cpuacct.txt diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt similarity index 100% rename from Documentation/cgroups/cpusets.txt rename to Documentation/cgroup-v1/cpusets.txt diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroup-v1/devices.txt similarity index 100% rename from Documentation/cgroups/devices.txt rename to Documentation/cgroup-v1/devices.txt diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroup-v1/freezer-subsystem.txt similarity index 100% rename from Documentation/cgroups/freezer-subsystem.txt rename to Documentation/cgroup-v1/freezer-subsystem.txt diff --git a/Documentation/cgroups/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt similarity index 100% rename from Documentation/cgroups/hugetlb.txt rename to Documentation/cgroup-v1/hugetlb.txt diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroup-v1/memcg_test.txt similarity index 100% rename from Documentation/cgroups/memcg_test.txt rename to Documentation/cgroup-v1/memcg_test.txt diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroup-v1/memory.txt similarity index 100% rename from Documentation/cgroups/memory.txt rename to Documentation/cgroup-v1/memory.txt diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroup-v1/net_cls.txt similarity index 100% rename from Documentation/cgroups/net_cls.txt rename to Documentation/cgroup-v1/net_cls.txt diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroup-v1/net_prio.txt similarity index 100% rename from Documentation/cgroups/net_prio.txt rename to Documentation/cgroup-v1/net_prio.txt diff --git a/Documentation/cgroups/pids.txt b/Documentation/cgroup-v1/pids.txt similarity index 100% rename from Documentation/cgroups/pids.txt rename to Documentation/cgroup-v1/pids.txt diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt new file mode 100644 index 0000000000000000000000000000000000000000..31d1f7bf12a19ee4a658461569b7285e3fc5df19 --- /dev/null +++ b/Documentation/cgroup-v2.txt @@ -0,0 +1,1293 @@ + +Control Group v2 + +October, 2015 Tejun Heo + +This is the authoritative documentation on the design, interface and +conventions of cgroup v2. It describes all userland-visible aspects +of cgroup including core and specific controller behaviors. All +future changes must be reflected in this document. Documentation for +v1 is available under Documentation/cgroup-legacy/. + +CONTENTS + +1. Introduction + 1-1. Terminology + 1-2. What is cgroup? +2. Basic Operations + 2-1. Mounting + 2-2. Organizing Processes + 2-3. [Un]populated Notification + 2-4. Controlling Controllers + 2-4-1. Enabling and Disabling + 2-4-2. Top-down Constraint + 2-4-3. No Internal Process Constraint + 2-5. Delegation + 2-5-1. Model of Delegation + 2-5-2. Delegation Containment + 2-6. Guidelines + 2-6-1. Organize Once and Control + 2-6-2. Avoid Name Collisions +3. Resource Distribution Models + 3-1. Weights + 3-2. Limits + 3-3. Protections + 3-4. Allocations +4. Interface Files + 4-1. Format + 4-2. Conventions + 4-3. Core Interface Files +5. Controllers + 5-1. CPU + 5-1-1. CPU Interface Files + 5-2. Memory + 5-2-1. Memory Interface Files + 5-2-2. Usage Guidelines + 5-2-3. Memory Ownership + 5-3. IO + 5-3-1. IO Interface Files + 5-3-2. Writeback +P. Information on Kernel Programming + P-1. Filesystem Support for Writeback +D. Deprecated v1 Core Features +R. Issues with v1 and Rationales for v2 + R-1. Multiple Hierarchies + R-2. Thread Granularity + R-3. Competition Between Inner Nodes and Threads + R-4. Other Interface Issues + R-5. Controller Issues and Remedies + R-5-1. Memory + + +1. Introduction + +1-1. Terminology + +"cgroup" stands for "control group" and is never capitalized. The +singular form is used to designate the whole feature and also as a +qualifier as in "cgroup controllers". When explicitly referring to +multiple individual control groups, the plural form "cgroups" is used. + + +1-2. What is cgroup? + +cgroup is a mechanism to organize processes hierarchically and +distribute system resources along the hierarchy in a controlled and +configurable manner. + +cgroup is largely composed of two parts - the core and controllers. +cgroup core is primarily responsible for hierarchically organizing +processes. A cgroup controller is usually responsible for +distributing a specific type of system resource along the hierarchy +although there are utility controllers which serve purposes other than +resource distribution. + +cgroups form a tree structure and every process in the system belongs +to one and only one cgroup. All threads of a process belong to the +same cgroup. On creation, all processes are put in the cgroup that +the parent process belongs to at the time. A process can be migrated +to another cgroup. Migration of a process doesn't affect already +existing descendant processes. + +Following certain structural constraints, controllers may be enabled or +disabled selectively on a cgroup. All controller behaviors are +hierarchical - if a controller is enabled on a cgroup, it affects all +processes which belong to the cgroups consisting the inclusive +sub-hierarchy of the cgroup. When a controller is enabled on a nested +cgroup, it always restricts the resource distribution further. The +restrictions set closer to the root in the hierarchy can not be +overridden from further away. + + +2. Basic Operations + +2-1. Mounting + +Unlike v1, cgroup v2 has only single hierarchy. The cgroup v2 +hierarchy can be mounted with the following mount command. + + # mount -t cgroup2 none $MOUNT_POINT + +cgroup2 filesystem has the magic number 0x63677270 ("cgrp"). All +controllers which support v2 and are not bound to a v1 hierarchy are +automatically bound to the v2 hierarchy and show up at the root. +Controllers which are not in active use in the v2 hierarchy can be +bound to other hierarchies. This allows mixing v2 hierarchy with the +legacy v1 multiple hierarchies in a fully backward compatible way. + +A controller can be moved across hierarchies only after the controller +is no longer referenced in its current hierarchy. Because per-cgroup +controller states are destroyed asynchronously and controllers may +have lingering references, a controller may not show up immediately on +the v2 hierarchy after the final umount of the previous hierarchy. +Similarly, a controller should be fully disabled to be moved out of +the unified hierarchy and it may take some time for the disabled +controller to become available for other hierarchies; furthermore, due +to inter-controller dependencies, other controllers may need to be +disabled too. + +While useful for development and manual configurations, moving +controllers dynamically between the v2 and other hierarchies is +strongly discouraged for production use. It is recommended to decide +the hierarchies and controller associations before starting using the +controllers after system boot. + + +2-2. Organizing Processes + +Initially, only the root cgroup exists to which all processes belong. +A child cgroup can be created by creating a sub-directory. + + # mkdir $CGROUP_NAME + +A given cgroup may have multiple child cgroups forming a tree +structure. Each cgroup has a read-writable interface file +"cgroup.procs". When read, it lists the PIDs of all processes which +belong to the cgroup one-per-line. The PIDs are not ordered and the +same PID may show up more than once if the process got moved to +another cgroup and then back or the PID got recycled while reading. + +A process can be migrated into a cgroup by writing its PID to the +target cgroup's "cgroup.procs" file. Only one process can be migrated +on a single write(2) call. If a process is composed of multiple +threads, writing the PID of any thread migrates all threads of the +process. + +When a process forks a child process, the new process is born into the +cgroup that the forking process belongs to at the time of the +operation. After exit, a process stays associated with the cgroup +that it belonged to at the time of exit until it's reaped; however, a +zombie process does not appear in "cgroup.procs" and thus can't be +moved to another cgroup. + +A cgroup which doesn't have any children or live processes can be +destroyed by removing the directory. Note that a cgroup which doesn't +have any children and is associated only with zombie processes is +considered empty and can be removed. + + # rmdir $CGROUP_NAME + +"/proc/$PID/cgroup" lists a process's cgroup membership. If legacy +cgroup is in use in the system, this file may contain multiple lines, +one for each hierarchy. The entry for cgroup v2 is always in the +format "0::$PATH". + + # cat /proc/842/cgroup + ... + 0::/test-cgroup/test-cgroup-nested + +If the process becomes a zombie and the cgroup it was associated with +is removed subsequently, " (deleted)" is appended to the path. + + # cat /proc/842/cgroup + ... + 0::/test-cgroup/test-cgroup-nested (deleted) + + +2-3. [Un]populated Notification + +Each non-root cgroup has a "cgroup.events" file which contains +"populated" field indicating whether the cgroup's sub-hierarchy has +live processes in it. Its value is 0 if there is no live process in +the cgroup and its descendants; otherwise, 1. poll and [id]notify +events are triggered when the value changes. This can be used, for +example, to start a clean-up operation after all processes of a given +sub-hierarchy have exited. The populated state updates and +notifications are recursive. Consider the following sub-hierarchy +where the numbers in the parentheses represent the numbers of processes +in each cgroup. + + A(4) - B(0) - C(1) + \ D(0) + +A, B and C's "populated" fields would be 1 while D's 0. After the one +process in C exits, B and C's "populated" fields would flip to "0" and +file modified events will be generated on the "cgroup.events" files of +both cgroups. + + +2-4. Controlling Controllers + +2-4-1. Enabling and Disabling + +Each cgroup has a "cgroup.controllers" file which lists all +controllers available for the cgroup to enable. + + # cat cgroup.controllers + cpu io memory + +No controller is enabled by default. Controllers can be enabled and +disabled by writing to the "cgroup.subtree_control" file. + + # echo "+cpu +memory -io" > cgroup.subtree_control + +Only controllers which are listed in "cgroup.controllers" can be +enabled. When multiple operations are specified as above, either they +all succeed or fail. If multiple operations on the same controller +are specified, the last one is effective. + +Enabling a controller in a cgroup indicates that the distribution of +the target resource across its immediate children will be controlled. +Consider the following sub-hierarchy. The enabled controllers are +listed in parentheses. + + A(cpu,memory) - B(memory) - C() + \ D() + +As A has "cpu" and "memory" enabled, A will control the distribution +of CPU cycles and memory to its children, in this case, B. As B has +"memory" enabled but not "CPU", C and D will compete freely on CPU +cycles but their division of memory available to B will be controlled. + +As a controller regulates the distribution of the target resource to +the cgroup's children, enabling it creates the controller's interface +files in the child cgroups. In the above example, enabling "cpu" on B +would create the "cpu." prefixed controller interface files in C and +D. Likewise, disabling "memory" from B would remove the "memory." +prefixed controller interface files from C and D. This means that the +controller interface files - anything which doesn't start with +"cgroup." are owned by the parent rather than the cgroup itself. + + +2-4-2. Top-down Constraint + +Resources are distributed top-down and a cgroup can further distribute +a resource only if the resource has been distributed to it from the +parent. This means that all non-root "cgroup.subtree_control" files +can only contain controllers which are enabled in the parent's +"cgroup.subtree_control" file. A controller can be enabled only if +the parent has the controller enabled and a controller can't be +disabled if one or more children have it enabled. + + +2-4-3. No Internal Process Constraint + +Non-root cgroups can only distribute resources to their children when +they don't have any processes of their own. In other words, only +cgroups which don't contain any processes can have controllers enabled +in their "cgroup.subtree_control" files. + +This guarantees that, when a controller is looking at the part of the +hierarchy which has it enabled, processes are always only on the +leaves. This rules out situations where child cgroups compete against +internal processes of the parent. + +The root cgroup is exempt from this restriction. Root contains +processes and anonymous resource consumption which can't be associated +with any other cgroups and requires special treatment from most +controllers. How resource consumption in the root cgroup is governed +is up to each controller. + +Note that the restriction doesn't get in the way if there is no +enabled controller in the cgroup's "cgroup.subtree_control". This is +important as otherwise it wouldn't be possible to create children of a +populated cgroup. To control resource distribution of a cgroup, the +cgroup must create children and transfer all its processes to the +children before enabling controllers in its "cgroup.subtree_control" +file. + + +2-5. Delegation + +2-5-1. Model of Delegation + +A cgroup can be delegated to a less privileged user by granting write +access of the directory and its "cgroup.procs" file to the user. Note +that resource control interface files in a given directory control the +distribution of the parent's resources and thus must not be delegated +along with the directory. + +Once delegated, the user can build sub-hierarchy under the directory, +organize processes as it sees fit and further distribute the resources +it received from the parent. The limits and other settings of all +resource controllers are hierarchical and regardless of what happens +in the delegated sub-hierarchy, nothing can escape the resource +restrictions imposed by the parent. + +Currently, cgroup doesn't impose any restrictions on the number of +cgroups in or nesting depth of a delegated sub-hierarchy; however, +this may be limited explicitly in the future. + + +2-5-2. Delegation Containment + +A delegated sub-hierarchy is contained in the sense that processes +can't be moved into or out of the sub-hierarchy by the delegatee. For +a process with a non-root euid to migrate a target process into a +cgroup by writing its PID to the "cgroup.procs" file, the following +conditions must be met. + +- The writer's euid must match either uid or suid of the target process. + +- The writer must have write access to the "cgroup.procs" file. + +- The writer must have write access to the "cgroup.procs" file of the + common ancestor of the source and destination cgroups. + +The above three constraints ensure that while a delegatee may migrate +processes around freely in the delegated sub-hierarchy it can't pull +in from or push out to outside the sub-hierarchy. + +For an example, let's assume cgroups C0 and C1 have been delegated to +user U0 who created C00, C01 under C0 and C10 under C1 as follows and +all processes under C0 and C1 belong to U0. + + ~~~~~~~~~~~~~ - C0 - C00 + ~ cgroup ~ \ C01 + ~ hierarchy ~ + ~~~~~~~~~~~~~ - C1 - C10 + +Let's also say U0 wants to write the PID of a process which is +currently in C10 into "C00/cgroup.procs". U0 has write access to the +file and uid match on the process; however, the common ancestor of the +source cgroup C10 and the destination cgroup C00 is above the points +of delegation and U0 would not have write access to its "cgroup.procs" +files and thus the write will be denied with -EACCES. + + +2-6. Guidelines + +2-6-1. Organize Once and Control + +Migrating a process across cgroups is a relatively expensive operation +and stateful resources such as memory are not moved together with the +process. This is an explicit design decision as there often exist +inherent trade-offs between migration and various hot paths in terms +of synchronization cost. + +As such, migrating processes across cgroups frequently as a means to +apply different resource restrictions is discouraged. A workload +should be assigned to a cgroup according to the system's logical and +resource structure once on start-up. Dynamic adjustments to resource +distribution can be made by changing controller configuration through +the interface files. + + +2-6-2. Avoid Name Collisions + +Interface files for a cgroup and its children cgroups occupy the same +directory and it is possible to create children cgroups which collide +with interface files. + +All cgroup core interface files are prefixed with "cgroup." and each +controller's interface files are prefixed with the controller name and +a dot. A controller's name is composed of lower case alphabets and +'_'s but never begins with an '_' so it can be used as the prefix +character for collision avoidance. Also, interface file names won't +start or end with terms which are often used in categorizing workloads +such as job, service, slice, unit or workload. + +cgroup doesn't do anything to prevent name collisions and it's the +user's responsibility to avoid them. + + +3. Resource Distribution Models + +cgroup controllers implement several resource distribution schemes +depending on the resource type and expected use cases. This section +describes major schemes in use along with their expected behaviors. + + +3-1. Weights + +A parent's resource is distributed by adding up the weights of all +active children and giving each the fraction matching the ratio of its +weight against the sum. As only children which can make use of the +resource at the moment participate in the distribution, this is +work-conserving. Due to the dynamic nature, this model is usually +used for stateless resources. + +All weights are in the range [1, 10000] with the default at 100. This +allows symmetric multiplicative biases in both directions at fine +enough granularity while staying in the intuitive range. + +As long as the weight is in range, all configuration combinations are +valid and there is no reason to reject configuration changes or +process migrations. + +"cpu.weight" proportionally distributes CPU cycles to active children +and is an example of this type. + + +3-2. Limits + +A child can only consume upto the configured amount of the resource. +Limits can be over-committed - the sum of the limits of children can +exceed the amount of resource available to the parent. + +Limits are in the range [0, max] and defaults to "max", which is noop. + +As limits can be over-committed, all configuration combinations are +valid and there is no reason to reject configuration changes or +process migrations. + +"io.max" limits the maximum BPS and/or IOPS that a cgroup can consume +on an IO device and is an example of this type. + + +3-3. Protections + +A cgroup is protected to be allocated upto the configured amount of +the resource if the usages of all its ancestors are under their +protected levels. Protections can be hard guarantees or best effort +soft boundaries. Protections can also be over-committed in which case +only upto the amount available to the parent is protected among +children. + +Protections are in the range [0, max] and defaults to 0, which is +noop. + +As protections can be over-committed, all configuration combinations +are valid and there is no reason to reject configuration changes or +process migrations. + +"memory.low" implements best-effort memory protection and is an +example of this type. + + +3-4. Allocations + +A cgroup is exclusively allocated a certain amount of a finite +resource. Allocations can't be over-committed - the sum of the +allocations of children can not exceed the amount of resource +available to the parent. + +Allocations are in the range [0, max] and defaults to 0, which is no +resource. + +As allocations can't be over-committed, some configuration +combinations are invalid and should be rejected. Also, if the +resource is mandatory for execution of processes, process migrations +may be rejected. + +"cpu.rt.max" hard-allocates realtime slices and is an example of this +type. + + +4. Interface Files + +4-1. Format + +All interface files should be in one of the following formats whenever +possible. + + New-line separated values + (when only one value can be written at once) + + VAL0\n + VAL1\n + ... + + Space separated values + (when read-only or multiple values can be written at once) + + VAL0 VAL1 ...\n + + Flat keyed + + KEY0 VAL0\n + KEY1 VAL1\n + ... + + Nested keyed + + KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01... + KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11... + ... + +For a writable file, the format for writing should generally match +reading; however, controllers may allow omitting later fields or +implement restricted shortcuts for most common use cases. + +For both flat and nested keyed files, only the values for a single key +can be written at a time. For nested keyed files, the sub key pairs +may be specified in any order and not all pairs have to be specified. + + +4-2. Conventions + +- Settings for a single feature should be contained in a single file. + +- The root cgroup should be exempt from resource control and thus + shouldn't have resource control interface files. Also, + informational files on the root cgroup which end up showing global + information available elsewhere shouldn't exist. + +- If a controller implements weight based resource distribution, its + interface file should be named "weight" and have the range [1, + 10000] with 100 as the default. The values are chosen to allow + enough and symmetric bias in both directions while keeping it + intuitive (the default is 100%). + +- If a controller implements an absolute resource guarantee and/or + limit, the interface files should be named "min" and "max" + respectively. If a controller implements best effort resource + guarantee and/or limit, the interface files should be named "low" + and "high" respectively. + + In the above four control files, the special token "max" should be + used to represent upward infinity for both reading and writing. + +- If a setting has a configurable default value and keyed specific + overrides, the default entry should be keyed with "default" and + appear as the first entry in the file. + + The default value can be updated by writing either "default $VAL" or + "$VAL". + + When writing to update a specific override, "default" can be used as + the value to indicate removal of the override. Override entries + with "default" as the value must not appear when read. + + For example, a setting which is keyed by major:minor device numbers + with integer values may look like the following. + + # cat cgroup-example-interface-file + default 150 + 8:0 300 + + The default value can be updated by + + # echo 125 > cgroup-example-interface-file + + or + + # echo "default 125" > cgroup-example-interface-file + + An override can be set by + + # echo "8:16 170" > cgroup-example-interface-file + + and cleared by + + # echo "8:0 default" > cgroup-example-interface-file + # cat cgroup-example-interface-file + default 125 + 8:16 170 + +- For events which are not very high frequency, an interface file + "events" should be created which lists event key value pairs. + Whenever a notifiable event happens, file modified event should be + generated on the file. + + +4-3. Core Interface Files + +All cgroup core files are prefixed with "cgroup." + + cgroup.procs + + A read-write new-line separated values file which exists on + all cgroups. + + When read, it lists the PIDs of all processes which belong to + the cgroup one-per-line. The PIDs are not ordered and the + same PID may show up more than once if the process got moved + to another cgroup and then back or the PID got recycled while + reading. + + A PID can be written to migrate the process associated with + the PID to the cgroup. The writer should match all of the + following conditions. + + - Its euid is either root or must match either uid or suid of + the target process. + + - It must have write access to the "cgroup.procs" file. + + - It must have write access to the "cgroup.procs" file of the + common ancestor of the source and destination cgroups. + + When delegating a sub-hierarchy, write access to this file + should be granted along with the containing directory. + + cgroup.controllers + + A read-only space separated values file which exists on all + cgroups. + + It shows space separated list of all controllers available to + the cgroup. The controllers are not ordered. + + cgroup.subtree_control + + A read-write space separated values file which exists on all + cgroups. Starts out empty. + + When read, it shows space separated list of the controllers + which are enabled to control resource distribution from the + cgroup to its children. + + Space separated list of controllers prefixed with '+' or '-' + can be written to enable or disable controllers. A controller + name prefixed with '+' enables the controller and '-' + disables. If a controller appears more than once on the list, + the last one is effective. When multiple enable and disable + operations are specified, either all succeed or all fail. + + cgroup.events + + A read-only flat-keyed file which exists on non-root cgroups. + The following entries are defined. Unless specified + otherwise, a value change in this file generates a file + modified event. + + populated + + 1 if the cgroup or its descendants contains any live + processes; otherwise, 0. + + +5. Controllers + +5-1. CPU + +[NOTE: The interface for the cpu controller hasn't been merged yet] + +The "cpu" controllers regulates distribution of CPU cycles. This +controller implements weight and absolute bandwidth limit models for +normal scheduling policy and absolute bandwidth allocation model for +realtime scheduling policy. + + +5-1-1. CPU Interface Files + +All time durations are in microseconds. + + cpu.stat + + A read-only flat-keyed file which exists on non-root cgroups. + + It reports the following six stats. + + usage_usec + user_usec + system_usec + nr_periods + nr_throttled + throttled_usec + + cpu.weight + + A read-write single value file which exists on non-root + cgroups. The default is "100". + + The weight in the range [1, 10000]. + + cpu.max + + A read-write two value file which exists on non-root cgroups. + The default is "max 100000". + + The maximum bandwidth limit. It's in the following format. + + $MAX $PERIOD + + which indicates that the group may consume upto $MAX in each + $PERIOD duration. "max" for $MAX indicates no limit. If only + one number is written, $MAX is updated. + + cpu.rt.max + + [NOTE: The semantics of this file is still under discussion and the + interface hasn't been merged yet] + + A read-write two value file which exists on all cgroups. + The default is "0 100000". + + The maximum realtime runtime allocation. Over-committing + configurations are disallowed and process migrations are + rejected if not enough bandwidth is available. It's in the + following format. + + $MAX $PERIOD + + which indicates that the group may consume upto $MAX in each + $PERIOD duration. If only one number is written, $MAX is + updated. + + +5-2. Memory + +The "memory" controller regulates distribution of memory. Memory is +stateful and implements both limit and protection models. Due to the +intertwining between memory usage and reclaim pressure and the +stateful nature of memory, the distribution model is relatively +complex. + +While not completely water-tight, all major memory usages by a given +cgroup are tracked so that the total memory consumption can be +accounted and controlled to a reasonable extent. Currently, the +following types of memory usages are tracked. + +- Userland memory - page cache and anonymous memory. + +- Kernel data structures such as dentries and inodes. + +- TCP socket buffers. + +The above list may expand in the future for better coverage. + + +5-2-1. Memory Interface Files + +All memory amounts are in bytes. If a value which is not aligned to +PAGE_SIZE is written, the value may be rounded up to the closest +PAGE_SIZE multiple when read back. + + memory.current + + A read-only single value file which exists on non-root + cgroups. + + The total amount of memory currently being used by the cgroup + and its descendants. + + memory.low + + A read-write single value file which exists on non-root + cgroups. The default is "0". + + Best-effort memory protection. If the memory usages of a + cgroup and all its ancestors are below their low boundaries, + the cgroup's memory won't be reclaimed unless memory can be + reclaimed from unprotected cgroups. + + Putting more memory than generally available under this + protection is discouraged. + + memory.high + + A read-write single value file which exists on non-root + cgroups. The default is "max". + + Memory usage throttle limit. This is the main mechanism to + control memory usage of a cgroup. If a cgroup's usage goes + over the high boundary, the processes of the cgroup are + throttled and put under heavy reclaim pressure. + + Going over the high limit never invokes the OOM killer and + under extreme conditions the limit may be breached. + + memory.max + + A read-write single value file which exists on non-root + cgroups. The default is "max". + + Memory usage hard limit. This is the final protection + mechanism. If a cgroup's memory usage reaches this limit and + can't be reduced, the OOM killer is invoked in the cgroup. + Under certain circumstances, the usage may go over the limit + temporarily. + + This is the ultimate protection mechanism. As long as the + high limit is used and monitored properly, this limit's + utility is limited to providing the final safety net. + + memory.events + + A read-only flat-keyed file which exists on non-root cgroups. + The following entries are defined. Unless specified + otherwise, a value change in this file generates a file + modified event. + + low + + The number of times the cgroup is reclaimed due to + high memory pressure even though its usage is under + the low boundary. This usually indicates that the low + boundary is over-committed. + + high + + The number of times processes of the cgroup are + throttled and routed to perform direct memory reclaim + because the high memory boundary was exceeded. For a + cgroup whose memory usage is capped by the high limit + rather than global memory pressure, this event's + occurrences are expected. + + max + + The number of times the cgroup's memory usage was + about to go over the max boundary. If direct reclaim + fails to bring it down, the OOM killer is invoked. + + oom + + The number of times the OOM killer has been invoked in + the cgroup. This may not exactly match the number of + processes killed but should generally be close. + + +5-2-2. General Usage + +"memory.high" is the main mechanism to control memory usage. +Over-committing on high limit (sum of high limits > available memory) +and letting global memory pressure to distribute memory according to +usage is a viable strategy. + +Because breach of the high limit doesn't trigger the OOM killer but +throttles the offending cgroup, a management agent has ample +opportunities to monitor and take appropriate actions such as granting +more memory or terminating the workload. + +Determining whether a cgroup has enough memory is not trivial as +memory usage doesn't indicate whether the workload can benefit from +more memory. For example, a workload which writes data received from +network to a file can use all available memory but can also operate as +performant with a small amount of memory. A measure of memory +pressure - how much the workload is being impacted due to lack of +memory - is necessary to determine whether a workload needs more +memory; unfortunately, memory pressure monitoring mechanism isn't +implemented yet. + + +5-2-3. Memory Ownership + +A memory area is charged to the cgroup which instantiated it and stays +charged to the cgroup until the area is released. Migrating a process +to a different cgroup doesn't move the memory usages that it +instantiated while in the previous cgroup to the new cgroup. + +A memory area may be used by processes belonging to different cgroups. +To which cgroup the area will be charged is in-deterministic; however, +over time, the memory area is likely to end up in a cgroup which has +enough memory allowance to avoid high reclaim pressure. + +If a cgroup sweeps a considerable amount of memory which is expected +to be accessed repeatedly by other cgroups, it may make sense to use +POSIX_FADV_DONTNEED to relinquish the ownership of memory areas +belonging to the affected files to ensure correct memory ownership. + + +5-3. IO + +The "io" controller regulates the distribution of IO resources. This +controller implements both weight based and absolute bandwidth or IOPS +limit distribution; however, weight based distribution is available +only if cfq-iosched is in use and neither scheme is available for +blk-mq devices. + + +5-3-1. IO Interface Files + + io.stat + + A read-only nested-keyed file which exists on non-root + cgroups. + + Lines are keyed by $MAJ:$MIN device numbers and not ordered. + The following nested keys are defined. + + rbytes Bytes read + wbytes Bytes written + rios Number of read IOs + wios Number of write IOs + + An example read output follows. + + 8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 + 8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 + + io.weight + + A read-write flat-keyed file which exists on non-root cgroups. + The default is "default 100". + + The first line is the default weight applied to devices + without specific override. The rest are overrides keyed by + $MAJ:$MIN device numbers and not ordered. The weights are in + the range [1, 10000] and specifies the relative amount IO time + the cgroup can use in relation to its siblings. + + The default weight can be updated by writing either "default + $WEIGHT" or simply "$WEIGHT". Overrides can be set by writing + "$MAJ:$MIN $WEIGHT" and unset by writing "$MAJ:$MIN default". + + An example read output follows. + + default 100 + 8:16 200 + 8:0 50 + + io.max + + A read-write nested-keyed file which exists on non-root + cgroups. + + BPS and IOPS based IO limit. Lines are keyed by $MAJ:$MIN + device numbers and not ordered. The following nested keys are + defined. + + rbps Max read bytes per second + wbps Max write bytes per second + riops Max read IO operations per second + wiops Max write IO operations per second + + When writing, any number of nested key-value pairs can be + specified in any order. "max" can be specified as the value + to remove a specific limit. If the same key is specified + multiple times, the outcome is undefined. + + BPS and IOPS are measured in each IO direction and IOs are + delayed if limit is reached. Temporary bursts are allowed. + + Setting read limit at 2M BPS and write at 120 IOPS for 8:16. + + echo "8:16 rbps=2097152 wiops=120" > io.max + + Reading returns the following. + + 8:16 rbps=2097152 wbps=max riops=max wiops=120 + + Write IOPS limit can be removed by writing the following. + + echo "8:16 wiops=max" > io.max + + Reading now returns the following. + + 8:16 rbps=2097152 wbps=max riops=max wiops=max + + +5-3-2. Writeback + +Page cache is dirtied through buffered writes and shared mmaps and +written asynchronously to the backing filesystem by the writeback +mechanism. Writeback sits between the memory and IO domains and +regulates the proportion of dirty memory by balancing dirtying and +write IOs. + +The io controller, in conjunction with the memory controller, +implements control of page cache writeback IOs. The memory controller +defines the memory domain that dirty memory ratio is calculated and +maintained for and the io controller defines the io domain which +writes out dirty pages for the memory domain. Both system-wide and +per-cgroup dirty memory states are examined and the more restrictive +of the two is enforced. + +cgroup writeback requires explicit support from the underlying +filesystem. Currently, cgroup writeback is implemented on ext2, ext4 +and btrfs. On other filesystems, all writeback IOs are attributed to +the root cgroup. + +There are inherent differences in memory and writeback management +which affects how cgroup ownership is tracked. Memory is tracked per +page while writeback per inode. For the purpose of writeback, an +inode is assigned to a cgroup and all IO requests to write dirty pages +from the inode are attributed to that cgroup. + +As cgroup ownership for memory is tracked per page, there can be pages +which are associated with different cgroups than the one the inode is +associated with. These are called foreign pages. The writeback +constantly keeps track of foreign pages and, if a particular foreign +cgroup becomes the majority over a certain period of time, switches +the ownership of the inode to that cgroup. + +While this model is enough for most use cases where a given inode is +mostly dirtied by a single cgroup even when the main writing cgroup +changes over time, use cases where multiple cgroups write to a single +inode simultaneously are not supported well. In such circumstances, a +significant portion of IOs are likely to be attributed incorrectly. +As memory controller assigns page ownership on the first use and +doesn't update it until the page is released, even if writeback +strictly follows page ownership, multiple cgroups dirtying overlapping +areas wouldn't work as expected. It's recommended to avoid such usage +patterns. + +The sysctl knobs which affect writeback behavior are applied to cgroup +writeback as follows. + + vm.dirty_background_ratio + vm.dirty_ratio + + These ratios apply the same to cgroup writeback with the + amount of available memory capped by limits imposed by the + memory controller and system-wide clean memory. + + vm.dirty_background_bytes + vm.dirty_bytes + + For cgroup writeback, this is calculated into ratio against + total available memory and applied the same way as + vm.dirty[_background]_ratio. + + +P. Information on Kernel Programming + +This section contains kernel programming information in the areas +where interacting with cgroup is necessary. cgroup core and +controllers are not covered. + + +P-1. Filesystem Support for Writeback + +A filesystem can support cgroup writeback by updating +address_space_operations->writepage[s]() to annotate bio's using the +following two functions. + + wbc_init_bio(@wbc, @bio) + + Should be called for each bio carrying writeback data and + associates the bio with the inode's owner cgroup. Can be + called anytime between bio allocation and submission. + + wbc_account_io(@wbc, @page, @bytes) + + Should be called for each data segment being written out. + While this function doesn't care exactly when it's called + during the writeback session, it's the easiest and most + natural to call it as data segments are added to a bio. + +With writeback bio's annotated, cgroup support can be enabled per +super_block by setting SB_I_CGROUPWB in ->s_iflags. This allows for +selective disabling of cgroup writeback support which is helpful when +certain filesystem features, e.g. journaled data mode, are +incompatible. + +wbc_init_bio() binds the specified bio to its cgroup. Depending on +the configuration, the bio may be executed at a lower priority and if +the writeback session is holding shared resources, e.g. a journal +entry, may lead to priority inversion. There is no one easy solution +for the problem. Filesystems can try to work around specific problem +cases by skipping wbc_init_bio() or using bio_associate_blkcg() +directly. + + +D. Deprecated v1 Core Features + +- Multiple hierarchies including named ones are not supported. + +- All mount options and remounting are not supported. + +- The "tasks" file is removed and "cgroup.procs" is not sorted. + +- "cgroup.clone_children" is removed. + +- /proc/cgroups is meaningless for v2. Use "cgroup.controllers" file + at the root instead. + + +R. Issues with v1 and Rationales for v2 + +R-1. Multiple Hierarchies + +cgroup v1 allowed an arbitrary number of hierarchies and each +hierarchy could host any number of controllers. While this seemed to +provide a high level of flexibility, it wasn't useful in practice. + +For example, as there is only one instance of each controller, utility +type controllers such as freezer which can be useful in all +hierarchies could only be used in one. The issue is exacerbated by +the fact that controllers couldn't be moved to another hierarchy once +hierarchies were populated. Another issue was that all controllers +bound to a hierarchy were forced to have exactly the same view of the +hierarchy. It wasn't possible to vary the granularity depending on +the specific controller. + +In practice, these issues heavily limited which controllers could be +put on the same hierarchy and most configurations resorted to putting +each controller on its own hierarchy. Only closely related ones, such +as the cpu and cpuacct controllers, made sense to be put on the same +hierarchy. This often meant that userland ended up managing multiple +similar hierarchies repeating the same steps on each hierarchy +whenever a hierarchy management operation was necessary. + +Furthermore, support for multiple hierarchies came at a steep cost. +It greatly complicated cgroup core implementation but more importantly +the support for multiple hierarchies restricted how cgroup could be +used in general and what controllers was able to do. + +There was no limit on how many hierarchies there might be, which meant +that a thread's cgroup membership couldn't be described in finite +length. The key might contain any number of entries and was unlimited +in length, which made it highly awkward to manipulate and led to +addition of controllers which existed only to identify membership, +which in turn exacerbated the original problem of proliferating number +of hierarchies. + +Also, as a controller couldn't have any expectation regarding the +topologies of hierarchies other controllers might be on, each +controller had to assume that all other controllers were attached to +completely orthogonal hierarchies. This made it impossible, or at +least very cumbersome, for controllers to cooperate with each other. + +In most use cases, putting controllers on hierarchies which are +completely orthogonal to each other isn't necessary. What usually is +called for is the ability to have differing levels of granularity +depending on the specific controller. In other words, hierarchy may +be collapsed from leaf towards root when viewed from specific +controllers. For example, a given configuration might not care about +how memory is distributed beyond a certain level while still wanting +to control how CPU cycles are distributed. + + +R-2. Thread Granularity + +cgroup v1 allowed threads of a process to belong to different cgroups. +This didn't make sense for some controllers and those controllers +ended up implementing different ways to ignore such situations but +much more importantly it blurred the line between API exposed to +individual applications and system management interface. + +Generally, in-process knowledge is available only to the process +itself; thus, unlike service-level organization of processes, +categorizing threads of a process requires active participation from +the application which owns the target process. + +cgroup v1 had an ambiguously defined delegation model which got abused +in combination with thread granularity. cgroups were delegated to +individual applications so that they can create and manage their own +sub-hierarchies and control resource distributions along them. This +effectively raised cgroup to the status of a syscall-like API exposed +to lay programs. + +First of all, cgroup has a fundamentally inadequate interface to be +exposed this way. For a process to access its own knobs, it has to +extract the path on the target hierarchy from /proc/self/cgroup, +construct the path by appending the name of the knob to the path, open +and then read and/or write to it. This is not only extremely clunky +and unusual but also inherently racy. There is no conventional way to +define transaction across the required steps and nothing can guarantee +that the process would actually be operating on its own sub-hierarchy. + +cgroup controllers implemented a number of knobs which would never be +accepted as public APIs because they were just adding control knobs to +system-management pseudo filesystem. cgroup ended up with interface +knobs which were not properly abstracted or refined and directly +revealed kernel internal details. These knobs got exposed to +individual applications through the ill-defined delegation mechanism +effectively abusing cgroup as a shortcut to implementing public APIs +without going through the required scrutiny. + +This was painful for both userland and kernel. Userland ended up with +misbehaving and poorly abstracted interfaces and kernel exposing and +locked into constructs inadvertently. + + +R-3. Competition Between Inner Nodes and Threads + +cgroup v1 allowed threads to be in any cgroups which created an +interesting problem where threads belonging to a parent cgroup and its +children cgroups competed for resources. This was nasty as two +different types of entities competed and there was no obvious way to +settle it. Different controllers did different things. + +The cpu controller considered threads and cgroups as equivalents and +mapped nice levels to cgroup weights. This worked for some cases but +fell flat when children wanted to be allocated specific ratios of CPU +cycles and the number of internal threads fluctuated - the ratios +constantly changed as the number of competing entities fluctuated. +There also were other issues. The mapping from nice level to weight +wasn't obvious or universal, and there were various other knobs which +simply weren't available for threads. + +The io controller implicitly created a hidden leaf node for each +cgroup to host the threads. The hidden leaf had its own copies of all +the knobs with "leaf_" prefixed. While this allowed equivalent +control over internal threads, it was with serious drawbacks. It +always added an extra layer of nesting which wouldn't be necessary +otherwise, made the interface messy and significantly complicated the +implementation. + +The memory controller didn't have a way to control what happened +between internal tasks and child cgroups and the behavior was not +clearly defined. There were attempts to add ad-hoc behaviors and +knobs to tailor the behavior to specific workloads which would have +led to problems extremely difficult to resolve in the long term. + +Multiple controllers struggled with internal tasks and came up with +different ways to deal with it; unfortunately, all the approaches were +severely flawed and, furthermore, the widely different behaviors +made cgroup as a whole highly inconsistent. + +This clearly is a problem which needs to be addressed from cgroup core +in a uniform way. + + +R-4. Other Interface Issues + +cgroup v1 grew without oversight and developed a large number of +idiosyncrasies and inconsistencies. One issue on the cgroup core side +was how an empty cgroup was notified - a userland helper binary was +forked and executed for each event. The event delivery wasn't +recursive or delegatable. The limitations of the mechanism also led +to in-kernel event delivery filtering mechanism further complicating +the interface. + +Controller interfaces were problematic too. An extreme example is +controllers completely ignoring hierarchical organization and treating +all cgroups as if they were all located directly under the root +cgroup. Some controllers exposed a large amount of inconsistent +implementation details to userland. + +There also was no consistency across controllers. When a new cgroup +was created, some controllers defaulted to not imposing extra +restrictions while others disallowed any resource usage until +explicitly configured. Configuration knobs for the same type of +control used widely differing naming schemes and formats. Statistics +and information knobs were named arbitrarily and used different +formats and units even in the same controller. + +cgroup v2 establishes common conventions where appropriate and updates +controllers so that they expose minimal and consistent interfaces. + + +R-5. Controller Issues and Remedies + +R-5-1. Memory + +The original lower boundary, the soft limit, is defined as a limit +that is per default unset. As a result, the set of cgroups that +global reclaim prefers is opt-in, rather than opt-out. The costs for +optimizing these mostly negative lookups are so high that the +implementation, despite its enormous size, does not even provide the +basic desirable behavior. First off, the soft limit has no +hierarchical meaning. All configured groups are organized in a global +rbtree and treated like equal peers, regardless where they are located +in the hierarchy. This makes subtree delegation impossible. Second, +the soft limit reclaim pass is so aggressive that it not just +introduces high allocation latencies into the system, but also impacts +system performance due to overreclaim, to the point where the feature +becomes self-defeating. + +The memory.low boundary on the other hand is a top-down allocated +reserve. A cgroup enjoys reclaim protection when it and all its +ancestors are below their low boundaries, which makes delegation of +subtrees possible. Secondly, new cgroups have no reserve per default +and in the common case most cgroups are eligible for the preferred +reclaim pass. This allows the new low boundary to be efficiently +implemented with just a minor addition to the generic reclaim code, +without the need for out-of-band data structures and reclaim passes. +Because the generic reclaim code considers all cgroups except for the +ones running low in the preferred first reclaim pass, overreclaim of +individual groups is eliminated as well, resulting in much better +overall workload performance. + +The original high boundary, the hard limit, is defined as a strict +limit that can not budge, even if the OOM killer has to be called. +But this generally goes against the goal of making the most out of the +available memory. The memory consumption of workloads varies during +runtime, and that requires users to overcommit. But doing that with a +strict upper limit requires either a fairly accurate prediction of the +working set size or adding slack to the limit. Since working set size +estimation is hard and error prone, and getting it wrong results in +OOM kills, most users tend to err on the side of a looser limit and +end up wasting precious resources. + +The memory.high boundary on the other hand can be set much more +conservatively. When hit, it throttles allocations by forcing them +into direct reclaim to work off the excess, but it never invokes the +OOM killer. As a result, a high boundary that is chosen too +aggressively will not terminate the processes, but instead it will +lead to gradual performance degradation. The user can monitor this +and make corrections until the minimal memory footprint that still +gives acceptable performance is found. + +In extreme cases, with many concurrent allocations and a complete +breakdown of reclaim progress within the group, the high boundary can +be exceeded. But even then it's mostly better to satisfy the +allocation from the slack available in other groups or the rest of the +system than killing the group. Otherwise, memory.max is there to +limit this type of spillover and ultimately contain buggy or even +malicious applications. diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt deleted file mode 100644 index 781b1d475bcfc9666d0d3dad80832c74adb48b22..0000000000000000000000000000000000000000 --- a/Documentation/cgroups/unified-hierarchy.txt +++ /dev/null @@ -1,647 +0,0 @@ - -Cgroup unified hierarchy - -April, 2014 Tejun Heo - -This document describes the changes made by unified hierarchy and -their rationales. It will eventually be merged into the main cgroup -documentation. - -CONTENTS - -1. Background -2. Basic Operation - 2-1. Mounting - 2-2. cgroup.subtree_control - 2-3. cgroup.controllers -3. Structural Constraints - 3-1. Top-down - 3-2. No internal tasks -4. Delegation - 4-1. Model of delegation - 4-2. Common ancestor rule -5. Other Changes - 5-1. [Un]populated Notification - 5-2. Other Core Changes - 5-3. Controller File Conventions - 5-3-1. Format - 5-3-2. Control Knobs - 5-4. Per-Controller Changes - 5-4-1. io - 5-4-2. cpuset - 5-4-3. memory -6. Planned Changes - 6-1. CAP for resource control - - -1. Background - -cgroup allows an arbitrary number of hierarchies and each hierarchy -can host any number of controllers. While this seems to provide a -high level of flexibility, it isn't quite useful in practice. - -For example, as there is only one instance of each controller, utility -type controllers such as freezer which can be useful in all -hierarchies can only be used in one. The issue is exacerbated by the -fact that controllers can't be moved around once hierarchies are -populated. Another issue is that all controllers bound to a hierarchy -are forced to have exactly the same view of the hierarchy. It isn't -possible to vary the granularity depending on the specific controller. - -In practice, these issues heavily limit which controllers can be put -on the same hierarchy and most configurations resort to putting each -controller on its own hierarchy. Only closely related ones, such as -the cpu and cpuacct controllers, make sense to put on the same -hierarchy. This often means that userland ends up managing multiple -similar hierarchies repeating the same steps on each hierarchy -whenever a hierarchy management operation is necessary. - -Unfortunately, support for multiple hierarchies comes at a steep cost. -Internal implementation in cgroup core proper is dazzlingly -complicated but more importantly the support for multiple hierarchies -restricts how cgroup is used in general and what controllers can do. - -There's no limit on how many hierarchies there may be, which means -that a task's cgroup membership can't be described in finite length. -The key may contain any varying number of entries and is unlimited in -length, which makes it highly awkward to handle and leads to addition -of controllers which exist only to identify membership, which in turn -exacerbates the original problem. - -Also, as a controller can't have any expectation regarding what shape -of hierarchies other controllers would be on, each controller has to -assume that all other controllers are operating on completely -orthogonal hierarchies. This makes it impossible, or at least very -cumbersome, for controllers to cooperate with each other. - -In most use cases, putting controllers on hierarchies which are -completely orthogonal to each other isn't necessary. What usually is -called for is the ability to have differing levels of granularity -depending on the specific controller. In other words, hierarchy may -be collapsed from leaf towards root when viewed from specific -controllers. For example, a given configuration might not care about -how memory is distributed beyond a certain level while still wanting -to control how CPU cycles are distributed. - -Unified hierarchy is the next version of cgroup interface. It aims to -address the aforementioned issues by having more structure while -retaining enough flexibility for most use cases. Various other -general and controller-specific interface issues are also addressed in -the process. - - -2. Basic Operation - -2-1. Mounting - -Currently, unified hierarchy can be mounted with the following mount -command. Note that this is still under development and scheduled to -change soon. - - mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT - -All controllers which support the unified hierarchy and are not bound -to other hierarchies are automatically bound to unified hierarchy and -show up at the root of it. Controllers which are enabled only in the -root of unified hierarchy can be bound to other hierarchies. This -allows mixing unified hierarchy with the traditional multiple -hierarchies in a fully backward compatible way. - -A controller can be moved across hierarchies only after the controller -is no longer referenced in its current hierarchy. Because per-cgroup -controller states are destroyed asynchronously and controllers may -have lingering references, a controller may not show up immediately on -the unified hierarchy after the final umount of the previous -hierarchy. Similarly, a controller should be fully disabled to be -moved out of the unified hierarchy and it may take some time for the -disabled controller to become available for other hierarchies; -furthermore, due to dependencies among controllers, other controllers -may need to be disabled too. - -While useful for development and manual configurations, dynamically -moving controllers between the unified and other hierarchies is -strongly discouraged for production use. It is recommended to decide -the hierarchies and controller associations before starting using the -controllers. - - -2-2. cgroup.subtree_control - -All cgroups on unified hierarchy have a "cgroup.subtree_control" file -which governs which controllers are enabled on the children of the -cgroup. Let's assume a hierarchy like the following. - - root - A - B - C - \ D - -root's "cgroup.subtree_control" file determines which controllers are -enabled on A. A's on B. B's on C and D. This coincides with the -fact that controllers on the immediate sub-level are used to -distribute the resources of the parent. In fact, it's natural to -assume that resource control knobs of a child belong to its parent. -Enabling a controller in a "cgroup.subtree_control" file declares that -distribution of the respective resources of the cgroup will be -controlled. Note that this means that controller enable states are -shared among siblings. - -When read, the file contains a space-separated list of currently -enabled controllers. A write to the file should contain a -space-separated list of controllers with '+' or '-' prefixed (without -the quotes). Controllers prefixed with '+' are enabled and '-' -disabled. If a controller is listed multiple times, the last entry -wins. The specific operations are executed atomically - either all -succeed or fail. - - -2-3. cgroup.controllers - -Read-only "cgroup.controllers" file contains a space-separated list of -controllers which can be enabled in the cgroup's -"cgroup.subtree_control" file. - -In the root cgroup, this lists controllers which are not bound to -other hierarchies and the content changes as controllers are bound to -and unbound from other hierarchies. - -In non-root cgroups, the content of this file equals that of the -parent's "cgroup.subtree_control" file as only controllers enabled -from the parent can be used in its children. - - -3. Structural Constraints - -3-1. Top-down - -As it doesn't make sense to nest control of an uncontrolled resource, -all non-root "cgroup.subtree_control" files can only contain -controllers which are enabled in the parent's "cgroup.subtree_control" -file. A controller can be enabled only if the parent has the -controller enabled and a controller can't be disabled if one or more -children have it enabled. - - -3-2. No internal tasks - -One long-standing issue that cgroup faces is the competition between -tasks belonging to the parent cgroup and its children cgroups. This -is inherently nasty as two different types of entities compete and -there is no agreed-upon obvious way to handle it. Different -controllers are doing different things. - -The cpu controller considers tasks and cgroups as equivalents and maps -nice levels to cgroup weights. This works for some cases but falls -flat when children should be allocated specific ratios of CPU cycles -and the number of internal tasks fluctuates - the ratios constantly -change as the number of competing entities fluctuates. There also are -other issues. The mapping from nice level to weight isn't obvious or -universal, and there are various other knobs which simply aren't -available for tasks. - -The io controller implicitly creates a hidden leaf node for each -cgroup to host the tasks. The hidden leaf has its own copies of all -the knobs with "leaf_" prefixed. While this allows equivalent control -over internal tasks, it's with serious drawbacks. It always adds an -extra layer of nesting which may not be necessary, makes the interface -messy and significantly complicates the implementation. - -The memory controller currently doesn't have a way to control what -happens between internal tasks and child cgroups and the behavior is -not clearly defined. There have been attempts to add ad-hoc behaviors -and knobs to tailor the behavior to specific workloads. Continuing -this direction will lead to problems which will be extremely difficult -to resolve in the long term. - -Multiple controllers struggle with internal tasks and came up with -different ways to deal with it; unfortunately, all the approaches in -use now are severely flawed and, furthermore, the widely different -behaviors make cgroup as whole highly inconsistent. - -It is clear that this is something which needs to be addressed from -cgroup core proper in a uniform way so that controllers don't need to -worry about it and cgroup as a whole shows a consistent and logical -behavior. To achieve that, unified hierarchy enforces the following -structural constraint: - - Except for the root, only cgroups which don't contain any task may - have controllers enabled in their "cgroup.subtree_control" files. - -Combined with other properties, this guarantees that, when a -controller is looking at the part of the hierarchy which has it -enabled, tasks are always only on the leaves. This rules out -situations where child cgroups compete against internal tasks of the -parent. - -There are two things to note. Firstly, the root cgroup is exempt from -the restriction. Root contains tasks and anonymous resource -consumption which can't be associated with any other cgroup and -requires special treatment from most controllers. How resource -consumption in the root cgroup is governed is up to each controller. - -Secondly, the restriction doesn't take effect if there is no enabled -controller in the cgroup's "cgroup.subtree_control" file. This is -important as otherwise it wouldn't be possible to create children of a -populated cgroup. To control resource distribution of a cgroup, the -cgroup must create children and transfer all its tasks to the children -before enabling controllers in its "cgroup.subtree_control" file. - - -4. Delegation - -4-1. Model of delegation - -A cgroup can be delegated to a less privileged user by granting write -access of the directory and its "cgroup.procs" file to the user. Note -that the resource control knobs in a given directory concern the -resources of the parent and thus must not be delegated along with the -directory. - -Once delegated, the user can build sub-hierarchy under the directory, -organize processes as it sees fit and further distribute the resources -it got from the parent. The limits and other settings of all resource -controllers are hierarchical and regardless of what happens in the -delegated sub-hierarchy, nothing can escape the resource restrictions -imposed by the parent. - -Currently, cgroup doesn't impose any restrictions on the number of -cgroups in or nesting depth of a delegated sub-hierarchy; however, -this may in the future be limited explicitly. - - -4-2. Common ancestor rule - -On the unified hierarchy, to write to a "cgroup.procs" file, in -addition to the usual write permission to the file and uid match, the -writer must also have write access to the "cgroup.procs" file of the -common ancestor of the source and destination cgroups. This prevents -delegatees from smuggling processes across disjoint sub-hierarchies. - -Let's say cgroups C0 and C1 have been delegated to user U0 who created -C00, C01 under C0 and C10 under C1 as follows. - - ~~~~~~~~~~~~~ - C0 - C00 - ~ cgroup ~ \ C01 - ~ hierarchy ~ - ~~~~~~~~~~~~~ - C1 - C10 - -C0 and C1 are separate entities in terms of resource distribution -regardless of their relative positions in the hierarchy. The -resources the processes under C0 are entitled to are controlled by -C0's ancestors and may be completely different from C1. It's clear -that the intention of delegating C0 to U0 is allowing U0 to organize -the processes under C0 and further control the distribution of C0's -resources. - -On traditional hierarchies, if a task has write access to "tasks" or -"cgroup.procs" file of a cgroup and its uid agrees with the target, it -can move the target to the cgroup. In the above example, U0 will not -only be able to move processes in each sub-hierarchy but also across -the two sub-hierarchies, effectively allowing it to violate the -organizational and resource restrictions implied by the hierarchical -structure above C0 and C1. - -On the unified hierarchy, let's say U0 wants to write the pid of a -process which has a matching uid and is currently in C10 into -"C00/cgroup.procs". U0 obviously has write access to the file and -migration permission on the process; however, the common ancestor of -the source cgroup C10 and the destination cgroup C00 is above the -points of delegation and U0 would not have write access to its -"cgroup.procs" and thus be denied with -EACCES. - - -5. Other Changes - -5-1. [Un]populated Notification - -cgroup users often need a way to determine when a cgroup's -subhierarchy becomes empty so that it can be cleaned up. cgroup -currently provides release_agent for it; unfortunately, this mechanism -is riddled with issues. - -- It delivers events by forking and execing a userland binary - specified as the release_agent. This is a long deprecated method of - notification delivery. It's extremely heavy, slow and cumbersome to - integrate with larger infrastructure. - -- There is single monitoring point at the root. There's no way to - delegate management of a subtree. - -- The event isn't recursive. It triggers when a cgroup doesn't have - any tasks or child cgroups. Events for internal nodes trigger only - after all children are removed. This again makes it impossible to - delegate management of a subtree. - -- Events are filtered from the kernel side. A "notify_on_release" - file is used to subscribe to or suppress release events. This is - unnecessarily complicated and probably done this way because event - delivery itself was expensive. - -Unified hierarchy implements "populated" field in "cgroup.events" -interface file which can be used to monitor whether the cgroup's -subhierarchy has tasks in it or not. Its value is 0 if there is no -task in the cgroup and its descendants; otherwise, 1. poll and -[id]notify events are triggered when the value changes. - -This is significantly lighter and simpler and trivially allows -delegating management of subhierarchy - subhierarchy monitoring can -block further propagation simply by putting itself or another process -in the subhierarchy and monitor events that it's interested in from -there without interfering with monitoring higher in the tree. - -In unified hierarchy, the release_agent mechanism is no longer -supported and the interface files "release_agent" and -"notify_on_release" do not exist. - - -5-2. Other Core Changes - -- None of the mount options is allowed. - -- remount is disallowed. - -- rename(2) is disallowed. - -- The "tasks" file is removed. Everything should at process - granularity. Use the "cgroup.procs" file instead. - -- The "cgroup.procs" file is not sorted. pids will be unique unless - they got recycled in-between reads. - -- The "cgroup.clone_children" file is removed. - -- /proc/PID/cgroup keeps reporting the cgroup that a zombie belonged - to before exiting. If the cgroup is removed before the zombie is - reaped, " (deleted)" is appeneded to the path. - - -5-3. Controller File Conventions - -5-3-1. Format - -In general, all controller files should be in one of the following -formats whenever possible. - -- Values only files - - VAL0 VAL1...\n - -- Flat keyed files - - KEY0 VAL0\n - KEY1 VAL1\n - ... - -- Nested keyed files - - KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01... - KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11... - ... - -For a writeable file, the format for writing should generally match -reading; however, controllers may allow omitting later fields or -implement restricted shortcuts for most common use cases. - -For both flat and nested keyed files, only the values for a single key -can be written at a time. For nested keyed files, the sub key pairs -may be specified in any order and not all pairs have to be specified. - - -5-3-2. Control Knobs - -- Settings for a single feature should generally be implemented in a - single file. - -- In general, the root cgroup should be exempt from resource control - and thus shouldn't have resource control knobs. - -- If a controller implements ratio based resource distribution, the - control knob should be named "weight" and have the range [1, 10000] - and 100 should be the default value. The values are chosen to allow - enough and symmetric bias in both directions while keeping it - intuitive (the default is 100%). - -- If a controller implements an absolute resource guarantee and/or - limit, the control knobs should be named "min" and "max" - respectively. If a controller implements best effort resource - gurantee and/or limit, the control knobs should be named "low" and - "high" respectively. - - In the above four control files, the special token "max" should be - used to represent upward infinity for both reading and writing. - -- If a setting has configurable default value and specific overrides, - the default settings should be keyed with "default" and appear as - the first entry in the file. Specific entries can use "default" as - its value to indicate inheritance of the default value. - -- For events which are not very high frequency, an interface file - "events" should be created which lists event key value pairs. - Whenever a notifiable event happens, file modified event should be - generated on the file. - - -5-4. Per-Controller Changes - -5-4-1. io - -- blkio is renamed to io. The interface is overhauled anyway. The - new name is more in line with the other two major controllers, cpu - and memory, and better suited given that it may be used for cgroup - writeback without involving block layer. - -- Everything including stat is always hierarchical making separate - recursive stat files pointless and, as no internal node can have - tasks, leaf weights are meaningless. The operation model is - simplified and the interface is overhauled accordingly. - - io.stat - - The stat file. The reported stats are from the point where - bio's are issued to request_queue. The stats are counted - independent of which policies are enabled. Each line in the - file follows the following format. More fields may later be - added at the end. - - $MAJ:$MIN rbytes=$RBYTES wbytes=$WBYTES rios=$RIOS wrios=$WIOS - - io.weight - - The weight setting, currently only available and effective if - cfq-iosched is in use for the target device. The weight is - between 1 and 10000 and defaults to 100. The first line - always contains the default weight in the following format to - use when per-device setting is missing. - - default $WEIGHT - - Subsequent lines list per-device weights of the following - format. - - $MAJ:$MIN $WEIGHT - - Writing "$WEIGHT" or "default $WEIGHT" changes the default - setting. Writing "$MAJ:$MIN $WEIGHT" sets per-device weight - while "$MAJ:$MIN default" clears it. - - This file is available only on non-root cgroups. - - io.max - - The maximum bandwidth and/or iops setting, only available if - blk-throttle is enabled. The file is of the following format. - - $MAJ:$MIN rbps=$RBPS wbps=$WBPS riops=$RIOPS wiops=$WIOPS - - ${R|W}BPS are read/write bytes per second and ${R|W}IOPS are - read/write IOs per second. "max" indicates no limit. Writing - to the file follows the same format but the individual - settings may be omitted or specified in any order. - - This file is available only on non-root cgroups. - - -5-4-2. cpuset - -- Tasks are kept in empty cpusets after hotplug and take on the masks - of the nearest non-empty ancestor, instead of being moved to it. - -- A task can be moved into an empty cpuset, and again it takes on the - masks of the nearest non-empty ancestor. - - -5-4-3. memory - -- use_hierarchy is on by default and the cgroup file for the flag is - not created. - -- The original lower boundary, the soft limit, is defined as a limit - that is per default unset. As a result, the set of cgroups that - global reclaim prefers is opt-in, rather than opt-out. The costs - for optimizing these mostly negative lookups are so high that the - implementation, despite its enormous size, does not even provide the - basic desirable behavior. First off, the soft limit has no - hierarchical meaning. All configured groups are organized in a - global rbtree and treated like equal peers, regardless where they - are located in the hierarchy. This makes subtree delegation - impossible. Second, the soft limit reclaim pass is so aggressive - that it not just introduces high allocation latencies into the - system, but also impacts system performance due to overreclaim, to - the point where the feature becomes self-defeating. - - The memory.low boundary on the other hand is a top-down allocated - reserve. A cgroup enjoys reclaim protection when it and all its - ancestors are below their low boundaries, which makes delegation of - subtrees possible. Secondly, new cgroups have no reserve per - default and in the common case most cgroups are eligible for the - preferred reclaim pass. This allows the new low boundary to be - efficiently implemented with just a minor addition to the generic - reclaim code, without the need for out-of-band data structures and - reclaim passes. Because the generic reclaim code considers all - cgroups except for the ones running low in the preferred first - reclaim pass, overreclaim of individual groups is eliminated as - well, resulting in much better overall workload performance. - -- The original high boundary, the hard limit, is defined as a strict - limit that can not budge, even if the OOM killer has to be called. - But this generally goes against the goal of making the most out of - the available memory. The memory consumption of workloads varies - during runtime, and that requires users to overcommit. But doing - that with a strict upper limit requires either a fairly accurate - prediction of the working set size or adding slack to the limit. - Since working set size estimation is hard and error prone, and - getting it wrong results in OOM kills, most users tend to err on the - side of a looser limit and end up wasting precious resources. - - The memory.high boundary on the other hand can be set much more - conservatively. When hit, it throttles allocations by forcing them - into direct reclaim to work off the excess, but it never invokes the - OOM killer. As a result, a high boundary that is chosen too - aggressively will not terminate the processes, but instead it will - lead to gradual performance degradation. The user can monitor this - and make corrections until the minimal memory footprint that still - gives acceptable performance is found. - - In extreme cases, with many concurrent allocations and a complete - breakdown of reclaim progress within the group, the high boundary - can be exceeded. But even then it's mostly better to satisfy the - allocation from the slack available in other groups or the rest of - the system than killing the group. Otherwise, memory.max is there - to limit this type of spillover and ultimately contain buggy or even - malicious applications. - -- The original control file names are unwieldy and inconsistent in - many different ways. For example, the upper boundary hit count is - exported in the memory.failcnt file, but an OOM event count has to - be manually counted by listening to memory.oom_control events, and - lower boundary / soft limit events have to be counted by first - setting a threshold for that value and then counting those events. - Also, usage and limit files encode their units in the filename. - That makes the filenames very long, even though this is not - information that a user needs to be reminded of every time they type - out those names. - - To address these naming issues, as well as to signal clearly that - the new interface carries a new configuration model, the naming - conventions in it necessarily differ from the old interface. - -- The original limit files indicate the state of an unset limit with a - Very High Number, and a configured limit can be unset by echoing -1 - into those files. But that very high number is implementation and - architecture dependent and not very descriptive. And while -1 can - be understood as an underflow into the highest possible value, -2 or - -10M etc. do not work, so it's not consistent. - - memory.low, memory.high, and memory.max will use the string "max" to - indicate and set the highest possible value. - -6. Planned Changes - -6-1. CAP for resource control - -Unified hierarchy will require one of the capabilities(7), which is -yet to be decided, for all resource control related knobs. Process -organization operations - creation of sub-cgroups and migration of -processes in sub-hierarchies may be delegated by changing the -ownership and/or permissions on the cgroup directory and -"cgroup.procs" interface file; however, all operations which affect -resource control - writes to a "cgroup.subtree_control" file or any -controller-specific knobs - will require an explicit CAP privilege. - -This, in part, is to prevent the cgroup interface from being -inadvertently promoted to programmable API used by non-privileged -binaries. cgroup exposes various aspects of the system in ways which -aren't properly abstracted for direct consumption by regular programs. -This is an administration interface much closer to sysctl knobs than -system calls. Even the basic access model, being filesystem path -based, isn't suitable for direct consumption. There's no way to -access "my cgroup" in a race-free way or make multiple operations -atomic against migration to another cgroup. - -Another aspect is that, for better or for worse, the cgroup interface -goes through far less scrutiny than regular interfaces for -unprivileged userland. The upside is that cgroup is able to expose -useful features which may not be suitable for general consumption in a -reasonable time frame. It provides a relatively short path between -internal details and userland-visible interface. Of course, this -shortcut comes with high risk. We go through what we go through for -general kernel APIs for good reasons. It may end up leaking internal -details in a way which can exert significant pain by locking the -kernel into a contract that can't be maintained in a reasonable -manner. - -Also, due to the specific nature, cgroup and its controllers don't -tend to attract attention from a wide scope of developers. cgroup's -short history is already fraught with severely mis-designed -interfaces, unnecessary commitments to and exposing of internal -details, broken and dangerous implementations of various features. - -Keeping cgroup as an administration interface is both advantageous for -its role and imperative given its nature. Some of the cgroup features -may make sense for unprivileged access. If deemed justified, those -must be further abstracted and implemented as a different interface, -be it a system call or process-private filesystem, and survive through -the scrutiny that any interface for general consumption is required to -go through. - -Requiring CAP is not a complete solution but should serve as a -significant deterrent against spraying cgroup usages in non-privileged -programs. diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index be8d4006bf767d37b7b1882ba962363abb1f81d3..f7b12c071d5356ceed76231a07b4e49f59e9f8d8 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -1,61 +1,131 @@ -Intel P-state driver +Intel P-State driver -------------------- -This driver provides an interface to control the P state selection for -SandyBridge+ Intel processors. The driver can operate two different -modes based on the processor model, legacy mode and Hardware P state (HWP) -mode. - -In legacy mode, the Intel P-state implements two internal governors, -performance and powersave, that differ from the general cpufreq governors of -the same name (the general cpufreq governors implement target(), whereas the -internal Intel P-state governors implement setpolicy()). The internal -performance governor sets the max_perf_pct and min_perf_pct to 100; that is, -the governor selects the highest available P state to maximize the performance -of the core. The internal powersave governor selects the appropriate P state -based on the current load on the CPU. - -In HWP mode P state selection is implemented in the processor -itself. The driver provides the interfaces between the cpufreq core and -the processor to control P state selection based on user preferences -and reporting frequency to the cpufreq core. In this mode the -internal Intel P-state governor code is disabled. - -In addition to the interfaces provided by the cpufreq core for -controlling frequency the driver provides sysfs files for -controlling P state selection. These files have been added to -/sys/devices/system/cpu/intel_pstate/ - - max_perf_pct: limits the maximum P state that will be requested by - the driver stated as a percentage of the available performance. The - available (P states) performance may be reduced by the no_turbo +This driver provides an interface to control the P-State selection for the +SandyBridge+ Intel processors. + +The following document explains P-States: +http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf +As stated in the document, P-State doesn’t exactly mean a frequency. However, for +the sake of the relationship with cpufreq, P-State and frequency are used +interchangeably. + +Understanding the cpufreq core governors and policies are important before +discussing more details about the Intel P-State driver. Based on what callbacks +a cpufreq driver provides to the cpufreq core, it can support two types of +drivers: +- with target_index() callback: In this mode, the drivers using cpufreq core +simply provide the minimum and maximum frequency limits and an additional +interface target_index() to set the current frequency. The cpufreq subsystem +has a number of scaling governors ("performance", "powersave", "ondemand", +etc.). Depending on which governor is in use, cpufreq core will call for +transitions to a specific frequency using target_index() callback. +- setpolicy() callback: In this mode, drivers do not provide target_index() +callback, so cpufreq core can't request a transition to a specific frequency. +The driver provides minimum and maximum frequency limits and callbacks to set a +policy. The policy in cpufreq sysfs is referred to as the "scaling governor". +The cpufreq core can request the driver to operate in any of the two policies: +"performance: and "powersave". The driver decides which frequency to use based +on the above policy selection considering minimum and maximum frequency limits. + +The Intel P-State driver falls under the latter category, which implements the +setpolicy() callback. This driver decides what P-State to use based on the +requested policy from the cpufreq core. If the processor is capable of +selecting its next P-State internally, then the driver will offload this +responsibility to the processor (aka HWP: Hardware P-States). If not, the +driver implements algorithms to select the next P-State. + +Since these policies are implemented in the driver, they are not same as the +cpufreq scaling governors implementation, even if they have the same name in +the cpufreq sysfs (scaling_governors). For example the "performance" policy is +similar to cpufreq’s "performance" governor, but "powersave" is completely +different than the cpufreq "powersave" governor. The strategy here is similar +to cpufreq "ondemand", where the requested P-State is related to the system load. + +Sysfs Interface + +In addition to the frequency-controlling interfaces provided by the cpufreq +core, the driver provides its own sysfs files to control the P-State selection. +These files have been added to /sys/devices/system/cpu/intel_pstate/. +Any changes made to these files are applicable to all CPUs (even in a +multi-package system). + + max_perf_pct: Limits the maximum P-State that will be requested by + the driver. It states it as a percentage of the available performance. The + available (P-State) performance may be reduced by the no_turbo setting described below. - min_perf_pct: limits the minimum P state that will be requested by - the driver stated as a percentage of the max (non-turbo) + min_perf_pct: Limits the minimum P-State that will be requested by + the driver. It states it as a percentage of the max (non-turbo) performance level. - no_turbo: limits the driver to selecting P states below the turbo + no_turbo: Limits the driver to selecting P-State below the turbo frequency range. - turbo_pct: displays the percentage of the total performance that - is supported by hardware that is in the turbo range. This number + turbo_pct: Displays the percentage of the total performance that + is supported by hardware that is in the turbo range. This number is independent of whether turbo has been disabled or not. - num_pstates: displays the number of pstates that are supported - by hardware. This number is independent of whether turbo has + num_pstates: Displays the number of P-States that are supported + by hardware. This number is independent of whether turbo has been disabled or not. +For example, if a system has these parameters: + Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State) + Max non turbo ratio: 0x17 + Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio) + +Sysfs will show : + max_perf_pct:100, which corresponds to 1 core ratio + min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio + no_turbo:0, turbo is not disabled + num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1) + turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates + +Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual +Volume 3: System Programming Guide" to understand ratios. + +cpufreq sysfs for Intel P-State + +Since this driver registers with cpufreq, cpufreq sysfs is also presented. +There are some important differences, which need to be considered. + +scaling_cur_freq: This displays the real frequency which was used during +the last sample period instead of what is requested. Some other cpufreq driver, +like acpi-cpufreq, displays what is requested (Some changes are on the +way to fix this for acpi-cpufreq driver). The same is true for frequencies +displayed at /proc/cpuinfo. + +scaling_governor: This displays current active policy. Since each CPU has a +cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this +is not possible with Intel P-States, as there is one common policy for all +CPUs. Here, the last requested policy will be applicable to all CPUs. It is +suggested that one use the cpupower utility to change policy to all CPUs at the +same time. + +scaling_setspeed: This attribute can never be used with Intel P-State. + +scaling_max_freq/scaling_min_freq: This interface can be used similarly to +the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies +are converted to nearest possible P-State, this is prone to rounding errors. +This method is not preferred to limit performance. + +affected_cpus: Not used +related_cpus: Not used + For contemporary Intel processors, the frequency is controlled by the -processor itself and the P-states exposed to software are related to +processor itself and the P-State exposed to software is related to performance levels. The idea that frequency can be set to a single -frequency is fiction for Intel Core processors. Even if the scaling -driver selects a single P state the actual frequency the processor +frequency is fictional for Intel Core processors. Even if the scaling +driver selects a single P-State, the actual frequency the processor will run at is selected by the processor itself. -For legacy mode debugfs files have also been added to allow tuning of -the internal governor algorythm. These files are located at -/sys/kernel/debug/pstate_snb/ These files are NOT present in HWP mode. +Tuning Intel P-State driver + +When HWP mode is not used, debugfs files have also been added to allow the +tuning of the internal governor algorithm. These files are located at +/sys/kernel/debug/pstate_snb/. The algorithm uses a PID (Proportional +Integral Derivative) controller. The PID tunable parameters are: deadband d_gain_pct @@ -63,3 +133,90 @@ the internal governor algorythm. These files are located at p_gain_pct sample_rate_ms setpoint + +To adjust these parameters, some understanding of driver implementation is +necessary. There are some tweeks described here, but be very careful. Adjusting +them requires expert level understanding of power and performance relationship. +These limits are only useful when the "powersave" policy is active. + +-To make the system more responsive to load changes, sample_rate_ms can +be adjusted (current default is 10ms). +-To make the system use higher performance, even if the load is lower, setpoint +can be adjusted to a lower number. This will also lead to faster ramp up time +to reach the maximum P-State. +If there are no derivative and integral coefficients, The next P-State will be +equal to: + current P-State - ((setpoint - current cpu load) * p_gain_pct) + +For example, if the current PID parameters are (Which are defaults for the core +processors like SandyBridge): + deadband = 0 + d_gain_pct = 0 + i_gain_pct = 0 + p_gain_pct = 20 + sample_rate_ms = 10 + setpoint = 97 + +If the current P-State = 0x08 and current load = 100, this will result in the +next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State +goes up by only 1. If during next sample interval the current load doesn't +change and still 100, then P-State goes up by one again. This process will +continue as long as the load is more than the setpoint until the maximum P-State +is reached. + +For the same load at setpoint = 60, this will result in the next P-State += 0x08 - ((60 - 100) * 0.2) = 16 +So by changing the setpoint from 97 to 60, there is an increase of the +next P-State from 9 to 16. So this will make processor execute at higher +P-State for the same CPU load. If the load continues to be more than the +setpoint during next sample intervals, then P-State will go up again till the +maximum P-State is reached. But the ramp up time to reach the maximum P-State +will be much faster when the setpoint is 60 compared to 97. + +Debugging Intel P-State driver + +Event tracing +To debug P-State transition, the Linux event tracing interface can be used. +There are two specific events, which can be enabled (Provided the kernel +configs related to event tracing are enabled). + +# cd /sys/kernel/debug/tracing/ +# echo 1 > events/power/pstate_sample/enable +# echo 1 > events/power/cpu_frequency/enable +# cat trace +gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 + scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 + freq=2474476 +cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2 + + +Using ftrace + +If function level tracing is required, the Linux ftrace interface can be used. +For example if we want to check how often a function to set a P-State is +called, we can set ftrace filter to intel_pstate_set_pstate. + +# cd /sys/kernel/debug/tracing/ +# cat available_filter_functions | grep -i pstate +intel_pstate_set_pstate +intel_pstate_cpu_init +... + +# echo intel_pstate_set_pstate > set_ftrace_filter +# echo function > current_tracer +# cat trace | head -15 +# tracer: function +# +# entries-in-buffer/entries-written: 80/80 #P:4 +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | + Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func + -0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt index 9e3c3b33514c688dc253861af1c11b650af361de..0a94224ad2965bff7c25e2137c3834ea3d132c72 100644 --- a/Documentation/cpu-freq/pcc-cpufreq.txt +++ b/Documentation/cpu-freq/pcc-cpufreq.txt @@ -159,8 +159,8 @@ to be strictly associated with a P-state. 2.2 cpuinfo_transition_latency: ------------------------------- -The cpuinfo_transition_latency field is 0. The PCC specification does -not include a field to expose this value currently. +The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification +does not include a field to expose this value currently. 2.3 cpuinfo_cur_freq: --------------------- diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index e15bc1a0fb98ab23563681210cc6ed1865234816..89fd8f9a259f69b9c9423da9bb16771ed0596cad 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -18,11 +18,11 @@ Construction Parameters 0 is the original format used in the Chromium OS. The salt is appended when hashing, digests are stored continuously and - the rest of the block is padded with zeros. + the rest of the block is padded with zeroes. 1 is the current format that should be used for new devices. The salt is prepended when hashing and each digest is - padded with zeros to the power of two. + padded with zeroes to the power of two. This is the device containing data, the integrity of which needs to be @@ -79,6 +79,37 @@ restart_on_corruption not compatible with ignore_corruption and requires user space support to avoid restart loops. +ignore_zero_blocks + Do not verify blocks that are expected to contain zeroes and always return + zeroes instead. This may be useful if the partition contains unused blocks + that are not guaranteed to contain zeroes. + +use_fec_from_device + Use forward error correction (FEC) to recover from corruption if hash + verification fails. Use encoding data from the specified device. This + may be the same device where data and hash blocks reside, in which case + fec_start must be outside data and hash areas. + + If the encoding data covers additional metadata, it must be accessible + on the hash device after the hash blocks. + + Note: block sizes for data and hash devices must match. Also, if the + verity is encrypted the should be too. + +fec_roots + Number of generator roots. This equals to the number of parity bytes in + the encoding data. For example, in RS(M, N) encoding, the number of roots + is M-N. + +fec_blocks + The number of encoding data blocks on the FEC device. The block size for + the FEC device is . + +fec_start + This is the offset, in blocks, from the start of the + FEC device to the beginning of the encoding data. + + Theory of operation =================== @@ -98,6 +129,11 @@ per-block basis. This allows for a lightweight hash computation on first read into the page cache. Block hashes are stored linearly, aligned to the nearest block size. +If forward error correction (FEC) support is enabled any recovery of +corrupted data will be verified using the cryptographic hash of the +corresponding data. This is why combining error correction with +integrity checking is essential. + Hash Tree --------- diff --git a/Documentation/devicetree/bindings/arm/arm,scpi.txt b/Documentation/devicetree/bindings/arm/arm,scpi.txt index 86302de67c2c73626850c3dfd07b0fd9a63a368b..313dabdc14f9874273e4ed87b32b27c75c3c4bdb 100644 --- a/Documentation/devicetree/bindings/arm/arm,scpi.txt +++ b/Documentation/devicetree/bindings/arm/arm,scpi.txt @@ -63,7 +63,7 @@ Required properties: - compatible : should be "arm,juno-sram-ns" for Non-secure SRAM on Juno The rest of the properties should follow the generic mmio-sram description -found in ../../misc/sysram.txt +found in ../../sram/sram.txt Each sub-node represents the reserved area for SCPI. diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index 3a07a87fef2087550cb24f0c4aff5f8e2fecab21..c352c11bd6411c95f6a8ee381f3471f60a991337 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -157,6 +157,7 @@ nodes to be present and contain the properties described below. "arm,cortex-a17" "arm,cortex-a53" "arm,cortex-a57" + "arm,cortex-a72" "arm,cortex-m0" "arm,cortex-m0+" "arm,cortex-m1" @@ -242,6 +243,23 @@ nodes to be present and contain the properties described below. Definition: Specifies the syscon node controlling the cpu core power domains. + - dynamic-power-coefficient + Usage: optional + Value type: + Definition: A u32 value that represents the running time dynamic + power coefficient in units of mW/MHz/uVolt^2. The + coefficient can either be calculated from power + measurements or derived by analysis. + + The dynamic power consumption of the CPU is + proportional to the square of the Voltage (V) and + the clock frequency (f). The coefficient is used to + calculate the dynamic power as below - + + Pdyn = dynamic-power-coefficient * V^2 * f + + where voltage is in uV, frequency is in MHz. + Example 1 (dual-cluster big.LITTLE system 32-bit): cpus { diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt similarity index 77% rename from Documentation/devicetree/bindings/arm/l2cc.txt rename to Documentation/devicetree/bindings/arm/l2c2x0.txt index 06c88a4d28aced38cdc2386a10364900266b7c6e..fe0398c5c77b3ba94c2dc9f4030a0b0eaa7f227d 100644 --- a/Documentation/devicetree/bindings/arm/l2cc.txt +++ b/Documentation/devicetree/bindings/arm/l2c2x0.txt @@ -1,7 +1,8 @@ * ARM L2 Cache Controller -ARM cores often have a separate level 2 cache controller. There are various -implementations of the L2 cache controller with compatible programming models. +ARM cores often have a separate L2C210/L2C220/L2C310 (also known as PL210/PL220/ +PL310 and variants) based level 2 cache controller. All these various implementations +of the L2 cache controller have compatible programming models (Note 1). Some of the properties that are just prefixed "cache-*" are taken from section 3.7.3 of the ePAPR v1.1 specification which can be found at: https://www.power.org/wp-content/uploads/2012/06/Power_ePAPR_APPROVED_v1.1.pdf @@ -67,12 +68,17 @@ Optional properties: disable if zero. - arm,prefetch-offset : Override prefetch offset value. Valid values are 0-7, 15, 23, and 31. -- arm,shared-override : The default behavior of the pl310 cache controller with - respect to the shareable attribute is to transform "normal memory - non-cacheable transactions" into "cacheable no allocate" (for reads) or - "write through no write allocate" (for writes). +- arm,shared-override : The default behavior of the L220 or PL310 cache + controllers with respect to the shareable attribute is to transform "normal + memory non-cacheable transactions" into "cacheable no allocate" (for reads) + or "write through no write allocate" (for writes). On systems where this may cause DMA buffer corruption, this property must be specified to indicate that such transforms are precluded. +- arm,parity-enable : enable parity checking on the L2 cache (L220 or PL310). +- arm,parity-disable : disable parity checking on the L2 cache (L220 or PL310). +- arm,outer-sync-disable : disable the outer sync operation on the L2 cache. + Some core tiles, especially ARM PB11MPCore have a faulty L220 cache that + will randomly hang unless outer sync operations are disabled. - prefetch-data : Data prefetch. Value: <0> (forcibly disable), <1> (forcibly enable), property absent (retain settings set by firmware) - prefetch-instr : Instruction prefetch. Value: <0> (forcibly disable), @@ -91,3 +97,9 @@ L2: cache-controller { cache-level = <2>; interrupts = <45>; }; + +Note 1: The description in this document doesn't apply to integrated L2 + cache controllers as found in e.g. Cortex-A15/A7/A57/A53. These + integrated L2 controllers are assumed to be all preconfigured by + early secure boot code. Thus no need to deal with their configuration + in the kernel at all. diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 97ba45af04fc693f831c00f15f9388df864434a6..56518839f52a7908922aa9a88d60575be57cdbe3 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -9,8 +9,9 @@ Required properties: - compatible : should be one of "apm,potenza-pmu" "arm,armv8-pmuv3" - "arm.cortex-a57-pmu" - "arm.cortex-a53-pmu" + "arm,cortex-a72-pmu" + "arm,cortex-a57-pmu" + "arm,cortex-a53-pmu" "arm,cortex-a17-pmu" "arm,cortex-a15-pmu" "arm,cortex-a12-pmu" diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt index a9adab84e2feb78596ef9e0b74b3d440c9cf4ff0..a2c4f1d524929bb788360542690061ade0c4f543 100644 --- a/Documentation/devicetree/bindings/arm/psci.txt +++ b/Documentation/devicetree/bindings/arm/psci.txt @@ -23,17 +23,20 @@ Main node required properties: - compatible : should contain at least one of: - * "arm,psci" : for implementations complying to PSCI versions prior to - 0.2. For these cases function IDs must be provided. - - * "arm,psci-0.2" : for implementations complying to PSCI 0.2. Function - IDs are not required and should be ignored by an OS with PSCI 0.2 - support, but are permitted to be present for compatibility with - existing software when "arm,psci" is later in the compatible list. - - * "arm,psci-1.0" : for implementations complying to PSCI 1.0. PSCI 1.0 is - backward compatible with PSCI 0.2 with minor specification updates, - as defined in the PSCI specification[2]. + * "arm,psci" : For implementations complying to PSCI versions prior + to 0.2. + For these cases function IDs must be provided. + + * "arm,psci-0.2" : For implementations complying to PSCI 0.2. + Function IDs are not required and should be ignored by + an OS with PSCI 0.2 support, but are permitted to be + present for compatibility with existing software when + "arm,psci" is later in the compatible list. + + * "arm,psci-1.0" : For implementations complying to PSCI 1.0. + PSCI 1.0 is backward compatible with PSCI 0.2 with + minor specification updates, as defined in the PSCI + specification[2]. - method : The method of calling the PSCI firmware. Permitted values are: diff --git a/Documentation/devicetree/bindings/arm/secure.txt b/Documentation/devicetree/bindings/arm/secure.txt new file mode 100644 index 0000000000000000000000000000000000000000..e31303fb233add7a7ae163fd0407cae7229e7bb4 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/secure.txt @@ -0,0 +1,53 @@ +* ARM Secure world bindings + +ARM CPUs with TrustZone support have two distinct address spaces, +"Normal" and "Secure". Most devicetree consumers (including the Linux +kernel) are not TrustZone aware and run entirely in either the Normal +world or the Secure world. However some devicetree consumers are +TrustZone aware and need to be able to determine whether devices are +visible only in the Secure address space, only in the Normal address +space, or visible in both. (One example of that situation would be a +virtual machine which boots Secure firmware and wants to tell the +firmware about the layout of the machine via devicetree.) + +The general principle of the naming scheme for Secure world bindings +is that any property that needs a different value in the Secure world +can be supported by prefixing the property name with "secure-". So for +instance "secure-foo" would override "foo". For property names with +a vendor prefix, the Secure variant of "vendor,foo" would be +"vendor,secure-foo". If there is no "secure-" property then the Secure +world value is the same as specified for the Normal world by the +non-prefixed property. However, only the properties listed below may +validly have "secure-" versions; this list will be enlarged on a +case-by-case basis. + +Defining the bindings in this way means that a device tree which has +been annotated to indicate the presence of Secure-only devices can +still be processed unmodified by existing Non-secure software (and in +particular by the kernel). + +Note that it is still valid for bindings intended for purely Secure +world consumers (like kernels that run entirely in Secure) to simply +describe the view of Secure world using the standard bindings. These +secure- bindings only need to be used where both the Secure and Normal +world views need to be described in a single device tree. + +Valid Secure world properties: + +- secure-status : specifies whether the device is present and usable + in the secure world. The combination of this with "status" allows + the various possible combinations of device visibility to be + specified. If "secure-status" is not specified it defaults to the + same value as "status"; if "status" is not specified either then + both default to "okay". This means the following combinations are + possible: + + /* Neither specified: default to visible in both S and NS */ + secure-status = "okay"; /* visible in both */ + status = "okay"; /* visible in both */ + status = "okay"; secure-status = "okay"; /* visible in both */ + secure-status = "disabled"; /* NS-only */ + status = "okay"; secure-status = "disabled"; /* NS-only */ + status = "disabled"; secure-status = "okay"; /* S-only */ + status = "disabled"; /* disabled in both */ + status = "disabled"; secure-status = "disabled"; /* disabled in both */ diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt b/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt index 20ac9bbfa1fda45e4d59e67ace1a202f98f1ee25..60872838f1adb772d04e0db6b50678d850be2a73 100644 --- a/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt +++ b/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt @@ -4,7 +4,9 @@ SATA nodes are defined to describe on-chip Serial ATA controllers. Each SATA controller should have its own node. Required properties: -- compatible : compatible list, may contain "brcm,bcm7445-ahci" and/or +- compatible : should be one or more of + "brcm,bcm7425-ahci" + "brcm,bcm7445-ahci" "brcm,sata3-ahci" - reg : register mappings for AHCI and SATA_TOP_CTRL - reg-names : "ahci" and "top-ctrl" diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt index 2493a5a316551de878c1be87d6e86f20f090e985..0764f9ab63dcde31f7efff01f74e56c5c6fa1c56 100644 --- a/Documentation/devicetree/bindings/ata/sata_rcar.txt +++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt @@ -8,6 +8,7 @@ Required properties: - "renesas,sata-r8a7790" for R-Car H2 other than ES1 - "renesas,sata-r8a7791" for R-Car M2-W - "renesas,sata-r8a7793" for R-Car M2-N + - "renesas,sata-r8a7795" for R-Car H3 - reg : address and length of the SATA registers; - interrupts : must consist of one interrupt specifier. - clocks : must contain a reference to the functional clock. diff --git a/Documentation/devicetree/bindings/clock/samsung,s2mps11.txt b/Documentation/devicetree/bindings/clock/samsung,s2mps11.txt new file mode 100644 index 0000000000000000000000000000000000000000..2726c1d58a79dd6a1eb2dfb8a3555dccc0e17cf0 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/samsung,s2mps11.txt @@ -0,0 +1,49 @@ +Binding for Samsung S2M and S5M family clock generator block +============================================================ + +This is a part of device tree bindings for S2M and S5M family multi-function +devices. +More information can be found in bindings/mfd/sec-core.txt file. + +The S2MPS11/13/15 and S5M8767 provide three(AP/CP/BT) buffered 32.768 kHz +outputs. The S2MPS14 provides two (AP/BT) buffered 32.768 KHz outputs. + +To register these as clocks with common clock framework instantiate under +main device node a sub-node named "clocks". + +It uses the common clock binding documented in: + - Documentation/devicetree/bindings/clock/clock-bindings.txt + + +Required properties of the "clocks" sub-node: + - #clock-cells: should be 1. + - compatible: Should be one of: "samsung,s2mps11-clk", "samsung,s2mps13-clk", + "samsung,s2mps14-clk", "samsung,s5m8767-clk" + The S2MPS15 uses the same compatible as S2MPS13, as both provides similar + clocks. + + +Each clock is assigned an identifier and client nodes use this identifier +to specify the clock which they consume. + Clock ID Devices + ---------------------------------------------------------- + 32KhzAP 0 S2MPS11/13/14/15, S5M8767 + 32KhzCP 1 S2MPS11/13/15, S5M8767 + 32KhzBT 2 S2MPS11/13/14/15, S5M8767 + +Include dt-bindings/clock/samsung,s2mps11.h file to use preprocessor defines +in device tree sources. + + +Example: + + s2mps11_pmic@66 { + compatible = "samsung,s2mps11-pmic"; + reg = <0x66>; + + s2m_osc: clocks { + compatible = "samsung,s2mps11-clk"; + #clock-cells = <1>; + clock-output-names = "xx", "yy", "zz"; + }; + }; diff --git a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt index 0715695e94a98575f0db40ad3dcc21549cfcc1f3..2aa06ac0fac594ae48b85041fd1c3aed21f674ba 100644 --- a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt +++ b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt @@ -12,7 +12,7 @@ must be present contiguously. Generic DT driver will check only node 'x' for cpu:x. Required properties: -- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt +- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt for details Optional properties: diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt index e41c98ffbccb2a53e408e83b9886ec6e56713557..dd3929e85decc562e5621cf99ac408033a5930f9 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt @@ -11,7 +11,7 @@ Required properties: - None Optional properties: -- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt for +- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt for details. OPPs *must* be supplied either via DT, i.e. this property, or populated at runtime. - clock-latency: Specify the possible maximum transition latency for clock, diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt new file mode 100644 index 0000000000000000000000000000000000000000..d91a02a3b6b0721a3e3ccb59367cc47f2f062c1f --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt @@ -0,0 +1,91 @@ +Binding for ST's CPUFreq driver +=============================== + +ST's CPUFreq driver attempts to read 'process' and 'version' attributes +from the SoC, then supplies the OPP framework with 'prop' and 'supported +hardware' information respectively. The framework is then able to read +the DT and operate in the usual way. + +For more information about the expected DT format [See: ../opp/opp.txt]. + +Frequency Scaling only +---------------------- + +No vendor specific driver required for this. + +Located in CPU's node: + +- operating-points : [See: ../power/opp.txt] + +Example [safe] +-------------- + +cpus { + cpu@0 { + /* kHz uV */ + operating-points = <1500000 0 + 1200000 0 + 800000 0 + 500000 0>; + }; +}; + +Dynamic Voltage and Frequency Scaling (DVFS) +-------------------------------------------- + +This requires the ST CPUFreq driver to supply 'process' and 'version' info. + +Located in CPU's node: + +- operating-points-v2 : [See ../power/opp.txt] + +Example [unsafe] +---------------- + +cpus { + cpu@0 { + operating-points-v2 = <&cpu0_opp_table>; + }; +}; + +cpu0_opp_table: opp_table { + compatible = "operating-points-v2"; + + /* ############################################################### */ + /* # WARNING: Do not attempt to copy/replicate these nodes, # */ + /* # they are only to be supplied by the bootloader !!! # */ + /* ############################################################### */ + opp0 { + /* Major Minor Substrate */ + /* 2 all all */ + opp-supported-hw = <0x00000004 0xffffffff 0xffffffff>; + opp-hz = /bits/ 64 <1500000000>; + clock-latency-ns = <10000000>; + + opp-microvolt-pcode0 = <1200000>; + opp-microvolt-pcode1 = <1200000>; + opp-microvolt-pcode2 = <1200000>; + opp-microvolt-pcode3 = <1200000>; + opp-microvolt-pcode4 = <1170000>; + opp-microvolt-pcode5 = <1140000>; + opp-microvolt-pcode6 = <1100000>; + opp-microvolt-pcode7 = <1070000>; + }; + + opp1 { + /* Major Minor Substrate */ + /* all all all */ + opp-supported-hw = <0xffffffff 0xffffffff 0xffffffff>; + opp-hz = /bits/ 64 <1200000000>; + clock-latency-ns = <10000000>; + + opp-microvolt-pcode0 = <1110000>; + opp-microvolt-pcode1 = <1150000>; + opp-microvolt-pcode2 = <1100000>; + opp-microvolt-pcode3 = <1080000>; + opp-microvolt-pcode4 = <1040000>; + opp-microvolt-pcode5 = <1020000>; + opp-microvolt-pcode6 = <980000>; + opp-microvolt-pcode7 = <930000>; + }; +}; diff --git a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt new file mode 100644 index 0000000000000000000000000000000000000000..096df34b11c162a84711f73ea426d86a6864f4a7 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt @@ -0,0 +1,29 @@ +Rockchip Electronics And Security Accelerator + +Required properties: +- compatible: Should be "rockchip,rk3288-crypto" +- reg: Base physical address of the engine and length of memory mapped + region +- interrupts: Interrupt number +- clocks: Reference to the clocks about crypto +- clock-names: "aclk" used to clock data + "hclk" used to clock data + "sclk" used to clock crypto accelerator + "apb_pclk" used to clock dma +- resets: Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. +- reset-names: Must include the name "crypto-rst". + +Examples: + + crypto: cypto-controller@ff8a0000 { + compatible = "rockchip,rk3288-crypto"; + reg = <0xff8a0000 0x4000>; + interrupts = ; + clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>, + <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>; + clock-names = "aclk", "hclk", "sclk", "apb_pclk"; + resets = <&cru SRST_CRYPTO>; + reset-names = "crypto-rst"; + status = "okay"; + }; diff --git a/Documentation/devicetree/bindings/display/bridge/tda998x.txt b/Documentation/devicetree/bindings/display/bridge/tda998x.txt index e9e4bce40760e2d742160956943930b72c01b656..e178e6b9f9ee1ace419d5c9ccd4bf27a64799116 100644 --- a/Documentation/devicetree/bindings/display/bridge/tda998x.txt +++ b/Documentation/devicetree/bindings/display/bridge/tda998x.txt @@ -5,6 +5,10 @@ Required properties; - reg: I2C address +Required node: + - port: Input port node with endpoint definition, as described + in Documentation/devicetree/bindings/graph.txt + Optional properties: - interrupts: interrupt number and trigger type default: polling diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt index 040f365954cc4d105c64a3a204b9fc342fc22ce2..e7780a186a36bc37c383f28f404e4d23916d1b9e 100644 --- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt +++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt @@ -1,7 +1,13 @@ * Renesas USB DMA Controller Device Tree bindings Required Properties: -- compatible: must contain "renesas,usb-dmac" +-compatible: "renesas,-usb-dmac", "renesas,usb-dmac" as fallback. + Examples with soctypes are: + - "renesas,r8a7790-usb-dmac" (R-Car H2) + - "renesas,r8a7791-usb-dmac" (R-Car M2-W) + - "renesas,r8a7793-usb-dmac" (R-Car M2-N) + - "renesas,r8a7794-usb-dmac" (R-Car E2) + - "renesas,r8a7795-usb-dmac" (R-Car H3) - reg: base address and length of the registers block for the DMAC - interrupts: interrupt specifiers for the DMAC, one for each entry in interrupt-names. @@ -15,7 +21,7 @@ Required Properties: Example: R8A7790 (R-Car H2) USB-DMACs usb_dmac0: dma-controller@e65a0000 { - compatible = "renesas,usb-dmac"; + compatible = "renesas,r8a7790-usb-dmac", "renesas,usb-dmac"; reg = <0 0xe65a0000 0 0x100>; interrupts = <0 109 IRQ_TYPE_LEVEL_HIGH 0 109 IRQ_TYPE_LEVEL_HIGH>; diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt new file mode 100644 index 0000000000000000000000000000000000000000..70cd13f1588abea6f266afe65e3ad4b4f66683ca --- /dev/null +++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt @@ -0,0 +1,82 @@ +* STMicroelectronics STM32 DMA controller + +The STM32 DMA is a general-purpose direct memory access controller capable of +supporting 8 independent DMA channels. Each channel can have up to 8 requests. + +Required properties: +- compatible: Should be "st,stm32-dma" +- reg: Should contain DMA registers location and length. This should include + all of the per-channel registers. +- interrupts: Should contain all of the per-channel DMA interrupts in + ascending order with respect to the DMA channel index. +- clocks: Should contain the input clock of the DMA instance. +- #dma-cells : Must be <4>. See DMA client paragraph for more details. + +Optional properties: +- resets: Reference to a reset controller asserting the DMA controller +- st,mem2mem: boolean; if defined, it indicates that the controller supports + memory-to-memory transfer + +Example: + + dma2: dma-controller@40026400 { + compatible = "st,stm32-dma"; + reg = <0x40026400 0x400>; + interrupts = <56>, + <57>, + <58>, + <59>, + <60>, + <68>, + <69>, + <70>; + clocks = <&clk_hclk>; + #dma-cells = <4>; + st,mem2mem; + resets = <&rcc 150>; + }; + +* DMA client + +DMA clients connected to the STM32 DMA controller must use the format +described in the dma.txt file, using a five-cell specifier for each +channel: a phandle plus four integer cells. +The four cells in order are: + +1. The channel id +2. The request line number +3. A 32bit mask specifying the DMA channel configuration which are device + dependent: + -bit 9: Peripheral Increment Address + 0x0: no address increment between transfers + 0x1: increment address between transfers + -bit 10: Memory Increment Address + 0x0: no address increment between transfers + 0x1: increment address between transfers + -bit 15: Peripheral Increment Offset Size + 0x0: offset size is linked to the peripheral bus width + 0x1: offset size is fixed to 4 (32-bit alignment) + -bit 16-17: Priority level + 0x0: low + 0x1: medium + 0x2: high + 0x3: very high +5. A 32bit mask specifying the DMA FIFO threshold configuration which are device + dependent: + -bit 0-1: Fifo threshold + 0x0: 1/4 full FIFO + 0x1: 1/2 full FIFO + 0x2: 3/4 full FIFO + 0x3: full FIFO + +Example: + + usart1: serial@40011000 { + compatible = "st,stm32-usart", "st,stm32-uart"; + reg = <0x40011000 0x400>; + interrupts = <37>; + clocks = <&clk_pclk2>; + dmas = <&dma2 2 4 0x10400 0x3>, + <&dma2 7 5 0x10200 0x3>; + dma-names = "rx", "tx"; + }; diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt index b152a75dceaebea57bd1d5fd3925c76cb7f193bc..aead5869a28d9cbd71846caf6e94e7844adb9153 100644 --- a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt +++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt @@ -14,6 +14,10 @@ The DMA controller node need to have the following poroperties: Optional properties: - ti,dma-safe-map: Safe routing value for unused request lines +- ti,reserved-dma-request-ranges: DMA request ranges which should not be used + when mapping xbar input to DMA request, they are either + allocated to be used by for example the DSP or they are used as + memcpy channels in eDMA. Notes: When requesting channel via ti,dra7-dma-crossbar, the DMA clinet must request @@ -46,6 +50,8 @@ sdma_xbar: dma-router@4a002b78 { #dma-cells = <1>; dma-requests = <205>; ti,dma-safe-map = <0>; + /* Protect the sDMA request ranges: 10-14 and 100-126 */ + ti,reserved-dma-request-ranges = <10 5>, <100 27>; dma-masters = <&sdma>; }; diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt index d3d0a4fb1c733b8288884869088fda088328546d..079b42a81d7cf3c5af086872fc86127023ab2c15 100644 --- a/Documentation/devicetree/bindings/dma/ti-edma.txt +++ b/Documentation/devicetree/bindings/dma/ti-edma.txt @@ -22,8 +22,7 @@ Required properties: Optional properties: - ti,hwmods: Name of the hwmods associated to the eDMA CC - ti,edma-memcpy-channels: List of channels allocated to be used for memcpy, iow - these channels will be SW triggered channels. The list must - contain 16 bits numbers, see example. + these channels will be SW triggered channels. See example. - ti,edma-reserved-slot-ranges: PaRAM slot ranges which should not be used by the driver, they are allocated to be used by for example the DSP. See example. @@ -56,10 +55,9 @@ edma: edma@49000000 { ti,tptcs = <&edma_tptc0 7>, <&edma_tptc1 7>, <&edma_tptc2 0>; /* Channel 20 and 21 is allocated for memcpy */ - ti,edma-memcpy-channels = /bits/ 16 <20 21>; - /* The following PaRAM slots are reserved: 35-45 and 100-110 */ - ti,edma-reserved-slot-ranges = /bits/ 16 <35 10>, - /bits/ 16 <100 10>; + ti,edma-memcpy-channels = <20 21>; + /* The following PaRAM slots are reserved: 35-44 and 100-109 */ + ti,edma-reserved-slot-ranges = <35 10>, <100 10>; }; edma_tptc0: tptc@49800000 { diff --git a/Documentation/devicetree/bindings/eeprom/eeprom.txt b/Documentation/devicetree/bindings/eeprom/eeprom.txt index 4342c10de1bf8db1a5216f3bc63300c32add54ef..735bc94444bbd7eaf921ea12c12ea4dcad6d63c3 100644 --- a/Documentation/devicetree/bindings/eeprom/eeprom.txt +++ b/Documentation/devicetree/bindings/eeprom/eeprom.txt @@ -2,11 +2,22 @@ EEPROMs (I2C) Required properties: - - compatible : should be "," - If there is no specific driver for , a generic - driver based on is selected. Possible types are: - 24c00, 24c01, 24c02, 24c04, 24c08, 24c16, 24c32, 24c64, - 24c128, 24c256, 24c512, 24c1024, spd + - compatible : should be ",", like these: + + "atmel,24c00", "atmel,24c01", "atmel,24c02", "atmel,24c04", + "atmel,24c08", "atmel,24c16", "atmel,24c32", "atmel,24c64", + "atmel,24c128", "atmel,24c256", "atmel,24c512", "atmel,24c1024" + + "catalyst,24c32" + + "ramtron,24c64" + + "renesas,r1ex24002" + + If there is no specific driver for , a generic + driver based on is selected. Possible types are: + "24c00", "24c01", "24c02", "24c04", "24c08", "24c16", "24c32", "24c64", + "24c128", "24c256", "24c512", "24c1024", "spd" - reg : the I2C address of the EEPROM diff --git a/Documentation/devicetree/bindings/extcon/extcon-arizona.txt b/Documentation/devicetree/bindings/extcon/extcon-arizona.txt index e1705fae63a86be9167d2a2aa99a635cf43fbff2..e27341f8a4c7e3a3a58a31f37914d2b9a21b9d88 100644 --- a/Documentation/devicetree/bindings/extcon/extcon-arizona.txt +++ b/Documentation/devicetree/bindings/extcon/extcon-arizona.txt @@ -13,3 +13,63 @@ Optional properties: ARIZONA_ACCDET_MODE_HPR or 2 - Headphone detect mode is set to HPDETR If this node is not mentioned or if the value is unknown, then headphone detection mode is set to HPDETL. + + - wlf,use-jd2 : Use the additional JD input along with JD1 for dual pin jack + detection. + - wlf,use-jd2-nopull : Internal pull on JD2 is disabled when used for + jack detection. + - wlf,jd-invert : Invert the polarity of the jack detection switch + + - wlf,micd-software-compare : Use a software comparison to determine mic + presence + - wlf,micd-detect-debounce : Additional software microphone detection + debounce specified in milliseconds. + - wlf,micd-pol-gpio : GPIO specifier for the GPIO controlling the headset + polarity if one exists. + - wlf,micd-bias-start-time : Time allowed for MICBIAS to startup prior to + performing microphone detection, specified as per the ARIZONA_MICD_TIME_XXX + defines. + - wlf,micd-rate : Delay between successive microphone detection measurements, + specified as per the ARIZONA_MICD_TIME_XXX defines. + - wlf,micd-dbtime : Microphone detection hardware debounces specified as the + number of measurements to take, valid values being 2 and 4. + - wlf,micd-timeout-ms : Timeout for microphone detection, specified in + milliseconds. + - wlf,micd-force-micbias : Force MICBIAS continuously on during microphone + detection. + - wlf,micd-configs : Headset polarity configurations (generally used for + detection of CTIA / OMTP headsets), the field can be of variable length + but should always be a multiple of 3 cells long, each three cell group + represents one polarity configuration. + The first cell defines the accessory detection pin, zero will use MICDET1 + and all other values will use MICDET2. + The second cell represents the MICBIAS to be used. + The third cell represents the value of the micd-pol-gpio pin. + + - wlf,gpsw : Settings for the general purpose switch + +Example: + +codec: wm8280@0 { + compatible = "wlf,wm8280"; + reg = <0>; + ... + + wlf,use-jd2; + wlf,use-jd2-nopull; + wlf,jd-invert; + + wlf,micd-software-compare; + wlf,micd-detect-debounce = <0>; + wlf,micd-pol-gpio = <&codec 2 0>; + wlf,micd-rate = ; + wlf,micd-dbtime = <4>; + wlf,micd-timeout-ms = <100>; + wlf,micd-force-micbias; + wlf,micd-configs = < + 0 1 0 /* MICDET1 MICBIAS1 GPIO=low */ + 1 2 1 /* MICDET2 MICBIAS2 GPIO=high */ + >; + + wlf,gpsw = <0>; +}; diff --git a/Documentation/devicetree/bindings/extcon/extcon-max3355.txt b/Documentation/devicetree/bindings/extcon/extcon-max3355.txt new file mode 100644 index 0000000000000000000000000000000000000000..f2288ea9eb8288a52abe5d7147defedab3474234 --- /dev/null +++ b/Documentation/devicetree/bindings/extcon/extcon-max3355.txt @@ -0,0 +1,21 @@ +Maxim Integrated MAX3355 USB OTG chip +------------------------------------- + +MAX3355 integrates a charge pump and comparators to enable a system with an +integrated USB OTG dual-role transceiver to function as a USB OTG dual-role +device. + +Required properties: +- compatible: should be "maxim,max3355"; +- maxim,shdn-gpios: should contain a phandle and GPIO specifier for the GPIO pin + connected to the MAX3355's SHDN# pin; +- id-gpios: should contain a phandle and GPIO specifier for the GPIO pin + connected to the MAX3355's ID_OUT pin. + +Example: + + usb-otg { + compatible = "maxim,max3355"; + maxim,shdn-gpios = <&gpio2 4 GPIO_ACTIVE_LOW>; + id-gpios = <&gpio5 31 GPIO_ACTIVE_HIGH>; + }; diff --git a/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt b/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt index dd5d2c0394b139b286f43d90a13d732c90d7f208..4d6c8cdc85860567a09a1f1a2be269c3781bbee6 100644 --- a/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt +++ b/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt @@ -24,7 +24,7 @@ controller. - #interrupt-cells : Specifies the number of cells needed to encode an interrupt. Shall be set to 2. The first cell defines the interrupt number, the second encodes the triger flags encoded as described in - Documentation/devicetree/bindings/interrupts.txt + Documentation/devicetree/bindings/interrupt-controller/interrupts.txt - interrupt-parent : The parent interrupt controller. - interrupts : The interrupt to the parent controller raised when GPIOs generate the interrupts. diff --git a/Documentation/devicetree/bindings/i2c/i2c-at91.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt index 6e81dc153f3b9570f617cf73cac90f238debb392..ef973a0343c7b48a95cd18c3cc44b48b0ae08e62 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-at91.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt @@ -3,7 +3,7 @@ I2C for Atmel platforms Required properties : - compatible : Must be "atmel,at91rm9200-i2c", "atmel,at91sam9261-i2c", "atmel,at91sam9260-i2c", "atmel,at91sam9g20-i2c", "atmel,at91sam9g10-i2c", - "atmel,at91sam9x5-i2c" or "atmel,sama5d2-i2c" + "atmel,at91sam9x5-i2c", "atmel,sama5d4-i2c" or "atmel,sama5d2-i2c" - reg: physical base address of the controller and length of memory mapped region. - interrupts: interrupt number to the cpu. @@ -17,6 +17,8 @@ Optional properties: - dma-names: should contain "tx" and "rx". - atmel,fifo-size: maximum number of data the RX and TX FIFOs can store for FIFO capable I2C controllers. +- i2c-sda-hold-time-ns: TWD hold time, only available for "atmel,sama5d4-i2c" + and "atmel,sama5d2-i2c". - Child nodes conforming to i2c bus binding Examples : @@ -52,6 +54,7 @@ i2c0: i2c@f8034600 { #size-cells = <0>; clocks = <&flx0>; atmel,fifo-size = <16>; + i2c-sda-hold-time-ns = <336>; wm8731: wm8731@1a { compatible = "wm8731"; diff --git a/Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt b/Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt index d6f724efdcf21fa66d481f0f281dd21ab6021705..aeceaceba3c5168ab01ec0ea4e78b1a79a09dee9 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-brcmstb.txt @@ -2,7 +2,7 @@ Broadcom stb bsc iic master controller Required properties: -- compatible: should be "brcm,brcmstb-i2c" +- compatible: should be "brcm,brcmstb-i2c" or "brcm,brcmper-i2c" - clock-frequency: 32-bit decimal value of iic master clock freqency in Hz valid values are 375000, 390000, 187500, 200000 93750, 97500, 46875 and 50000 diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt index ea406eb20fa5ad117b3590cd88c6f1fa13253c2a..95e97223a71c83005c1d0acca9b39c7479f34002 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt @@ -20,6 +20,10 @@ Optional properties: propoerty indicates the default frequency 100 kHz. - clocks: clock specifier. +- i2c-scl-falling-time-ns: see i2c.txt +- i2c-scl-internal-delay-ns: see i2c.txt +- i2c-scl-rising-time-ns: see i2c.txt + Examples : i2c0: i2c@e6508000 { diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index 8a99150ac3a7fd296322dcd03819981dc34f2679..c8d977ed847f3c32a04cd35ac8f995e1730659b2 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -29,12 +29,38 @@ Optional properties These properties may not be supported by all drivers. However, if a driver wants to support one of the below features, it should adapt the bindings below. -- clock-frequency - frequency of bus clock in Hz. -- wakeup-source - device can be used as a wakeup source. +- clock-frequency + frequency of bus clock in Hz. -- interrupts - interrupts used by the device. -- interrupt-names - "irq" and "wakeup" names are recognized by I2C core, - other names are left to individual drivers. +- i2c-scl-falling-time-ns + Number of nanoseconds the SCL signal takes to fall; t(f) in the I2C + specification. + +- i2c-scl-internal-delay-ns + Number of nanoseconds the IP core additionally needs to setup SCL. + +- i2c-scl-rising-time-ns + Number of nanoseconds the SCL signal takes to rise; t(r) in the I2C + specification. + +- i2c-sda-falling-time-ns + Number of nanoseconds the SDA signal takes to fall; t(f) in the I2C + specification. + +- interrupts + interrupts used by the device. + +- interrupt-names + "irq" and "wakeup" names are recognized by I2C core, other names are + left to individual drivers. + +- multi-master + states that there is another master active on this bus. The OS can use + this information to adapt power management to keep the arbitration awake + all the time, for example. + +- wakeup-source + device can be used as a wakeup source. Binding may contain optional "interrupts" property, describing interrupts used by the device. I2C core will assign "irq" interrupt (or the very first diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index c50cf13c852e518cff4664370eb1a72947e9cca7..539874490492de666274aff20fda6dc1660f681d 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -20,22 +20,11 @@ adi,adt7476 +/-1C TDM Extended Temp Range I.C adi,adt7490 +/-1C TDM Extended Temp Range I.C adi,adxl345 Three-Axis Digital Accelerometer adi,adxl346 Three-Axis Digital Accelerometer (backward-compatibility value "adi,adxl345" must be listed too) +ams,iaq-core AMS iAQ-Core VOC Sensor at,24c08 i2c serial eeprom (24cxx) -atmel,24c00 i2c serial eeprom (24cxx) -atmel,24c01 i2c serial eeprom (24cxx) -atmel,24c02 i2c serial eeprom (24cxx) -atmel,24c04 i2c serial eeprom (24cxx) -atmel,24c16 i2c serial eeprom (24cxx) -atmel,24c32 i2c serial eeprom (24cxx) -atmel,24c64 i2c serial eeprom (24cxx) -atmel,24c128 i2c serial eeprom (24cxx) -atmel,24c256 i2c serial eeprom (24cxx) -atmel,24c512 i2c serial eeprom (24cxx) -atmel,24c1024 i2c serial eeprom (24cxx) atmel,at97sc3204t i2c trusted platform module (TPM) capella,cm32181 CM32181: Ambient Light Sensor capella,cm3232 CM3232: Ambient Light Sensor -catalyst,24c32 i2c serial eeprom cirrus,cs42l51 Cirrus Logic CS42L51 audio codec dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock dallas,ds1338 I2C RTC with 56-Byte NV RAM @@ -49,11 +38,13 @@ dallas,ds4510 CPU Supervisor with Nonvolatile Memory and Programmable I/O dallas,ds75 Digital Thermometer and Thermostat dlg,da9053 DA9053: flexible system level PMIC with multicore support dlg,da9063 DA9063: system PMIC for quad-core application processors +epson,rx8010 I2C-BUS INTERFACE REAL TIME CLOCK MODULE epson,rx8025 High-Stability. I2C-Bus INTERFACE REAL TIME CLOCK MODULE epson,rx8581 I2C-BUS INTERFACE REAL TIME CLOCK MODULE fsl,mag3110 MAG3110: Xtrinsic High Accuracy, 3D Magnetometer fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51 fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer +fsl,mpl3115 MPL3115: Absolute Digital Pressure Sensor fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface @@ -80,7 +71,6 @@ ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI an pericom,pt7c4338 Real-time Clock Module plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch pulsedlight,lidar-lite-v2 Pulsedlight LIDAR range-finding sensor -ramtron,24c64 i2c serial eeprom (24cxx) ricoh,r2025sd I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC ricoh,r2221tl I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC ricoh,rs5c372a I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC diff --git a/Documentation/devicetree/bindings/iio/accel/mma8452.txt b/Documentation/devicetree/bindings/iio/accel/mma8452.txt index e3c37467d7dacb853bc2d2df7c3f26064c7566e5..3c10e8581144a128f380240ea8537bfc2a963956 100644 --- a/Documentation/devicetree/bindings/iio/accel/mma8452.txt +++ b/Documentation/devicetree/bindings/iio/accel/mma8452.txt @@ -7,13 +7,18 @@ Required properties: * "fsl,mma8453" * "fsl,mma8652" * "fsl,mma8653" + - reg: the I2C address of the chip Optional properties: - interrupt-parent: should be the phandle for the interrupt controller + - interrupts: interrupt mapping for GPIO IRQ + - interrupt-names: should contain "INT1" and/or "INT2", the accelerometer's + interrupt line in use. + Example: mma8453fc@1d { @@ -21,4 +26,5 @@ Example: reg = <0x1d>; interrupt-parent = <&gpio1>; interrupts = <5 0>; + interrupt-names = "INT2"; }; diff --git a/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt b/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c184b9406693f68b28ecb3e4fcfda32470c92ec --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt @@ -0,0 +1,22 @@ +Freescale imx7d ADC bindings + +The devicetree bindings are for the ADC driver written for +imx7d SoC. + +Required properties: +- compatible: Should be "fsl,imx7d-adc" +- reg: Offset and length of the register set for the ADC device +- interrupts: The interrupt number for the ADC device +- clocks: The root clock of the ADC controller +- clock-names: Must contain "adc", matching entry in the clocks property +- vref-supply: The regulator supply ADC reference voltage + +Example: +adc1: adc@30610000 { + compatible = "fsl,imx7d-adc"; + reg = <0x30610000 0x10000>; + interrupts = ; + clocks = <&clks IMX7D_ADC_ROOT_CLK>; + clock-names = "adc"; + vref-supply = <®_vcc_3v3_mcu>; +}; diff --git a/Documentation/devicetree/bindings/iio/adc/mcp320x.txt b/Documentation/devicetree/bindings/iio/adc/mcp320x.txt index 2a1f3af30155d0f3a88d0c20ab4fc6711ded2ad1..bcd3ac8e6e0c57414b26aa60c91e543260208cdc 100644 --- a/Documentation/devicetree/bindings/iio/adc/mcp320x.txt +++ b/Documentation/devicetree/bindings/iio/adc/mcp320x.txt @@ -10,16 +10,28 @@ must be specified. Required properties: - compatible: Must be one of the following, depending on the model: - "mcp3001" - "mcp3002" - "mcp3004" - "mcp3008" - "mcp3201" - "mcp3202" - "mcp3204" - "mcp3208" - "mcp3301" + "mcp3001" (DEPRECATED) + "mcp3002" (DEPRECATED) + "mcp3004" (DEPRECATED) + "mcp3008" (DEPRECATED) + "mcp3201" (DEPRECATED) + "mcp3202" (DEPRECATED) + "mcp3204" (DEPRECATED) + "mcp3208" (DEPRECATED) + "mcp3301" (DEPRECATED) + "microchip,mcp3001" + "microchip,mcp3002" + "microchip,mcp3004" + "microchip,mcp3008" + "microchip,mcp3201" + "microchip,mcp3202" + "microchip,mcp3204" + "microchip,mcp3208" + "microchip,mcp3301" + + NOTE: The use of the compatibles with no vendor prefix + is deprecated and only listed because old DT use them. Examples: spi_controller { diff --git a/Documentation/devicetree/bindings/iio/adc/mcp3422.txt b/Documentation/devicetree/bindings/iio/adc/mcp3422.txt index 333139cc0bfb3cbdb615b077864050289dadaee0..dcae4ccfcc5284b8f116e2c788fbc30c996603c9 100644 --- a/Documentation/devicetree/bindings/iio/adc/mcp3422.txt +++ b/Documentation/devicetree/bindings/iio/adc/mcp3422.txt @@ -1,7 +1,8 @@ -* Microchip mcp3422/3/4/6/7/8 chip family (ADC) +* Microchip mcp3421/2/3/4/6/7/8 chip family (ADC) Required properties: - compatible: Should be + "microchip,mcp3421" or "microchip,mcp3422" or "microchip,mcp3423" or "microchip,mcp3424" or diff --git a/Documentation/devicetree/bindings/iio/adc/palmas-gpadc.txt b/Documentation/devicetree/bindings/iio/adc/palmas-gpadc.txt new file mode 100644 index 0000000000000000000000000000000000000000..4bb9a86065d13a26484b3be287e2afd25aaa5a61 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/palmas-gpadc.txt @@ -0,0 +1,48 @@ +* Palmas general purpose ADC IP block devicetree bindings + +Channels list: + 0 battery type + 1 battery temp NTC (optional current source) + 2 GP + 3 temp (with ext. diode, optional current source) + 4 GP + 5 GP + 6 VBAT_SENSE + 7 VCC_SENSE + 8 Backup Battery voltage + 9 external charger (VCHG) + 10 VBUS + 11 DC-DC current probe (how does this work?) + 12 internal die temp + 13 internal die temp + 14 USB ID pin voltage + 15 test network + +Required properties: +- compatible : Must be "ti,palmas-gpadc". +- #io-channel-cells: Should be set to <1>. + +Optional sub-nodes: +ti,channel0-current-microamp: Channel 0 current in uA. + Values are rounded to derive 0uA, 5uA, 15uA, 20uA. +ti,channel3-current-microamp: Channel 3 current in uA. + Values are rounded to derive 0uA, 10uA, 400uA, 800uA. +ti,enable-extended-delay: Enable extended delay. + +Example: + +pmic { + compatible = "ti,twl6035-pmic", "ti,palmas-pmic"; + ... + gpadc { + compatible = "ti,palmas-gpadc"; + interrupts = <18 0 + 16 0 + 17 0>; + #io-channel-cells = <1>; + ti,channel0-current-microamp = <5>; + ti,channel3-current-microamp = <10>; + }; + }; + ... +}; diff --git a/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt b/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt index 15ca6b47958e970727c0db575c1c215d3fefeee8..daa2b2c294285f4e717d25d473690582a3bc0329 100644 --- a/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt +++ b/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt @@ -1,7 +1,7 @@ -* Texas Instruments' ADC128S052 and ADC122S021 ADC chip +* Texas Instruments' ADC128S052, ADC122S021 and ADC124S021 ADC chip Required properties: - - compatible: Should be "ti,adc128s052" or "ti,adc122s021" + - compatible: Should be "ti,adc128s052", "ti,adc122s021" or "ti,adc124s021" - reg: spi chip select number for the device - vref-supply: The regulator supply for ADC reference voltage diff --git a/Documentation/devicetree/bindings/iio/adc/ti-ads8688.txt b/Documentation/devicetree/bindings/iio/adc/ti-ads8688.txt new file mode 100644 index 0000000000000000000000000000000000000000..a02337d7efa447f46265dbd4a460e8c760ea45f5 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/ti-ads8688.txt @@ -0,0 +1,20 @@ +* Texas Instruments' ADS8684 and ADS8688 ADC chip + +Required properties: + - compatible: Should be "ti,ads8684" or "ti,ads8688" + - reg: spi chip select number for the device + +Recommended properties: + - spi-max-frequency: Definition as per + Documentation/devicetree/bindings/spi/spi-bus.txt + +Optional properties: + - vref-supply: The regulator supply for ADC reference voltage + +Example: +adc@0 { + compatible = "ti,ads8688"; + reg = <0>; + vref-supply = <&vdd_supply>; + spi-max-frequency = <1000000>; +}; diff --git a/Documentation/devicetree/bindings/iio/health/max30100.txt b/Documentation/devicetree/bindings/iio/health/max30100.txt new file mode 100644 index 0000000000000000000000000000000000000000..f6fbac66ad06e7429e9ab218a48b20d16abe0ab9 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/health/max30100.txt @@ -0,0 +1,21 @@ +Maxim MAX30100 heart rate and pulse oximeter sensor + +* https://datasheets.maximintegrated.com/en/ds/MAX30100.pdf + +Required properties: + - compatible: must be "maxim,max30100" + - reg: the I2C address of the sensor + - interrupt-parent: should be the phandle for the interrupt controller + - interrupts: the sole interrupt generated by the device + + Refer to interrupt-controller/interrupts.txt for generic + interrupt client node bindings. + +Example: + +max30100@057 { + compatible = "maxim,max30100"; + reg = <57>; + interrupt-parent = <&gpio1>; + interrupts = <16 2>; +}; diff --git a/Documentation/devicetree/bindings/iio/light/us5182d.txt b/Documentation/devicetree/bindings/iio/light/us5182d.txt index 6f0a530144fde8c2570ece125acdaa06aca22a4b..a61979997f373c6e784865dbf5fe145fd388906e 100644 --- a/Documentation/devicetree/bindings/iio/light/us5182d.txt +++ b/Documentation/devicetree/bindings/iio/light/us5182d.txt @@ -7,13 +7,24 @@ Required properties: Optional properties: - upisemi,glass-coef: glass attenuation factor - compensation factor of resolution 1000 for material transmittance. + - upisemi,dark-ths: array of 8 elements containing 16-bit thresholds (adc counts) corresponding to every scale. + - upisemi,upper-dark-gain: 8-bit dark gain compensation factor(4 int and 4 fractional bits - Q4.4) applied when light > threshold + - upisemi,lower-dark-gain: 8-bit dark gain compensation factor(4 int and 4 fractional bits - Q4.4) applied when light < threshold +- upisemi,continuous: This chip has two power modes: one-shot (chip takes one + measurement and then shuts itself down) and continuous ( + chip takes continuous measurements). The one-shot mode is + more power-friendly but the continuous mode may be more + reliable. If this property is specified the continuous + mode will be used instead of the default one-shot one for + raw reads. + If the optional properties are not specified these factors will default to the values in the below example. The glass-coef defaults to no compensation for the covering material. diff --git a/Documentation/devicetree/bindings/iio/st-sensors.txt b/Documentation/devicetree/bindings/iio/st-sensors.txt index d3ccdb190c53b581f347ef2eb8444fcb3ac84592..d4b87cc1e446ec6dd73e367a2d1707a2ba0d5b96 100644 --- a/Documentation/devicetree/bindings/iio/st-sensors.txt +++ b/Documentation/devicetree/bindings/iio/st-sensors.txt @@ -36,6 +36,7 @@ Accelerometers: - st,lsm303dlm-accel - st,lsm330-accel - st,lsm303agr-accel +- st,lis2dh12-accel Gyroscopes: - st,l3g4200d-gyro diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt index b9c32f6fd687b04a3737c871bcf5c694665d3b84..4357e498ef0422d5fdc2715e3daf065ac80e41f4 100644 --- a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt +++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt @@ -12,7 +12,7 @@ Each key is represented as a sub-node of "allwinner,sun4i-a10-lradc-keys": Required subnode-properties: - label: Descriptive name of the key. - linux,code: Keycode to emit. - - channel: Channel this key is attached to, mut be 0 or 1. + - channel: Channel this key is attached to, must be 0 or 1. - voltage: Voltage in µV at lradc input when this key is pressed. Example: diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt index 8ba98eec765bc5cf27ee1e620bb53a282bf8d921..c98757a69110dbc6a997998d1e27753c205bdbdd 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt @@ -13,6 +13,17 @@ Required properties: - interrupt-parent : Interrupt controller to which the chip is connected - interrupts : Interrupt to which the chip is connected +Optional properties: + + - irq-gpios : GPIO pin used for IRQ. The driver uses the + interrupt gpio pin as output to reset the device. + - reset-gpios : GPIO pin used for reset + + - touchscreen-inverted-x : X axis is inverted (boolean) + - touchscreen-inverted-y : Y axis is inverted (boolean) + - touchscreen-swapped-x-y : X and Y axis are swapped (boolean) + (swapping is done after inverting the axis) + Example: i2c@00000000 { @@ -23,6 +34,9 @@ Example: reg = <0x5d>; interrupt-parent = <&gpio>; interrupts = <0 0>; + + irq-gpios = <&gpio1 0 0>; + reset-gpios = <&gpio1 1 0>; }; /* ... */ diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt index 8eb240a287c8aec13881a395eaa25a3da06c7194..697a3e7831e734e00173ffee012d3bc0f40381da 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt @@ -9,7 +9,9 @@ Required properties: - touchscreen-size-y: vertical resolution of touchscreen (in pixels) Optional properties: -- reset-gpio: GPIO connected to the RESET line of the chip +- reset-gpios: GPIO connected to the RESET line of the chip +- enable-gpios: GPIO connected to the ENABLE line of the chip +- wake-gpios: GPIO connected to the WAKE line of the chip Example: diff --git a/Documentation/devicetree/bindings/input/touchscreen/ts4800-ts.txt b/Documentation/devicetree/bindings/input/touchscreen/ts4800-ts.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c1c092c276bc5c602223aa88fa810aa6cbef11b --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/ts4800-ts.txt @@ -0,0 +1,11 @@ +* TS-4800 Touchscreen bindings + +Required properties: +- compatible: must be "technologic,ts4800-ts" +- reg: physical base address of the controller and length of memory mapped + region. +- syscon: phandle / integers array that points to the syscon node which + describes the FPGA's syscon registers. + - phandle to FPGA's syscon + - offset to the touchscreen register + - offset to the touchscreen enable bit diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt similarity index 94% rename from Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt rename to Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt index d1c5cdabc3e03cc0ed79ce9fee23d9a38d74f4da..81cd3692405e5fc7c74ab2ea6aca0d9d942c0969 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt @@ -4,7 +4,7 @@ Allwinner Sunxi NMI Controller Required properties: - compatible : should be "allwinner,sun7i-a20-sc-nmi" or - "allwinner,sun6i-a31-sc-nmi" + "allwinner,sun6i-a31-sc-nmi" or "allwinner,sun9i-a80-nmi" - reg : Specifies base physical address and size of the registers. - interrupt-controller : Identifies the node as an interrupt controller - #interrupt-cells : Specifies the number of cells needed to encode an diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt index cc56021eb60babea6c580bfbe594aca3c17dfce5..5a1cb4bc3dfe84c67664c65eeb2a64ac4a92c1ff 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt @@ -18,6 +18,7 @@ Main node required properties: "arm,cortex-a9-gic" "arm,gic-400" "arm,pl390" + "arm,tc11mp-gic" "brcm,brahma-b15-gic" "qcom,msm-8660-qgic" "qcom,msm-qgic2" diff --git a/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt new file mode 100644 index 0000000000000000000000000000000000000000..720f7c92e9a1016cd3b174bb5b74f3b2acabab90 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt @@ -0,0 +1,74 @@ +Hisilicon mbigen device tree bindings. +======================================= + +Mbigen means: message based interrupt generator. + +MBI is kind of msi interrupt only used on Non-PCI devices. + +To reduce the wired interrupt number connected to GIC, +Hisilicon designed mbigen to collect and generate interrupt. + + +Non-pci devices can connect to mbigen and generate the +interrupt by writing ITS register. + +The mbigen chip and devices connect to mbigen have the following properties: + +Mbigen main node required properties: +------------------------------------------- +- compatible: Should be "hisilicon,mbigen-v2" + +- reg: Specifies the base physical address and size of the Mbigen + registers. + +- interrupt controller: Identifies the node as an interrupt controller + +- msi-parent: Specifies the MSI controller this mbigen use. + For more detail information,please refer to the generic msi-parent binding in + Documentation/devicetree/bindings/interrupt-controller/msi.txt. + +- num-pins: the total number of pins implemented in this Mbigen + instance. + +- #interrupt-cells : Specifies the number of cells needed to encode an + interrupt source. The value must be 2. + + The 1st cell is hardware pin number of the interrupt.This number is local to + each mbigen chip and in the range from 0 to the maximum interrupts number + of the mbigen. + + The 2nd cell is the interrupt trigger type. + The value of this cell should be: + 1: rising edge triggered + or + 4: high level triggered + +Examples: + + mbigen_device_gmac:intc { + compatible = "hisilicon,mbigen-v2"; + reg = <0x0 0xc0080000 0x0 0x10000>; + interrupt-controller; + msi-parent = <&its_dsa 0x40b1c>; + num-pins = <9>; + #interrupt-cells = <2>; + }; + +Devices connect to mbigen required properties: +---------------------------------------------------- +-interrupt-parent: Specifies the mbigen device node which device connected. + +-interrupts:Specifies the interrupt source. + For the specific information of each cell in this property,please refer to + the "interrupt-cells" description mentioned above. + +Examples: + gmac0: ethernet@c2080000 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0 0xc2080000 0 0x20000>, + <0 0xc0000000 0 0x1000>; + interrupt-parent = <&mbigen_device_gmac>; + interrupts = <656 1>, + <657 1>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt index ec96b1f0147886102554c16e3bd260dc3f425619..475ae9bd562b4496ae6eda481aaa3ac0da7b6cee 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt @@ -22,7 +22,7 @@ Interrupt Controllers bindings used by client devices. Example: interrupt-controller@18060010 { - compatible = "qca,ar9132-misc-intc", qca,ar7100-misc-intc"; + compatible = "qca,ar9132-misc-intc", "qca,ar7100-misc-intc"; reg = <0x18060010 0x4>; interrupt-parent = <&cpuintc>; diff --git a/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt b/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt new file mode 100644 index 0000000000000000000000000000000000000000..7f15f1b0325b51754e227717e9f86ca8784a5ca5 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt @@ -0,0 +1,16 @@ +TS-4800 FPGA interrupt controller + +TS-4800 FPGA has an internal interrupt controller. When one of the +interrupts is triggered, the SoC is notified, usually using a GPIO as +parent interrupt source. + +Required properties: +- compatible: should be "technologic,ts4800-irqc" +- interrupt-controller: identifies the node as an interrupt controller +- reg: physical base address of the controller and length of memory mapped + region +- #interrupt-cells: specifies the number of cells needed to encode an interrupt + source, should be 1. +- interrupt-parent: phandle to the parent interrupt controller this one is + cascaded from +- interrupts: specifies the interrupt line in the interrupt-parent controller diff --git a/Documentation/devicetree/bindings/media/i2c/adp1653.txt b/Documentation/devicetree/bindings/media/i2c/adp1653.txt index 5ce66f2104e37f83c8c5d794a5ddb63f7b2826af..4cce0de40ee9208566a917b71400b7a7d7482725 100644 --- a/Documentation/devicetree/bindings/media/i2c/adp1653.txt +++ b/Documentation/devicetree/bindings/media/i2c/adp1653.txt @@ -12,12 +12,13 @@ There are two LED outputs available - flash and indicator. One LED is represented by one child node, nodes need to be named "flash" and "indicator". Required properties of the LED child node: -- max-microamp : see Documentation/devicetree/bindings/leds/common.txt +- led-max-microamp : see Documentation/devicetree/bindings/leds/common.txt Required properties of the flash LED child node: - flash-max-microamp : see Documentation/devicetree/bindings/leds/common.txt - flash-timeout-us : see Documentation/devicetree/bindings/leds/common.txt +- led-max-microamp : see Documentation/devicetree/bindings/leds/common.txt Example: @@ -29,9 +30,9 @@ Example: flash { flash-timeout-us = <500000>; flash-max-microamp = <320000>; - max-microamp = <50000>; + led-max-microamp = <50000>; }; indicator { - max-microamp = <17500>; + led-max-microamp = <17500>; }; }; diff --git a/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt b/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt index d4def767bdfea346df695d2d5a1a74577f0fbfea..cc51b1fd6e0cee6e7e6a17123be60f20b9c318f8 100644 --- a/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt +++ b/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt @@ -35,7 +35,7 @@ Required properties (tsin (child) node): - tsin-num : tsin id of the InputBlock (must be between 0 to 6) - i2c-bus : phandle to the I2C bus DT node which the demodulators & tuners on this tsin channel are connected. -- rst-gpio : reset gpio for this tsin channel. +- reset-gpios : reset gpio for this tsin channel. Optional properties (tsin (child) node): @@ -55,27 +55,27 @@ Example: status = "okay"; reg = <0x08a20000 0x10000>, <0x08a00000 0x4000>; reg-names = "stfe", "stfe-ram"; - interrupts = <0 34 0>, <0 35 0>; + interrupts = , ; interrupt-names = "stfe-error-irq", "stfe-idle-irq"; - - pinctrl-names = "tsin0-serial", "tsin0-parallel", "tsin3-serial", - "tsin4-serial", "tsin5-serial"; - pinctrl-0 = <&pinctrl_tsin0_serial>; pinctrl-1 = <&pinctrl_tsin0_parallel>; pinctrl-2 = <&pinctrl_tsin3_serial>; pinctrl-3 = <&pinctrl_tsin4_serial_alt3>; pinctrl-4 = <&pinctrl_tsin5_serial_alt1>; - + pinctrl-names = "tsin0-serial", + "tsin0-parallel", + "tsin3-serial", + "tsin4-serial", + "tsin5-serial"; clocks = <&clk_s_c0_flexgen CLK_PROC_STFE>; - clock-names = "stfe"; + clock-names = "c8sectpfe"; /* tsin0 is TSA on NIMA */ tsin0: port@0 { tsin-num = <0>; serial-not-parallel; i2c-bus = <&ssc2>; - rst-gpio = <&pio15 4 0>; + reset-gpios = <&pio15 4 GPIO_ACTIVE_HIGH>; dvb-card = ; }; @@ -83,7 +83,7 @@ Example: tsin-num = <3>; serial-not-parallel; i2c-bus = <&ssc3>; - rst-gpio = <&pio15 7 0>; + reset-gpios = <&pio15 7 GPIO_ACTIVE_HIGH>; dvb-card = ; }; }; diff --git a/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt b/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt index efe35a065714e95c924a2b0d99b9f814f45e1307..c81af75bcd88697a227f1824d74cd555a84d6bc4 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt +++ b/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt @@ -1,6 +1,6 @@ Binding for Qualcomm Atheros AR7xxx/AR9xxx DDR controller -The DDR controller of the ARxxx and AR9xxx families provides an interface +The DDR controller of the AR7xxx and AR9xxx families provides an interface to flush the FIFO between various devices and the DDR. This is mainly used by the IRQ controller to flush the FIFO before running the interrupt handler of such devices. @@ -11,9 +11,9 @@ Required properties: "qca,[ar7100|ar7240]-ddr-controller" as fallback. On SoC with PCI support "qca,ar7100-ddr-controller" should be used as fallback, otherwise "qca,ar7240-ddr-controller" should be used. -- reg: Base address and size of the controllers memory area -- #qca,ddr-wb-channel-cells: has to be 1, the index of the write buffer - channel +- reg: Base address and size of the controller's memory area +- #qca,ddr-wb-channel-cells: Specifies the number of cells needed to encode + the write buffer channel index, should be 1. Example: diff --git a/Documentation/devicetree/bindings/mfd/arizona.txt b/Documentation/devicetree/bindings/mfd/arizona.txt index 18be0cbfb456c92d47d959a98bdc9d27bb8f08bc..9b30011ecabe767652d8a1dbc1baaf729e950b70 100644 --- a/Documentation/devicetree/bindings/mfd/arizona.txt +++ b/Documentation/devicetree/bindings/mfd/arizona.txt @@ -1,4 +1,4 @@ -Wolfson Arizona class audio SoCs +Cirrus Logic/Wolfson Microelectronics Arizona class audio SoCs These devices are audio SoCs with extensive digital capabilites and a range of analogue I/O. @@ -6,12 +6,14 @@ of analogue I/O. Required properties: - compatible : One of the following chip-specific strings: + "cirrus,cs47l24" "wlf,wm5102" "wlf,wm5110" "wlf,wm8280" "wlf,wm8997" "wlf,wm8998" "wlf,wm1814" + "wlf,wm1831" - reg : I2C slave address when connected using I2C, chip select number when using SPI. @@ -24,7 +26,7 @@ Required properties: - #interrupt-cells: the number of cells to describe an IRQ, this should be 2. The first cell is the IRQ number. The second cell is the flags, encoded as the trigger masks from - Documentation/devicetree/bindings/interrupts.txt + Documentation/devicetree/bindings/interrupt-controller/interrupts.txt - gpio-controller : Indicates this device is a GPIO controller. - #gpio-cells : Must be 2. The first cell is the pin number and the @@ -41,10 +43,21 @@ Required properties: - SPKVDD-supply : Speaker driver power supply (wm8997) + - DCVDD-supply : Main power supply (cs47l24, wm1831) + + - MICVDD-supply : Microphone power supply (cs47l24, wm1831) + Optional properties: - wlf,reset : GPIO specifier for the GPIO controlling /RESET + - clocks: Should reference the clocks supplied on MCLK1 and MCLK2 + - clock-names: Should contains two strings: + "mclk1" for the clock supplied on MCLK1, recommended to be a high + quality audio reference clock + "mclk2" for the clock supplied on MCLK2, recommended to be an always on + 32k clock + - wlf,gpio-defaults : A list of GPIO configuration register values. Defines for the appropriate values can found in . If absent, no configuration of these registers is performed. If any entry has @@ -59,6 +72,12 @@ Optional properties: that have not been specified are set to 0 by default. Entries are: (wm5102, wm5110, wm8280, wm8997) (wm8998, wm1814) + - wlf,out-mono : A list of boolean values indicating whether each output is + mono or stereo. Position within the list indicates the output affected + (eg. First entry in the list corresponds to output 1). A non-zero value + indicates a mono output. If present, the number of values should be less + than or equal to the number of outputs, if less values are supplied the + additional outputs will be treated as stereo. - wlf,dmic-ref : DMIC reference voltage source for each input, can be selected from either MICVDD or one of the MICBIAS's, defines @@ -69,6 +88,7 @@ Optional properties: - DCVDD-supply, MICVDD-supply : Power supplies, only need to be specified if they are being externally supplied. As covered in Documentation/devicetree/bindings/regulator/regulator.txt + (wm5102, wm5110, wm8280, wm8997, wm8998, wm1814) Also see child specific device properties: Regulator - ../regulator/arizona-regulator.txt diff --git a/Documentation/devicetree/bindings/mfd/palmas.txt b/Documentation/devicetree/bindings/mfd/palmas.txt index eda898978d3332514890f55ed9bca5c3f3e4da41..8ae1a32bfb7eb4998c59f7cce4554ade8e5e4163 100644 --- a/Documentation/devicetree/bindings/mfd/palmas.txt +++ b/Documentation/devicetree/bindings/mfd/palmas.txt @@ -24,7 +24,7 @@ and also the generic series names - #interrupt-cells : should be set to 2 for IRQ number and flags The first cell is the IRQ number. The second cell is the flags, encoded as the trigger masks from - Documentation/devicetree/bindings/interrupts.txt + Documentation/devicetree/bindings/interrupt-controller/interrupts.txt - interrupt-parent : The parent interrupt controller. Optional properties: diff --git a/Documentation/devicetree/bindings/mfd/s2mpa01.txt b/Documentation/devicetree/bindings/mfd/s2mpa01.txt deleted file mode 100644 index c13d3d8c39470598e01483ba1828f70ced6381ed..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/mfd/s2mpa01.txt +++ /dev/null @@ -1,90 +0,0 @@ - -* Samsung S2MPA01 Voltage and Current Regulator - -The Samsung S2MPA01 is a multi-function device which includes high -efficiency buck converters including Dual-Phase buck converter, various LDOs, -and an RTC. It is interfaced to the host controller using an I2C interface. -Each sub-block is addressed by the host system using different I2C slave -addresses. - -Required properties: -- compatible: Should be "samsung,s2mpa01-pmic". -- reg: Specifies the I2C slave address of the PMIC block. It should be 0x66. - -Optional properties: -- interrupt-parent: Specifies the phandle of the interrupt controller to which - the interrupts from s2mpa01 are delivered to. -- interrupts: An interrupt specifier for the sole interrupt generated by the - device. - -Optional nodes: -- regulators: The regulators of s2mpa01 that have to be instantiated should be - included in a sub-node named 'regulators'. Regulator nodes and constraints - included in this sub-node use the standard regulator bindings which are - documented elsewhere. - -Properties for BUCK regulator nodes: -- regulator-ramp-delay: ramp delay in uV/us. May be 6250, 12500 - (default), 25000, or 50000. May be 0 for disabling the ramp delay on - BUCK{1,2,3,4}. - - In the absence of the regulator-ramp-delay property, the default ramp - delay will be used. - - NOTE: Some BUCKs share the ramp rate setting i.e. same ramp value will be set - for a particular group of BUCKs. So provide same regulator-ramp-delay=. - - The following BUCKs share ramp settings: - * 1 and 6 - * 2 and 4 - * 8, 9, and 10 - -The following are the names of the regulators that the s2mpa01 PMIC block -supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number -as per the datasheet of s2mpa01. - - - LDOn - - valid values for n are 1 to 26 - - Example: LDO1, LD02, LDO26 - - BUCKn - - valid values for n are 1 to 10. - - Example: BUCK1, BUCK2, BUCK9 - -Example: - - s2mpa01_pmic@66 { - compatible = "samsung,s2mpa01-pmic"; - reg = <0x66>; - - regulators { - ldo1_reg: LDO1 { - regulator-name = "VDD_ALIVE"; - regulator-min-microvolt = <1000000>; - regulator-max-microvolt = <1000000>; - }; - - ldo2_reg: LDO2 { - regulator-name = "VDDQ_MMC2"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - regulator-always-on; - }; - - buck1_reg: BUCK1 { - regulator-name = "vdd_mif"; - regulator-min-microvolt = <950000>; - regulator-max-microvolt = <1350000>; - regulator-always-on; - regulator-boot-on; - }; - - buck2_reg: BUCK2 { - regulator-name = "vdd_arm"; - regulator-min-microvolt = <950000>; - regulator-max-microvolt = <1350000>; - regulator-always-on; - regulator-boot-on; - regulator-ramp-delay = <50000>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/mfd/s2mps11.txt b/Documentation/devicetree/bindings/mfd/s2mps11.txt deleted file mode 100644 index 09b94c97faaccf1914253bf54a514dad9867a1cb..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/mfd/s2mps11.txt +++ /dev/null @@ -1,153 +0,0 @@ - -* Samsung S2MPS11/13/14/15 and S2MPU02 Voltage and Current Regulator - -The Samsung S2MPS11 is a multi-function device which includes voltage and -current regulators, RTC, charger controller and other sub-blocks. It is -interfaced to the host controller using an I2C interface. Each sub-block is -addressed by the host system using different I2C slave addresses. - -Required properties: -- compatible: Should be one of the following - - "samsung,s2mps11-pmic" - - "samsung,s2mps13-pmic" - - "samsung,s2mps14-pmic" - - "samsung,s2mps15-pmic" - - "samsung,s2mpu02-pmic". -- reg: Specifies the I2C slave address of the pmic block. It should be 0x66. - -Optional properties: -- interrupt-parent: Specifies the phandle of the interrupt controller to which - the interrupts from s2mps11 are delivered to. -- interrupts: Interrupt specifiers for interrupt sources. -- samsung,s2mps11-wrstbi-ground: Indicates that WRSTBI pin of PMIC is pulled - down. When the system is suspended it will always go down thus triggerring - unwanted buck warm reset (setting buck voltages to default values). -- samsung,s2mps11-acokb-ground: Indicates that ACOKB pin of S2MPS11 PMIC is - connected to the ground so the PMIC must manually set PWRHOLD bit in CTRL1 - register to turn off the power. Usually the ACOKB is pulled up to VBATT so - when PWRHOLD pin goes low, the rising ACOKB will trigger power off. - -Optional nodes: -- clocks: s2mps11, s2mps13, s2mps15 and s5m8767 provide three(AP/CP/BT) buffered 32.768 - KHz outputs, so to register these as clocks with common clock framework - instantiate a sub-node named "clocks". It uses the common clock binding - documented in : - [Documentation/devicetree/bindings/clock/clock-bindings.txt] - The s2mps14 provides two (AP/BT) buffered 32.768 KHz outputs. - - #clock-cells: should be 1. - - - The following is the list of clocks generated by the controller. Each clock - is assigned an identifier and client nodes use this identifier to specify - the clock which they consume. - Clock ID Devices - ---------------------------------------------------------- - 32KhzAP 0 S2MPS11, S2MPS13, S2MPS14, S2MPS15, S5M8767 - 32KhzCP 1 S2MPS11, S2MPS13, S2MPS15, S5M8767 - 32KhzBT 2 S2MPS11, S2MPS13, S2MPS14, S2MPS15, S5M8767 - - - compatible: Should be one of: "samsung,s2mps11-clk", "samsung,s2mps13-clk", - "samsung,s2mps14-clk", "samsung,s5m8767-clk" - The s2msp15 uses the same compatible as s2mps13, as both provides similar clocks. - -- regulators: The regulators of s2mps11 that have to be instantiated should be -included in a sub-node named 'regulators'. Regulator nodes included in this -sub-node should be of the format as listed below. - - regulator_name { - [standard regulator constraints....]; - }; - - regulator-ramp-delay for BUCKs = [6250/12500/25000(default)/50000] uV/us - - BUCK[2/3/4/6] supports disabling ramp delay on hardware, so explicitly - regulator-ramp-delay = <0> can be used for them to disable ramp delay. - In the absence of the regulator-ramp-delay property, the default ramp - delay will be used. - -NOTE: Some BUCKs share the ramp rate setting i.e. same ramp value will be set -for a particular group of BUCKs. So provide same regulator-ramp-delay. -Grouping of BUCKs sharing ramp rate setting is as follow : BUCK[1, 6], -BUCK[3, 4], and BUCK[7, 8, 10] - -On S2MPS14 the LDO10, LDO11 and LDO12 can be configured to external control -over GPIO. To turn this feature on this property must be added to the regulator -sub-node: - - samsung,ext-control-gpios: GPIO specifier for one GPIO - controlling this regulator (enable/disable); -Example: - LDO12 { - regulator-name = "V_EMMC_2.8V"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - samsung,ext-control-gpios = <&gpk0 2 0>; - }; - - -The regulator constraints inside the regulator nodes use the standard regulator -bindings which are documented elsewhere. - -The following are the names of the regulators that the s2mps11 pmic block -supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number -as per the datasheet of s2mps11. - - - LDOn - - valid values for n are: - - S2MPS11: 1 to 38 - - S2MPS13: 1 to 40 - - S2MPS14: 1 to 25 - - S2MPS15: 1 to 27 - - S2MPU02: 1 to 28 - - Example: LDO1, LDO2, LDO28 - - BUCKn - - valid values for n are: - - S2MPS11: 1 to 10 - - S2MPS13: 1 to 10 - - S2MPS14: 1 to 5 - - S2MPS15: 1 to 10 - - S2MPU02: 1 to 7 - - Example: BUCK1, BUCK2, BUCK9 - -Example: - - s2mps11_pmic@66 { - compatible = "samsung,s2mps11-pmic"; - reg = <0x66>; - - s2m_osc: clocks { - compatible = "samsung,s2mps11-clk"; - #clock-cells = <1>; - clock-output-names = "xx", "yy", "zz"; - }; - - regulators { - ldo1_reg: LDO1 { - regulator-name = "VDD_ABB_3.3V"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; - - ldo2_reg: LDO2 { - regulator-name = "VDD_ALIVE_1.1V"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; - regulator-always-on; - }; - - buck1_reg: BUCK1 { - regulator-name = "vdd_mif"; - regulator-min-microvolt = <950000>; - regulator-max-microvolt = <1350000>; - regulator-always-on; - regulator-boot-on; - }; - - buck2_reg: BUCK2 { - regulator-name = "vdd_arm"; - regulator-min-microvolt = <950000>; - regulator-max-microvolt = <1350000>; - regulator-always-on; - regulator-boot-on; - regulator-ramp-delay = <50000>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/mfd/samsung,sec-core.txt b/Documentation/devicetree/bindings/mfd/samsung,sec-core.txt new file mode 100644 index 0000000000000000000000000000000000000000..cdd079bfc287f064170f0d6bbd4d175f7a0049e6 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/samsung,sec-core.txt @@ -0,0 +1,88 @@ +Binding for Samsung S2M and S5M family multi-function device +============================================================ + +This is a part of device tree bindings for S2M and S5M family multi-function +devices. + +The Samsung S2MPA01, S2MPS11/13/14/15, S2MPU02 and S5M8767 is a family +of multi-function devices which include voltage and current regulators, RTC, +charger controller, clock outputs and other sub-blocks. It is interfaced +to the host controller using an I2C interface. Each sub-block is usually +addressed by the host system using different I2C slave addresses. + + +This document describes bindings for main device node. Optional sub-blocks +must be a sub-nodes to it. Bindings for them can be found in: + - bindings/regulator/samsung,s2mpa01.txt + - bindings/regulator/samsung,s2mps11.txt + - bindings/regulator/samsung,s5m8767.txt + - bindings/clock/samsung,s2mps11.txt + + +Required properties: + - compatible: Should be one of the following + - "samsung,s2mpa01-pmic", + - "samsung,s2mps11-pmic", + - "samsung,s2mps13-pmic", + - "samsung,s2mps14-pmic", + - "samsung,s2mps15-pmic", + - "samsung,s2mpu02-pmic", + - "samsung,s5m8767-pmic". + - reg: Specifies the I2C slave address of the pmic block. It should be 0x66. + +Optional properties: + - interrupt-parent: Specifies the phandle of the interrupt controller to which + the interrupts from s2mps11 are delivered to. + - interrupts: Interrupt specifiers for interrupt sources. + - samsung,s2mps11-wrstbi-ground: Indicates that WRSTBI pin of PMIC is pulled + down. When the system is suspended it will always go down thus triggerring + unwanted buck warm reset (setting buck voltages to default values). + - samsung,s2mps11-acokb-ground: Indicates that ACOKB pin of S2MPS11 PMIC is + connected to the ground so the PMIC must manually set PWRHOLD bit in CTRL1 + register to turn off the power. Usually the ACOKB is pulled up to VBATT so + when PWRHOLD pin goes low, the rising ACOKB will trigger power off. + +Example: + + s2mps11_pmic@66 { + compatible = "samsung,s2mps11-pmic"; + reg = <0x66>; + + s2m_osc: clocks { + compatible = "samsung,s2mps11-clk"; + #clock-cells = <1>; + clock-output-names = "xx", "yy", "zz"; + }; + + regulators { + ldo1_reg: LDO1 { + regulator-name = "VDD_ABB_3.3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + ldo2_reg: LDO2 { + regulator-name = "VDD_ALIVE_1.1V"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + + buck1_reg: BUCK1 { + regulator-name = "vdd_mif"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + }; + + buck2_reg: BUCK2 { + regulator-name = "vdd_arm"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + regulator-ramp-delay = <50000>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/mfd/syscon.txt b/Documentation/devicetree/bindings/mfd/syscon.txt index fe8150bb3248e9920f8a24f4b91efe39440b38d6..408f768686f1e9c9606dc5a188cbd7ba1b8f663f 100644 --- a/Documentation/devicetree/bindings/mfd/syscon.txt +++ b/Documentation/devicetree/bindings/mfd/syscon.txt @@ -13,6 +13,10 @@ Required properties: - compatible: Should contain "syscon". - reg: the register region can be accessed from syscon +Optional property: +- reg-io-width: the size (in bytes) of the IO accesses that should be + performed on the device. + Examples: gpr: iomuxc-gpr@020e0000 { compatible = "fsl,imx6q-iomuxc-gpr", "syscon"; diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt index cae29eb5733d8a529d2ec2c3606e4c2255a01884..ff611fa66871dec93363ea875df4237df90423f0 100644 --- a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt +++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt @@ -11,6 +11,7 @@ Required properties: - "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs - "renesas,mmcif-r8a7790" for the MMCIF found in r8a7790 SoCs - "renesas,mmcif-r8a7791" for the MMCIF found in r8a7791 SoCs + - "renesas,mmcif-r8a7793" for the MMCIF found in r8a7793 SoCs - "renesas,mmcif-r8a7794" for the MMCIF found in r8a7794 SoCs - clocks: reference to the functional clock diff --git a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt index 4ff7128ee3b20297d427dc9fb98ac0cb0c1966c7..c2546ced9c02a379b2c67c498f0cdc2fbccd03a4 100644 --- a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt +++ b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.txt @@ -45,6 +45,8 @@ Required properties: - #size-cells : <0> Optional properties: +- clock : reference to the clock for the NAND controller +- clock-names : "nand" (required for the above clock) - brcm,nand-has-wp : Some versions of this IP include a write-protect (WP) control bit. It is always available on >= v7.0. Use this property to describe the rare @@ -72,6 +74,12 @@ we define additional 'compatible' properties and associated register resources w and enable registers - reg-names: (required) "nand-int-base" + * "brcm,nand-bcm6368" + - compatible: should contain "brcm,nand-bcm", "brcm,nand-bcm6368" + - reg: (required) the 'NAND_INTR_BASE' register range, with combined status + and enable registers, and boot address registers + - reg-names: (required) "nand-int-base" + * "brcm,nand-iproc" - reg: (required) the "IDM" register range, for interrupt enable and APB bus access endianness configuration, and the "EXT" register range, @@ -148,3 +156,27 @@ nand@f0442800 { }; }; }; + +nand@10000200 { + compatible = "brcm,nand-bcm63168", "brcm,nand-bcm6368", + "brcm,brcmnand-v4.0", "brcm,brcmnand"; + reg = <0x10000200 0x180>, + <0x10000600 0x200>, + <0x100000b0 0x10>; + reg-names = "nand", "nand-cache", "nand-int-base"; + interrupt-parent = <&periph_intc>; + interrupts = <50>; + clocks = <&periph_clk 20>; + clock-names = "nand"; + + #address-cells = <1>; + #size-cells = <0>; + + nand0: nandcs@0 { + compatible = "brcm,nandcs"; + reg = <0>; + nand-on-flash-bbt; + nand-ecc-strength = <1>; + nand-ecc-step-size = <512>; + }; +}; diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt index 862aa2f8837af35f24317890e1ddd162017ac2da..00c587b3d3ae77eb958ba769b7a494edbee127e6 100644 --- a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt +++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt @@ -2,7 +2,8 @@ Required properties: - compatible : Should be "fsl,vf610-qspi", "fsl,imx6sx-qspi", - "fsl,imx7d-qspi", "fsl,imx6ul-qspi" + "fsl,imx7d-qspi", "fsl,imx6ul-qspi", + "fsl,ls1021-qspi" - reg : the first contains the register location and length, the second contains the memory mapping address and length - reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory" diff --git a/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt b/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt new file mode 100644 index 0000000000000000000000000000000000000000..29ea5853ca9147bbc7f8277e33925193792b91c9 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt @@ -0,0 +1,86 @@ +* Ingenic JZ4780 NAND/BCH + +This file documents the device tree bindings for NAND flash devices on the +JZ4780. NAND devices are connected to the NEMC controller (described in +memory-controllers/ingenic,jz4780-nemc.txt), and thus NAND device nodes must +be children of the NEMC node. + +Required NAND controller device properties: +- compatible: Should be set to "ingenic,jz4780-nand". +- reg: For each bank with a NAND chip attached, should specify a bank number, + an offset of 0 and a size of 0x1000000 (i.e. the whole NEMC bank). + +Optional NAND controller device properties: +- ingenic,bch-controller: To make use of the hardware BCH controller, this + property must contain a phandle for the BCH controller node. The required + properties for this node are described below. If this is not specified, + software BCH will be used instead. + +Optional children nodes: +- Individual NAND chips are children of the NAND controller node. + +Required children node properties: +- reg: An integer ranging from 1 to 6 representing the CS line to use. + +Optional children node properties: +- nand-ecc-step-size: ECC block size in bytes. +- nand-ecc-strength: ECC strength (max number of correctable bits). +- nand-ecc-mode: String, operation mode of the NAND ecc mode. "hw" by default +- nand-on-flash-bbt: boolean to enable on flash bbt option, if not present false +- rb-gpios: GPIO specifier for the busy pin. +- wp-gpios: GPIO specifier for the write protect pin. + +Optional child node of NAND chip nodes: +- partitions: see Documentation/devicetree/bindings/mtd/partition.txt + +Example: + +nemc: nemc@13410000 { + ... + + nandc: nand-controller@1 { + compatible = "ingenic,jz4780-nand"; + reg = <1 0 0x1000000>; /* Bank 1 */ + + #address-cells = <1>; + #size-cells = <0>; + + ingenic,bch-controller = <&bch>; + + nand@1 { + reg = <1>; + + nand-ecc-step-size = <1024>; + nand-ecc-strength = <24>; + nand-ecc-mode = "hw"; + nand-on-flash-bbt; + + rb-gpios = <&gpa 20 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpf 22 GPIO_ACTIVE_LOW>; + + partitions { + #address-cells = <2>; + #size-cells = <2>; + ... + } + }; + }; +}; + +The BCH controller is a separate SoC component used for error correction on +NAND devices. The following is a description of the device properties for a +BCH controller. + +Required BCH properties: +- compatible: Should be set to "ingenic,jz4780-bch". +- reg: Should specify the BCH controller registers location and length. +- clocks: Clock for the BCH controller. + +Example: + +bch: bch@134d0000 { + compatible = "ingenic,jz4780-bch"; + reg = <0x134d0000 0x10000>; + + clocks = <&cgu JZ4780_CLK_BCH>; +}; diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt index 2bee68103b0112ecc106edc25a74a33f69462cc8..2c91c03e7eb0265e1c7db5edc4d548600555e613 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt @@ -1,15 +1,61 @@ -* MTD SPI driver for ST M25Pxx (and similar) serial flash chips +* SPI NOR flash: ST M25Pxx (and similar) serial flash chips Required properties: - #address-cells, #size-cells : Must be present if the device has sub-nodes representing partitions. - compatible : May include a device-specific string consisting of the - manufacturer and name of the chip. Bear in mind the DT binding - is not Linux-only, but in case of Linux, see the "m25p_ids" - table in drivers/mtd/devices/m25p80.c for the list of supported - chips. + manufacturer and name of the chip. A list of supported chip + names follows. Must also include "jedec,spi-nor" for any SPI NOR flash that can be identified by the JEDEC READ ID opcode (0x9F). + + Supported chip names: + at25df321a + at25df641 + at26df081a + mr25h256 + mx25l4005a + mx25l1606e + mx25l6405d + mx25l12805d + mx25l25635e + n25q064 + n25q128a11 + n25q128a13 + n25q512a + s25fl256s1 + s25fl512s + s25sl12801 + s25fl008k + s25fl064k + sst25vf040b + m25p40 + m25p80 + m25p16 + m25p32 + m25p64 + m25p128 + w25x80 + w25x32 + w25q32 + w25q32dw + w25q80bl + w25q128 + w25q256 + + The following chip names have been used historically to + designate quirky versions of flash chips that do not support the + JEDEC READ ID opcode (0x9F): + m25p05-nonjedec + m25p10-nonjedec + m25p20-nonjedec + m25p40-nonjedec + m25p80-nonjedec + m25p16-nonjedec + m25p32-nonjedec + m25p64-nonjedec + m25p128-nonjedec + - reg : Chip-Select number - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at diff --git a/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt new file mode 100644 index 0000000000000000000000000000000000000000..fb314f09861b81334496dbe2aaec08e0d7102913 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/mtk-quadspi.txt @@ -0,0 +1,41 @@ +* Serial NOR flash controller for MTK MT81xx (and similar) + +Required properties: +- compatible: should be "mediatek,mt8173-nor"; +- reg: physical base address and length of the controller's register +- clocks: the phandle of the clocks needed by the nor controller +- clock-names: the names of the clocks + the clocks should be named "spi" and "sf". "spi" is used for spi bus, + and "sf" is used for controller, these are the clocks witch + hardware needs to enabling nor flash and nor flash controller. + See Documentation/devicetree/bindings/clock/clock-bindings.txt for details. +- #address-cells: should be <1> +- #size-cells: should be <0> + +The SPI flash must be a child of the nor_flash node and must have a +compatible property. Also see jedec,spi-nor.txt. + +Required properties: +- compatible: May include a device-specific string consisting of the manufacturer + and name of the chip. Must also include "jedec,spi-nor" for any + SPI NOR flash that can be identified by the JEDEC READ ID opcode (0x9F). +- reg : Chip-Select number + +Example: + +nor_flash: spi@1100d000 { + compatible = "mediatek,mt8173-nor"; + reg = <0 0x1100d000 0 0xe0>; + clocks = <&pericfg CLK_PERI_SPI>, + <&topckgen CLK_TOP_SPINFI_IFR_SEL>; + clock-names = "spi", "sf"; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + + flash@0 { + compatible = "jedec,spi-nor"; + reg = <0>; + }; +}; + diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt index f1e2a02381a498a8f94e379870a8dcb1d2dec4ca..81a224da63be54a4b7a5b248f57a985423b36213 100644 --- a/Documentation/devicetree/bindings/mtd/partition.txt +++ b/Documentation/devicetree/bindings/mtd/partition.txt @@ -6,7 +6,9 @@ used for what purposes, but which don't use an on-flash partition table such as RedBoot. The partition table should be a subnode of the mtd node and should be named -'partitions'. Partitions are defined in subnodes of the partitions node. +'partitions'. This node should have the following property: +- compatible : (required) must be "fixed-partitions" +Partitions are then defined in subnodes of the partitions node. For backwards compatibility partitions as direct subnodes of the mtd device are supported. This use is discouraged. @@ -30,12 +32,15 @@ Optional properties: partition should only be mounted read-only. This is usually used for flash partitions containing early-boot firmware images or data which should not be clobbered. +- lock : Do not unlock the partition at initialization time (not supported on + all devices) Examples: flash@0 { partitions { + compatible = "fixed-partitions"; #address-cells = <1>; #size-cells = <1>; @@ -53,6 +58,7 @@ flash@0 { flash@1 { partitions { + compatible = "fixed-partitions"; #address-cells = <1>; #size-cells = <2>; @@ -66,6 +72,7 @@ flash@1 { flash@2 { partitions { + compatible = "fixed-partitions"; #address-cells = <2>; #size-cells = <2>; diff --git a/Documentation/devicetree/bindings/net/cdns-emac.txt b/Documentation/devicetree/bindings/net/cdns-emac.txt deleted file mode 100644 index 4451ee9732239b50d8329339eb57662e639c24b4..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/net/cdns-emac.txt +++ /dev/null @@ -1,20 +0,0 @@ -* Cadence EMAC Ethernet controller - -Required properties: -- compatible: Should be "cdns,[-]{emac}" - Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. - Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC. - Or the generic form: "cdns,emac". -- reg: Address and length of the register set for the device -- interrupts: Should contain macb interrupt -- phy-mode: see ethernet.txt file in the same directory. - -Examples: - - macb0: ethernet@fffc4000 { - compatible = "cdns,at91rm9200-emac"; - reg = <0xfffc4000 0x4000>; - interrupts = <21>; - phy-mode = "rmii"; - local-mac-address = [3a 0e 03 04 05 06]; - }; diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 9853f8e7096613e990c28cc545b39ab7f26afddc..28a4781ab6d7b9d6a1ab553ed96857f0f509e250 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -40,18 +40,18 @@ Optional properties: Slave Properties: Required properties: -- phy_id : Specifies slave phy id - phy-mode : See ethernet.txt file in the same directory Optional properties: - dual_emac_res_vlan : Specifies VID to be used to segregate the ports - mac-address : See ethernet.txt file in the same directory +- phy_id : Specifies slave phy id - phy-handle : See ethernet.txt file in the same directory Slave sub-nodes: - fixed-link : See fixed-link.txt file in the same directory - Either the properties phy_id and phy-mode, - or the sub-node fixed-link can be specified + Either the property phy_id, or the sub-node + fixed-link can be specified Note: "ti,hwmods" field is used to fetch the base address and irq resources from TI, omap hwmod data base during device registration. diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index 04e6bef3ac3ff431560f53456a34d4b932c7eca0..5fdbbcdf8c4b84109cfeb28d0c77397d0f93aa42 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -31,6 +31,8 @@ A switch child node has the following optional property: switch. Must be set if the switch can not detect the presence and/or size of a connected EEPROM, otherwise optional. +- reset-gpios : phandle and specifier to a gpio line connected to + reset pin of the switch chip. A switch may have multiple "port" children nodes @@ -114,6 +116,7 @@ Example: #size-cells = <0>; reg = <17 1>; /* MDIO address 17, switch 1 in tree */ mii-bus = <&mii_bus1>; + reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; switch1port0: port@0 { reg = <0>; diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-mdio.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-mdio.txt index 9c23fdf25018ba7b6b8a87fd41653f216c908167..4a7ede9657b08211675a7329ff5cc3b5bd6e90e7 100644 --- a/Documentation/devicetree/bindings/net/hisilicon-hns-mdio.txt +++ b/Documentation/devicetree/bindings/net/hisilicon-hns-mdio.txt @@ -1,7 +1,12 @@ Hisilicon MDIO bus controller Properties: -- compatible: "hisilicon,mdio","hisilicon,hns-mdio". +- compatible: can be one of: + "hisilicon,hns-mdio" + "hisilicon,mdio" + "hisilicon,hns-mdio" is recommended to be used for hip05 and later SOCs, + while "hisilicon,mdio" is optional for backwards compatibility only on + hip04 Soc. - reg: The base address of the MDIO bus controller register bank. - #address-cells: Must be <1>. - #size-cells: Must be <0>. MDIO addresses have no size component. diff --git a/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt b/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt new file mode 100644 index 0000000000000000000000000000000000000000..dea5124cdc52d7227215d15bddfd9c403a7cd904 --- /dev/null +++ b/Documentation/devicetree/bindings/net/ieee802154/adf7242.txt @@ -0,0 +1,18 @@ +* ADF7242 IEEE 802.15.4 * + +Required properties: + - compatible: should be "adi,adf7242" + - spi-max-frequency: maximal bus speed (12.5 MHz) + - reg: the chipselect index + - interrupts: the interrupt generated by the device via pin IRQ1. + IRQ_TYPE_LEVEL_HIGH (4) or IRQ_TYPE_EDGE_FALLING (1) + +Example: + + adf7242@0 { + compatible = "adi,adf7242"; + spi-max-frequency = <10000000>; + reg = <0>; + interrupts = <98 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gpio3>; + }; diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index b5d79761ac970acb7ae506853def327f0eaa5147..d2e243b1ec0e14cae6a576e615857928777e40f8 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -2,15 +2,19 @@ Required properties: - compatible: Should be "cdns,[-]{macb|gem}" + Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs or the 10/100Mbit IP available on sama5d3 SoCs. + Use "cdns,np4-macb" for NP4 SoC devices. Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb". Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on the Cadence GEM, or the generic form: "cdns,gem". Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs. Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs. Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs. + Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC. Use "cdns,zynqmp-gem" for Zynq Ultrascale+ MPSoC. + Or the generic form: "cdns,emac". - reg: Address and length of the register set for the device - interrupts: Should contain macb interrupt - phy-mode: See ethernet.txt file in the same directory. @@ -19,6 +23,9 @@ Required properties: Optional elements: 'tx_clk' - clocks: Phandles to input clocks. +Optional properties for PHY child node: +- reset-gpios : Should specify the gpio for phy reset + Examples: macb0: ethernet@fffc4000 { @@ -29,4 +36,8 @@ Examples: local-mac-address = [3a 0e 03 04 05 06]; clock-names = "pclk", "hclk", "tx_clk"; clocks = <&clkc 30>, <&clkc 30>, <&clkc 13>; + ethernet-phy@1 { + reg = <0x1>; + reset-gpios = <&pioE 6 1>; + }; }; diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt index 692076fda0e589788567a5283d79525f04f01852..f9c32adab5c6414b6d17577a37e31245e5fa30a7 100644 --- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -1,8 +1,9 @@ Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY -Some boards require special tuning values, particularly when it comes to -clock delays. You can specify clock delay values by adding -micrel-specific properties to an Ethernet OF device node. +Some boards require special tuning values, particularly when it comes +to clock delays. You can specify clock delay values in the PHY OF +device node. Deprecated, but still supported, these properties can +also be added to an Ethernet OF device node. Note that these settings are applied after any phy-specific fixup from phy_fixup_list (see phy_init_hw() from drivers/net/phy/phy_device.c), @@ -57,16 +58,6 @@ KSZ9031: Examples: - /* Attach to an Ethernet device with autodetected PHY */ - &enet { - rxc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - txc-skew-ps = <3000>; - txen-skew-ps = <0>; - status = "okay"; - }; - - /* Attach to an explicitly-specified PHY */ mdio { phy0: ethernet-phy@0 { rxc-skew-ps = <3000>; diff --git a/Documentation/devicetree/bindings/net/nfc/st95hf.txt b/Documentation/devicetree/bindings/net/nfc/st95hf.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea3178bc9ddd5f85dc6bc02f1ac0b5060b44fb99 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/st95hf.txt @@ -0,0 +1,50 @@ +* STMicroelectronics : NFC Transceiver ST95HF + +ST NFC Transceiver is required to attach with SPI bus. +ST95HF node should be defined in DT as SPI slave device of SPI +master with which ST95HF transceiver is physically connected. +The properties defined below are required to be the part of DT +to include ST95HF transceiver into the platform. + +Required properties: +=================== +- reg: Address of SPI slave "ST95HF transceiver" on SPI master bus. + +- compatible: should be "st,st95hf" for ST95HF NFC transceiver + +- spi-max-frequency: Max. operating SPI frequency for ST95HF + transceiver. + +- enable-gpio: GPIO line to enable ST95HF transceiver. + +- interrupt-parent : Standard way to specify the controller to which + ST95HF transceiver's interrupt is routed. + +- interrupts : Standard way to define ST95HF transceiver's out + interrupt. + +Optional property: +================= +- st95hfvin-supply : This is an optional property. It contains a + phandle to ST95HF transceiver's regulator supply node in DT. + +Example: +======= +spi@9840000 { + reg = <0x9840000 0x110>; + #address-cells = <1>; + #size-cells = <0>; + cs-gpios = <&pio0 4>; + status = "okay"; + + st95hf@0{ + reg = <0>; + compatible = "st,st95hf"; + status = "okay"; + spi-max-frequency = <1000000>; + enable-gpio = <&pio4 0>; + interrupt-parent = <&pio0>; + interrupts = <7 IRQ_TYPE_EDGE_FALLING>; + }; + +}; diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index b486f3f5f6a3264037c07d05d335b1737e8d455e..81a9f9e6b45ff85c83da488fbafc2c2b89839600 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -5,8 +5,18 @@ interface contains. Required properties: - compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC. + "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC. + "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC. + "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC. "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC. "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC. + "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface. + "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface. + + When compatible with the generic version, nodes must list the + SoC-specific version corresponding to the platform first + followed by the generic version. + - reg: offset and length of (1) the register block and (2) the stream buffer. - interrupts: A list of interrupt-specifiers, one for each entry in interrupt-names. @@ -37,7 +47,7 @@ Optional properties: Example: ethernet@e6800000 { - compatible = "renesas,etheravb-r8a7795"; + compatible = "renesas,etheravb-r8a7795", "renesas,etheravb-rcar-gen3"; reg = <0 0xe6800000 0 0x800>, <0 0xe6a00000 0 0x10000>; interrupt-parent = <&gic>; interrupts = , diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt index 3a9d6795160654080ec4aaa114304b62d5f0754f..72d82d684342b116efdbe2f7e05246ee854a1103 100644 --- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt +++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt @@ -11,6 +11,8 @@ Required properties: designware version numbers documented in stmmac.txt - altr,sysmgr-syscon : Should be the phandle to the system manager node that encompasses the glue register, the register offset, and the register shift. + - altr,f2h_ptp_ref_clk use f2h_ptp_ref_clk instead of default eosc1 clock + for ptp ref clk. This affects all emacs as the clock is common. Optional properties: altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index f34fc3c81a755c8a70573c50fa0a5b6dfae96a3c..e862a922bd3f957daca0989d1a94ae246afa48ca 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -35,18 +35,18 @@ Optional properties: - reset-names: Should contain the reset signal name "stmmaceth", if a reset phandle is given - max-frame-size: See ethernet.txt file in the same directory -- clocks: If present, the first clock should be the GMAC main clock and - the second clock should be peripheral's register interface clock. Further - clocks may be specified in derived bindings. -- clock-names: One name for each entry in the clocks property, the - first one should be "stmmaceth" and the second one should be "pclk". -- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is - available this clock is used for programming the Timestamp Addend Register. - If not passed then the system clock will be used and this is fine on some - platforms. +- clocks: If present, the first clock should be the GMAC main clock + The optional second clock should be peripheral's register interface clock. + The third optional clock should be the ptp reference clock. + Further clocks may be specified in derived bindings. +- clock-names: One name for each entry in the clocks property. + The first one should be "stmmaceth". + The optional second one should be "pclk". + The optional third one should be "clk_ptp_ref". - snps,burst_len: The AXI burst lenth value of the AXI BUS MODE register. - tx-fifo-depth: See ethernet.txt file in the same directory - rx-fifo-depth: See ethernet.txt file in the same directory +- mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus. Examples: @@ -65,4 +65,11 @@ Examples: tx-fifo-depth = <16384>; clocks = <&clock>; clock-names = "stmmaceth"; + mdio0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + phy1: ethernet-phy@0 { + }; + }; }; diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 0cb44dc21f97ca7cfde5056ce7d44941d3439546..601256fe8c0dd99d2df3ff5a77b2cee23a852e5d 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -45,21 +45,10 @@ Devices supporting OPPs must set their "operating-points-v2" property with phandle to a OPP table in their DT node. The OPP core will use this phandle to find the operating points for the device. -Devices may want to choose OPP tables at runtime and so can provide a list of -phandles here. But only *one* of them should be chosen at runtime. This must be -accompanied by a corresponding "operating-points-names" property, to uniquely -identify the OPP tables. - If required, this can be extended for SoC vendor specfic bindings. Such bindings should be documented as Documentation/devicetree/bindings/power/-opp.txt and should have a compatible description like: "operating-points-v2-". -Optional properties: -- operating-points-names: Names of OPP tables (required if multiple OPP - tables are present), to uniquely identify them. The same list must be present - for all the CPUs which are sharing clock/voltage rails and hence the OPP - tables. - * OPP Table Node This describes the OPPs belonging to a device. This node can have following @@ -100,6 +89,14 @@ Optional properties: Entries for multiple regulators must be present in the same order as regulators are specified in device's DT node. +- opp-microvolt-: Named opp-microvolt property. This is exactly similar to + the above opp-microvolt property, but allows multiple voltage ranges to be + provided for the same OPP. At runtime, the platform can pick a and + matching opp-microvolt- property will be enabled for all OPPs. If the + platform doesn't pick a specific or the doesn't match with any + opp-microvolt- properties, then opp-microvolt property shall be used, if + present. + - opp-microamp: The maximum current drawn by the device in microamperes considering system specific parameters (such as transients, process, aging, maximum operating temperature range etc.) as necessary. This may be used to @@ -112,6 +109,9 @@ Optional properties: for few regulators, then this should be marked as zero for them. If it isn't required for any regulator, then this property need not be present. +- opp-microamp-: Named opp-microamp property. Similar to + opp-microvolt- property, but for microamp instead. + - clock-latency-ns: Specifies the maximum possible transition latency (in nanoseconds) for switching to this OPP from any other OPP. @@ -123,6 +123,26 @@ Optional properties: - opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in the table should have this. +- opp-supported-hw: This enables us to select only a subset of OPPs from the + larger OPP table, based on what version of the hardware we are running on. We + still can't have multiple nodes with the same opp-hz value in OPP table. + + It's an user defined array containing a hierarchy of hardware version numbers, + supported by the OPP. For example: a platform with hierarchy of three levels + of versions (A, B and C), this field should be like , where X + corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z + corresponds to version hierarchy C. + + Each level of hierarchy is represented by a 32 bit value, and so there can be + only 32 different supported version per hierarchy. i.e. 1 bit per version. A + value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy + level. And a value of 0x00000000 will disable the OPP completely, and so we + never want that to happen. + + If 32 values aren't sufficient for a version hierarchy, than that version + hierarchy can be contained in multiple 32 bit values. i.e. in the + above example, Z1 & Z2 refer to the version hierarchy Z. + - status: Marks the node enabled/disabled. Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. @@ -157,20 +177,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; clock-latency-ns = <290000>; @@ -236,20 +256,20 @@ independently. * independently. */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000; @@ -312,20 +332,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000>; @@ -338,20 +358,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp10 { + opp@1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1045000 1050000 1055000>; opp-microamp = <95000>; clock-latency-ns = <400000>; opp-suspend; }; - opp11 { + opp@1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1075000>; opp-microamp = <100000>; clock-latency-ns = <400000>; }; - opp12 { + opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1010000 1100000 1110000>; opp-microamp = <95000>; @@ -378,7 +398,7 @@ Example 4: Handling multiple regulators compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000>, /* Supply 0 */ <960000>, /* Supply 1 */ @@ -391,7 +411,7 @@ Example 4: Handling multiple regulators /* OR */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>, /* Supply 0 */ <960000 965000 975000>, /* Supply 1 */ @@ -404,7 +424,7 @@ Example 4: Handling multiple regulators /* OR */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>, /* Supply 0 */ <960000 965000 975000>, /* Supply 1 */ @@ -417,7 +437,8 @@ Example 4: Handling multiple regulators }; }; -Example 5: Multiple OPP tables +Example 5: opp-supported-hw +(example: three level hierarchy of versions: cuts, substrate and process) / { cpus { @@ -426,40 +447,73 @@ Example 5: Multiple OPP tables ... cpu-supply = <&cpu_supply> - operating-points-v2 = <&cpu0_opp_table_slow>, <&cpu0_opp_table_fast>; - operating-points-names = "slow", "fast"; + operating-points-v2 = <&cpu0_opp_table_slow>; }; }; - cpu0_opp_table_slow: opp_table_slow { + opp_table { compatible = "operating-points-v2"; status = "okay"; opp-shared; - opp00 { + opp@600000000 { + /* + * Supports all substrate and process versions for 0xF + * cuts, i.e. only first four cuts. + */ + opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF> opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <900000 915000 925000>; ... }; - opp01 { + opp@800000000 { + /* + * Supports: + * - cuts: only one, 6th cut (represented by 6th bit). + * - substrate: supports 16 different substrate versions + * - process: supports 9 different process versions + */ + opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0> opp-hz = /bits/ 64 <800000000>; + opp-microvolt = <900000 915000 925000>; ... }; }; +}; + +Example 6: opp-microvolt-, opp-microamp-: +(example: device with two possible microvolt ranges: slow and fast) - cpu0_opp_table_fast: opp_table_fast { +/ { + cpus { + cpu@0 { + compatible = "arm,cortex-a7"; + ... + + operating-points-v2 = <&cpu0_opp_table>; + }; + }; + + cpu0_opp_table: opp_table0 { compatible = "operating-points-v2"; - status = "okay"; opp-shared; - opp10 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; - ... + opp-microvolt-slow = <900000 915000 925000>; + opp-microvolt-fast = <970000 975000 985000>; + opp-microamp-slow = <70000>; + opp-microamp-fast = <71000>; }; - opp11 { - opp-hz = /bits/ 64 <1100000000>; - ... + opp@1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */ + <910000 925000 935000>; /* Supply vcc1 */ + opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */ + <960000 965000 975000>; /* Supply vcc1 */ + opp-microamp = <70000>; /* Will be used for both slow/fast */ }; }; }; diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt b/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt index 7f81ef90146ab8f3dbce86e0a5d690917b3dfe55..d87ab7c127b8a0c439feee9d181ba1fd8f32283a 100644 --- a/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt +++ b/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt @@ -2,6 +2,7 @@ Required properties: - compatible: should be one or more of + "brcm,bcm7425-sata-phy" "brcm,bcm7445-sata-phy" "brcm,phy-sata3" - address-cells: should be 1 diff --git a/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt b/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt new file mode 100644 index 0000000000000000000000000000000000000000..f17a56e2152f0442c320bec6130b05d9f84c48dc --- /dev/null +++ b/Documentation/devicetree/bindings/phy/phy-hi6220-usb.txt @@ -0,0 +1,16 @@ +Hisilicon hi6220 usb PHY +----------------------- + +Required properties: +- compatible: should be "hisilicon,hi6220-usb-phy" +- #phy-cells: must be 0 +- hisilicon,peripheral-syscon: phandle of syscon used to control phy. +Refer to phy/phy-bindings.txt for the generic PHY binding properties + +Example: + usb_phy: usbphy { + compatible = "hisilicon,hi6220-usb-phy"; + #phy-cells = <0>; + phy-supply = <&fixed_5v_hub>; + hisilicon,peripheral-syscon = <&sys_ctrl>; + }; diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt new file mode 100644 index 0000000000000000000000000000000000000000..2390e4e9c84c64838e86480b4e6554768e8d124c --- /dev/null +++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt @@ -0,0 +1,39 @@ +* Renesas R-Car generation 3 USB 2.0 PHY + +This file provides information on what the device node for the R-Car generation +3 USB 2.0 PHY contains. + +Required properties: +- compatible: "renesas,usb2-phy-r8a7795" if the device is a part of an R8A7795 + SoC. +- reg: offset and length of the partial USB 2.0 Host register block. +- reg-names: must be "usb2_host". +- clocks: clock phandle and specifier pair(s). +- #phy-cells: see phy-bindings.txt in the same directory, must be <0>. + +Optional properties: +To use a USB channel where USB 2.0 Host and HSUSB (USB 2.0 Peripheral) are +combined, the device tree node should set HSUSB properties to reg and reg-names +properties. This is because HSUSB has registers to select USB 2.0 host or +peripheral at that channel: +- reg: offset and length of the partial HSUSB register block. +- reg-names: must be "hsusb". +- interrupts: interrupt specifier for the PHY. + +Example (R-Car H3): + + usb-phy@ee080200 { + compatible = "renesas,usb2-phy-r8a7795"; + reg = <0 0xee080200 0 0x700>, <0 0xe6590100 0 0x100>; + reg-names = "usb2_host", "hsusb"; + interrupts = ; + clocks = <&mstp7_clks R8A7795_CLK_EHCI0>, + <&mstp7_clks R8A7795_CLK_HSUSB>; + }; + + usb-phy@ee0a0200 { + compatible = "renesas,usb2-phy-r8a7795"; + reg = <0 0xee0a0200 0 0x700>; + reg-names = "usb2_host"; + clocks = <&mstp7_clks R8A7795_CLK_EHCI0>; + }; diff --git a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt index 826454ac43bb6c1cee88bfe7aa505cf1cacd5635..68498d5603540343ffbf6c3d72f0c8fc65dc1a83 100644 --- a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt +++ b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt @@ -1,7 +1,10 @@ ROCKCHIP USB2 PHY Required properties: - - compatible: rockchip,rk3288-usb-phy + - compatible: matching the soc type, one of + "rockchip,rk3066a-usb-phy" + "rockchip,rk3188-usb-phy" + "rockchip,rk3288-usb-phy" - rockchip,grf : phandle to the syscon managing the "general register files" - #address-cells: should be 1 @@ -21,6 +24,7 @@ required properties: Optional Properties: - clocks : phandle + clock specifier for the phy clocks - clock-names: string, clock name, must be "phyclk" +- #clock-cells: for users of the phy-pll, should be 0 Example: diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt index 0cebf745451760d103097ee73e5160b285610d81..95736d77fbb7168f045f031f8bba7f63980ad14d 100644 --- a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt +++ b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt @@ -9,6 +9,7 @@ Required properties: * allwinner,sun7i-a20-usb-phy * allwinner,sun8i-a23-usb-phy * allwinner,sun8i-a33-usb-phy + * allwinner,sun8i-h3-usb-phy - reg : a list of offset + length pairs - reg-names : * "phy_ctrl" diff --git a/Documentation/devicetree/bindings/phy/ti-phy.txt b/Documentation/devicetree/bindings/phy/ti-phy.txt index 9cf9446eaf2eac41d57251cb5853037e2b31e7c2..a3b394587874dcaeaa34bd08f3fd40d07b19f573 100644 --- a/Documentation/devicetree/bindings/phy/ti-phy.txt +++ b/Documentation/devicetree/bindings/phy/ti-phy.txt @@ -31,6 +31,8 @@ OMAP USB2 PHY Required properties: - compatible: Should be "ti,omap-usb2" + Should be "ti,dra7x-usb2-phy2" for the 2nd instance of USB2 PHY + in DRA7x - reg : Address and length of the register set for the device. - #phy-cells: determine the number of cells that should be given in the phandle while referencing this phy. @@ -40,10 +42,14 @@ Required properties: * "wkupclk" - wakeup clock. * "refclk" - reference clock (optional). -Optional properties: +Deprecated properties: - ctrl-module : phandle of the control module used by PHY driver to power on the PHY. +Recommended properies: +- syscon-phy-power : phandle/offset pair. Phandle to the system control + module and the register offset to power on/off the PHY. + This is usually a subnode of ocp2scp to which it is connected. usb2phy@4a0ad080 { @@ -77,14 +83,22 @@ Required properties: * "div-clk" - apll clock Optional properties: - - ctrl-module : phandle of the control module used by PHY driver to power on - the PHY. - id: If there are multiple instance of the same type, in order to differentiate between each instance "id" can be used (e.g., multi-lane PCIe PHY). If "id" is not provided, it is set to default value of '1'. - syscon-pllreset: Handle to system control region that contains the CTRL_CORE_SMA_SW_0 register and register offset to the CTRL_CORE_SMA_SW_0 register that contains the SATA_PLL_SOFT_RESET bit. Only valid for sata_phy. + - syscon-pcs : phandle/offset pair. Phandle to the system control module and the + register offset to write the PCS delay value. + +Deprecated properties: + - ctrl-module : phandle of the control module used by PHY driver to power on + the PHY. + +Recommended properies: + - syscon-phy-power : phandle/offset pair. Phandle to the system control + module and the register offset to power on/off the PHY. This is usually a subnode of ocp2scp to which it is connected. diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt index b321b26780dc0697e54f0821289126c2a5c6e801..9213b27e1036666f98fa077fb05e066b793fca0e 100644 --- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt @@ -17,7 +17,10 @@ Required properties: "allwinner,sun8i-a23-pinctrl" "allwinner,sun8i-a23-r-pinctrl" "allwinner,sun8i-a33-pinctrl" + "allwinner,sun9i-a80-pinctrl" + "allwinner,sun9i-a80-r-pinctrl" "allwinner,sun8i-a83t-pinctrl" + "allwinner,sun8i-h3-pinctrl" - reg: Should contain the register physical address and length for the pin controller. diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt similarity index 94% rename from Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt rename to Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt index 16589fb6f42059c4d3710d87cdf0e7bb37c323e1..e4277921f3e30a6dd8e883434eb733d59d4e34dc 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt +++ b/Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt @@ -1,4 +1,4 @@ -Broadcom Cygnus GPIO/PINCONF Controller +Broadcom iProc GPIO/PINCONF Controller Required properties: @@ -7,9 +7,12 @@ Required properties: "brcm,cygnus-crmu-gpio" or "brcm,iproc-gpio" - reg: - Define the base and range of the I/O address space that contains the Cygnus + Define the base and range of the I/O address space that contains SoC GPIO/PINCONF controller registers +- ngpios: + Total number of in-use slots in GPIO controller + - #gpio-cells: Must be two. The first cell is the GPIO pin number (within the controller's pin space) and the second cell is used for the following: @@ -57,6 +60,7 @@ Example: compatible = "brcm,cygnus-ccm-gpio"; reg = <0x1800a000 0x50>, <0x0301d164 0x20>; + ngpios = <24>; #gpio-cells = <2>; gpio-controller; interrupts = ; @@ -78,6 +82,7 @@ Example: gpio_asiu: gpio@180a5000 { compatible = "brcm,cygnus-asiu-gpio"; reg = <0x180a5000 0x668>; + ngpios = <146>; #gpio-cells = <2>; gpio-controller; interrupts = ; diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt new file mode 100644 index 0000000000000000000000000000000000000000..0844168a6dd4b0a68be830f9d868c143df84e737 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt @@ -0,0 +1,80 @@ +Broadcom Northstar plus (NSP) GPIO/PINCONF Controller + +Required properties: +- compatible: + Must be "brcm,nsp-gpio-a" + +- reg: + Should contain the register physical address and length for each of + GPIO base, IO control registers + +- #gpio-cells: + Must be two. The first cell is the GPIO pin number (within the + controller's pin space) and the second cell is used for the following: + bit[0]: polarity (0 for active high and 1 for active low) + +- gpio-controller: + Specifies that the node is a GPIO controller + +- ngpios: + Number of gpios supported (58x25 supports 32 and 58x23 supports 24) + +Optional properties: +- interrupts: + Interrupt ID + +- interrupt-controller: + Specifies that the node is an interrupt controller + +- gpio-ranges: + Specifies the mapping between gpio controller and pin-controllers pins. + This requires 4 fields in cells defined as - + 1. Phandle of pin-controller. + 2. GPIO base pin offset. + 3 Pin-control base pin offset. + 4. number of gpio pins which are linearly mapped from pin base. + +Supported generic PINCONF properties in child nodes: +- pins: + The list of pins (within the controller's own pin space) that properties + in the node apply to. Pin names are "gpio-" + +- bias-disable: + Disable pin bias + +- bias-pull-up: + Enable internal pull up resistor + +- bias-pull-down: + Enable internal pull down resistor + +- drive-strength: + Valid drive strength values include 2, 4, 6, 8, 10, 12, 14, 16 (mA) + +Example: + + gpioa: gpio@18000020 { + compatible = "brcm,nsp-gpio-a"; + reg = <0x18000020 0x100>, + <0x1803f1c4 0x1c>; + #gpio-cells = <2>; + gpio-controller; + ngpios = <32>; + gpio-ranges = <&pinctrl 0 0 31>; + interrupt-controller; + interrupts = ; + + /* Hog a few default settings */ + pinctrl-names = "default"; + pinctrl-0 = <&led>; + led: led { + pins = "gpio-1"; + bias-pull-up; + }; + + pwr: pwr { + gpio-hog; + gpios = <3 1>; + output-high; + }; + }; diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt index e89b4677567d0fcd9a4550905ea806d905d951c0..8e5216bcd74852cb6f675002d31866a5e53de206 100644 --- a/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt +++ b/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt @@ -1,7 +1,16 @@ Lantiq XWAY pinmux controller Required properties: -- compatible: "lantiq,pinctrl-xway" or "lantiq,pinctrl-xr9" +- compatible: "lantiq,pinctrl-xway", (DEPRECATED: Use "lantiq,pinctrl-danube") + "lantiq,pinctrl-xr9", (DEPRECATED: Use "lantiq,xrx100-pinctrl" or + "lantiq,xrx200-pinctrl") + "lantiq,pinctrl-ase", (DEPRECATED: Use "lantiq,ase-pinctrl") + "lantiq,-pinctrl", where is: + "ase" (XWAY AMAZON Family) + "danube" (XWAY DANUBE Family) + "xrx100" (XWAY xRX100 Family) + "xrx200" (XWAY xRX200 Family) + "xrx300" (XWAY xRX300 Family) - reg: Should contain the physical address and length of the gpio/pinmux register range @@ -36,19 +45,87 @@ Required subnode-properties: Valid values for group and function names: +XWAY: (DEPRECATED: Use DANUBE) mux groups: exin0, exin1, exin2, jtag, ebu a23, ebu a24, ebu a25, ebu clk, ebu cs1, ebu wait, nand ale, nand cs1, nand cle, spi, spi_cs1, spi_cs2, spi_cs3, - spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi , gpt1, gpt2, + spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1, clkout2, clkout3, gnt1, gnt2, gnt3, req1, req2, req3 - additional mux groups (XR9 only): - mdio, nand rdy, nand rd, exin3, exin4, gnt4, req4 + functions: + spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu + +XR9: ( DEPRECATED: Use xRX100/xRX200) + mux groups: + exin0, exin1, exin2, exin3, exin4, jtag, ebu a23, ebu a24, ebu a25, + ebu clk, ebu cs1, ebu wait, nand ale, nand cs1, nand cle, nand rdy, + nand rd, spi, spi_cs1, spi_cs2, spi_cs3, spi_cs4, spi_cs5, spi_cs6, + asc0, asc0 cts rts, stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1, + clkout2, clkout3, gnt1, gnt2, gnt3, gnt4, req1, req2, req3, req4, mdio, + gphy0 led0, gphy0 led1, gphy0 led2, gphy1 led0, gphy1 led1, gphy1 led2 + + functions: + spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, mdio, gphy + +AMAZON: + mux groups: + exin0, exin1, exin2, jtag, spi_di, spi_do, spi_clk, spi_cs1, spi_cs2, + spi_cs3, spi_cs4, spi_cs5, spi_cs6, asc, stp, gpt1, gpt2, gpt3, clkout0, + clkout1, clkout2, mdio, dfe led0, dfe led1, ephy led0, ephy led1, ephy led2 + + functions: + spi, asc, cgu, jtag, exin, stp, gpt, mdio, ephy, dfe + +DANUBE: + mux groups: + exin0, exin1, exin2, jtag, ebu a23, ebu a24, ebu a25, ebu clk, ebu cs1, + ebu wait, nand ale, nand cs1, nand cle, spi_di, spi_do, spi_clk, spi_cs1, + spi_cs2, spi_cs3, spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi, + gpt1, gpt2, gpt3, clkout0, clkout1, clkout2, clkout3, gnt1, gnt2, gnt3, + req1, req2, req3, dfe led0, dfe led1 functions: - spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, mdio + spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, dfe +xRX100: + mux groups: + exin0, exin1, exin2, exin3, exin4, ebu a23, ebu a24, ebu a25, ebu clk, + ebu cs1, ebu wait, nand ale, nand cs1, nand cle, nand rdy, nand rd, + spi_di, spi_do, spi_clk, spi_cs1, spi_cs2, spi_cs3, spi_cs4, spi_cs5, + spi_cs6, asc0, asc0 cts rts, stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1, + clkout2, clkout3, gnt1, gnt2, gnt3, gnt4, req1, req2, req3, req4, mdio, + dfe led0, dfe led1 + + functions: + spi, asc, cgu, exin, stp, gpt, nmi, pci, ebu, mdio, dfe + +xRX200: + mux groups: + exin0, exin1, exin2, exin3, exin4, ebu a23, ebu a24, ebu a25, ebu clk, + ebu cs1, ebu wait, nand ale, nand cs1, nand cle, nand rdy, nand rd, + spi_di, spi_do, spi_clk, spi_cs1, spi_cs2, spi_cs3, spi_cs4, spi_cs5, + spi_cs6, usif uart_rx, usif uart_tx, usif uart_rts, usif uart_cts, + usif uart_dtr, usif uart_dsr, usif uart_dcd, usif uart_ri, usif spi_di, + usif spi_do, usif spi_clk, usif spi_cs0, usif spi_cs1, usif spi_cs2, + stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1, clkout2, clkout3, gnt1, + gnt2, gnt3, gnt4, req1, req2, req3, req4, mdio, dfe led0, dfe led1, + gphy0 led0, gphy0 led1, gphy0 led2, gphy1 led0, gphy1 led1, gphy1 led2 + + functions: + spi, usif, cgu, exin, stp, gpt, nmi, pci, ebu, mdio, dfe, gphy + +xRX300: + mux groups: + exin0, exin1, exin2, exin4, nand ale, nand cs0, nand cs1, nand cle, + nand rdy, nand rd, nand_d0, nand_d1, nand_d2, nand_d3, nand_d4, nand_d5, + nand_d6, nand_d7, nand_d1, nand wr, nand wp, nand se, spi_di, spi_do, + spi_clk, spi_cs1, spi_cs4, spi_cs6, usif uart_rx, usif uart_tx, + usif spi_di, usif spi_do, usif spi_clk, usif spi_cs0, stp, clkout2, + mdio, dfe led0, dfe led1, ephy0 led0, ephy0 led1, ephy1 led0, ephy1 led1 + + functions: + spi, usif, cgu, exin, stp, ebu, mdio, dfe, ephy Definition of pin configurations: @@ -62,15 +139,32 @@ Optional subnode-properties: 0: none, 1: down, 2: up. - lantiq,open-drain: Boolean, enables open-drain on the defined pin. -Valid values for XWAY pin names: +Valid values for XWAY pin names: (DEPRECATED: Use DANUBE) Pinconf pins can be referenced via the names io0-io31. -Valid values for XR9 pin names: +Valid values for XR9 pin names: (DEPRECATED: Use xrX100/xRX200) Pinconf pins can be referenced via the names io0-io55. +Valid values for AMAZON pin names: + Pinconf pins can be referenced via the names io0-io31. + +Valid values for DANUBE pin names: + Pinconf pins can be referenced via the names io0-io31. + +Valid values for xRX100 pin names: + Pinconf pins can be referenced via the names io0-io55. + +Valid values for xRX200 pin names: + Pinconf pins can be referenced via the names io0-io49. + +Valid values for xRX300 pin names: + Pinconf pins can be referenced via the names io0-io1,io3-io6,io8-io11, + io13-io19,io23-io27,io34-io36, + io42-io43,io48-io61. + Example: gpio: pinmux@E100B10 { - compatible = "lantiq,pinctrl-xway"; + compatible = "lantiq,danube-pinctrl"; pinctrl-names = "default"; pinctrl-0 = <&state_default>; diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt index 0480bc31bfd71c624ef33d3e94e571fbd09083e7..9ffb0b276bb4864eab7a41368d754f035d4ab56b 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt @@ -4,10 +4,11 @@ The Mediatek's Pin controller is used to control SoC pins. Required properties: - compatible: value should be one of the following. - (a) "mediatek,mt8135-pinctrl", compatible with mt8135 pinctrl. - (b) "mediatek,mt8173-pinctrl", compatible with mt8173 pinctrl. - (c) "mediatek,mt6397-pinctrl", compatible with mt6397 pinctrl. - (d) "mediatek,mt8127-pinctrl", compatible with mt8127 pinctrl. + "mediatek,mt2701-pinctrl", compatible with mt2701 pinctrl. + "mediatek,mt6397-pinctrl", compatible with mt6397 pinctrl. + "mediatek,mt8127-pinctrl", compatible with mt8127 pinctrl. + "mediatek,mt8135-pinctrl", compatible with mt8135 pinctrl. + "mediatek,mt8173-pinctrl", compatible with mt8173 pinctrl. - pins-are-numbered: Specify the subnodes are using numbered pinmux to specify pins. - gpio-controller : Marks the device node as a gpio controller. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt new file mode 100644 index 0000000000000000000000000000000000000000..e312a71b2f94fcab78cdbcfeba3ea27c6c5798dc --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt @@ -0,0 +1,199 @@ +Qualcomm MSM8996 TLMM block + +This binding describes the Top Level Mode Multiplexer block found in the +MSM8996 platform. + +- compatible: + Usage: required + Value type: + Definition: must be "qcom,msm8996-pinctrl" + +- reg: + Usage: required + Value type: + Definition: the base address and size of the TLMM register space. + +- interrupts: + Usage: required + Value type: + Definition: should specify the TLMM summary IRQ. + +- interrupt-controller: + Usage: required + Value type: + Definition: identifies this node as an interrupt controller + +- #interrupt-cells: + Usage: required + Value type: + Definition: must be 2. Specifying the pin number and flags, as defined + in + +- gpio-controller: + Usage: required + Value type: + Definition: identifies this node as a gpio controller + +- #gpio-cells: + Usage: required + Value type: + Definition: must be 2. Specifying the pin number and flags, as defined + in + +Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for +a general description of GPIO and interrupt bindings. + +Please refer to pinctrl-bindings.txt in this directory for details of the +common pinctrl bindings used by client devices, including the meaning of the +phrase "pin configuration node". + +The pin configuration nodes act as a container for an arbitrary number of +subnodes. Each of these subnodes represents some desired configuration for a +pin, a group, or a list of pins or groups. This configuration can include the +mux function to select on those pin(s)/group(s), and various pin configuration +parameters, such as pull-up, drive strength, etc. + + +PIN CONFIGURATION NODES: + +The name of each subnode is not important; all subnodes should be enumerated +and processed purely based on their content. + +Each subnode only affects those parameters that are explicitly listed. In +other words, a subnode that lists a mux function but no pin configuration +parameters implies no information about any pin configuration parameters. +Similarly, a pin subnode that describes a pullup parameter implies no +information about e.g. the mux function. + + +The following generic properties as defined in pinctrl-bindings.txt are valid +to specify in a pin configuration subnode: + +- pins: + Usage: required + Value type: + Definition: List of gpio pins affected by the properties specified in + this subnode. + + Valid pins are: + gpio0-gpio149 + Supports mux, bias and drive-strength + + sdc1_clk, sdc1_cmd, sdc1_data sdc2_clk, sdc2_cmd, + sdc2_data sdc1_rclk + Supports bias and drive-strength + +- function: + Usage: required + Value type: + Definition: Specify the alternative function to be configured for the + specified pins. Functions are only valid for gpio pins. + Valid values are: + + blsp_uart1, blsp_spi1, blsp_i2c1, blsp_uim1, atest_tsens, + bimc_dte1, dac_calib0, blsp_spi8, blsp_uart8, blsp_uim8, + qdss_cti_trig_out_b, bimc_dte0, dac_calib1, qdss_cti_trig_in_b, + dac_calib2, atest_tsens2, atest_usb1, blsp_spi10, blsp_uart10, + blsp_uim10, atest_bbrx1, atest_usb13, atest_bbrx0, atest_usb12, + mdp_vsync, edp_lcd, blsp_i2c10, atest_gpsadc1, atest_usb11, + atest_gpsadc0, edp_hot, atest_usb10, m_voc, dac_gpio, atest_char, + cam_mclk, pll_bypassnl, qdss_stm7, blsp_i2c8, qdss_tracedata_b, + pll_reset, qdss_stm6, qdss_stm5, qdss_stm4, atest_usb2, cci_i2c, + qdss_stm3, dac_calib3, atest_usb23, atest_char3, dac_calib4, + qdss_stm2, atest_usb22, atest_char2, qdss_stm1, dac_calib5, + atest_usb21, atest_char1, dbg_out, qdss_stm0, dac_calib6, + atest_usb20, atest_char0, dac_calib10, qdss_stm10, + qdss_cti_trig_in_a, cci_timer4, blsp_spi6, blsp_uart6, blsp_uim6, + blsp2_spi, qdss_stm9, qdss_cti_trig_out_a, dac_calib11, + qdss_stm8, cci_timer0, qdss_stm13, dac_calib7, cci_timer1, + qdss_stm12, dac_calib8, cci_timer2, blsp1_spi, qdss_stm11, + dac_calib9, cci_timer3, cci_async, dac_calib12, blsp_i2c6, + qdss_tracectl_a, dac_calib13, qdss_traceclk_a, dac_calib14, + dac_calib15, hdmi_rcv, dac_calib16, hdmi_cec, pwr_modem, + dac_calib17, hdmi_ddc, pwr_nav, dac_calib18, pwr_crypto, + dac_calib19, hdmi_hot, dac_calib20, dac_calib21, pci_e0, + dac_calib22, dac_calib23, dac_calib24, tsif1_sync, dac_calib25, + sd_write, tsif1_error, blsp_spi2, blsp_uart2, blsp_uim2, + qdss_cti, blsp_i2c2, blsp_spi3, blsp_uart3, blsp_uim3, blsp_i2c3, + uim3, blsp_spi9, blsp_uart9, blsp_uim9, blsp10_spi, blsp_i2c9, + blsp_spi7, blsp_uart7, blsp_uim7, qdss_tracedata_a, blsp_i2c7, + qua_mi2s, gcc_gp1_clk_a, ssc_irq, uim4, blsp_spi11, blsp_uart11, + blsp_uim11, gcc_gp2_clk_a, gcc_gp3_clk_a, blsp_i2c11, cri_trng0, + cri_trng1, cri_trng, qdss_stm18, pri_mi2s, qdss_stm17, blsp_spi4, + blsp_uart4, blsp_uim4, qdss_stm16, qdss_stm15, blsp_i2c4, + qdss_stm14, dac_calib26, spkr_i2s, audio_ref, lpass_slimbus, + isense_dbg, tsense_pwm1, tsense_pwm2, btfm_slimbus, ter_mi2s, + qdss_stm22, qdss_stm21, qdss_stm20, qdss_stm19, gcc_gp1_clk_b, + sec_mi2s, blsp_spi5, blsp_uart5, blsp_uim5, gcc_gp2_clk_b, + gcc_gp3_clk_b, blsp_i2c5, blsp_spi12, blsp_uart12, blsp_uim12, + qdss_stm25, qdss_stm31, blsp_i2c12, qdss_stm30, qdss_stm29, + tsif1_clk, qdss_stm28, tsif1_en, tsif1_data, sdc4_cmd, qdss_stm27, + qdss_traceclk_b, tsif2_error, sdc43, vfr_1, qdss_stm26, tsif2_clk, + sdc4_clk, qdss_stm24, tsif2_en, sdc42, qdss_stm23, qdss_tracectl_b, + sd_card, tsif2_data, sdc41, tsif2_sync, sdc40, mdp_vsync_p_b, + ldo_en, mdp_vsync_s_b, ldo_update, blsp11_uart_tx_b, blsp11_uart_rx_b, + blsp11_i2c_sda_b, prng_rosc, blsp11_i2c_scl_b, uim2, uim1, uim_batt, + pci_e2, pa_indicator, adsp_ext, ddr_bist, qdss_tracedata_11, + qdss_tracedata_12, modem_tsync, nav_dr, nav_pps, pci_e1, gsm_tx, + qspi_cs, ssbi2, ssbi1, mss_lte, qspi_clk, qspi0, qspi1, qspi2, qspi3, + gpio + +- bias-disable: + Usage: optional + Value type: + Definition: The specified pins should be configued as no pull. + +- bias-pull-down: + Usage: optional + Value type: + Definition: The specified pins should be configued as pull down. + +- bias-pull-up: + Usage: optional + Value type: + Definition: The specified pins should be configued as pull up. + +- output-high: + Usage: optional + Value type: + Definition: The specified pins are configured in output mode, driven + high. + Not valid for sdc pins. + +- output-low: + Usage: optional + Value type: + Definition: The specified pins are configured in output mode, driven + low. + Not valid for sdc pins. + +- drive-strength: + Usage: optional + Value type: + Definition: Selects the drive strength for the specified pins, in mA. + Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16 + +Example: + + tlmm: pinctrl@01010000 { + compatible = "qcom,msm8996-pinctrl"; + reg = <0x01010000 0x300000>; + interrupts = <0 208 0>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + + uart_console_active: uart_console_active { + mux { + pins = "gpio4", "gpio5"; + function = "blsp_uart8"; + }; + + config { + pins = "gpio4", "gpio5"; + drive-strength = <2>; + bias-disable; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt index 1ae63c0acd40b2609e173380d6785d7174248525..a90c812ad6429abe0ef5282982e6fea66560b599 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt @@ -14,6 +14,7 @@ PMIC's from Qualcomm. "qcom,pm8917-gpio" "qcom,pm8921-gpio" "qcom,pm8941-gpio" + "qcom,pm8994-gpio" "qcom,pma8084-gpio" - reg: @@ -79,6 +80,7 @@ to specify in a pin configuration subnode: gpio1-gpio38 for pm8917 gpio1-gpio44 for pm8921 gpio1-gpio36 for pm8941 + gpio1-gpio22 for pm8994 gpio1-gpio22 for pma8084 - function: diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt index d7803a2a94e9be4c089a3ed69f8eaa23e935ee80..d74e631e10da1eb35252c97660d7044e1c502941 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt @@ -15,6 +15,7 @@ of PMIC's from Qualcomm. "qcom,pm8917-mpp", "qcom,pm8921-mpp", "qcom,pm8941-mpp", + "qcom,pm8994-mpp", "qcom,pma8084-mpp", - reg: diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt index 391ef4be8d509006446120fb83e73712e7384628..0cd701b1947fdc35bc8ec036cac82b053f237712 100644 --- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt @@ -21,7 +21,8 @@ defined as gpio sub-nodes of the pinmux controller. Required properties for iomux controller: - compatible: one of "rockchip,rk2928-pinctrl", "rockchip,rk3066a-pinctrl" "rockchip,rk3066b-pinctrl", "rockchip,rk3188-pinctrl" - "rockchip,rk3288-pinctrl", "rockchip,rk3368-pinctrl" + "rockchip,rk3228-pinctrl", "rockchip,rk3288-pinctrl" + "rockchip,rk3368-pinctrl" - rockchip,grf: phandle referencing a syscon providing the "general register files" diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt index 9d2a995293e650064923f0cc5f791ed57a34836a..6db16b90873a4d4fbcd820d4626e2ea80ee7c8ea 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt @@ -17,6 +17,7 @@ Required Properties: - "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller. - "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller. - "samsung,exynos5260-pinctrl": for Exynos5260 compatible pin-controller. + - "samsung,exynos5410-pinctrl": for Exynos5410 compatible pin-controller. - "samsung,exynos5420-pinctrl": for Exynos5420 compatible pin-controller. - "samsung,exynos7-pinctrl": for Exynos7 compatible pin-controller. diff --git a/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt deleted file mode 100644 index 20191315e444e94594e03da0965b1d8904be312a..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt +++ /dev/null @@ -1,163 +0,0 @@ -* Samsung S5M8767 Voltage and Current Regulator - -The Samsung S5M8767 is a multi-function device which includes voltage and -current regulators, rtc, charger controller and other sub-blocks. It is -interfaced to the host controller using a i2c interface. Each sub-block is -addressed by the host system using different i2c slave address. This document -describes the bindings for 'pmic' sub-block of s5m8767. - -Required properties: -- compatible: Should be "samsung,s5m8767-pmic". -- reg: Specifies the i2c slave address of the pmic block. It should be 0x66. - -- s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV) - units for buck2 when changing voltage using gpio dvs. Refer to [1] below - for additional information. - -- s5m8767,pmic-buck3-dvs-voltage: A set of 8 voltage values in micro-volt (uV) - units for buck3 when changing voltage using gpio dvs. Refer to [1] below - for additional information. - -- s5m8767,pmic-buck4-dvs-voltage: A set of 8 voltage values in micro-volt (uV) - units for buck4 when changing voltage using gpio dvs. Refer to [1] below - for additional information. - -- s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used - for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. - -[1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage' - property should specify atleast one voltage level (which would be a - safe operating voltage). - - If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, then all the eight voltage values for the - 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. - -Optional properties: -- interrupt-parent: Specifies the phandle of the interrupt controller to which - the interrupts from s5m8767 are delivered to. -- interrupts: Interrupt specifiers for two interrupt sources. - - First interrupt specifier is for 'irq1' interrupt. - - Second interrupt specifier is for 'alert' interrupt. -- s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs. -- s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs. -- s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs. - -Additional properties required if either of the optional properties are used: - -- s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from - the possible 8 options selectable by the dvs gpios. The value of this - property should be between 0 and 7. If not specified or if out of range, the - default value of this property is set to 0. - -- s5m8767,pmic-buck-dvs-gpios: GPIO specifiers for three host gpio's used - for dvs. The format of the gpio specifier depends in the gpio controller. - -Regulators: The regulators of s5m8767 that have to be instantiated should be -included in a sub-node named 'regulators'. Regulator nodes included in this -sub-node should be of the format as listed below. - - regulator_name { - ldo1_reg: LDO1 { - regulator-name = "VDD_ALIVE_1.0V"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; - regulator-always-on; - regulator-boot-on; - op_mode = <1>; /* Normal Mode */ - }; - }; -The above regulator entries are defined in regulator bindings documentation -except these properties: - - op_mode: describes the different operating modes of the LDO's with - power mode change in SOC. The different possible values are, - 0 - always off mode - 1 - on in normal mode - 2 - low power mode - 3 - suspend mode - - s5m8767,pmic-ext-control-gpios: (optional) GPIO specifier for one - GPIO controlling this regulator (enable/disable); This is - valid only for buck9. - -The following are the names of the regulators that the s5m8767 pmic block -supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number -as per the datasheet of s5m8767. - - - LDOn - - valid values for n are 1 to 28 - - Example: LDO1, LDO2, LDO28 - - BUCKn - - valid values for n are 1 to 9. - - Example: BUCK1, BUCK2, BUCK9 - -The bindings inside the regulator nodes use the standard regulator bindings -which are documented elsewhere. - -Example: - - s5m8767_pmic@66 { - compatible = "samsung,s5m8767-pmic"; - reg = <0x66>; - - s5m8767,pmic-buck2-uses-gpio-dvs; - s5m8767,pmic-buck3-uses-gpio-dvs; - s5m8767,pmic-buck4-uses-gpio-dvs; - - s5m8767,pmic-buck-default-dvs-idx = <0>; - - s5m8767,pmic-buck-dvs-gpios = <&gpx0 0 0>, /* DVS1 */ - <&gpx0 1 0>, /* DVS2 */ - <&gpx0 2 0>; /* DVS3 */ - - s5m8767,pmic-buck-ds-gpios = <&gpx2 3 0>, /* SET1 */ - <&gpx2 4 0>, /* SET2 */ - <&gpx2 5 0>; /* SET3 */ - - s5m8767,pmic-buck2-dvs-voltage = <1350000>, <1300000>, - <1250000>, <1200000>, - <1150000>, <1100000>, - <1000000>, <950000>; - - s5m8767,pmic-buck3-dvs-voltage = <1100000>, <1100000>, - <1100000>, <1100000>, - <1000000>, <1000000>, - <1000000>, <1000000>; - - s5m8767,pmic-buck4-dvs-voltage = <1200000>, <1200000>, - <1200000>, <1200000>, - <1200000>, <1200000>, - <1200000>, <1200000>; - - regulators { - ldo1_reg: LDO1 { - regulator-name = "VDD_ABB_3.3V"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - op_mode = <1>; /* Normal Mode */ - }; - - ldo2_reg: LDO2 { - regulator-name = "VDD_ALIVE_1.1V"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; - regulator-always-on; - }; - - buck1_reg: BUCK1 { - regulator-name = "VDD_MIF_1.2V"; - regulator-min-microvolt = <950000>; - regulator-max-microvolt = <1350000>; - regulator-always-on; - regulator-boot-on; - }; - - vemmc_reg: BUCK9 { - regulator-name = "VMEM_VDD_2.8V"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; - op_mode = <3>; /* Standby Mode */ - s5m8767,pmic-ext-control-gpios = <&gpk0 2 0>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.txt b/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.txt new file mode 100644 index 0000000000000000000000000000000000000000..bae3c7f838cf710d890480c9b139d096fa98524c --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mpa01.txt @@ -0,0 +1,79 @@ +Binding for Samsung S2MPA01 regulator block +=========================================== + +This is a part of device tree bindings for S2M family multi-function devices. +More information can be found in bindings/mfd/sec-core.txt file. + +The S2MPA01 device provide buck and LDO regulators. + +To register these with regulator framework instantiate under main device node +a sub-node named "regulators" with more sub-nodes for each regulator using the +common regulator binding documented in: + - Documentation/devicetree/bindings/regulator/regulator.txt + + +Names of regulators supported by S2MPA01 device: + - LDOn + - valid values for n are 1 to 26 + - Example: LDO1, LD02, LDO26 + - BUCKn + - valid values for n are 1 to 10. + - Example: BUCK1, BUCK2, BUCK9 +Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number +as per the datasheet of device. + + +Optional properties of buck regulator nodes under "regulators" sub-node: + - regulator-ramp-delay: ramp delay in uV/us. May be 6250, 12500 + (default), 25000, or 50000. May be 0 for disabling the ramp delay on + BUCK{1,2,3,4}. + + In the absence of the regulator-ramp-delay property, the default ramp + delay will be used. + + Note: Some bucks share the ramp rate setting i.e. same ramp value + will be set for a particular group of bucks so provide the same + regulator-ramp-delay value for them. + Groups sharing ramp rate: + - buck{1,6}, + - buck{2,4}, + - buck{8,9,10}. + +Example: + + s2mpa01_pmic@66 { + compatible = "samsung,s2mpa01-pmic"; + reg = <0x66>; + + regulators { + ldo1_reg: LDO1 { + regulator-name = "VDD_ALIVE"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + }; + + ldo2_reg: LDO2 { + regulator-name = "VDDQ_MMC2"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-always-on; + }; + + buck1_reg: BUCK1 { + regulator-name = "vdd_mif"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + }; + + buck2_reg: BUCK2 { + regulator-name = "vdd_arm"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + regulator-ramp-delay = <50000>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/samsung,s2mps11.txt b/Documentation/devicetree/bindings/regulator/samsung,s2mps11.txt new file mode 100644 index 0000000000000000000000000000000000000000..27a48bf1b185b1c5e4757d21c52daf94841e2bf4 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/samsung,s2mps11.txt @@ -0,0 +1,102 @@ +Binding for Samsung S2M family regulator block +============================================== + +This is a part of device tree bindings for S2M family multi-function devices. +More information can be found in bindings/mfd/sec-core.txt file. + +The S2MPS11/13/14/15 and S2MPU02 devices provide buck and LDO regulators. + +To register these with regulator framework instantiate under main device node +a sub-node named "regulators" with more sub-nodes for each regulator using the +common regulator binding documented in: + - Documentation/devicetree/bindings/regulator/regulator.txt + + +Names of regulators supported by different devices: + - LDOn + - valid values for n are: + - S2MPS11: 1 to 38 + - S2MPS13: 1 to 40 + - S2MPS14: 1 to 25 + - S2MPS15: 1 to 27 + - S2MPU02: 1 to 28 + - Example: LDO1, LDO2, LDO28 + - BUCKn + - valid values for n are: + - S2MPS11: 1 to 10 + - S2MPS13: 1 to 10 + - S2MPS14: 1 to 5 + - S2MPS15: 1 to 10 + - S2MPU02: 1 to 7 + - Example: BUCK1, BUCK2, BUCK9 +Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number +as per the datasheet of device. + + +Optional properties of the nodes under "regulators" sub-node: + - regulator-ramp-delay: ramp delay in uV/us. May be 6250, 12500, + 25000 (default) or 50000. + + Additionally S2MPS11 supports disabling ramp delay for BUCK{2,3,4,6} + by setting it to <0>. + + Note: On S2MPS11 some bucks share the ramp rate setting i.e. same ramp value + will be set for a particular group of bucks so provide the same + regulator-ramp-delay value for them. + Groups sharing ramp rate: + - buck{1,6}, + - buck{3,4}, + - buck{7,8,10}. + + - samsung,ext-control-gpios: On S2MPS14 the LDO10, LDO11 and LDO12 can be + configured to external control over GPIO. To turn this feature on this + property must be added to the regulator sub-node: + - samsung,ext-control-gpios: GPIO specifier for one GPIO + controlling this regulator (enable/disable) + Example: + LDO12 { + regulator-name = "V_EMMC_2.8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + samsung,ext-control-gpios = <&gpk0 2 0>; + }; + + +Example: + + s2mps11_pmic@66 { + compatible = "samsung,s2mps11-pmic"; + reg = <0x66>; + + regulators { + ldo1_reg: LDO1 { + regulator-name = "VDD_ABB_3.3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + ldo2_reg: LDO2 { + regulator-name = "VDD_ALIVE_1.1V"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + + buck1_reg: BUCK1 { + regulator-name = "vdd_mif"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + }; + + buck2_reg: BUCK2 { + regulator-name = "vdd_arm"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + regulator-ramp-delay = <50000>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt new file mode 100644 index 0000000000000000000000000000000000000000..093edda0c8dfcaa4a3b1d7fbb731ba7e84577117 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt @@ -0,0 +1,145 @@ +Binding for Samsung S5M8767 regulator block +=========================================== + +This is a part of device tree bindings for S5M family multi-function devices. +More information can be found in bindings/mfd/sec-core.txt file. + +The S5M8767 device provide buck and LDO regulators. + +To register these with regulator framework instantiate under main device node +a sub-node named "regulators" with more sub-nodes for each regulator using the +common regulator binding documented in: + - Documentation/devicetree/bindings/regulator/regulator.txt + + +Required properties of the main device node (the parent!): + - s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV) + units for buck2 when changing voltage using gpio dvs. Refer to [1] below + for additional information. + + - s5m8767,pmic-buck3-dvs-voltage: A set of 8 voltage values in micro-volt (uV) + units for buck3 when changing voltage using gpio dvs. Refer to [1] below + for additional information. + + - s5m8767,pmic-buck4-dvs-voltage: A set of 8 voltage values in micro-volt (uV) + units for buck4 when changing voltage using gpio dvs. Refer to [1] below + for additional information. + + - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used + for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. + + [1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional + property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage' + property should specify atleast one voltage level (which would be a + safe operating voltage). + + If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional + property is specified, then all the eight voltage values for the + 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. + +Optional properties of the main device node (the parent!): + - s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs. + - s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs. + - s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs. + +Additional properties required if either of the optional properties are used: + + - s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from + the possible 8 options selectable by the dvs gpios. The value of this + property should be between 0 and 7. If not specified or if out of range, the + default value of this property is set to 0. + + - s5m8767,pmic-buck-dvs-gpios: GPIO specifiers for three host gpio's used + for dvs. The format of the gpio specifier depends in the gpio controller. + + +Names of regulators supported by S5M8767 device: + - LDOn + - valid values for n are 1 to 28 + - Example: LDO1, LDO2, LDO28 + - BUCKn + - valid values for n are 1 to 9. + - Example: BUCK1, BUCK2, BUCK9 +Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number +as per the datasheet of device. + + +Optional properties of the nodes under "regulators" sub-node: + - op_mode: describes the different operating modes of the LDO's with + power mode change in SOC. The different possible values are, + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + - s5m8767,pmic-ext-control-gpios: (optional) GPIO specifier for one + GPIO controlling this regulator + (enable/disable); This is valid only + for buck9. + +Example: + + s5m8767_pmic@66 { + compatible = "samsung,s5m8767-pmic"; + reg = <0x66>; + + s5m8767,pmic-buck2-uses-gpio-dvs; + s5m8767,pmic-buck3-uses-gpio-dvs; + s5m8767,pmic-buck4-uses-gpio-dvs; + + s5m8767,pmic-buck-default-dvs-idx = <0>; + + s5m8767,pmic-buck-dvs-gpios = <&gpx0 0 0>, /* DVS1 */ + <&gpx0 1 0>, /* DVS2 */ + <&gpx0 2 0>; /* DVS3 */ + + s5m8767,pmic-buck-ds-gpios = <&gpx2 3 0>, /* SET1 */ + <&gpx2 4 0>, /* SET2 */ + <&gpx2 5 0>; /* SET3 */ + + s5m8767,pmic-buck2-dvs-voltage = <1350000>, <1300000>, + <1250000>, <1200000>, + <1150000>, <1100000>, + <1000000>, <950000>; + + s5m8767,pmic-buck3-dvs-voltage = <1100000>, <1100000>, + <1100000>, <1100000>, + <1000000>, <1000000>, + <1000000>, <1000000>; + + s5m8767,pmic-buck4-dvs-voltage = <1200000>, <1200000>, + <1200000>, <1200000>, + <1200000>, <1200000>, + <1200000>, <1200000>; + + regulators { + ldo1_reg: LDO1 { + regulator-name = "VDD_ABB_3.3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + op_mode = <1>; /* Normal Mode */ + }; + + ldo2_reg: LDO2 { + regulator-name = "VDD_ALIVE_1.1V"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + + buck1_reg: BUCK1 { + regulator-name = "VDD_MIF_1.2V"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1350000>; + regulator-always-on; + regulator-boot-on; + }; + + vemmc_reg: BUCK9 { + regulator-name = "VMEM_VDD_2.8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + op_mode = <3>; /* Standby Mode */ + s5m8767,pmic-ext-control-gpios = <&gpk0 2 0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt b/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt new file mode 100644 index 0000000000000000000000000000000000000000..f67e761bcc18e8f37f5f56e89f7f8e514ab6c2f4 --- /dev/null +++ b/Documentation/devicetree/bindings/scsi/hisilicon-sas.txt @@ -0,0 +1,69 @@ +* HiSilicon SAS controller + +The HiSilicon SAS controller supports SAS/SATA. + +Main node required properties: + - compatible : value should be as follows: + (a) "hisilicon,hip05-sas-v1" for v1 hw in hip05 chipset + - sas-addr : array of 8 bytes for host SAS address + - reg : Address and length of the SAS register + - hisilicon,sas-syscon: phandle of syscon used for sas control + - ctrl-reset-reg : offset to controller reset register in ctrl reg + - ctrl-reset-sts-reg : offset to controller reset status register in ctrl reg + - ctrl-clock-ena-reg : offset to controller clock enable register in ctrl reg + - queue-count : number of delivery and completion queues in the controller + - phy-count : number of phys accessible by the controller + - interrupts : Interrupts for phys, completion queues, and fatal + sources; the interrupts are ordered in 3 groups, as follows: + - Phy interrupts + - Completion queue interrupts + - Fatal interrupts + Phy interrupts : Each phy has 3 interrupt sources: + - broadcast + - phyup + - abnormal + The phy interrupts are ordered into groups of 3 per phy + (broadcast, phyup, and abnormal) in increasing order. + Completion queue interrupts : each completion queue has 1 + interrupt source. + The interrupts are ordered in increasing order. + Fatal interrupts : the fatal interrupts are ordered as follows: + - ECC + - AXI bus + +Example: + sas0: sas@c1000000 { + compatible = "hisilicon,hip05-sas-v1"; + sas-addr = [50 01 88 20 16 00 00 0a]; + reg = <0x0 0xc1000000 0x0 0x10000>; + hisilicon,sas-syscon = <&pcie_sas>; + ctrl-reset-reg = <0xa60>; + ctrl-reset-sts-reg = <0x5a30>; + ctrl-clock-ena-reg = <0x338>; + queue-count = <32>; + phy-count = <8>; + dma-coherent; + interrupt-parent = <&mbigen_dsa>; + interrupts = <259 4>,<263 4>,<264 4>,/* phy0 */ + <269 4>,<273 4>,<274 4>,/* phy1 */ + <279 4>,<283 4>,<284 4>,/* phy2 */ + <289 4>,<293 4>,<294 4>,/* phy3 */ + <299 4>,<303 4>,<304 4>,/* phy4 */ + <309 4>,<313 4>,<314 4>,/* phy5 */ + <319 4>,<323 4>,<324 4>,/* phy6 */ + <329 4>,<333 4>,<334 4>,/* phy7 */ + <336 1>,<337 1>,<338 1>,/* cq0-2 */ + <339 1>,<340 1>,<341 1>,/* cq3-5 */ + <342 1>,<343 1>,<344 1>,/* cq6-8 */ + <345 1>,<346 1>,<347 1>,/* cq9-11 */ + <348 1>,<349 1>,<350 1>,/* cq12-14 */ + <351 1>,<352 1>,<353 1>,/* cq15-17 */ + <354 1>,<355 1>,<356 1>,/* cq18-20 */ + <357 1>,<358 1>,<359 1>,/* cq21-23 */ + <360 1>,<361 1>,<362 1>,/* cq24-26 */ + <363 1>,<364 1>,<365 1>,/* cq27-29 */ + <366 1>,<367 1>/* cq30-31 */ + <376 4>,/* fatal ecc */ + <381 4>;/* fatal axi */ + status = "disabled"; + }; diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt index 73f825e5e64488151b8a7b49c0f99f06f0edb73c..401b1b33c2c406520b2dd881e506cf75dc9d3042 100644 --- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt +++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt @@ -2,7 +2,7 @@ Required properties: - - compatible: Must contain one of the following: + - compatible: Must contain one or more of the following: - "renesas,scif-r7s72100" for R7S72100 (RZ/A1H) SCIF compatible UART. - "renesas,scifa-r8a73a4" for R8A73A4 (R-Mobile APE6) SCIFA compatible UART. @@ -15,10 +15,14 @@ Required properties: - "renesas,scifa-r8a7790" for R8A7790 (R-Car H2) SCIFA compatible UART. - "renesas,scifb-r8a7790" for R8A7790 (R-Car H2) SCIFB compatible UART. - "renesas,hscif-r8a7790" for R8A7790 (R-Car H2) HSCIF compatible UART. - - "renesas,scif-r8a7791" for R8A7791 (R-Car M2) SCIF compatible UART. - - "renesas,scifa-r8a7791" for R8A7791 (R-Car M2) SCIFA compatible UART. - - "renesas,scifb-r8a7791" for R8A7791 (R-Car M2) SCIFB compatible UART. - - "renesas,hscif-r8a7791" for R8A7791 (R-Car M2) HSCIF compatible UART. + - "renesas,scif-r8a7791" for R8A7791 (R-Car M2-W) SCIF compatible UART. + - "renesas,scifa-r8a7791" for R8A7791 (R-Car M2-W) SCIFA compatible UART. + - "renesas,scifb-r8a7791" for R8A7791 (R-Car M2-W) SCIFB compatible UART. + - "renesas,hscif-r8a7791" for R8A7791 (R-Car M2-W) HSCIF compatible UART. + - "renesas,scif-r8a7793" for R8A7793 (R-Car M2-N) SCIF compatible UART. + - "renesas,scifa-r8a7793" for R8A7793 (R-Car M2-N) SCIFA compatible UART. + - "renesas,scifb-r8a7793" for R8A7793 (R-Car M2-N) SCIFB compatible UART. + - "renesas,hscif-r8a7793" for R8A7793 (R-Car M2-N) HSCIF compatible UART. - "renesas,scif-r8a7794" for R8A7794 (R-Car E2) SCIF compatible UART. - "renesas,scifa-r8a7794" for R8A7794 (R-Car E2) SCIFA compatible UART. - "renesas,scifb-r8a7794" for R8A7794 (R-Car E2) SCIFB compatible UART. @@ -27,6 +31,14 @@ Required properties: - "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART. - "renesas,scifa-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFA compatible UART. - "renesas,scifb-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFB compatible UART. + - "renesas,rcar-gen1-scif" for R-Car Gen1 SCIF compatible UART, + - "renesas,rcar-gen2-scif" for R-Car Gen2 SCIF compatible UART, + - "renesas,rcar-gen3-scif" for R-Car Gen3 SCIF compatible UART, + - "renesas,rcar-gen2-scifa" for R-Car Gen2 SCIFA compatible UART, + - "renesas,rcar-gen2-scifb" for R-Car Gen2 SCIFB compatible UART, + - "renesas,rcar-gen1-hscif" for R-Car Gen1 HSCIF compatible UART, + - "renesas,rcar-gen2-hscif" for R-Car Gen2 HSCIF compatible UART, + - "renesas,rcar-gen3-hscif" for R-Car Gen3 HSCIF compatible UART, - "renesas,scif" for generic SCIF compatible UART. - "renesas,scifa" for generic SCIFA compatible UART. - "renesas,scifb" for generic SCIFB compatible UART. @@ -34,15 +46,26 @@ Required properties: - "renesas,sci" for generic SCI compatible UART. When compatible with the generic version, nodes must list the - SoC-specific version corresponding to the platform first followed by the - generic version. + SoC-specific version corresponding to the platform first, followed by the + family-specific and/or generic versions. - reg: Base address and length of the I/O registers used by the UART. - interrupts: Must contain an interrupt-specifier for the SCIx interrupt. - clocks: Must contain a phandle and clock-specifier pair for each entry in clock-names. - - clock-names: Must contain "sci_ick" for the SCIx UART interface clock. + - clock-names: Must contain "fck" for the SCIx UART functional clock. + Apart from the divided functional clock, there may be other possible + sources for the sampling clock, depending on SCIx variant. + On (H)SCI(F) and some SCIFA, an additional clock may be specified: + - "hsck" for the optional external clock input (on HSCIF), + - "sck" for the optional external clock input (on other variants). + On UARTs equipped with a Baud Rate Generator for External Clock (BRG) + (some SCIF and HSCIF), additional clocks may be specified: + - "brg_int" for the optional internal clock source for the frequency + divider (typically the (AXI or SHwy) bus clock), + - "scif_clk" for the optional external clock source for the frequency + divider (SCIF_CLK). Note: Each enabled SCIx UART should have an alias correctly numbered in the "aliases" node. @@ -58,12 +81,13 @@ Example: }; scifa0: serial@e6c40000 { - compatible = "renesas,scifa-r8a7790", "renesas,scifa"; + compatible = "renesas,scifa-r8a7790", + "renesas,rcar-gen2-scifa", "renesas,scifa"; reg = <0 0xe6c40000 0 64>; interrupt-parent = <&gic>; interrupts = <0 144 IRQ_TYPE_LEVEL_HIGH>; clocks = <&mstp2_clks R8A7790_CLK_SCIFA0>; - clock-names = "sci_ick"; + clock-names = "fck"; dmas = <&dmac0 0x21>, <&dmac0 0x22>; dma-names = "tx", "rx"; }; diff --git a/Documentation/devicetree/bindings/sound/atmel-classd.txt b/Documentation/devicetree/bindings/sound/atmel-classd.txt index 0018451c435148c4cd39b592e04c8e338d4ee3c9..549e701cb7a1c58620863fc0a5e5b08c313ff4ef 100644 --- a/Documentation/devicetree/bindings/sound/atmel-classd.txt +++ b/Documentation/devicetree/bindings/sound/atmel-classd.txt @@ -16,6 +16,10 @@ Required properties: Required elements: "pclk", "gclk" and "aclk". - clocks Please refer to clock-bindings.txt. +- assigned-clocks + Should be <&classd_gclk>. +- assigned-clock-parents + Should be <&audio_pll_pmc>. Optional properties: - pinctrl-names, pinctrl-0 @@ -43,6 +47,8 @@ classd: classd@fc048000 { dma-names = "tx"; clocks = <&classd_clk>, <&classd_gclk>, <&audio_pll_pmc>; clock-names = "pclk", "gclk", "aclk"; + assigned-clocks = <&classd_gclk>; + assigned-clock-parents = <&audio_pll_pmc>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_classd_default>; diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt index e045e90a0924bc6e4d8a1861cd1a43b1a7a8d178..68c4e8d96bed68c4c3d0ea09b10cbe8cb8170515 100644 --- a/Documentation/devicetree/bindings/sound/wm8994.txt +++ b/Documentation/devicetree/bindings/sound/wm8994.txt @@ -30,7 +30,7 @@ Optional properties: - #interrupt-cells: the number of cells to describe an IRQ, this should be 2. The first cell is the IRQ number. The second cell is the flags, encoded as the trigger masks from - Documentation/devicetree/bindings/interrupts.txt + Documentation/devicetree/bindings/interrupt-controller/interrupts.txt - clocks : A list of up to two phandle and clock specifier pairs - clock-names : A list of clock names sorted in the same order as clocks. diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt index 705075da2f10156e92a60828177c8483ee16eeec..aa005c1d10d95756921ebb946d54d682f72942d5 100644 --- a/Documentation/devicetree/bindings/spi/sh-msiof.txt +++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt @@ -10,6 +10,7 @@ Required properties: "renesas,msiof-r8a7792" (R-Car V2H) "renesas,msiof-r8a7793" (R-Car M2-N) "renesas,msiof-r8a7794" (R-Car E2) + "renesas,msiof-sh73a0" (SH-Mobile AG5) - reg : A list of offsets and lengths of the register sets for the device. If only one register set is present, it is to be used diff --git a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt index ce363c923f446b66dcfffc51274da4623630f15d..e43f4cf4cf35c9e4e61537b73e1f55e9ee67d452 100644 --- a/Documentation/devicetree/bindings/spi/spi-mt65xx.txt +++ b/Documentation/devicetree/bindings/spi/spi-mt65xx.txt @@ -2,9 +2,10 @@ Binding for MTK SPI controller Required properties: - compatible: should be one of the following. - - mediatek,mt8173-spi: for mt8173 platforms - - mediatek,mt8135-spi: for mt8135 platforms + - mediatek,mt2701-spi: for mt2701 platforms - mediatek,mt6589-spi: for mt6589 platforms + - mediatek,mt8135-spi: for mt8135 platforms + - mediatek,mt8173-spi: for mt8173 platforms - #address-cells: should be 1. @@ -29,10 +30,10 @@ Required properties: muxes clock, and "spi-clk" for the clock gate. Optional properties: --cs-gpios: see spi-bus.txt, only required for MT8173. +-cs-gpios: see spi-bus.txt. - mediatek,pad-select: specify which pins group(ck/mi/mo/cs) spi - controller used. This is a array, the element value should be 0~3, + controller used. This is an array, the element value should be 0~3, only required for MT8173. 0: specify GPIO69,70,71,72 for spi pins. 1: specify GPIO102,103,104,105 for spi pins. diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu-sram.txt b/Documentation/devicetree/bindings/sram/rockchip-pmu-sram.txt similarity index 100% rename from Documentation/devicetree/bindings/arm/rockchip/pmu-sram.txt rename to Documentation/devicetree/bindings/sram/rockchip-pmu-sram.txt diff --git a/Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt b/Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt similarity index 92% rename from Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt rename to Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt index d9416fb8db6f7f0e5d2ab43031bf88c6d227360c..800701ecffca34235d46d00f29fd4cbabdaac5bd 100644 --- a/Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt +++ b/Documentation/devicetree/bindings/sram/rockchip-smp-sram.txt @@ -12,7 +12,7 @@ Required sub-node properties: - compatible : should be "rockchip,rk3066-smp-sram" The rest of the properties should follow the generic mmio-sram discription -found in ../../misc/sram.txt +found in Documentation/devicetree/bindings/sram/sram.txt Example: diff --git a/Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt b/Documentation/devicetree/bindings/sram/samsung-sram.txt similarity index 95% rename from Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt rename to Documentation/devicetree/bindings/sram/samsung-sram.txt index 4a0a4f70a0ce31eec52698a62a8d5a549bd5739f..6bc474b2b885a624ca0b08d1fc7dc83fc60deebf 100644 --- a/Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt +++ b/Documentation/devicetree/bindings/sram/samsung-sram.txt @@ -15,7 +15,7 @@ Required sub-node properties: "samsung,exynos4210-sysram-ns" : for Non-secure SYSRAM The rest of the properties should follow the generic mmio-sram discription -found in ../../misc/sysram.txt +found in Documentation/devicetree/bindings/sram/sram.txt Example: diff --git a/Documentation/devicetree/bindings/misc/sram.txt b/Documentation/devicetree/bindings/sram/sram.txt similarity index 100% rename from Documentation/devicetree/bindings/misc/sram.txt rename to Documentation/devicetree/bindings/sram/sram.txt diff --git a/Documentation/devicetree/bindings/soc/sunxi/sram.txt b/Documentation/devicetree/bindings/sram/sunxi-sram.txt similarity index 97% rename from Documentation/devicetree/bindings/soc/sunxi/sram.txt rename to Documentation/devicetree/bindings/sram/sunxi-sram.txt index 067698112f5fda3366bfeac849dc2071981281f9..8d5665468fe7e6919d3bf4f3e646e0ae571eeb5d 100644 --- a/Documentation/devicetree/bindings/soc/sunxi/sram.txt +++ b/Documentation/devicetree/bindings/sram/sunxi-sram.txt @@ -16,7 +16,7 @@ SRAM nodes ---------- Each SRAM is described using the mmio-sram bindings documented in -Documentation/devicetree/bindings/misc/sram.txt +Documentation/devicetree/bindings/sram/sram.txt Each SRAM will have SRAM sections that are going to be handled by the SRAM controller as subnodes. These sections are represented following diff --git a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt b/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt new file mode 100644 index 0000000000000000000000000000000000000000..c59e27c632c1778883bbadfbff151290f2413c13 --- /dev/null +++ b/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt @@ -0,0 +1,31 @@ +Hi6220 SoC ION +=================================================================== +Required properties: +- compatible : "hisilicon,hi6220-ion" +- list of the ION heaps + - heap name : maybe heap_sys_user@0 + - heap id : id should be unique in the system. + - heap base : base ddr address of the heap,0 means that + it is dynamic. + - heap size : memory size and 0 means it is dynamic. + - heap type : the heap type of the heap, please also + see the define in ion.h(drivers/staging/android/uapi/ion.h) +------------------------------------------------------------------- +Example: + hi6220-ion { + compatible = "hisilicon,hi6220-ion"; + heap_sys_user@0 { + heap-name = "sys_user"; + heap-id = <0x0>; + heap-base = <0x0>; + heap-size = <0x0>; + heap-type = "ion_system"; + }; + heap_sys_contig@0 { + heap-name = "sys_contig"; + heap-id = <0x1>; + heap-base = <0x0>; + heap-size = <0x0>; + heap-type = "ion_system_contig"; + }; + }; diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt index fd132cbee70eaf8b9ca14272ec888367c580dc46..221368207ca4c60a57794a3288244dd9307b493e 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.txt +++ b/Documentation/devicetree/bindings/usb/dwc2.txt @@ -4,6 +4,7 @@ Platform DesignWare HS OTG USB 2.0 controller Required properties: - compatible : One of: - brcm,bcm2835-usb: The DWC2 USB controller instance in the BCM2835 SoC. + - hisilicon,hi6220-usb: The DWC2 USB controller instance in the hi6220 SoC. - rockchip,rk3066-usb: The DWC2 USB controller instance in the rk3066 Soc; - "rockchip,rk3188-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3188 Soc; - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc; diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.txt b/Documentation/devicetree/bindings/usb/dwc3-xilinx.txt new file mode 100644 index 0000000000000000000000000000000000000000..30361b32a46024ef118a9fe5858eb1eb29288bf6 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.txt @@ -0,0 +1,33 @@ +Xilinx SuperSpeed DWC3 USB SoC controller + +Required properties: +- compatible: Should contain "xlnx,zynqmp-dwc3" +- clocks: A list of phandles for the clocks listed in clock-names +- clock-names: Should contain the following: + "bus_clk" Master/Core clock, have to be >= 125 MHz for SS + operation and >= 60MHz for HS operation + + "ref_clk" Clock source to core during PHY power down + +Required child node: +A child node must exist to represent the core DWC3 IP block. The name of +the node is not important. The content of the node is defined in dwc3.txt. + +Example device node: + + usb@0 { + #address-cells = <0x2>; + #size-cells = <0x1>; + status = "okay"; + compatible = "xlnx,zynqmp-dwc3"; + clock-names = "bus_clk" "ref_clk"; + clocks = <&clk125>, <&clk125>; + ranges; + + dwc3@fe200000 { + compatible = "snps,dwc3"; + reg = <0x0 0xfe200000 0x40000>; + interrupts = <0x0 0x41 0x4>; + dr_mode = "host"; + }; + }; diff --git a/Documentation/devicetree/bindings/usb/mt8173-xhci.txt b/Documentation/devicetree/bindings/usb/mt8173-xhci.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3a7ffa48852f798bdee848ac8f8b2c52fd7b33a --- /dev/null +++ b/Documentation/devicetree/bindings/usb/mt8173-xhci.txt @@ -0,0 +1,51 @@ +MT8173 xHCI + +The device node for Mediatek SOC USB3.0 host controller + +Required properties: + - compatible : should contain "mediatek,mt8173-xhci" + - reg : specifies physical base address and size of the registers, + the first one for MAC, the second for IPPC + - interrupts : interrupt used by the controller + - power-domains : a phandle to USB power domain node to control USB's + mtcmos + - vusb33-supply : regulator of USB avdd3.3v + + - clocks : a list of phandle + clock-specifier pairs, one for each + entry in clock-names + - clock-names : must contain + "sys_ck": for clock of xHCI MAC + "wakeup_deb_p0": for USB wakeup debounce clock of port0 + "wakeup_deb_p1": for USB wakeup debounce clock of port1 + + - phys : a list of phandle + phy specifier pairs + +Optional properties: + - mediatek,wakeup-src : 1: ip sleep wakeup mode; 2: line state wakeup + mode; + - mediatek,syscon-wakeup : phandle to syscon used to access USB wakeup + control register, it depends on "mediatek,wakeup-src". + - vbus-supply : reference to the VBUS regulator; + - usb3-lpm-capable : supports USB3.0 LPM + +Example: +usb30: usb@11270000 { + compatible = "mediatek,mt8173-xhci"; + reg = <0 0x11270000 0 0x1000>, + <0 0x11280700 0 0x0100>; + interrupts = ; + power-domains = <&scpsys MT8173_POWER_DOMAIN_USB>; + clocks = <&topckgen CLK_TOP_USB30_SEL>, + <&pericfg CLK_PERI_USB0>, + <&pericfg CLK_PERI_USB1>; + clock-names = "sys_ck", + "wakeup_deb_p0", + "wakeup_deb_p1"; + phys = <&phy_port0 PHY_TYPE_USB3>, + <&phy_port1 PHY_TYPE_USB2>; + vusb33-supply = <&mt6397_vusb_reg>; + vbus-supply = <&usb_p1_vbus>; + usb3-lpm-capable; + mediatek,syscon-wakeup = <&pericfg>; + mediatek,wakeup-src = <1>; +}; diff --git a/Documentation/devicetree/bindings/usb/octeon-usb.txt b/Documentation/devicetree/bindings/usb/octeon-usb.txt new file mode 100644 index 0000000000000000000000000000000000000000..205c8d24d6e31487f412cc88fe860a4794765197 --- /dev/null +++ b/Documentation/devicetree/bindings/usb/octeon-usb.txt @@ -0,0 +1,62 @@ +OCTEON/OCTEON+ USB BLOCK + +1) Main node + + Required properties: + + - compatible: must be "cavium,octeon-5750-usbn" + + - reg: specifies the physical base address of the USBN block and + the length of the memory mapped region. + + - #address-cells: specifies the number of cells needed to encode an + address. The value must be 2. + + - #size-cells: specifies the number of cells used to represent the size + of an address. The value must be 2. + + - ranges: specifies the translation between child address space and parent + address space. + + - clock-frequency: speed of the USB reference clock. Allowed values are + 12000000, 24000000 or 48000000. + + - cavium,refclk-type: type of the USB reference clock. Allowed values are + "crystal" or "external". + + - refclk-frequency: deprecated, use "clock-frequency". + + - refclk-type: deprecated, use "cavium,refclk-type". + +2) Child node + + The main node must have one child node which describes the built-in + USB controller. + + Required properties: + + - compatible: must be "cavium,octeon-5750-usbc" + + - reg: specifies the physical base address of the USBC block and + the length of the memory mapped region. + + - interrupts: specifies the interrupt number for the USB controller. + +3) Example: + + usbn: usbn@1180068000000 { + compatible = "cavium,octeon-5750-usbn"; + reg = <0x11800 0x68000000 0x0 0x1000>; + ranges; /* Direct mapping */ + #address-cells = <2>; + #size-cells = <2>; + clock-frequency = <12000000>; + cavium,refclk-type = "crystal"; + + usbc@16f0010000000 { + compatible = "cavium,octeon-5750-usbc"; + reg = <0x16f00 0x10000000 0x0 0x80000>; + interrupts = <0 56>; + }; + }; + diff --git a/Documentation/devicetree/bindings/usb/renesas_usb3.txt b/Documentation/devicetree/bindings/usb/renesas_usb3.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d52766f07b907b187b27c336c02ba2a9bc6210d --- /dev/null +++ b/Documentation/devicetree/bindings/usb/renesas_usb3.txt @@ -0,0 +1,23 @@ +Renesas Electronics USB3.0 Peripheral driver + +Required properties: + - compatible: Must contain one of the following: + - "renesas,r8a7795-usb3-peri" + - reg: Base address and length of the register for the USB3.0 Peripheral + - interrupts: Interrupt specifier for the USB3.0 Peripheral + - clocks: clock phandle and specifier pair + +Example: + usb3_peri0: usb@ee020000 { + compatible = "renesas,r8a7795-usb3-peri"; + reg = <0 0xee020000 0 0x400>; + interrupts = ; + clocks = <&cpg CPG_MOD 328>; + }; + + usb3_peri1: usb@ee060000 { + compatible = "renesas,r8a7795-usb3-peri"; + reg = <0 0xee060000 0 0x400>; + interrupts = ; + clocks = <&cpg CPG_MOD 327>; + }; diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt index 7d48f63db44ec9b9c0aa68ea0a54a70fc1a7014e..b6040563e51a36de00900c2b5c0192cdd4b5d540 100644 --- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt +++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt @@ -1,11 +1,21 @@ Renesas Electronics USBHS driver Required properties: - - compatible: Must contain one of the following: - - "renesas,usbhs-r8a7790" - - "renesas,usbhs-r8a7791" - - "renesas,usbhs-r8a7794" - - "renesas,usbhs-r8a7795" + - compatible: Must contain one or more of the following: + + - "renesas,usbhs-r8a7790" for r8a7790 (R-Car H2) compatible device + - "renesas,usbhs-r8a7791" for r8a7791 (R-Car M2-W) compatible device + - "renesas,usbhs-r8a7792" for r8a7792 (R-Car V2H) compatible device + - "renesas,usbhs-r8a7793" for r8a7793 (R-Car M2-N) compatible device + - "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device + - "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device + - "renesas,rcar-gen2-usbhs" for R-Car Gen2 compatible device + - "renesas,rcar-gen3-usbhs" for R-Car Gen3 compatible device + + When compatible with the generic version, nodes must list the + SoC-specific version corresponding to the platform first followed + by the generic version. + - reg: Base address and length of the register for the USBHS - interrupts: Interrupt specifier for the USBHS - clocks: A list of phandle + clock specifier pairs @@ -22,7 +32,7 @@ Optional properties: Example: usbhs: usb@e6590000 { - compatible = "renesas,usbhs-r8a7790"; + compatible = "renesas,usbhs-r8a7790", "renesas,rcar-gen2-usbhs"; reg = <0 0xe6590000 0 0x100>; interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>; clocks = <&mstp7_clks R8A7790_CLK_HSUSB>; diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt index 86f67f0886bc4ea577e2847a9a97289ab077b62d..082573289f1e5577f5e0173e6d1810c23d8dfea3 100644 --- a/Documentation/devicetree/bindings/usb/usb-xhci.txt +++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt @@ -3,8 +3,8 @@ USB xHCI controllers Required properties: - compatible: should be one of "generic-xhci", "marvell,armada-375-xhci", "marvell,armada-380-xhci", - "renesas,xhci-r8a7790", "renesas,xhci-r8a7791" (deprecated: - "xhci-platform"). + "renesas,xhci-r8a7790", "renesas,xhci-r8a7791", "renesas,xhci-r8a7793", + "renesas,xhci-r8a7795" (deprecated: "xhci-platform"). - reg: should contain address and length of the standard XHCI register set for the device. - interrupts: one XHCI interrupt should be described here. diff --git a/Documentation/devicetree/bindings/usb/usb3503.txt b/Documentation/devicetree/bindings/usb/usb3503.txt index 52493b1480e275145510b67251ab50a972b0b711..c1a0a9191d26180bd0d96be32f8bbdc5ad3eafa9 100644 --- a/Documentation/devicetree/bindings/usb/usb3503.txt +++ b/Documentation/devicetree/bindings/usb/usb3503.txt @@ -18,7 +18,8 @@ Optional properties: - refclk: Clock used for driving REFCLK signal (optional, if not provided the driver assumes that clock signal is always available, its rate is specified by REF_SEL pins and a value from the primary - reference clock frequencies table is used) + reference clock frequencies table is used). Use clocks and + clock-names in order to assign it - refclk-frequency: Frequency of the REFCLK signal as defined by REF_SEL pins (optional, if not provided, driver will not set rate of the REFCLK signal and assume that a value from the primary reference @@ -33,4 +34,6 @@ Examples: intn-gpios = <&gpx3 4 1>; reset-gpios = <&gpx3 5 1>; initial-mode = <1>; + clocks = <&clks 80>; + clock-names = "refclk"; }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 55df1d444e9f82c150ef144c02357d13169ead31..a4f2035569ce7257ea0601a4f75c41a9e1d54c6e 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -161,6 +161,7 @@ nuvoton Nuvoton Technology Corporation nvidia NVIDIA nxp NXP Semiconductors okaya Okaya Electric America, Inc. +olimex OLIMEX Ltd. onnn ON Semiconductor Corp. opencores OpenCores.org option Option NV diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt index 11fb87ff6cd09f17d19d972fd968a48de6b77ed1..9e33189745f00e0d2d443a3a718b8012e197c8a7 100644 --- a/Documentation/dmaengine/client.txt +++ b/Documentation/dmaengine/client.txt @@ -22,25 +22,14 @@ The slave DMA usage consists of following steps: Channel allocation is slightly different in the slave DMA context, client drivers typically need a channel from a particular DMA controller only and even in some cases a specific channel is desired. - To request a channel dma_request_channel() API is used. + To request a channel dma_request_chan() API is used. Interface: - struct dma_chan *dma_request_channel(dma_cap_mask_t mask, - dma_filter_fn filter_fn, - void *filter_param); - where dma_filter_fn is defined as: - typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + struct dma_chan *dma_request_chan(struct device *dev, const char *name); - The 'filter_fn' parameter is optional, but highly recommended for - slave and cyclic channels as they typically need to obtain a specific - DMA channel. - - When the optional 'filter_fn' parameter is NULL, dma_request_channel() - simply returns the first channel that satisfies the capability mask. - - Otherwise, the 'filter_fn' routine will be called once for each free - channel which has a capability in 'mask'. 'filter_fn' is expected to - return 'true' when the desired DMA channel is found. + Which will find and return the 'name' DMA channel associated with the 'dev' + device. The association is done via DT, ACPI or board file based + dma_slave_map matching table. A channel allocated via this interface is exclusive to the caller, until dma_release_channel() is called. @@ -128,7 +117,7 @@ The slave DMA usage consists of following steps: transaction. For cyclic DMA, a callback function may wish to terminate the - DMA via dmaengine_terminate_all(). + DMA via dmaengine_terminate_async(). Therefore, it is important that DMA engine drivers drop any locks before calling the callback function which may cause a @@ -166,12 +155,29 @@ The slave DMA usage consists of following steps: Further APIs: -1. int dmaengine_terminate_all(struct dma_chan *chan) +1. int dmaengine_terminate_sync(struct dma_chan *chan) + int dmaengine_terminate_async(struct dma_chan *chan) + int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */ This causes all activity for the DMA channel to be stopped, and may discard data in the DMA FIFO which hasn't been fully transferred. No callback functions will be called for any incomplete transfers. + Two variants of this function are available. + + dmaengine_terminate_async() might not wait until the DMA has been fully + stopped or until any running complete callbacks have finished. But it is + possible to call dmaengine_terminate_async() from atomic context or from + within a complete callback. dmaengine_synchronize() must be called before it + is safe to free the memory accessed by the DMA transfer or free resources + accessed from within the complete callback. + + dmaengine_terminate_sync() will wait for the transfer and any running + complete callbacks to finish before it returns. But the function must not be + called from atomic context or from within a complete callback. + + dmaengine_terminate_all() is deprecated and should not be used in new code. + 2. int dmaengine_pause(struct dma_chan *chan) This pauses activity on the DMA channel without data loss. @@ -197,3 +203,20 @@ Further APIs: a running DMA channel. It is recommended that DMA engine users pause or stop (via dmaengine_terminate_all()) the channel before using this API. + +5. void dmaengine_synchronize(struct dma_chan *chan) + + Synchronize the termination of the DMA channel to the current context. + + This function should be used after dmaengine_terminate_async() to synchronize + the termination of the DMA channel to the current context. The function will + wait for the transfer and any running complete callbacks to finish before it + returns. + + If dmaengine_terminate_async() is used to stop the DMA channel this function + must be called before it is safe to free memory accessed by previously + submitted descriptors or to free any resources accessed within the complete + callback of previously submitted descriptors. + + The behavior of this function is undefined if dma_async_issue_pending() has + been called between dmaengine_terminate_async() and this function. diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt index 67d4ce4df1096efab9c027f026af22a46ea43809..122b7f4876bb637bf4023826703a12936bd326c7 100644 --- a/Documentation/dmaengine/provider.txt +++ b/Documentation/dmaengine/provider.txt @@ -327,8 +327,24 @@ supported. * device_terminate_all - Aborts all the pending and ongoing transfers on the channel - - This command should operate synchronously on the channel, - terminating right away all the channels + - For aborted transfers the complete callback should not be called + - Can be called from atomic context or from within a complete + callback of a descriptor. Must not sleep. Drivers must be able + to handle this correctly. + - Termination may be asynchronous. The driver does not have to + wait until the currently active transfer has completely stopped. + See device_synchronize. + + * device_synchronize + - Must synchronize the termination of a channel to the current + context. + - Must make sure that memory for previously submitted + descriptors is no longer accessed by the DMA controller. + - Must make sure that all complete callbacks for previously + submitted descriptors have finished running and none are + scheduled to run. + - May sleep. + Misc notes (stuff that should be documented, but don't really know where to put them) diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb index 8eb92264ee047163caa25738a30c09ad04d20fcc..669dc6ce43305e17f29ea95066bec2a80b59b70d 100644 --- a/Documentation/dvb/README.dvb-usb +++ b/Documentation/dvb/README.dvb-usb @@ -45,7 +45,7 @@ Supported devices See the LinuxTV DVB Wiki at www.linuxtv.org for a complete list of cards/drivers/firmwares: -http://www.linuxtv.org/wiki/index.php/DVB_USB +https://linuxtv.org/wiki/index.php/DVB_USB 0. History & News: 2005-06-30 - added support for WideView WT-220U (Thanks to Steve Chang) @@ -121,7 +121,7 @@ working. Have a look at the Wikipage for the DVB-USB-drivers to find out, which firmware you need for your device: -http://www.linuxtv.org/wiki/index.php/DVB_USB +https://linuxtv.org/wiki/index.php/DVB_USB 1.2. Compiling diff --git a/Documentation/dvb/faq.txt b/Documentation/dvb/faq.txt index 97b1373f2428e11bc38ed7e2a5adf71a570a4de2..a0be92012877d7c55246112c6dd61a104304d253 100644 --- a/Documentation/dvb/faq.txt +++ b/Documentation/dvb/faq.txt @@ -76,7 +76,7 @@ Some very frequently asked questions about linuxtv-dvb the TuxBox CVS many interesting DVB applications and the dBox2 DVB source - http://www.linuxtv.org/downloads/ + https://linuxtv.org/downloads DVB Swiss Army Knife library and utilities http://www.nenie.org/misc/mpsys/ diff --git a/Documentation/dvb/get_dvb_firmware b/Documentation/dvb/get_dvb_firmware index 91b43d2738c7935d8a919a6655b52fc95add8541..1a0a04125f713cb586549e1f5315501447eb65de 100755 --- a/Documentation/dvb/get_dvb_firmware +++ b/Documentation/dvb/get_dvb_firmware @@ -152,7 +152,7 @@ sub tda10046lifeview { sub av7110 { my $sourcefile = "dvb-ttpci-01.fw-261d"; - my $url = "http://www.linuxtv.org/downloads/firmware/$sourcefile"; + my $url = "https://linuxtv.org/downloads/firmware/$sourcefile"; my $hash = "603431b6259715a8e88f376a53b64e2f"; my $outfile = "dvb-ttpci-01.fw"; @@ -303,7 +303,7 @@ sub vp7049 { } sub dibusb { - my $url = "http://www.linuxtv.org/downloads/firmware/dvb-usb-dibusb-5.0.0.11.fw"; + my $url = "https://linuxtv.org/downloads/firmware/dvb-usb-dibusb-5.0.0.11.fw"; my $outfile = "dvb-dibusb-5.0.0.11.fw"; my $hash = "fa490295a527360ca16dcdf3224ca243"; @@ -351,7 +351,7 @@ sub nxt2004 { sub or51211 { my $fwfile = "dvb-fe-or51211.fw"; - my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; + my $url = "https://linuxtv.org/downloads/firmware/$fwfile"; my $hash = "d830949c771a289505bf9eafc225d491"; checkstandard(); @@ -364,7 +364,7 @@ sub or51211 { sub cx231xx { my $fwfile = "v4l-cx231xx-avcore-01.fw"; - my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; + my $url = "https://linuxtv.org/downloads/firmware/$fwfile"; my $hash = "7d3bb956dc9df0eafded2b56ba57cc42"; checkstandard(); @@ -376,7 +376,7 @@ sub cx231xx { } sub cx18 { - my $url = "http://linuxtv.org/downloads/firmware/"; + my $url = "https://linuxtv.org/downloads/firmware/"; my %files = ( 'v4l-cx23418-apu.fw' => '588f081b562f5c653a3db1ad8f65939a', @@ -450,7 +450,7 @@ sub mpc718 { } sub cx23885 { - my $url = "http://linuxtv.org/downloads/firmware/"; + my $url = "https://linuxtv.org/downloads/firmware/"; my %files = ( 'v4l-cx23885-avcore-01.fw' => 'a9f8f5d901a7fb42f552e1ee6384f3bb', @@ -472,7 +472,7 @@ sub cx23885 { } sub pvrusb2 { - my $url = "http://linuxtv.org/downloads/firmware/"; + my $url = "https://linuxtv.org/downloads/firmware/"; my %files = ( 'v4l-cx25840.fw' => 'dadb79e9904fc8af96e8111d9cb59320', @@ -494,7 +494,7 @@ sub pvrusb2 { sub or51132_qam { my $fwfile = "dvb-fe-or51132-qam.fw"; - my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; + my $url = "https://linuxtv.org/downloads/firmware/$fwfile"; my $hash = "7702e8938612de46ccadfe9b413cb3b5"; checkstandard(); @@ -507,7 +507,7 @@ sub or51132_qam { sub or51132_vsb { my $fwfile = "dvb-fe-or51132-vsb.fw"; - my $url = "http://linuxtv.org/downloads/firmware/$fwfile"; + my $url = "https://linuxtv.org/downloads/firmware/$fwfile"; my $hash = "c16208e02f36fc439a557ad4c613364a"; checkstandard(); @@ -519,7 +519,7 @@ sub or51132_vsb { } sub bluebird { - my $url = "http://www.linuxtv.org/download/dvb/firmware/dvb-usb-bluebird-01.fw"; + my $url = "https://linuxtv.org/download/dvb/firmware/dvb-usb-bluebird-01.fw"; my $outfile = "dvb-usb-bluebird-01.fw"; my $hash = "658397cb9eba9101af9031302671f49d"; @@ -677,7 +677,7 @@ sub drxk_hauppauge_hvr930c { } sub drxk_terratec_h5 { - my $url = "http://www.linuxtv.org/downloads/firmware/"; + my $url = "https://linuxtv.org/downloads/firmware/"; my $hash = "19000dada8e2741162ccc50cc91fa7f1"; my $fwfile = "dvb-usb-terratec-h5-drxk.fw"; diff --git a/Documentation/dvb/readme.txt b/Documentation/dvb/readme.txt index 0b0380c919902c012d1450f7202280fb8a5aefd6..89965041a2667d3b4b2f29aa23b28b6c2f73f57d 100644 --- a/Documentation/dvb/readme.txt +++ b/Documentation/dvb/readme.txt @@ -2,12 +2,12 @@ Linux Digital Video Broadcast (DVB) subsystem ============================================= The main development site and CVS repository for these -drivers is http://linuxtv.org/. +drivers is https://linuxtv.org. The developer mailing list linux-dvb is also hosted there, -see http://linuxtv.org/lists.php. Please check -the archive http://linuxtv.org/pipermail/linux-dvb/ -and the Wiki http://linuxtv.org/wiki/ +see https://linuxtv.org/lists.php. Please check +the archive https://linuxtv.org/pipermail/linux-dvb/ +and the Wiki https://linuxtv.org/wiki/ before asking newbie questions on the list. API documentation, utilities and test/example programs @@ -16,7 +16,7 @@ are available as part of the old driver package for Linux 2.4 We plan to split this into separate packages, but it's not been done yet. -http://linuxtv.org/downloads/ +https://linuxtv.org/downloads/ What's inside this directory: diff --git a/Documentation/edac.txt b/Documentation/edac.txt index 80841a2d640cf5aeca8e16db4ebba5d224c5a008..f89cfd85ae134b1530ef76a46970d4fc044f66a6 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt @@ -1,9 +1,13 @@ EDAC - Error Detection And Correction ===================================== -"bluesmoke" was the name for this device driver when it was "out-of-tree" -and maintained at sourceforge.net. When it was pushed into 2.6.16 for the -first time, it was renamed to 'EDAC'. +"bluesmoke" was the name for this device driver when it +was "out-of-tree" and maintained at sourceforge.net - +bluesmoke.sourceforge.net. That site is mostly archaic now and can be +used only for historical purposes. + +When the subsystem was pushed into 2.6.16 for the first time, it was +renamed to 'EDAC'. PURPOSE ------- diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt index 09adabef513ff863ae6f413a239b0b79f90d575b..83d3f4e43e91726201ba56392f0723c40927fc67 100644 --- a/Documentation/fault-injection/notifier-error-inject.txt +++ b/Documentation/fault-injection/notifier-error-inject.txt @@ -10,6 +10,7 @@ modules that can be used to test the following notifiers. * PM notifier * Memory hotplug notifier * powerpc pSeries reconfig notifier + * Netdevice notifier CPU notifier error injection module ----------------------------------- @@ -87,6 +88,30 @@ Possible pSeries reconfig notifier events to be failed are: * PSERIES_DRCONF_MEM_ADD * PSERIES_DRCONF_MEM_REMOVE +Netdevice notifier error injection module +---------------------------------------------- +This feature is controlled through debugfs interface +/sys/kernel/debug/notifier-error-inject/netdev/actions//error + +Netdevice notifier events which can be failed are: + + * NETDEV_REGISTER + * NETDEV_CHANGEMTU + * NETDEV_CHANGENAME + * NETDEV_PRE_UP + * NETDEV_PRE_TYPE_CHANGE + * NETDEV_POST_INIT + * NETDEV_PRECHANGEMTU + * NETDEV_PRECHANGEUPPER + * NETDEV_CHANGEUPPER + +Example: Inject netdevice mtu change error (-22 == -EINVAL) + + # cd /sys/kernel/debug/notifier-error-inject/netdev + # echo -22 > actions/NETDEV_CHANGEMTU/error + # ip link set eth0 mtu 1024 + RTNETLINK answers: Invalid argument + For more usage examples ----------------------- There are tools/testing/selftests using the notifier error injection features diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt index 76d39d66a5d754ed15d39285bd7742d9af1c2be7..4f66ec13395112ae9be41a7250f0152963995a1c 100644 --- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt +++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt @@ -33,7 +33,7 @@ | sh: | TODO | | sparc: | TODO | | tile: | ok | - | um: | TODO | + | um: | ok | | unicore32: | TODO | | x86: | ok | | xtensa: | TODO | diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt index e63316239938164be150d0a9d73dac94fa9b4b92..4199ffecc0ff06bde4b26fa5f09d1fd0a9a08601 100644 --- a/Documentation/features/time/irq-time-acct/arch-support.txt +++ b/Documentation/features/time/irq-time-acct/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | .. | | arc: | TODO | | arm: | ok | - | arm64: | .. | + | arm64: | ok | | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 06d443450f2138fc8595ef43be0e64ebc33f66f7..619af9bfdcb3eb4baceac7824c655fd9ba495950 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -50,8 +50,7 @@ prototypes: int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); - const char *(*follow_link) (struct dentry *, void **); - void (*put_link) (struct inode *, void *); + const char *(*get_link) (struct dentry *, struct inode *, void **); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); int (*get_acl)(struct inode *, int); @@ -83,8 +82,7 @@ rmdir: yes (both) (see below) rename: yes (all) (see below) rename2: yes (all) (see below) readlink: no -follow_link: no -put_link: no +get_link: no setattr: yes permission: no (may not block if called in rcu-walk mode) get_acl: no diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index af68efdbbfaddb0827b29f1fc4fb4b0eacc43b3a..e5fe521eea1d3b568404831efbbfc15bb935430c 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt @@ -51,15 +51,27 @@ configfs tree is always there, whether mounted on /config or not. An item is created via mkdir(2). The item's attributes will also appear at this time. readdir(3) can determine what the attributes are, read(2) can query their default values, and write(2) can store new -values. Like sysfs, attributes should be ASCII text files, preferably -with only one value per file. The same efficiency caveats from sysfs -apply. Don't mix more than one attribute in one attribute file. - -Like sysfs, configfs expects write(2) to store the entire buffer at -once. When writing to configfs attributes, userspace processes should -first read the entire file, modify the portions they wish to change, and -then write the entire buffer back. Attribute files have a maximum size -of one page (PAGE_SIZE, 4096 on i386). +values. Don't mix more than one attribute in one attribute file. + +There are two types of configfs attributes: + +* Normal attributes, which similar to sysfs attributes, are small ASCII text +files, with a maximum size of one page (PAGE_SIZE, 4096 on i386). Preferably +only one value per file should be used, and the same caveats from sysfs apply. +Configfs expects write(2) to store the entire buffer at once. When writing to +normal configfs attributes, userspace processes should first read the entire +file, modify the portions they wish to change, and then write the entire +buffer back. + +* Binary attributes, which are somewhat similar to sysfs binary attributes, +but with a few slight changes to semantics. The PAGE_SIZE limitation does not +apply, but the whole binary item must fit in single kernel vmalloc'ed buffer. +The write(2) calls from user space are buffered, and the attributes' +write_bin_attribute method will be invoked on the final close, therefore it is +imperative for user-space to check the return code of close(2) in order to +verify that the operation finished successfully. +To avoid a malicious user OOMing the kernel, there's a per-binary attribute +maximum buffer value. When an item needs to be destroyed, remove it with rmdir(2). An item cannot be destroyed if any other item has a link to it (via @@ -171,6 +183,7 @@ among other things. For that, it needs a type. struct configfs_item_operations *ct_item_ops; struct configfs_group_operations *ct_group_ops; struct configfs_attribute **ct_attrs; + struct configfs_bin_attribute **ct_bin_attrs; }; The most basic function of a config_item_type is to define what @@ -201,6 +214,32 @@ be called whenever userspace asks for a read(2) on the attribute. If an attribute is writable and provides a ->store method, that method will be be called whenever userspace asks for a write(2) on the attribute. +[struct configfs_bin_attribute] + + struct configfs_attribute { + struct configfs_attribute cb_attr; + void *cb_private; + size_t cb_max_size; + }; + +The binary attribute is used when the one needs to use binary blob to +appear as the contents of a file in the item's configfs directory. +To do so add the binary attribute to the NULL-terminated array +config_item_type->ct_bin_attrs, and the item appears in configfs, the +attribute file will appear with the configfs_bin_attribute->cb_attr.ca_name +filename. configfs_bin_attribute->cb_attr.ca_mode specifies the file +permissions. +The cb_private member is provided for use by the driver, while the +cb_max_size member specifies the maximum amount of vmalloc buffer +to be used. + +If binary attribute is readable and the config_item provides a +ct_item_ops->read_bin_attribute() method, that method will be called +whenever userspace asks for a read(2) on the attribute. The converse +will happen for write(2). The reads/writes are bufferred so only a +single read/write will occur; the attributes' need not concern itself +with it. + [struct config_group] A config_item cannot live in a vacuum. The only way one can be created diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b102b436563ebd60fa194df5be1a34c4365c688a..e1c9f0849da6d99ebcf03b6d6fec56f09c205e67 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -102,7 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage collection, triggered in background when I/O subsystem is idle. If background_gc=on, it will turn on the garbage collection and if background_gc=off, garbage collection - will be truned off. If background_gc=sync, it will turn + will be turned off. If background_gc=sync, it will turn on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. @@ -145,10 +145,12 @@ extent_cache Enable an extent cache based on rb-tree, it can cache as many as extent which map between contiguous logical address and physical address per inode, resulting in increasing the cache hit ratio. Set by default. -noextent_cache Diable an extent cache based on rb-tree explicitly, see +noextent_cache Disable an extent cache based on rb-tree explicitly, see the above extent_cache mount option. noinline_data Disable the inline data feature, inline data feature is enabled by default. +data_flush Enable data flushing before checkpoint in order to + persist data of regular and symlink. ================================================================================ DEBUGFS ENTRIES @@ -192,7 +194,7 @@ Files in /sys/fs/f2fs/ policy for garbage collection. Setting gc_idle = 0 (default) will disable this option. Setting gc_idle = 1 will select the Cost Benefit approach - & setting gc_idle = 2 will select the greedy aproach. + & setting gc_idle = 2 will select the greedy approach. reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree @@ -298,7 +300,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to file. Each file is dump_ssa and dump_sit. The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. -It shows on-disk inode information reconized by a given inode number, and is +It shows on-disk inode information recognized by a given inode number, and is able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and ./dump_sit respectively. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index f24d1b8339576e96c46045f5da8f275ee9250056..f1b87d8aa2da5355e68d01995238cf93e8a47cf3 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -504,3 +504,24 @@ in your dentry operations instead. [mandatory] __fd_install() & fd_install() can now sleep. Callers should not hold a spinlock or other resources that do not allow a schedule. +-- +[mandatory] + any symlink that might use page_follow_link_light/page_put_link() must + have inode_nohighmem(inode) called before anything might start playing with + its pagecache. No highmem pages should end up in the pagecache of such + symlinks. That includes any preseeding that might be done during symlink + creation. __page_symlink() will honour the mapping gfp flags, so once + you've done inode_nohighmem() it's safe to use, but if you allocate and + insert the page manually, make sure to use the right gfp flags. +-- +[mandatory] + ->follow_link() is replaced with ->get_link(); same API, except that + * ->get_link() gets inode as a separate argument + * ->get_link() may be called in RCU mode - in that case NULL + dentry is passed +-- +[mandatory] + ->get_link() gets struct delayed_call *done now, and should do + set_delayed_call() where it used to set *cookie. + ->put_link() is gone - just give the destructor to set_delayed_call() + in ->get_link(). diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 8c6f07ad373aa6b8402e457e0705cdebd0f3271d..b02a7d598258542e890eae7ab3b4b4503db251e2 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -350,8 +350,8 @@ struct inode_operations { int (*rename2) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); - const char *(*follow_link) (struct dentry *, void **); - void (*put_link) (struct inode *, void *); + const char *(*get_link) (struct dentry *, struct inode *, + struct delayed_call *); int (*permission) (struct inode *, int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); @@ -434,20 +434,19 @@ otherwise noted. readlink: called by the readlink(2) system call. Only required if you want to support reading symbolic links - follow_link: called by the VFS to follow a symbolic link to the + get_link: called by the VFS to follow a symbolic link to the inode it points to. Only required if you want to support symbolic links. This method returns the symlink body to traverse (and possibly resets the current position with nd_jump_link()). If the body won't go away until the inode is gone, nothing else is needed; if it needs to be otherwise - pinned, the data needed to release whatever we'd grabbed - is to be stored in void * variable passed by address to - follow_link() instance. - - put_link: called by the VFS to release resources allocated by - follow_link(). The cookie stored by follow_link() is passed - to this method as the last parameter; only called when - cookie isn't NULL. + pinned, arrange for its release by having get_link(..., ..., done) + do set_delayed_call(done, destructor, argument). + In that case destructor(argument) will be called once VFS is + done with the body you've returned. + May be called in RCU mode; that is indicated by NULL dentry + argument. If request can't be handled without leaving RCU mode, + have it return ERR_PTR(-ECHILD). permission: called by the VFS to check for access rights on a POSIX-like filesystem. diff --git a/Documentation/hwmon/htu21 b/Documentation/hwmon/htu21 deleted file mode 100644 index f39a215fb6ae22e541bb9f0d9cd895121587f44e..0000000000000000000000000000000000000000 --- a/Documentation/hwmon/htu21 +++ /dev/null @@ -1,46 +0,0 @@ -Kernel driver htu21 -=================== - -Supported chips: - * Measurement Specialties HTU21D - Prefix: 'htu21' - Addresses scanned: none - Datasheet: Publicly available at the Measurement Specialties website - http://www.meas-spec.com/downloads/HTU21D.pdf - - -Author: - William Markezana - -Description ------------ - -The HTU21D is a humidity and temperature sensor in a DFN package of -only 3 x 3 mm footprint and 0.9 mm height. - -The devices communicate with the I2C protocol. All sensors are set to the -same I2C address 0x40, so an entry with I2C_BOARD_INFO("htu21", 0x40) can -be used in the board setup code. - -This driver does not auto-detect devices. You will have to instantiate the -devices explicitly. Please see Documentation/i2c/instantiating-devices -for details. - -sysfs-Interface ---------------- - -temp1_input - temperature input -humidity1_input - humidity input - -Notes ------ - -The driver uses the default resolution settings of 12 bit for humidity and 14 -bit for temperature, which results in typical measurement times of 11 ms for -humidity and 44 ms for temperature. To keep self heating below 0.1 degree -Celsius, the device should not be active for more than 10% of the time. For -this reason, the driver performs no more than two measurements per second and -reports cached information if polled more frequently. - -Different resolutions, the on-chip heater, using the CRC checksum and reading -the serial number are not supported yet. diff --git a/Documentation/hwmon/ltc3815 b/Documentation/hwmon/ltc3815 new file mode 100644 index 0000000000000000000000000000000000000000..eb7db2d1358763cfad0afa367c91863f9b7bbd75 --- /dev/null +++ b/Documentation/hwmon/ltc3815 @@ -0,0 +1,61 @@ +Kernel driver ltc3815 +===================== + +Supported chips: + * Linear Technology LTC3815 + Prefix: 'ltc3815' + Addresses scanned: - + Datasheet: http://www.linear.com/product/ltc3815 + +Author: Guenter Roeck + + +Description +----------- + +LTC3815 is a Monolithic Synchronous DC/DC Step-Down Converter. + + +Usage Notes +----------- + +This driver does not probe for PMBus devices. You will have to instantiate +devices explicitly. + +Example: the following commands will load the driver for an LTC3815 +at address 0x20 on I2C bus #1: + +# modprobe ltc3815 +# echo ltc3815 0x20 > /sys/bus/i2c/devices/i2c-1/new_device + + +Sysfs attributes +---------------- + +in1_label "vin" +in1_input Measured input voltage. +in1_alarm Input voltage alarm. +in1_highest Highest input voltage. +in1_reset_history Reset input voltage history. + +in2_label "vout1". +in2_input Measured output voltage. +in2_alarm Output voltage alarm. +in2_highest Highest output voltage. +in2_reset_history Reset output voltage history. + +temp1_input Measured chip temperature. +temp1_alarm Temperature alarm. +temp1_highest Highest measured temperature. +temp1_reset_history Reset temperature history. + +curr1_label "iin". +curr1_input Measured input current. +curr1_highest Highest input current. +curr1_reset_history Reset input current history. + +curr2_label "iout1". +curr2_input Measured output current. +curr2_alarm Output current alarm. +curr2_highest Highest output current. +curr2_reset_history Reset output current history. diff --git a/Documentation/iio/iio_configfs.txt b/Documentation/iio/iio_configfs.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0add35cd52edae6c3eb2162d5e493b704b2f864 --- /dev/null +++ b/Documentation/iio/iio_configfs.txt @@ -0,0 +1,93 @@ +Industrial IIO configfs support + +1. Overview + +Configfs is a filesystem-based manager of kernel objects. IIO uses some +objects that could be easily configured using configfs (e.g.: devices, +triggers). + +See Documentation/filesystems/configfs/configfs.txt for more information +about how configfs works. + +2. Usage + +In order to use configfs support in IIO we need to select it at compile +time via CONFIG_IIO_CONFIGFS config option. + +Then, mount the configfs filesystem (usually under /config directory): + +$ mkdir /config +$ mount -t configfs none /config + +At this point, all default IIO groups will be created and can be accessed +under /config/iio. Next chapters will describe available IIO configuration +objects. + +3. Software triggers + +One of the IIO default configfs groups is the "triggers" group. It is +automagically accessible when the configfs is mounted and can be found +under /config/iio/triggers. + +IIO software triggers implementation offers support for creating multiple +trigger types. A new trigger type is usually implemented as a separate +kernel module following the interface in include/linux/iio/sw_trigger.h: + +/* + * drivers/iio/trigger/iio-trig-sample.c + * sample kernel module implementing a new trigger type + */ +#include + + +static struct iio_sw_trigger *iio_trig_sample_probe(const char *name) +{ + /* + * This allocates and registers an IIO trigger plus other + * trigger type specific initialization. + */ +} + +static int iio_trig_hrtimer_remove(struct iio_sw_trigger *swt) +{ + /* + * This undoes the actions in iio_trig_sample_probe + */ +} + +static const struct iio_sw_trigger_ops iio_trig_sample_ops = { + .probe = iio_trig_sample_probe, + .remove = iio_trig_sample_remove, +}; + +static struct iio_sw_trigger_type iio_trig_sample = { + .name = "trig-sample", + .owner = THIS_MODULE, + .ops = &iio_trig_sample_ops, +}; + +module_iio_sw_trigger_driver(iio_trig_sample); + +Each trigger type has its own directory under /config/iio/triggers. Loading +iio-trig-sample module will create 'trig-sample' trigger type directory +/config/iio/triggers/trig-sample. + +We support the following interrupt sources (trigger types): + * hrtimer, uses high resolution timers as interrupt source + +3.1 Hrtimer triggers creation and destruction + +Loading iio-trig-hrtimer module will register hrtimer trigger types allowing +users to create hrtimer triggers under /config/iio/triggers/hrtimer. + +e.g: + +$ mkdir /config/triggers/hrtimer/instance1 +$ rmdir /config/triggers/hrtimer/instance1 + +Each trigger can have one or more attributes specific to the trigger type. + +3.2 "hrtimer" trigger types attributes + +"hrtimer" trigger type doesn't have any configurable attribute from /config dir. +It does introduce the sampling_frequency attribute to trigger directory. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 742f69d18fc8989ae28d9c0662d6bf334109dddd..b7d44871effc2c49f194a3c3c986733f2a497b2c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -472,6 +472,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Change the amount of debugging information output when initialising the APIC and IO-APIC components. + apic_extnmi= [APIC,X86] External NMI delivery setting + Format: { bsp (default) | all | none } + bsp: External NMI is delivered only to CPU 0 + all: External NMIs are broadcast to all CPUs as a + backup of CPU 0 + none: External NMI is masked for all CPUs. This is + useful so that a dump capture kernel won't be + shot down by NMI + autoconf= [IPV6] See Documentation/networking/ipv6.txt. @@ -721,16 +730,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. uart[8250],io,[,options] uart[8250],mmio,[,options] + uart[8250],mmio16,[,options] uart[8250],mmio32,[,options] uart[8250],0x[,options] Start an early, polled-mode console on the 8250/16550 UART at the specified I/O port or MMIO address, switching to the matching ttyS device later. MMIO inter-register address stride is either 8-bit - (mmio) or 32-bit (mmio32). - If none of [io|mmio|mmio32], is assumed to be - equivalent to 'mmio'. 'options' are specified in the - same format described for ttyS above; if unspecified, + (mmio), 16-bit (mmio16), or 32-bit (mmio32). + If none of [io|mmio|mmio16|mmio32], is assumed + to be equivalent to 'mmio'. 'options' are specified in + the same format described for ttyS above; if unspecified, the h/w is not re-initialized. hvc Use the hypervisor console device . This is for @@ -1002,10 +1012,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. unspecified, the h/w is not initialized. pl011, + pl011,mmio32, Start an early, polled-mode console on a pl011 serial port at the specified address. The pl011 serial port must already be setup and configured. Options are not - yet supported. + yet supported. If 'mmio32' is specified, then only + the driver will use only 32-bit accessors to read/write + the device registers. msm_serial, Start an early, polled-mode console on an msm serial @@ -2575,8 +2588,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. notsc [BUGS=X86-32] Disable Time Stamp Counter - nousb [USB] Disable the USB subsystem - nowatchdog [KNL] Disable both lockup detectors, i.e. soft-lockup and NMI watchdog (hard-lockup). @@ -3296,18 +3307,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.rcu_cpu_stall_suppress= [KNL] + Suppress RCU CPU stall warning messages. + + rcupdate.rcu_cpu_stall_timeout= [KNL] + Set timeout for RCU CPU stall warning messages. + rcupdate.rcu_expedited= [KNL] Use expedited grace-period primitives, for example, synchronize_rcu_expedited() instead of synchronize_rcu(). This reduces latency, but can increase CPU utilization, degrade real-time latency, and degrade energy efficiency. - - rcupdate.rcu_cpu_stall_suppress= [KNL] - Suppress RCU CPU stall warning messages. - - rcupdate.rcu_cpu_stall_timeout= [KNL] - Set timeout for RCU CPU stall warning messages. + No effect on CONFIG_TINY_RCU kernels. + + rcupdate.rcu_normal= [KNL] + Use only normal grace-period primitives, + for example, synchronize_rcu() instead of + synchronize_rcu_expedited(). This improves + real-time latency, CPU utilization, and + energy efficiency, but can expose users to + increased grace-period latency. This parameter + overrides rcupdate.rcu_expedited. No effect on + CONFIG_TINY_RCU kernels. + + rcupdate.rcu_normal_after_boot= [KNL] + Once boot has completed (that is, after + rcu_end_inkernel_boot() has been invoked), use + only normal grace-period primitives. No effect + on CONFIG_TINY_RCU kernels. rcupdate.rcu_task_stall_timeout= [KNL] Set timeout in jiffies for RCU task stall warning @@ -3874,6 +3902,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. usbcore.usbfs_snoop= [USB] Set to log all usbfs traffic (default 0 = off). + usbcore.usbfs_snoop_max= + [USB] Maximum number of bytes to snoop in each URB + (default = 65536). + usbcore.blinkenlights= [USB] Set to cycle leds on hubs (default 0 = off). @@ -3894,6 +3926,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. USB_REQ_GET_DESCRIPTOR request in milliseconds (default 5000 = 5.0 seconds). + usbcore.nousb [USB] Disable the USB subsystem + usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. @@ -4114,6 +4148,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. or other driver-specific files in the Documentation/watchdog/ directory. + workqueue.watchdog_thresh= + If CONFIG_WQ_WATCHDOG is configured, workqueue can + warn stall conditions and dump internal state to + help debugging. 0 disables workqueue stall + detection; otherwise, it's the stall threshold + duration in seconds. The default value is 30 and + it can be updated at runtime by writing to the + corresponding sysfs file. + workqueue.disable_numa By default, all work items queued to unbound workqueues are affine to the NUMA nodes they're diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt index 62261c04060a73bef82a3a6784cef794d39e5d50..d406d98339b25abbe3d1f5f05c4ac7f9856d3e9c 100644 --- a/Documentation/leds/leds-class.txt +++ b/Documentation/leds/leds-class.txt @@ -52,6 +52,19 @@ above leaves scope for further attributes should they be needed. If sections of the name don't apply, just leave that section blank. +Brightness setting API +====================== + +LED subsystem core exposes following API for setting brightness: + + - led_set_brightness : it is guaranteed not to sleep, passing LED_OFF stops + blinking, + - led_set_brightness_sync : for use cases when immediate effect is desired - + it can block the caller for the time required for accessing + device registers and can sleep, passing LED_OFF stops hardware + blinking, returns -EBUSY if software blink fallback is enabled. + + Hardware accelerated blink of LEDs ================================== diff --git a/Documentation/media-framework.txt b/Documentation/media-framework.txt deleted file mode 100644 index f552a75c0e70b22b3800a3fa93c0783075228250..0000000000000000000000000000000000000000 --- a/Documentation/media-framework.txt +++ /dev/null @@ -1,372 +0,0 @@ -Linux kernel media framework -============================ - -This document describes the Linux kernel media framework, its data structures, -functions and their usage. - - -Introduction ------------- - -The media controller API is documented in DocBook format in -Documentation/DocBook/media/v4l/media-controller.xml. This document will focus -on the kernel-side implementation of the media framework. - - -Abstract media device model ---------------------------- - -Discovering a device internal topology, and configuring it at runtime, is one -of the goals of the media framework. To achieve this, hardware devices are -modelled as an oriented graph of building blocks called entities connected -through pads. - -An entity is a basic media hardware building block. It can correspond to -a large variety of logical blocks such as physical hardware devices -(CMOS sensor for instance), logical hardware devices (a building block -in a System-on-Chip image processing pipeline), DMA channels or physical -connectors. - -A pad is a connection endpoint through which an entity can interact with -other entities. Data (not restricted to video) produced by an entity -flows from the entity's output to one or more entity inputs. Pads should -not be confused with physical pins at chip boundaries. - -A link is a point-to-point oriented connection between two pads, either -on the same entity or on different entities. Data flows from a source -pad to a sink pad. - - -Media device ------------- - -A media device is represented by a struct media_device instance, defined in -include/media/media-device.h. Allocation of the structure is handled by the -media device driver, usually by embedding the media_device instance in a -larger driver-specific structure. - -Drivers register media device instances by calling - - media_device_register(struct media_device *mdev); - -The caller is responsible for initializing the media_device structure before -registration. The following fields must be set: - - - dev must point to the parent device (usually a pci_dev, usb_interface or - platform_device instance). - - - model must be filled with the device model name as a NUL-terminated UTF-8 - string. The device/model revision must not be stored in this field. - -The following fields are optional: - - - serial is a unique serial number stored as a NUL-terminated ASCII string. - The field is big enough to store a GUID in text form. If the hardware - doesn't provide a unique serial number this field must be left empty. - - - bus_info represents the location of the device in the system as a - NUL-terminated ASCII string. For PCI/PCIe devices bus_info must be set to - "PCI:" (or "PCIe:") followed by the value of pci_name(). For USB devices, - the usb_make_path() function must be used. This field is used by - applications to distinguish between otherwise identical devices that don't - provide a serial number. - - - hw_revision is the hardware device revision in a driver-specific format. - When possible the revision should be formatted with the KERNEL_VERSION - macro. - - - driver_version is formatted with the KERNEL_VERSION macro. The version - minor must be incremented when new features are added to the userspace API - without breaking binary compatibility. The version major must be - incremented when binary compatibility is broken. - -Upon successful registration a character device named media[0-9]+ is created. -The device major and minor numbers are dynamic. The model name is exported as -a sysfs attribute. - -Drivers unregister media device instances by calling - - media_device_unregister(struct media_device *mdev); - -Unregistering a media device that hasn't been registered is *NOT* safe. - - -Entities, pads and links ------------------------- - -- Entities - -Entities are represented by a struct media_entity instance, defined in -include/media/media-entity.h. The structure is usually embedded into a -higher-level structure, such as a v4l2_subdev or video_device instance, -although drivers can allocate entities directly. - -Drivers initialize entities by calling - - media_entity_init(struct media_entity *entity, u16 num_pads, - struct media_pad *pads, u16 extra_links); - -The media_entity name, type, flags, revision and group_id fields can be -initialized before or after calling media_entity_init. Entities embedded in -higher-level standard structures can have some of those fields set by the -higher-level framework. - -As the number of pads is known in advance, the pads array is not allocated -dynamically but is managed by the entity driver. Most drivers will embed the -pads array in a driver-specific structure, avoiding dynamic allocation. - -Drivers must set the direction of every pad in the pads array before calling -media_entity_init. The function will initialize the other pads fields. - -Unlike the number of pads, the total number of links isn't always known in -advance by the entity driver. As an initial estimate, media_entity_init -pre-allocates a number of links equal to the number of pads plus an optional -number of extra links. The links array will be reallocated if it grows beyond -the initial estimate. - -Drivers register entities with a media device by calling - - media_device_register_entity(struct media_device *mdev, - struct media_entity *entity); - -Entities are identified by a unique positive integer ID. Drivers can provide an -ID by filling the media_entity id field prior to registration, or request the -media controller framework to assign an ID automatically. Drivers that provide -IDs manually must ensure that all IDs are unique. IDs are not guaranteed to be -contiguous even when they are all assigned automatically by the framework. - -Drivers unregister entities by calling - - media_device_unregister_entity(struct media_entity *entity); - -Unregistering an entity will not change the IDs of the other entities, and the -ID will never be reused for a newly registered entity. - -When a media device is unregistered, all its entities are unregistered -automatically. No manual entities unregistration is then required. - -Drivers free resources associated with an entity by calling - - media_entity_cleanup(struct media_entity *entity); - -This function must be called during the cleanup phase after unregistering the -entity. Note that the media_entity instance itself must be freed explicitly by -the driver if required. - -Entities have flags that describe the entity capabilities and state. - - MEDIA_ENT_FL_DEFAULT indicates the default entity for a given type. - This can be used to report the default audio and video devices or the - default camera sensor. - -Logical entity groups can be defined by setting the group ID of all member -entities to the same non-zero value. An entity group serves no purpose in the -kernel, but is reported to userspace during entities enumeration. The group_id -field belongs to the media device driver and must not by touched by entity -drivers. - -Media device drivers should define groups if several entities are logically -bound together. Example usages include reporting - - - ALSA, VBI and video nodes that carry the same media stream - - lens and flash controllers associated with a sensor - -- Pads - -Pads are represented by a struct media_pad instance, defined in -include/media/media-entity.h. Each entity stores its pads in a pads array -managed by the entity driver. Drivers usually embed the array in a -driver-specific structure. - -Pads are identified by their entity and their 0-based index in the pads array. -Both information are stored in the media_pad structure, making the media_pad -pointer the canonical way to store and pass link references. - -Pads have flags that describe the pad capabilities and state. - - MEDIA_PAD_FL_SINK indicates that the pad supports sinking data. - MEDIA_PAD_FL_SOURCE indicates that the pad supports sourcing data. - -One and only one of MEDIA_PAD_FL_SINK and MEDIA_PAD_FL_SOURCE must be set for -each pad. - -- Links - -Links are represented by a struct media_link instance, defined in -include/media/media-entity.h. Each entity stores all links originating at or -targeting any of its pads in a links array. A given link is thus stored -twice, once in the source entity and once in the target entity. The array is -pre-allocated and grows dynamically as needed. - -Drivers create links by calling - - media_entity_create_link(struct media_entity *source, u16 source_pad, - struct media_entity *sink, u16 sink_pad, - u32 flags); - -An entry in the link array of each entity is allocated and stores pointers -to source and sink pads. - -Links have flags that describe the link capabilities and state. - - MEDIA_LNK_FL_ENABLED indicates that the link is enabled and can be used - to transfer media data. When two or more links target a sink pad, only - one of them can be enabled at a time. - MEDIA_LNK_FL_IMMUTABLE indicates that the link enabled state can't be - modified at runtime. If MEDIA_LNK_FL_IMMUTABLE is set, then - MEDIA_LNK_FL_ENABLED must also be set since an immutable link is always - enabled. - - -Graph traversal ---------------- - -The media framework provides APIs to iterate over entities in a graph. - -To iterate over all entities belonging to a media device, drivers can use the -media_device_for_each_entity macro, defined in include/media/media-device.h. - - struct media_entity *entity; - - media_device_for_each_entity(entity, mdev) { - /* entity will point to each entity in turn */ - ... - } - -Drivers might also need to iterate over all entities in a graph that can be -reached only through enabled links starting at a given entity. The media -framework provides a depth-first graph traversal API for that purpose. - -Note that graphs with cycles (whether directed or undirected) are *NOT* -supported by the graph traversal API. To prevent infinite loops, the graph -traversal code limits the maximum depth to MEDIA_ENTITY_ENUM_MAX_DEPTH, -currently defined as 16. - -Drivers initiate a graph traversal by calling - - media_entity_graph_walk_start(struct media_entity_graph *graph, - struct media_entity *entity); - -The graph structure, provided by the caller, is initialized to start graph -traversal at the given entity. - -Drivers can then retrieve the next entity by calling - - media_entity_graph_walk_next(struct media_entity_graph *graph); - -When the graph traversal is complete the function will return NULL. - -Graph traversal can be interrupted at any moment. No cleanup function call is -required and the graph structure can be freed normally. - -Helper functions can be used to find a link between two given pads, or a pad -connected to another pad through an enabled link - - media_entity_find_link(struct media_pad *source, - struct media_pad *sink); - - media_entity_remote_pad(struct media_pad *pad); - -Refer to the kerneldoc documentation for more information. - - -Use count and power handling ----------------------------- - -Due to the wide differences between drivers regarding power management needs, -the media controller does not implement power management. However, the -media_entity structure includes a use_count field that media drivers can use to -track the number of users of every entity for power management needs. - -The use_count field is owned by media drivers and must not be touched by entity -drivers. Access to the field must be protected by the media device graph_mutex -lock. - - -Links setup ------------ - -Link properties can be modified at runtime by calling - - media_entity_setup_link(struct media_link *link, u32 flags); - -The flags argument contains the requested new link flags. - -The only configurable property is the ENABLED link flag to enable/disable a -link. Links marked with the IMMUTABLE link flag can not be enabled or disabled. - -When a link is enabled or disabled, the media framework calls the -link_setup operation for the two entities at the source and sink of the link, -in that order. If the second link_setup call fails, another link_setup call is -made on the first entity to restore the original link flags. - -Media device drivers can be notified of link setup operations by setting the -media_device::link_notify pointer to a callback function. If provided, the -notification callback will be called before enabling and after disabling -links. - -Entity drivers must implement the link_setup operation if any of their links -is non-immutable. The operation must either configure the hardware or store -the configuration information to be applied later. - -Link configuration must not have any side effect on other links. If an enabled -link at a sink pad prevents another link at the same pad from being enabled, -the link_setup operation must return -EBUSY and can't implicitly disable the -first enabled link. - - -Pipelines and media streams ---------------------------- - -When starting streaming, drivers must notify all entities in the pipeline to -prevent link states from being modified during streaming by calling - - media_entity_pipeline_start(struct media_entity *entity, - struct media_pipeline *pipe); - -The function will mark all entities connected to the given entity through -enabled links, either directly or indirectly, as streaming. - -The media_pipeline instance pointed to by the pipe argument will be stored in -every entity in the pipeline. Drivers should embed the media_pipeline structure -in higher-level pipeline structures and can then access the pipeline through -the media_entity pipe field. - -Calls to media_entity_pipeline_start() can be nested. The pipeline pointer must -be identical for all nested calls to the function. - -media_entity_pipeline_start() may return an error. In that case, it will -clean up any of the changes it did by itself. - -When stopping the stream, drivers must notify the entities with - - media_entity_pipeline_stop(struct media_entity *entity); - -If multiple calls to media_entity_pipeline_start() have been made the same -number of media_entity_pipeline_stop() calls are required to stop streaming. The -media_entity pipe field is reset to NULL on the last nested stop call. - -Link configuration will fail with -EBUSY by default if either end of the link is -a streaming entity. Links that can be modified while streaming must be marked -with the MEDIA_LNK_FL_DYNAMIC flag. - -If other operations need to be disallowed on streaming entities (such as -changing entities configuration parameters) drivers can explicitly check the -media_entity stream_count field to find out if an entity is streaming. This -operation must be done with the media_device graph_mutex held. - - -Link validation ---------------- - -Link validation is performed by media_entity_pipeline_start() for any -entity which has sink pads in the pipeline. The -media_entity::link_validate() callback is used for that purpose. In -link_validate() callback, entity driver should check that the properties of -the source pad of the connected entity and its own sink pad match. It is up -to the type of the entity (and in the end, the properties of the hardware) -what matching actually means. - -Subsystems should facilitate link validation by providing subsystem specific -helper functions to provide easy access for commonly needed information, and -in the end provide a way to use driver-specific callbacks. diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index aef9487303d023cd00d27dceadb441c78f12413f..a61be39c7b516a1e3b081afd1fea02a3f1068187 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -194,7 +194,7 @@ There are some minimal guarantees that may be expected of a CPU: (*) On any given CPU, dependent memory accesses will be issued in order, with respect to itself. This means that for: - WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q); + Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q); the CPU will issue the following memory operations: @@ -202,9 +202,9 @@ There are some minimal guarantees that may be expected of a CPU: and always in that order. On most systems, smp_read_barrier_depends() does nothing, but it is required for DEC Alpha. The READ_ONCE() - and WRITE_ONCE() are required to prevent compiler mischief. Please - note that you should normally use something like rcu_dereference() - instead of open-coding smp_read_barrier_depends(). + is required to prevent compiler mischief. Please note that you + should normally use something like rcu_dereference() instead of + open-coding smp_read_barrier_depends(). (*) Overlapping loads and stores within a particular CPU will appear to be ordered within that CPU. This means that for: @@ -1673,8 +1673,8 @@ There are some more advanced barrier functions: (*) smp_store_mb(var, value) This assigns the value to the variable and then inserts a full memory - barrier after it, depending on the function. It isn't guaranteed to - insert anything more than a compiler barrier in a UP compilation. + barrier after it. It isn't guaranteed to insert anything more than a + compiler barrier in a UP compilation. (*) smp_mb__before_atomic(); diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt index e129b2479ea8def68c74a0cf65c705334814ef7f..f8c3284bf6a7f3e18c1df3f17dcc4503f6234109 100644 --- a/Documentation/mtd/nand_ecc.txt +++ b/Documentation/mtd/nand_ecc.txt @@ -107,7 +107,7 @@ for (i = 0; i < 256; i++) if (i & 0x01) rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1; else - rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1; + rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0; if (i & 0x02) rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3; else @@ -127,7 +127,7 @@ for (i = 0; i < 256; i++) if (i & 0x20) rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11; else - rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10; + rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10; if (i & 0x40) rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13; else @@ -158,7 +158,7 @@ the values in any order. So instead of calculating all the bits individually, let us try to rearrange things. For the column parity this is easy. We can just xor the bytes and in the end filter out the relevant bits. This is pretty nice as it will bring -all cp calculation out of the if loop. +all cp calculation out of the for loop. Similarly we can first xor the bytes for the various rows. This leads to: @@ -271,11 +271,11 @@ to write our code in such a way that we process data in 32 bit chunks. Of course this means some modification as the row parity is byte by byte. A quick analysis: for the column parity we use the par variable. When extending to 32 bits -we can in the end easily calculate p0 and p1 from it. +we can in the end easily calculate rp0 and rp1 from it. (because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0 -respectively) +respectively, from MSB to LSB) also rp2 and rp3 can be easily retrieved from par as rp3 covers the -first two bytes and rp2 the last two bytes. +first two MSBs and rp2 covers the last two LSBs. Note that of course now the loop is executed only 64 times (256/4). And note that care must taken wrt byte ordering. The way bytes are @@ -387,11 +387,11 @@ Analysis 2 The code (of course) works, and hurray: we are a little bit faster than the linux driver code (about 15%). But wait, don't cheer too quickly. -THere is more to be gained. +There is more to be gained. If we look at e.g. rp14 and rp15 we see that we either xor our data with rp14 or with rp15. However we also have par which goes over all data. This means there is no need to calculate rp14 as it can be calculated from -rp15 through rp14 = par ^ rp15; +rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15; (or if desired we can avoid calculating rp15 and calculate it from rp14). That is why some places refer to inverse parity. Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13. @@ -419,12 +419,12 @@ with if (i & 0x20) rp15 ^= cur; and outside the loop added: - rp4 = par ^ rp5; - rp6 = par ^ rp7; - rp8 = par ^ rp9; - rp10 = par ^ rp11; - rp12 = par ^ rp13; - rp14 = par ^ rp15; + rp4 = par ^ rp5; + rp6 = par ^ rp7; + rp8 = par ^ rp9; + rp10 = par ^ rp11; + rp12 = par ^ rp13; + rp14 = par ^ rp15; And after that the code takes about 30% more time, although the number of statements is reduced. This is also reflected in the assembly code. @@ -524,12 +524,12 @@ THe code within the for loop was changed to: cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; cur = *bp++; tmppar ^= cur; rp6 ^= cur; - cur = *bp++; tmppar ^= cur; rp4 ^= cur; - cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; - cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur; cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur; - cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur; cur = *bp++; tmppar ^= cur; rp8 ^= cur; cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; @@ -537,7 +537,7 @@ THe code within the for loop was changed to: cur = *bp++; tmppar ^= cur; rp4 ^= cur; cur = *bp++; tmppar ^= cur; - par ^= tmppar; + par ^= tmppar; if ((i & 0x1) == 0) rp12 ^= tmppar; if ((i & 0x2) == 0) rp14 ^= tmppar; } @@ -548,8 +548,8 @@ to rp12 and rp14. While making the changes I also found that I could exploit that tmppar contains the running parity for this iteration. So instead of having: -rp4 ^= cur; rp6 = cur; -I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next +rp4 ^= cur; rp6 ^= cur; +I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next statement. A similar change was done for rp8 and rp10 @@ -593,22 +593,22 @@ The new code now looks like: cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; cur = *bp++; tmppar ^= cur; rp6 ^= cur; - cur = *bp++; tmppar ^= cur; rp4 ^= cur; - cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp10 ^= tmppar; - notrp8 = tmppar; - cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; + notrp8 = tmppar; + cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; cur = *bp++; tmppar ^= cur; rp6 ^= cur; - cur = *bp++; tmppar ^= cur; rp4 ^= cur; + cur = *bp++; tmppar ^= cur; rp4 ^= cur; cur = *bp++; tmppar ^= cur; - rp8 = rp8 ^ tmppar ^ notrp8; + rp8 = rp8 ^ tmppar ^ notrp8; cur = *bp++; tmppar ^= cur; rp4_6 ^= cur; cur = *bp++; tmppar ^= cur; rp6 ^= cur; cur = *bp++; tmppar ^= cur; rp4 ^= cur; cur = *bp++; tmppar ^= cur; - par ^= tmppar; + par ^= tmppar; if ((i & 0x1) == 0) rp12 ^= tmppar; if ((i & 0x2) == 0) rp14 ^= tmppar; } @@ -700,7 +700,7 @@ Conclusion The gain when calculating the ecc is tremendous. Om my development hardware a speedup of a factor of 18 for ecc calculation was achieved. On a test on an embedded system with a MIPS core a factor 7 was obtained. -On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor +On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor 5 (big endian mode, gcc 4.1.2, -O3) For correction not much gain could be obtained (as bitflips are rare). Then again there are also much less cycles spent there. diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 58e49042fc208291e3aec884c96684a606fe06d3..ff23b755f5e45cc8b6d367f2e55564cdf3a1782d 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -115,14 +115,17 @@ The "bat0" interface can be used like any other regular inter- face. It needs an IP address which can be either statically con- figured or dynamically (by using DHCP or similar services): -# NodeA: ifconfig bat0 192.168.0.1 -# NodeB: ifconfig bat0 192.168.0.2 +# NodeA: ip link set up dev bat0 +# NodeA: ip addr add 192.168.0.1/24 dev bat0 + +# NodeB: ip link set up dev bat0 +# NodeB: ip addr add 192.168.0.2/24 dev bat0 # NodeB: ping 192.168.0.1 Note: In order to avoid problems remove all IP addresses previ- ously assigned to interfaces now used by batman advanced, e.g. -# ifconfig eth0 0.0.0.0 +# ip addr flush dev eth0 LOGGING/DEBUGGING diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt index f862cf3aff3495d96f699cd666af6dbf3d18a453..42ddbd4b52a9d646a539ad154d7400c0a61c02c7 100644 --- a/Documentation/networking/e100.txt +++ b/Documentation/networking/e100.txt @@ -181,17 +181,3 @@ For general information, go to the Intel support website at: If an issue is identified with the released source code on the supported kernel with a supported adapter, email the specific information related to the issue to e1000-devel@lists.sourceforge.net. - - -License -======= - -This software program is released under the terms of a license agreement -between you ('Licensee') and Intel. Do not use or load this software or any -associated materials (collectively, the 'Software') until you have carefully -read the full terms and conditions of the file COPYING located in this software -package. By loading or using the Software, you agree to the terms of this -Agreement. If you do not agree with the terms of this Agreement, do not install -or use the Software. - -* Other names and brands may be claimed as the property of others. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2ea4c45cf1c8736ccd577eff75058bb32ed0ca95..ceb44a095a27ba98804442c1f7aa91ed616b7d42 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -335,6 +335,14 @@ tcp_keepalive_intvl - INTEGER after probes started. Default value: 75sec i.e. connection will be aborted after ~11 minutes of retries. +tcp_l3mdev_accept - BOOLEAN + Enables child sockets to inherit the L3 master device index. + Enabling this option allows a "global" listen socket to work + across L3 master domains (e.g., VRFs) with connected sockets + derived from the listen socket to be bound to the L3 domain in + which the packets originated. Only valid when the kernel was + compiled with CONFIG_NET_L3_MASTER_DEV. + tcp_low_latency - BOOLEAN If set, the TCP stack makes decisions that prefer lower latency as opposed to higher throughput. By default, this @@ -1723,6 +1731,25 @@ addip_enable - BOOLEAN Default: 0 +pf_enable - INTEGER + Enable or disable pf (pf is short for potentially failed) state. A value + of pf_retrans > path_max_retrans also disables pf state. That is, one of + both pf_enable and pf_retrans > path_max_retrans can disable pf state. + Since pf_retrans and path_max_retrans can be changed by userspace + application, sometimes user expects to disable pf state by the value of + pf_retrans > path_max_retrans, but occasionally the value of pf_retrans + or path_max_retrans is changed by the user application, this pf state is + enabled. As such, it is necessary to add this to dynamically enable + and disable pf state. See: + https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for + details. + + 1: Enable pf. + + 0: Disable pf. + + Default: 1 + addip_noauth_enable - BOOLEAN Dynamic Address Reconfiguration (ADD-IP) requires the use of authentication to protect the operations of adding or removing new @@ -1799,7 +1826,9 @@ pf_retrans - INTEGER having to reduce path_max_retrans to a very low value. See: http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt for details. Note also that a value of pf_retrans > path_max_retrans - disables this feature + disables this feature. Since both pf_retrans and path_max_retrans can + be changed by userspace application, a variable pf_enable is used to + disable pf state. Default: 0 diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 91994134efca9c92635eb5fe8503e5d5b63ae4b0..fad63136ee3eb8ecf5ad013da54b4013fc9c6e10 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -304,8 +304,12 @@ certain netdevs from flooding unicast traffic for which there is no FDB entry. IGMP Snooping ^^^^^^^^^^^^^ -XXX: complete this section - +In order to support IGMP snooping, the port netdevs should trap to the bridge +driver all IGMP join and leave messages. +The bridge multicast module will notify port netdevs on every multicast group +changed whether it is static configured or dynamically joined/leave. +The hardware implementation should be forwarding all registered multicast +traffic groups only to the configured ports. L3 Routing Offload ------------------ diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index b0e911e0e8f50ad749686961e494377a1a49db45..44558882aa602816f3b1579ac94b3894562ac2b9 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -999,7 +999,7 @@ from its probe routine to make runtime PM work for the device. It is important to remember that the driver's runtime_suspend() callback may be executed right after the usage counter has been decremented, because -user space may already have cuased the pm_runtime_allow() helper function +user space may already have caused the pm_runtime_allow() helper function unblocking the runtime PM of the device to run via sysfs, so the driver must be prepared to cope with that. diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 0784bc3a2ab51bd2d624644d6ea5a41e44529307..7328cf85236c2b2bc5a4c078dc9f81f2e3febd11 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -371,6 +371,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: - increment the device's usage counter, run pm_runtime_resume(dev) and return its result + int pm_runtime_get_if_in_use(struct device *dev); + - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the + runtime PM status is RPM_ACTIVE and the runtime PM usage counter is + nonzero, increment the counter and return 1; otherwise return 0 without + changing the counter + void pm_runtime_put_noidle(struct device *dev); - decrement the device's usage counter diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index b784c270105f40e8320cd388cb9a8ef1e2d463f4..6389551bbad6a7971b89e53451428d3b77b4e8d0 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -250,6 +250,12 @@ dentry names: Passed by reference. +block_device names: + + %pg sda, sda1 or loop0p1 + + For printing name of block_device pointers. + struct va_format: %pV diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.txt index dc929be9601615f4e9e3e2393b6b4fbb83f50009..b064aa59714dba472d43562ecdeb6988c755d70e 100644 --- a/Documentation/s390/zfcpdump.txt +++ b/Documentation/s390/zfcpdump.txt @@ -15,19 +15,15 @@ the s390-tools package) to make the device bootable. The operator of a Linux system can then trigger a SCSI dump by booting the SCSI disk, where zfcpdump resides on. -The kernel part of zfcpdump is implemented as a debugfs file under "zcore/mem", -which exports memory and registers of the crashed Linux in an s390 -standalone dump format. It can be used in the same way as e.g. /dev/mem. The -dump format defines a 4K header followed by plain uncompressed memory. The -register sets are stored in the prefix pages of the respective CPUs. To build a -dump enabled kernel with the zcore driver, the kernel config option -CONFIG_CRASH_DUMP has to be set. When reading from "zcore/mem", the part of -memory, which has been saved by hardware is read by the driver via the SCLP -hardware interface. The second part is just copied from the non overwritten real -memory. - -Since kernel version 3.12 also the /proc/vmcore file can also be used to access -the dump. +The user space dump tool accesses the memory of the crashed system by means +of the /proc/vmcore interface. This interface exports the crashed system's +memory and registers in ELF core dump format. To access the memory which has +been saved by the hardware SCLP requests will be created at the time the data +is needed by /proc/vmcore. The tail part of the crashed systems memory which +has not been stashed by hardware can just be copied from real memory. + +To build a dump enabled kernel the kernel config option CONFIG_CRASH_DUMP +has to be set. To get a valid zfcpdump kernel configuration use "make zfcpdump_defconfig". diff --git a/Documentation/spi/00-INDEX b/Documentation/spi/00-INDEX index a128fa83551297a47bd9cbab2e0630d1bde51d35..4644bf0d9832ee47d96a9f4ec923cb31a6208f85 100644 --- a/Documentation/spi/00-INDEX +++ b/Documentation/spi/00-INDEX @@ -10,13 +10,9 @@ pxa2xx - PXA2xx SPI master controller build by spi_message fifo wq spidev - Intro to the userspace API for spi devices -spidev_fdx.c - - spidev example file spi-lm70llp - Connecting an LM70-LLP sensor to the kernel via the SPI subsys. spi-sc18is602 - NXP SC18IS602/603 I2C-bus to SPI bridge spi-summary - (Linux) SPI overview. If unsure about SPI or SPI in Linux, start here. -spidev_test.c - - SPI testing utility. diff --git a/Documentation/spi/Makefile b/Documentation/spi/Makefile deleted file mode 100644 index efa255813e9d3af413a54f36e110929d12d41997..0000000000000000000000000000000000000000 --- a/Documentation/spi/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# List of programs to build -hostprogs-y := spidev_test spidev_fdx - -# Tell kbuild to always build the programs -always := $(hostprogs-y) - -HOSTCFLAGS_spidev_test.o += -I$(objtree)/usr/include -HOSTCFLAGS_spidev_fdx.o += -I$(objtree)/usr/include diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index af70d1541d3af5b18834bce320ddd3e37009d29b..73c6b1ef0e8456a5de653305c1f9c783f0d958c1 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -551,6 +551,21 @@ the recommended setting is 60. ============================================================== +panic_on_io_nmi: + +Controls the kernel's behavior when a CPU receives an NMI caused by +an IO error. + +0: try to continue operation (default) + +1: panic immediately. The IO error triggered an NMI. This indicates a + serious system condition which could result in IO data corruption. + Rather than continuing, panicking might be a better choice. Some + servers issue this sort of NMI when the dump button is pushed, + and you can use this option to take a crash dump. + +============================================================== + panic_on_oops: Controls the kernel's behaviour when an oops or BUG is encountered. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index f72370b440b121e718183ea6a7dbb9bafc2dcc25..8ee925c046aa67c12a07a9c3eb9f1d4b1c812dc2 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -135,7 +135,7 @@ Contains, as a percentage of total available memory that contains free pages and reclaimable pages, the number of pages at which the background kernel flusher threads will start writing out dirty data. -The total avaiable memory is not equal to total system memory. +The total available memory is not equal to total system memory. ============================================================== @@ -170,7 +170,7 @@ Contains, as a percentage of total available memory that contains free pages and reclaimable pages, the number of pages at which a process which is generating disk writes will itself start writing out dirty data. -The total avaiable memory is not equal to total system memory. +The total available memory is not equal to total system memory. ============================================================== diff --git a/Documentation/trace/events-msr.txt b/Documentation/trace/events-msr.txt new file mode 100644 index 0000000000000000000000000000000000000000..78c383bf06aa1c831b0b33584d9576e4ae2642f8 --- /dev/null +++ b/Documentation/trace/events-msr.txt @@ -0,0 +1,37 @@ + +The x86 kernel supports tracing most MSR (Model Specific Register) accesses. +To see the definition of the MSRs on Intel systems please see the SDM +at http://www.intel.com/sdm (Volume 3) + +Available trace points: + +/sys/kernel/debug/tracing/events/msr/ + +Trace MSR reads + +read_msr + +msr: MSR number +val: Value written +failed: 1 if the access failed, otherwise 0 + + +Trace MSR writes + +write_msr + +msr: MSR number +val: Value written +failed: 1 if the access failed, otherwise 0 + + +Trace RDPMC in kernel + +rdpmc + +The trace data can be post processed with the postprocess/decode_msr.py script + +cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h + +to add symbolic MSR names. + diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py new file mode 100644 index 0000000000000000000000000000000000000000..0ab40e0db5809921a395e18ca23e0cfe2823a280 --- /dev/null +++ b/Documentation/trace/postprocess/decode_msr.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +# add symbolic names to read_msr / write_msr in trace +# decode_msr msr-index.h < trace +import sys +import re + +msrs = dict() + +with open(sys.argv[1] if len(sys.argv) > 1 else "msr-index.h", "r") as f: + for j in f: + m = re.match(r'#define (MSR_\w+)\s+(0x[0-9a-fA-F]+)', j) + if m: + msrs[int(m.group(2), 16)] = m.group(1) + +extra_ranges = ( + ( "MSR_LASTBRANCH_%d_FROM_IP", 0x680, 0x69F ), + ( "MSR_LASTBRANCH_%d_TO_IP", 0x6C0, 0x6DF ), + ( "LBR_INFO_%d", 0xdc0, 0xddf ), +) + +for j in sys.stdin: + m = re.search(r'(read|write)_msr:\s+([0-9a-f]+)', j) + if m: + r = None + num = int(m.group(2), 16) + if num in msrs: + r = msrs[num] + else: + for er in extra_ranges: + if er[1] <= num <= er[2]: + r = er[0] % (num - er[1],) + break + if r: + j = j.replace(" " + m.group(2), " " + r + "(" + m.group(2) + ")") + print j, + + diff --git a/Documentation/usb/chipidea.txt b/Documentation/usb/chipidea.txt index 3f848c1f2940d279b111945e0256140c8f71329e..05f735a1b5a574cb73875e64275480ae63f83b40 100644 --- a/Documentation/usb/chipidea.txt +++ b/Documentation/usb/chipidea.txt @@ -7,8 +7,8 @@ with 2 Freescale i.MX6Q sabre SD boards. --------------------------------------- Select CONFIG_USB_OTG_FSM, rebuild kernel Image and modules. If you want to check some internal variables for otg fsm, -select CONFIG_USB_CHIPIDEA_DEBUG, there are 2 files which -can show otg fsm variables and some controller registers value: +mount debugfs, there are 2 files which can show otg fsm +variables and some controller registers value: cat /sys/kernel/debug/ci_hdrc.0/otg cat /sys/kernel/debug/ci_hdrc.0/registers diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt index b24d3ef89166bc16f228e97aceb6f300063edc2a..58196057488941ebc2aa23e116dbb01d1d153c8b 100644 --- a/Documentation/usb/gadget-testing.txt +++ b/Documentation/usb/gadget-testing.txt @@ -434,7 +434,7 @@ On host: serialc -v -p -i -a1 -s1024 \ where seriald and serialc are Felipe's utilities found here: -https://git.gitorious.org/usb/usb-tools.git master +https://github.com/felipebalbi/usb-tools.git master 12. PHONET function =================== @@ -579,6 +579,8 @@ The SOURCESINK function provides these attributes in its function directory: isoc_mult - 0..2 (hs/ss only) isoc_maxburst - 0..15 (ss only) bulk_buflen - buffer length + bulk_qlen - depth of queue for bulk + iso_qlen - depth of queue for iso Testing the SOURCESINK function ------------------------------- diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index 4a15c90bc11d93d362b0397899091d048992a545..0a94ffe17ab6f1d3b11774924411431da08a3385 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt @@ -537,17 +537,18 @@ relevant attribute files are usb2_hardware_lpm and usb3_hardware_lpm. can write y/Y/1 or n/N/0 to the file to enable/disable USB2 hardware LPM manually. This is for test purpose mainly. - power/usb3_hardware_lpm + power/usb3_hardware_lpm_u1 + power/usb3_hardware_lpm_u2 When a USB 3.0 lpm-capable device is plugged in to a xHCI host which supports link PM, it will check if U1 and U2 exit latencies have been set in the BOS descriptor; if the check is is passed and the host supports USB3 hardware LPM, USB3 hardware LPM will be - enabled for the device and this file will be created. - The file holds a string value (enable or disable) - indicating whether or not USB3 hardware LPM is - enabled for the device. + enabled for the device and these files will be created. + The files hold a string value (enable or disable) + indicating whether or not USB3 hardware LPM U1 or U2 + is enabled for the device. USB Port Power Control ---------------------- diff --git a/Documentation/video4linux/API.html b/Documentation/video4linux/API.html index 256f8efa992c27a30c34bcbeca0c9bc3053f9e2e..eaf948cf1ae7176293bf3312392fbee5875f6b92 100644 --- a/Documentation/video4linux/API.html +++ b/Documentation/video4linux/API.html @@ -9,7 +9,7 @@

- V4L original API + V4L original API Obsoleted by V4L2 API diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 9e57ce43c4f499fbe3bf8c27775bcd3f5a50bc69..67209998a439ee55c9169194c368e7c6a192c40d 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -41,8 +41,8 @@ 40 -> Plextor ConvertX PX-TV100U (em2861) [093b:a005] 41 -> Kworld 350 U DVB-T (em2870) [eb1a:e350] 42 -> Kworld 355 U DVB-T (em2870) [eb1a:e355,eb1a:e357,eb1a:e359] - 43 -> Terratec Cinergy T XS (em2870) [0ccd:0043] - 44 -> Terratec Cinergy T XS (MT2060) (em2870) + 43 -> Terratec Cinergy T XS (em2870) + 44 -> Terratec Cinergy T XS (MT2060) (em2870) [0ccd:0043] 45 -> Pinnacle PCTV DVB-T (em2870) 46 -> Compro, VideoMate U3 (em2870) [185b:2870] 47 -> KWorld DVB-T 305U (em2880) [eb1a:e305] diff --git a/Documentation/video4linux/fimc.txt b/Documentation/video4linux/fimc.txt index e0c6b8bc4743736c2f98dce83238d24bf86aa8af..4fab231be52eb188bc59ac34595c57949d908ee5 100644 --- a/Documentation/video4linux/fimc.txt +++ b/Documentation/video4linux/fimc.txt @@ -58,7 +58,7 @@ Not currently supported: 4.1. Media device interface The driver supports Media Controller API as defined at -http://linuxtv.org/downloads/v4l-dvb-apis/media_common.html +https://linuxtv.org/downloads/v4l-dvb-apis/media_common.html The media device driver name is "SAMSUNG S5P FIMC". The purpose of this interface is to allow changing assignment of FIMC instances @@ -83,11 +83,11 @@ undefined behaviour. 4.3. Capture video node The driver supports V4L2 Video Capture Interface as defined at: -http://linuxtv.org/downloads/v4l-dvb-apis/devices.html +https://linuxtv.org/downloads/v4l-dvb-apis/devices.html At the capture and mem-to-mem video nodes only the multi-planar API is supported. For more details see: -http://linuxtv.org/downloads/v4l-dvb-apis/planar-apis.html +https://linuxtv.org/downloads/v4l-dvb-apis/planar-apis.html 4.4. Camera capture subdevs diff --git a/Documentation/video4linux/omap4_camera.txt b/Documentation/video4linux/omap4_camera.txt index 25d9b40a4651f0d2694819960384a48ffd98c5dc..a6734aa77242625d0bb9de6b4a946aea9b38f26c 100644 --- a/Documentation/video4linux/omap4_camera.txt +++ b/Documentation/video4linux/omap4_camera.txt @@ -47,7 +47,7 @@ Tested platforms File list --------- drivers/staging/media/omap4iss/ -include/media/omap4iss.h +include/linux/platform_data/media/omap4iss.h References ---------- diff --git a/Documentation/video4linux/si4713.txt b/Documentation/video4linux/si4713.txt index 2e7392a4fee1a736dcd6135f6f08341b38505e3b..2ddc6b095a760e80ed2202f96e045823f8b7f849 100644 --- a/Documentation/video4linux/si4713.txt +++ b/Documentation/video4linux/si4713.txt @@ -157,7 +157,7 @@ int main (int argc, char *argv[]) } The struct si4713_rnl and SI4713_IOC_MEASURE_RNL are defined under -include/media/si4713.h. +include/linux/platform_data/media/si4713.h. Stereo/Mono and RDS subchannels =============================== diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt index 75d5c18d689aaaaa14b7e365c9650d0502c95799..fa41608ab2b4f48c4ecb4c012be76b7802dc2958 100644 --- a/Documentation/video4linux/v4l2-framework.txt +++ b/Documentation/video4linux/v4l2-framework.txt @@ -295,16 +295,16 @@ module owner. This is done for you if you use the i2c helper functions. If integration with the media framework is needed, you must initialize the media_entity struct embedded in the v4l2_subdev struct (entity field) by -calling media_entity_init(): +calling media_entity_pads_init(), if the entity has pads: struct media_pad *pads = &my_sd->pads; int err; - err = media_entity_init(&sd->entity, npads, pads, 0); + err = media_entity_pads_init(&sd->entity, npads, pads); The pads array must have been previously initialized. There is no need to -manually set the struct media_entity type and name fields, but the revision -field must be initialized if needed. +manually set the struct media_entity function and name fields, but the +revision field must be initialized if needed. A reference to the entity will be automatically acquired/released when the subdev device node (if any) is opened/closed. @@ -695,12 +695,12 @@ difference is that the inode argument is omitted since it is never used. If integration with the media framework is needed, you must initialize the media_entity struct embedded in the video_device struct (entity field) by -calling media_entity_init(): +calling media_entity_pads_init(): struct media_pad *pad = &my_vdev->pad; int err; - err = media_entity_init(&vdev->entity, 1, pad, 0); + err = media_entity_pads_init(&vdev->entity, 1, pad); The pads array must have been previously initialized. There is no need to manually set the struct media_entity type and name fields. diff --git a/Documentation/video4linux/v4l2-pci-skeleton.c b/Documentation/video4linux/v4l2-pci-skeleton.c index 95ae8286009280a568e18a98cd6ecd0d59f1fc36..79af0c0410565f0eaae17b4be4e5185aff3f9ea8 100644 --- a/Documentation/video4linux/v4l2-pci-skeleton.c +++ b/Documentation/video4linux/v4l2-pci-skeleton.c @@ -163,11 +163,10 @@ static irqreturn_t skeleton_irq(int irq, void *dev_id) * minimum number: many DMA engines need a minimum of 2 buffers in the * queue and you need to have another available for userspace processing. */ -static int queue_setup(struct vb2_queue *vq, const void *parg, +static int queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], void *alloc_ctxs[]) { - const struct v4l2_format *fmt = parg; struct skeleton *skel = vb2_get_drv_priv(vq); skel->field = skel->format.field; @@ -183,12 +182,12 @@ static int queue_setup(struct vb2_queue *vq, const void *parg, if (vq->num_buffers + *nbuffers < 3) *nbuffers = 3 - vq->num_buffers; + alloc_ctxs[0] = skel->alloc_ctx; - if (fmt && fmt->fmt.pix.sizeimage < skel->format.sizeimage) - return -EINVAL; + if (*nplanes) + return sizes[0] < skel->format.sizeimage ? -EINVAL : 0; *nplanes = 1; - sizes[0] = fmt ? fmt->fmt.pix.sizeimage : skel->format.sizeimage; - alloc_ctxs[0] = skel->alloc_ctx; + sizes[0] = skel->format.sizeimage; return 0; } @@ -509,7 +508,7 @@ static int skeleton_s_dv_timings(struct file *file, void *_fh, return -EINVAL; /* Return 0 if the new timings are the same as the current timings. */ - if (v4l2_match_dv_timings(timings, &skel->timings, 0)) + if (v4l2_match_dv_timings(timings, &skel->timings, 0, false)) return 0; /* diff --git a/Documentation/zh_CN/video4linux/v4l2-framework.txt b/Documentation/zh_CN/video4linux/v4l2-framework.txt index 2b828e631e318c3cf2ace087b732bcaa18cb06ff..698660b7f21fd8d0453c12a1c29c0614557cf38c 100644 --- a/Documentation/zh_CN/video4linux/v4l2-framework.txt +++ b/Documentation/zh_CN/video4linux/v4l2-framework.txt @@ -289,13 +289,13 @@ struct v4l2_subdev_ops { 然后,你必须用一个唯一的名字初始化 subdev->name,并初始化模块的 owner 域。若使用 i2c 辅助函数,这些都会帮你处理好。 -若需同媒体框架整合,你必须调用 media_entity_init() 初始化 v4l2_subdev +若需同媒体框架整合,你必须调用 media_entity_pads_init() 初始化 v4l2_subdev 结构体中的 media_entity 结构体(entity 域): struct media_pad *pads = &my_sd->pads; int err; - err = media_entity_init(&sd->entity, npads, pads, 0); + err = media_entity_pads_init(&sd->entity, npads, pads); pads 数组必须预先初始化。无须手动设置 media_entity 的 type 和 name 域,但如有必要,revision 域必须初始化。 @@ -596,13 +596,13 @@ void v4l2_disable_ioctl(struct video_device *vdev, unsigned int cmd); v4l2_file_operations 结构体是 file_operations 的一个子集。其主要 区别在于:因 inode 参数从未被使用,它将被忽略。 -如果需要与媒体框架整合,你必须通过调用 media_entity_init() 初始化 +如果需要与媒体框架整合,你必须通过调用 media_entity_pads_init() 初始化 嵌入在 video_device 结构体中的 media_entity(entity 域)结构体: struct media_pad *pad = &my_vdev->pad; int err; - err = media_entity_init(&vdev->entity, 1, pad, 0); + err = media_entity_pads_init(&vdev->entity, 1, pad); pads 数组必须预先初始化。没有必要手动设置 media_entity 的 type 和 name 域。 diff --git a/MAINTAINERS b/MAINTAINERS index 8e92b45876740a39040aa7256cc7f7464567bd79..5696e08918f2df762fc0517100574a4d6511076f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -206,7 +206,7 @@ F: include/trace/events/9p.h A8293 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -371,6 +371,15 @@ ADDRESS SPACE LAYOUT RANDOMIZATION (ASLR) M: Jiri Kosina S: Maintained +ADF7242 IEEE 802.15.4 RADIO DRIVER +M: Michael Hennerich +W: https://wiki.analog.com/ADF7242 +W: http://ez.analog.com/community/linux-device-drivers +L: linux-wpan@vger.kernel.org +S: Supported +F: drivers/net/ieee802154/adf7242.c +F: Documentation/devicetree/bindings/net/ieee802154/adf7242.txt + ADM1025 HARDWARE MONITOR DRIVER M: Jean Delvare L: lm-sensors@lm-sensors.org @@ -388,14 +397,14 @@ ADM8211 WIRELESS DRIVER L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ S: Orphan -F: drivers/net/wireless/adm8211.* +F: drivers/net/wireless/admtek/adm8211.* ADP1653 FLASH CONTROLLER DRIVER M: Sakari Ailus L: linux-media@vger.kernel.org S: Maintained F: drivers/media/i2c/adp1653.c -F: include/media/adp1653.h +F: include/media/i2c/adp1653.h ADP5520 BACKLIGHT DRIVER WITH IO EXPANDER (ADP5520/ADP5501) M: Michael Hennerich @@ -466,7 +475,7 @@ F: sound/oss/aedsp16.c AF9013 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -476,7 +485,7 @@ F: drivers/media/dvb-frontends/af9013* AF9033 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -522,7 +531,7 @@ AIMSLAB FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-aimslab* @@ -536,7 +545,7 @@ F: include/linux/*aio*.h AIRSPY MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -614,9 +623,9 @@ F: drivers/crypto/ccp/ F: include/linux/ccp.h AMD FAM15H PROCESSOR POWER MONITORING DRIVER -M: Andreas Herrmann +M: Huang Rui L: lm-sensors@lm-sensors.org -S: Maintained +S: Supported F: Documentation/hwmon/fam15h_power F: drivers/hwmon/fam15h_power.c @@ -1773,7 +1782,7 @@ L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/i2c/as3645a.c -F: include/media/as3645a.h +F: include/media/i2c/as3645a.h ASC7621 HARDWARE MONITOR DRIVER M: George Joseph @@ -1896,7 +1905,6 @@ ATMEL AT91 / AT32 MCI DRIVER M: Ludovic Desroches S: Maintained F: drivers/mmc/host/atmel-mci.c -F: drivers/mmc/host/atmel-mci-regs.h ATMEL AT91 / AT32 SERIAL DRIVER M: Nicolas Ferre @@ -1993,7 +2001,7 @@ L: linux-wireless@vger.kernel.org W: http://www.thekelleys.org.uk/atmel W: http://atmelwlandriver.sourceforge.net/ S: Maintained -F: drivers/net/wireless/atmel* +F: drivers/net/wireless/atmel/atmel* ATMEL MAXTOUCH DRIVER M: Nick Dyer @@ -2064,7 +2072,7 @@ F: net/ax25/ AZ6007 DVB DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/usb/dvb-usb-v2/az6007.c @@ -2073,7 +2081,7 @@ AZTECH FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-aztech* @@ -2082,7 +2090,7 @@ L: linux-wireless@vger.kernel.org L: b43-dev@lists.infradead.org W: http://wireless.kernel.org/en/users/Drivers/b43 S: Odd Fixes -F: drivers/net/wireless/b43/ +F: drivers/net/wireless/broadcom/b43/ B43LEGACY WIRELESS DRIVER M: Larry Finger @@ -2090,7 +2098,7 @@ L: linux-wireless@vger.kernel.org L: b43-dev@lists.infradead.org W: http://wireless.kernel.org/en/users/Drivers/b43 S: Maintained -F: drivers/net/wireless/b43legacy/ +F: drivers/net/wireless/broadcom/b43legacy/ BACKLIGHT CLASS/SUBSYSTEM M: Jingoo Han @@ -2102,7 +2110,7 @@ F: include/linux/backlight.h BATMAN ADVANCED M: Marek Lindner M: Simon Wunderlich -M: Antonio Quartulli +M: Antonio Quartulli L: b.a.t.m.a.n@lists.open-mesh.org W: http://www.open-mesh.org/ S: Maintained @@ -2126,7 +2134,7 @@ BDISP ST MEDIA DRIVER M: Fabien Dessenne L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Supported F: drivers/media/platform/sti/bdisp @@ -2392,7 +2400,7 @@ M: Hante Meuleman L: linux-wireless@vger.kernel.org L: brcm80211-dev-list@broadcom.com S: Supported -F: drivers/net/wireless/brcm80211/ +F: drivers/net/wireless/broadcom/brcm80211/ BROADCOM BNX2FC 10 GIGABIT FCOE DRIVER M: QLogic-Storage-Upstream@qlogic.com @@ -2519,7 +2527,7 @@ F: fs/btrfs/ BTTV VIDEO4LINUX DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Odd fixes F: Documentation/video4linux/bttv/ @@ -2558,7 +2566,7 @@ CADET FM/AM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-cadet* @@ -2759,7 +2767,7 @@ S: Maintained F: Documentation/zh_CN/ CHIPIDEA USB HIGH SPEED DUAL ROLE CONTROLLER -M: Peter Chen +M: Peter Chen T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git L: linux-usb@vger.kernel.org S: Maintained @@ -2851,7 +2859,7 @@ COBALT MEDIA DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Supported F: drivers/media/pci/cobalt/ @@ -2939,7 +2947,8 @@ F: drivers/usb/atm/cxacru.c CONFIGFS M: Joel Becker -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/configfs.git +M: Christoph Hellwig +T: git git://git.infradead.org/users/hch/configfs.git S: Supported F: fs/configfs/ F: include/linux/configfs.h @@ -3096,6 +3105,15 @@ S: Maintained F: crypto/ansi_cprng.c F: crypto/rng.c +CS3308 MEDIA DRIVER +M: Hans Verkuil +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +W: http://linuxtv.org +S: Odd Fixes +F: drivers/media/i2c/cs3308.c +F: drivers/media/i2c/cs3308.h + CS5535 Audio ALSA driver M: Jaya Kumar S: Maintained @@ -3104,14 +3122,14 @@ F: sound/pci/cs5535audio/ CW1200 WLAN driver M: Solomon Peachy S: Maintained -F: drivers/net/wireless/cw1200/ +F: drivers/net/wireless/st/cw1200/ CX18 VIDEO4LINUX DRIVER M: Andy Walls L: ivtv-devel@ivtvdriver.org (subscribers-only) L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org W: http://www.ivtvdriver.org/index.php/Cx18 S: Maintained F: Documentation/video4linux/cx18.txt @@ -3122,7 +3140,7 @@ CX2341X MPEG ENCODER HELPER MODULE M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/common/cx2341x* F: include/media/cx2341x* @@ -3131,7 +3149,7 @@ CX24120 MEDIA DRIVER M: Jemma Denson M: Patrick Boettcher L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/dvb-frontends/cx24120* @@ -3139,7 +3157,7 @@ F: drivers/media/dvb-frontends/cx24120* CX88 VIDEO4LINUX DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Odd fixes F: Documentation/video4linux/cx88/ @@ -3148,7 +3166,7 @@ F: drivers/media/pci/cx88/ CXD2820R MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3254,7 +3272,7 @@ F: drivers/net/wan/pc300* CYPRESS_FIRMWARE MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3761,14 +3779,14 @@ DT3155 MEDIA DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/pci/dt3155/ DVB_USB_AF9015 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3778,7 +3796,7 @@ F: drivers/media/usb/dvb-usb-v2/af9015* DVB_USB_AF9035 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3788,7 +3806,7 @@ F: drivers/media/usb/dvb-usb-v2/af9035* DVB_USB_ANYSEE MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3798,7 +3816,7 @@ F: drivers/media/usb/dvb-usb-v2/anysee* DVB_USB_AU6610 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3808,7 +3826,7 @@ F: drivers/media/usb/dvb-usb-v2/au6610* DVB_USB_CE6230 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3818,7 +3836,7 @@ F: drivers/media/usb/dvb-usb-v2/ce6230* DVB_USB_CXUSB MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/media_tree.git @@ -3828,7 +3846,7 @@ F: drivers/media/usb/dvb-usb/cxusb* DVB_USB_EC168 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3838,7 +3856,7 @@ F: drivers/media/usb/dvb-usb-v2/ec168* DVB_USB_GL861 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git S: Maintained @@ -3847,7 +3865,7 @@ F: drivers/media/usb/dvb-usb-v2/gl861* DVB_USB_MXL111SF MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/mxl111sf.git @@ -3857,7 +3875,7 @@ F: drivers/media/usb/dvb-usb-v2/mxl111sf* DVB_USB_RTL28XXU MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3867,7 +3885,7 @@ F: drivers/media/usb/dvb-usb-v2/rtl28xxu* DVB_USB_V2 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3897,7 +3915,7 @@ F: Documentation/devicetree/bindings/input/e3x0-button.txt E4000 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3913,7 +3931,7 @@ F: drivers/scsi/eata.c EC100 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -3934,9 +3952,8 @@ M: Doug Thompson M: Borislav Petkov M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net -T: git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git#for-next -T: git://git.kernel.org/pub/linux/kernel/git/mchehab/linux-edac.git#linux_next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next S: Supported F: Documentation/edac.txt F: drivers/edac/ @@ -3946,7 +3963,6 @@ EDAC-AMD64 M: Doug Thompson M: Borislav Petkov L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/amd64_edac* @@ -3954,7 +3970,6 @@ EDAC-CALXEDA M: Doug Thompson M: Robert Richter L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/highbank* @@ -3963,7 +3978,6 @@ M: Ralf Baechle M: David Daney L: linux-edac@vger.kernel.org L: linux-mips@linux-mips.org -W: bluesmoke.sourceforge.net S: Supported F: drivers/edac/octeon_edac* @@ -3971,63 +3985,54 @@ EDAC-E752X M: Mark Gross M: Doug Thompson L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e752x_edac.c EDAC-E7XXX M: Doug Thompson L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e7xxx_edac.c EDAC-GHES M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/ghes_edac.c EDAC-I82443BXGX M: Tim Small L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i82443bxgx_edac.c EDAC-I3000 M: Jason Uhlenkott L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i3000_edac.c EDAC-I5000 M: Doug Thompson L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i5000_edac.c EDAC-I5400 M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i5400_edac.c EDAC-I7300 M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i7300_edac.c EDAC-I7CORE M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i7core_edac.c @@ -4035,42 +4040,36 @@ EDAC-I82975X M: Ranganathan Desikan M: "Arvind R." L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/i82975x_edac.c EDAC-IE31200 M: Jason Baron L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/ie31200_edac.c EDAC-MPC85XX M: Johannes Thumshirn L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/mpc85xx_edac.[ch] EDAC-PASEMI M: Egor Martovetsky L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/pasemi_edac.c EDAC-R82600 M: Tim Small L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/r82600_edac.c EDAC-SBRIDGE M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org -W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/sb_edac.c @@ -4137,7 +4136,7 @@ F: drivers/net/ethernet/ibm/ehea/ EM28XX VIDEO4LINUX DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/usb/em28xx/ @@ -4277,7 +4276,7 @@ F: drivers/media/tuners/fc0011.c FC2580 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -4600,7 +4599,7 @@ M: Heungjun Kim L: linux-media@vger.kernel.org S: Maintained F: drivers/media/i2c/m5mols/ -F: include/media/m5mols.h +F: include/media/i2c/m5mols.h FUJITSU TABLET EXTRAS M: Robert Gerlach @@ -4646,7 +4645,7 @@ GEMTEK FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-gemtek* @@ -4854,7 +4853,7 @@ HDPVR USB VIDEO ENCODER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/usb/hdpvr/ @@ -4873,7 +4872,7 @@ F: drivers/tty/hvc/ HACKRF MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -4916,7 +4915,7 @@ F: sound/parisc/harmony.* HD29L2 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -4985,6 +4984,7 @@ F: arch/*/include/asm/suspend*.h HID CORE LAYER M: Jiri Kosina +R: Benjamin Tissoires L: linux-input@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git S: Maintained @@ -5039,13 +5039,20 @@ F: include/uapi/linux/if_hippi.h F: net/802/hippi.c F: drivers/net/hippi/ +HISILICON SAS Controller +M: John Garry +W: http://www.hisilicon.com +S: Supported +F: drivers/scsi/hisi_sas/ +F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt + HOST AP DRIVER M: Jouni Malinen L: hostap@shmoo.com (subscribers-only) L: linux-wireless@vger.kernel.org W: http://hostap.epitest.fi/ S: Maintained -F: drivers/net/wireless/hostap/ +F: drivers/net/wireless/intersil/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER L: platform-driver-x86@vger.kernel.org @@ -5283,6 +5290,13 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmveth.* +IBM Power SRIOV Virtual NIC Device Driver +M: Thomas Falcon +M: John Allen +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/ibm/ibmvnic.* + IBM Power Virtual SCSI Device Drivers M: Tyrel Datwyler L: linux-scsi@vger.kernel.org @@ -5578,7 +5592,7 @@ R: Jesse Brandeburg R: Shannon Nelson R: Carolyn Wyborny R: Don Skidmore -R: Matthew Vick +R: Bruce Allan R: John Ronciak R: Mitch Williams L: intel-wired-lan@lists.osuosl.org @@ -5613,7 +5627,7 @@ L: linux-wireless@vger.kernel.org S: Maintained F: Documentation/networking/README.ipw2100 F: Documentation/networking/README.ipw2200 -F: drivers/net/wireless/ipw2x00/ +F: drivers/net/wireless/intel/ipw2x00/ INTEL(R) TRACE HUB M: Alexander Shishkin @@ -5622,9 +5636,7 @@ F: Documentation/trace/intel_th.txt F: drivers/hwtracing/intel_th/ INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT) -M: Richard L Maliszewski -M: Gang Wei -M: Shane Wang +M: Ning Sun L: tboot-devel@lists.sourceforge.net W: http://tboot.sourceforge.net T: hg http://tboot.hg.sourceforge.net:8000/hgroot/tboot/tboot @@ -5647,7 +5659,7 @@ INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy) M: Stanislaw Gruszka L: linux-wireless@vger.kernel.org S: Supported -F: drivers/net/wireless/iwlegacy/ +F: drivers/net/wireless/intel/iwlegacy/ INTEL WIRELESS WIFI LINK (iwlwifi) M: Johannes Berg @@ -5657,7 +5669,7 @@ L: linux-wireless@vger.kernel.org W: http://intellinuxwireless.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git S: Supported -F: drivers/net/wireless/iwlwifi/ +F: drivers/net/wireless/intel/iwlwifi/ INTEL MANAGEMENT ENGINE (mei) M: Tomas Winkler @@ -5822,7 +5834,7 @@ ISA RADIO MODULE M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-isa* @@ -5892,7 +5904,7 @@ F: drivers/hwmon/it87.c IT913X MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -5913,7 +5925,7 @@ F: include/uapi/linux/ivtv* IX2505V MEDIA DRIVER M: Malcolm Priestley L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/dvb-frontends/ix2505v* @@ -6002,7 +6014,7 @@ KEENE FM RADIO TRANSMITTER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-keene* @@ -6255,7 +6267,7 @@ F: drivers/usb/misc/legousbtower.c LG2160 MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/tuners.git @@ -6265,7 +6277,7 @@ F: drivers/media/dvb-frontends/lg2160.* LGDT3305 MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/tuners.git @@ -6522,7 +6534,7 @@ F: drivers/hwmon/lm95234.c LME2510 MEDIA DRIVER M: Malcolm Priestley L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/usb/dvb-usb-v2/lmedm04* @@ -6628,7 +6640,7 @@ F: arch/m68k/hp300/ M88DS3103 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -6638,7 +6650,7 @@ F: drivers/media/dvb-frontends/m88ds3103* M88RS2000 MEDIA DRIVER M: Malcolm Priestley L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/dvb-frontends/m88rs2000* @@ -6709,7 +6721,7 @@ F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER L: libertas-dev@lists.infradead.org S: Orphan -F: drivers/net/wireless/libertas/ +F: drivers/net/wireless/marvell/libertas/ MARVELL MV643XX ETHERNET DRIVER M: Sebastian Hesselbarth @@ -6729,13 +6741,13 @@ M: Amitkumar Karwar M: Nishant Sarmukadam L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/mwifiex/ +F: drivers/net/wireless/marvell/mwifiex/ MARVELL MWL8K WIRELESS DRIVER M: Lennert Buytenhek L: linux-wireless@vger.kernel.org S: Odd Fixes -F: drivers/net/wireless/mwl8k.c +F: drivers/net/wireless/marvell/mwl8k.c MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER M: Nicolas Pitre @@ -6817,7 +6829,7 @@ MAXIRADIO FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-maxiradio* @@ -6839,7 +6851,7 @@ F: drivers/media/platform/vsp1/ MEDIA DRIVERS FOR ASCOT2E M: Sergey Kozlov L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org W: http://netup.tv/ T: git git://linuxtv.org/media_tree.git S: Supported @@ -6848,7 +6860,7 @@ F: drivers/media/dvb-frontends/ascot2e* MEDIA DRIVERS FOR CXD2841ER M: Sergey Kozlov L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://netup.tv/ T: git git://linuxtv.org/media_tree.git S: Supported @@ -6857,7 +6869,7 @@ F: drivers/media/dvb-frontends/cxd2841er* MEDIA DRIVERS FOR HORUS3A M: Sergey Kozlov L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://netup.tv/ T: git git://linuxtv.org/media_tree.git S: Supported @@ -6866,7 +6878,7 @@ F: drivers/media/dvb-frontends/horus3a* MEDIA DRIVERS FOR LNBH25 M: Sergey Kozlov L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://netup.tv/ T: git git://linuxtv.org/media_tree.git S: Supported @@ -6875,7 +6887,7 @@ F: drivers/media/dvb-frontends/lnbh25* MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices M: Sergey Kozlov L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://netup.tv/ T: git git://linuxtv.org/media_tree.git S: Supported @@ -6885,7 +6897,7 @@ MEDIA INPUT INFRASTRUCTURE (V4L/DVB) M: Mauro Carvalho Chehab P: LinuxTV.org Project L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org Q: http://patchwork.kernel.org/project/linux-media/list/ T: git git://linuxtv.org/media_tree.git S: Maintained @@ -6894,6 +6906,7 @@ F: Documentation/video4linux/ F: Documentation/DocBook/media/ F: drivers/media/ F: drivers/staging/media/ +F: include/linux/platform_data/media/ F: include/media/ F: include/uapi/linux/dvb/ F: include/uapi/linux/videodev2.h @@ -7045,7 +7058,7 @@ MIROSOUND PCM20 FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/radio/radio-miropcm20* @@ -7081,7 +7094,7 @@ F: drivers/iio/temperature/mlx90614.c MN88472 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -7092,7 +7105,7 @@ F: drivers/media/dvb-frontends/mn88472.h MN88473 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -7147,7 +7160,7 @@ F: drivers/platform/x86/msi-wmi.c MSI001 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -7157,7 +7170,7 @@ F: drivers/media/tuners/msi001* MSI2500 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -7176,7 +7189,7 @@ L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/i2c/mt9m032.c -F: include/media/mt9m032.h +F: include/media/i2c/mt9m032.h MT9P031 APTINA CAMERA SENSOR M: Laurent Pinchart @@ -7184,7 +7197,7 @@ L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/i2c/mt9p031.c -F: include/media/mt9p031.h +F: include/media/i2c/mt9p031.h MT9T001 APTINA CAMERA SENSOR M: Laurent Pinchart @@ -7192,7 +7205,7 @@ L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/i2c/mt9t001.c -F: include/media/mt9t001.h +F: include/media/i2c/mt9t001.h MT9V032 APTINA CAMERA SENSOR M: Laurent Pinchart @@ -7201,7 +7214,7 @@ T: git git://linuxtv.org/media_tree.git S: Maintained F: Documentation/devicetree/bindings/media/i2c/mt9v032.txt F: drivers/media/i2c/mt9v032.c -F: include/media/mt9v032.h +F: include/media/i2c/mt9v032.h MULTIFUNCTION DEVICES (MFD) M: Lee Jones @@ -7245,7 +7258,7 @@ F: drivers/usb/musb/ MXL5007T MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/tuners.git @@ -7370,6 +7383,13 @@ F: include/net/netrom.h F: include/uapi/linux/netrom.h F: net/netrom/ +NETRONOME ETHERNET DRIVERS +M: Jakub Kicinski +M: Rolf Neugebauer +L: oss-drivers@netronome.com +S: Maintained +F: drivers/net/ethernet/netronome/ + NETWORK BLOCK DEVICE (NBD) M: Markus Pargmann S: Maintained @@ -7501,7 +7521,12 @@ F: net/nfc/ F: include/net/nfc/ F: include/uapi/linux/nfc.h F: drivers/nfc/ +F: include/linux/platform_data/microread.h +F: include/linux/platform_data/nfcmrvl.h +F: include/linux/platform_data/nxp-nci.h F: include/linux/platform_data/pn544.h +F: include/linux/platform_data/st21nfca.h +F: include/linux/platform_data/st-nci.h F: Documentation/devicetree/bindings/net/nfc/ NFS, SUNRPC, AND LOCKD CLIENTS @@ -7951,7 +7976,7 @@ L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/en/users/Drivers/orinoco W: http://www.nongnu.org/orinoco/ S: Orphan -F: drivers/net/wireless/orinoco/ +F: drivers/net/wireless/intersil/orinoco/ OSD LIBRARY and FILESYSTEM M: Boaz Harrosh @@ -7977,7 +8002,7 @@ M: Christian Lamparter L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/en/users/Drivers/p54 S: Maintained -F: drivers/net/wireless/p54/ +F: drivers/net/wireless/intersil/p54/ PA SEMI ETHERNET DRIVER M: Olof Johansson @@ -8370,6 +8395,7 @@ F: drivers/pinctrl/intel/ PIN CONTROLLER - RENESAS M: Laurent Pinchart +M: Geert Uytterhoeven L: linux-sh@vger.kernel.org S: Maintained F: drivers/pinctrl/sh-pfc/ @@ -8381,6 +8407,14 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained F: drivers/pinctrl/samsung/ +PIN CONTROLLER - SINGLE +M: Tony Lindgren +M: Haojian Zhuang +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: linux-omap@vger.kernel.org +S: Maintained +F: drivers/pinctrl/pinctrl-single.c + PIN CONTROLLER - ST SPEAR M: Viresh Kumar L: spear-devel@list.st.com @@ -8440,6 +8474,17 @@ F: fs/timerfd.c F: include/linux/timer* F: kernel/time/*timer* +POWER MANAGEMENT CORE +M: "Rafael J. Wysocki" +L: linux-pm@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +S: Supported +F: drivers/base/power/ +F: include/linux/pm.h +F: include/linux/pm_* +F: include/linux/powercap.h +F: drivers/powercap/ + POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS M: Sebastian Reichel M: Dmitry Eremin-Solenikov @@ -8519,7 +8564,7 @@ M: "Luis R. Rodriguez" L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/en/users/Drivers/p54 S: Obsolete -F: drivers/net/wireless/prism54/ +F: drivers/net/wireless/intersil/prism54/ PS3 NETWORK SUPPORT M: Geoff Levand @@ -8632,6 +8677,7 @@ S: Maintained F: arch/arm/mach-pxa/ F: drivers/dma/pxa* F: drivers/pcmcia/pxa2xx* +F: drivers/pinctrl/pxa/ F: drivers/spi/spi-pxa2xx* F: drivers/usb/gadget/udc/pxa2* F: include/sound/pxa2xx-lib.h @@ -8738,7 +8784,7 @@ F: include/uapi/linux/qnxtypes.h QT1010 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -8822,7 +8868,7 @@ M: Stanislaw Gruszka M: Helmut Schaa L: linux-wireless@vger.kernel.org S: Maintained -F: drivers/net/wireless/rt2x00/ +F: drivers/net/wireless/ralink/rt2x00/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe @@ -8947,6 +8993,19 @@ F: drivers/rpmsg/ F: Documentation/rpmsg.txt F: include/linux/rpmsg.h +RENESAS ETHERNET DRIVERS +R: Sergei Shtylyov +L: netdev@vger.kernel.org +L: linux-sh@vger.kernel.org +F: drivers/net/ethernet/renesas/ +F: include/linux/sh_eth.h + +RENESAS USB2 PHY DRIVER +M: Yoshihiro Shimoda +L: linux-sh@vger.kernel.org +S: Maintained +F: drivers/phy/phy-rcar-gen3-usb2.c + RESET CONTROLLER FRAMEWORK M: Philipp Zabel S: Maintained @@ -9024,7 +9083,7 @@ F: net/rose/ RTL2830 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -9034,7 +9093,7 @@ F: drivers/media/dvb-frontends/rtl2830* RTL2832 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -9044,7 +9103,7 @@ F: drivers/media/dvb-frontends/rtl2832* RTL2832_SDR MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -9056,7 +9115,7 @@ L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git S: Orphan -F: drivers/net/wireless/rtl818x/rtl8180/ +F: drivers/net/wireless/realtek/rtl818x/rtl8180/ RTL8187 WIRELESS DRIVER M: Herton Ronaldo Krzesinski @@ -9066,7 +9125,7 @@ L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git S: Maintained -F: drivers/net/wireless/rtl818x/rtl8187/ +F: drivers/net/wireless/realtek/rtl818x/rtl8187/ RTL8192CE WIRELESS DRIVER M: Larry Finger @@ -9075,8 +9134,8 @@ L: linux-wireless@vger.kernel.org W: http://wireless.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git S: Maintained -F: drivers/net/wireless/rtlwifi/ -F: drivers/net/wireless/rtlwifi/rtl8192ce/ +F: drivers/net/wireless/realtek/rtlwifi/ +F: drivers/net/wireless/realtek/rtlwifi/rtl8192ce/ RTL8XXXU WIRELESS DRIVER (rtl8xxxu) M: Jes Sorensen @@ -9120,7 +9179,7 @@ F: drivers/s390/block/dasd* F: block/partitions/ibm.c S390 NETWORK DRIVERS -M: Ursula Braun +M: Ursula Braun L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported @@ -9150,7 +9209,7 @@ S: Supported F: drivers/s390/scsi/zfcp_* S390 IUCV NETWORK LAYER -M: Ursula Braun +M: Ursula Braun L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported @@ -9175,14 +9234,14 @@ SAA6588 RDS RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/i2c/saa6588* SAA7134 VIDEO4LINUX DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Odd fixes F: Documentation/video4linux/*.saa7134 @@ -9227,8 +9286,10 @@ F: drivers/regulator/s5m*.c F: drivers/clk/clk-s2mps11.c F: drivers/rtc/rtc-s5m.c F: include/linux/mfd/samsung/ -F: Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt -F: Documentation/devicetree/bindings/mfd/s2mp*.txt +F: Documentation/devicetree/bindings/mfd/samsung,sec-core.txt +F: Documentation/devicetree/bindings/regulator/samsung,s2m*.txt +F: Documentation/devicetree/bindings/regulator/samsung,s5m*.txt +F: Documentation/devicetree/bindings/clock/samsung,s2mps11.txt SAMSUNG S5P/EXYNOS4 SOC SERIES CAMERA SUBSYSTEM DRIVERS M: Kyungmin Park @@ -9244,7 +9305,7 @@ L: linux-media@vger.kernel.org L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained F: drivers/media/platform/s3c-camif/ -F: include/media/s3c_camif.h +F: include/media/drv-intf/s3c_camif.h SAMSUNG S5C73M3 CAMERA DRIVER M: Kyungmin Park @@ -9352,7 +9413,7 @@ M: Andreas Noever S: Maintained F: drivers/thunderbolt/ -TIMEKEEPING, CLOCKSOURCE CORE, NTP +TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER M: John Stultz M: Thomas Gleixner L: linux-kernel@vger.kernel.org @@ -9365,6 +9426,7 @@ F: include/uapi/linux/time.h F: include/uapi/linux/timex.h F: kernel/time/clocksource.c F: kernel/time/time*.c +F: kernel/time/alarmtimer.c F: kernel/time/ntp.c F: tools/testing/selftests/timers/ @@ -9619,7 +9681,7 @@ F: drivers/misc/sgi-xp/ SI2157 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -9629,7 +9691,7 @@ F: drivers/media/tuners/si2157* SI2168 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -9640,7 +9702,7 @@ SI470X FM RADIO RECEIVER I2C DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/radio/si470x/radio-si470x-i2c.c @@ -9648,7 +9710,7 @@ SI470X FM RADIO RECEIVER USB DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/si470x/radio-si470x-common.c F: drivers/media/radio/si470x/radio-si470x.h @@ -9658,7 +9720,7 @@ SI4713 FM RADIO TRANSMITTER I2C DRIVER M: Eduardo Valentin L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/radio/si4713/si4713.? @@ -9666,7 +9728,7 @@ SI4713 FM RADIO TRANSMITTER PLATFORM DRIVER M: Eduardo Valentin L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/radio/si4713/radio-platform-si4713.c @@ -9674,14 +9736,14 @@ SI4713 FM RADIO TRANSMITTER USB DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/si4713/radio-usb-si4713.c SIANO DVB DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Odd fixes F: drivers/media/common/siano/ @@ -9706,7 +9768,7 @@ SH_VOU V4L2 OUTPUT DRIVER L: linux-media@vger.kernel.org S: Orphan F: drivers/media/platform/sh_vou.c -F: include/media/sh_vou.h +F: include/media/drv-intf/sh_vou.h SIMPLE FIRMWARE INTERFACE (SFI) M: Len Brown @@ -9747,7 +9809,7 @@ F: drivers/i2c/busses/i2c-davinci.c TI DAVINCI SERIES MEDIA DRIVER M: "Lad, Prabhakar" L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git S: Maintained @@ -9757,7 +9819,7 @@ F: include/media/davinci/ TI AM437X VPFE DRIVER M: "Lad, Prabhakar" L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git S: Maintained @@ -9766,12 +9828,12 @@ F: drivers/media/platform/am437x/ OV2659 OMNIVISION SENSOR DRIVER M: "Lad, Prabhakar" L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git S: Maintained F: drivers/media/i2c/ov2659.c -F: include/media/ov2659.h +F: include/media/i2c/ov2659.h SILICON MOTION SM712 FRAME BUFFER DRIVER M: Sudip Mukherjee @@ -9860,7 +9922,7 @@ M: Sakari Ailus L: linux-media@vger.kernel.org S: Maintained F: drivers/media/i2c/smiapp/ -F: include/media/smiapp.h +F: include/media/i2c/smiapp.h F: drivers/media/i2c/smiapp-pll.c F: drivers/media/i2c/smiapp-pll.h F: include/uapi/linux/smiapp.h @@ -10024,7 +10086,7 @@ F: sound/soc/soc-generic-dmaengine-pcm.c SP2 MEDIA DRIVER M: Olli Salonen L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/dvb-frontends/sp2* @@ -10386,7 +10448,7 @@ F: net/ipv4/tcp_lp.c TDA10071 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -10396,7 +10458,7 @@ F: drivers/media/dvb-frontends/tda10071* TDA18212 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -10406,7 +10468,7 @@ F: drivers/media/tuners/tda18212* TDA18218 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -10416,7 +10478,7 @@ F: drivers/media/tuners/tda18218* TDA18271 MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/tuners.git @@ -10426,7 +10488,7 @@ F: drivers/media/tuners/tda18271* TDA827x MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/tuners.git @@ -10436,7 +10498,7 @@ F: drivers/media/tuners/tda8290.* TDA8290 MEDIA DRIVER M: Michael Krufky L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://github.com/mkrufky Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/mkrufky/tuners.git @@ -10447,14 +10509,14 @@ TDA9840 MEDIA DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/i2c/tda9840* TEA5761 TUNER DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Odd fixes F: drivers/media/tuners/tea5761.* @@ -10462,7 +10524,7 @@ F: drivers/media/tuners/tea5761.* TEA5767 TUNER DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/tuners/tea5767.* @@ -10471,7 +10533,7 @@ TEA6415C MEDIA DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/i2c/tea6415c* @@ -10479,7 +10541,7 @@ TEA6420 MEDIA DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/i2c/tea6420* @@ -10577,7 +10639,7 @@ THANKO'S RAREMONO AM/FM/SW RADIO RECEIVER USB DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/radio/radio-raremono.c @@ -10801,7 +10863,7 @@ M: Mats Randgaard L: linux-media@vger.kernel.org S: Maintained F: drivers/media/i2c/tc358743* -F: include/media/tc358743.h +F: include/media/i2c/tc358743.h TMIO MMC DRIVER M: Ian Molton @@ -10829,7 +10891,7 @@ F: mm/shmem.c TM6000 VIDEO4LINUX DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Odd fixes F: drivers/media/usb/tm6000/ @@ -10838,7 +10900,7 @@ TW68 VIDEO4LINUX DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/pci/tw68/ @@ -10899,7 +10961,7 @@ F: include/uapi/linux/tty.h TUA9001 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org -W: http://linuxtv.org/ +W: https://linuxtv.org W: http://palosaari.fi/linux/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ T: git git://linuxtv.org/anttip/media_tree.git @@ -11101,6 +11163,7 @@ F: include/linux/usb/gadget* USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...) M: Jiri Kosina +R: Benjamin Tissoires L: linux-usb@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git S: Maintained @@ -11142,7 +11205,7 @@ F: Documentation/usb/ohci.txt F: drivers/usb/host/ohci* USB OTG FSM (Finite State Machine) -M: Peter Chen +M: Peter Chen T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git L: linux-usb@vger.kernel.org S: Maintained @@ -11178,6 +11241,13 @@ L: linux-usb@vger.kernel.org S: Supported F: drivers/usb/class/usblp.c +USB QMI WWAN NETWORK DRIVER +M: Bjørn Mork +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/ABI/testing/sysfs-class-net-qmi +F: drivers/net/usb/qmi_wwan.c + USB RTL8150 DRIVER M: Petko Manolov L: linux-usb@vger.kernel.org @@ -11246,7 +11316,7 @@ USB VISION DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Odd Fixes F: drivers/media/usb/usbvision/ @@ -11274,7 +11344,7 @@ USB ZD1201 DRIVER L: linux-wireless@vger.kernel.org W: http://linux-lc100020.sourceforge.net S: Orphan -F: drivers/net/wireless/zd1201.* +F: drivers/net/wireless/zydas/zd1201.* USB ZR364XX DRIVER M: Antoine Jacquet @@ -11460,7 +11530,7 @@ VIVID VIRTUAL VIDEO DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org T: git git://linuxtv.org/media_tree.git -W: http://linuxtv.org +W: https://linuxtv.org S: Maintained F: drivers/media/platform/vivid/* @@ -11677,6 +11747,7 @@ F: drivers/input/touchscreen/wm831x-ts.c F: drivers/input/touchscreen/wm97*.c F: drivers/mfd/arizona* F: drivers/mfd/wm*.c +F: drivers/mfd/cs47l24* F: drivers/power/wm83*.c F: drivers/rtc/rtc-wm83*.c F: drivers/regulator/wm8*.c @@ -11690,6 +11761,7 @@ F: include/linux/wm97xx.h F: include/sound/wm????.h F: sound/soc/codecs/arizona.? F: sound/soc/codecs/wm* +F: sound/soc/codecs/cs47l24* WORKQUEUE M: Tejun Heo @@ -11749,7 +11821,7 @@ F: arch/x86/entry/vdso/ XC2028/3028 TUNER DRIVER M: Mauro Carvalho Chehab L: linux-media@vger.kernel.org -W: http://linuxtv.org +W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/tuners/tuner-xc2028.* @@ -11905,7 +11977,7 @@ W: http://zd1211.ath.cx/wiki/DriverRewrite L: linux-wireless@vger.kernel.org L: zd1211-devs@lists.sourceforge.net (subscribers-only) S: Maintained -F: drivers/net/wireless/zd1211rw/ +F: drivers/net/wireless/zydas/zd1211rw/ ZPOOL COMPRESSED PAGE STORAGE API M: Dan Streetman @@ -11918,7 +11990,7 @@ ZR36067 VIDEO FOR LINUX DRIVER L: mjpeg-users@lists.sourceforge.net L: linux-media@vger.kernel.org W: http://mjpeg.sourceforge.net/driver-zoran/ -T: hg http://linuxtv.org/hg/v4l-dvb +T: hg https://linuxtv.org/hg/v4l-dvb S: Odd Fixes F: drivers/media/pci/zoran/ diff --git a/Makefile b/Makefile index bc0165d0f5cf30ea0faa7325fe96fd1b99b428fa..70dea02f1346d4fa8253fb4d22aac60c7718e0c7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = NAME = Blurry Fish Butt # *DOCUMENTATION* diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 9a20821b111c65aecc5b60a7956fb00dde178aae..c5fb9e6bc3a51df64245d31987373aa37930888c 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 2fd00b7077e41f17c654fac08c732f2ed62e1b1b..936bc8f89a679f2f8aeb2f7e3cb41f11190a2d77 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 2c2ac3f3ff80370bd6bad9fffc9dfb8c8f6931e8..6312f607932fd2241782827cce2ec7968ea3c8b6 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -445,6 +445,7 @@ config LINUX_LINK_BASE However some customers have peripherals mapped at this addr, so Linux needs to be scooted a bit. If you don't know what the above means, leave this setting alone. + This needs to match memory start address specified in Device Tree config HIGHMEM bool "High Memory Support" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index cf0cf34eeb24b14ef070735e83d25b0f7f6aecec..aeb19021099e315967ba95524e9528c48da0d159 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -81,7 +81,7 @@ endif LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel -KBUILD_CFLAGS_MODULE += -mlong-calls +KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode # Finally dump eveything into kernel build system KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index f3db32154973718268c1998e9f16bb03d1af7e7f..44a578c10732cd1ab79558415e3c66f6825c98bd 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -46,6 +46,7 @@ ethernet@0x18000 { snps,pbl = < 32 >; clocks = <&apbclk>; clock-names = "stmmaceth"; + max-speed = <100>; }; ehci@0x40000 { diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index b0eb0e7fe21d8a66b1d0ef5267f24dd6a1f8f254..fc81879bc1f5800e5efe8eb43395eb2c5509f481 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -17,7 +17,8 @@ / { memory { device_type = "memory"; - reg = <0x0 0x80000000 0x0 0x40000000 /* 1 GB low mem */ + /* CONFIG_LINUX_LINK_BASE needs to match low mem start */ + reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MB low mem */ 0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */ }; diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index abf06e81c9290f6eafc441b563061e3389032922..210ef3e7233224b099bd40e85106d2d864e70987 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -62,9 +62,7 @@ extern int ioc_exists; #define ARC_REG_IC_IVIC 0x10 #define ARC_REG_IC_CTRL 0x11 #define ARC_REG_IC_IVIL 0x19 -#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #define ARC_REG_IC_PTAG 0x1E -#endif #define ARC_REG_IC_PTAG_HI 0x1F /* Bit val in IC_CTRL */ diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index 6ff657a904b61e700421b60423991273df93e1e1..c28e6c347b4900217ad48053c69679bb3da8b607 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -23,7 +23,7 @@ * @dt_compat: Array of device tree 'compatible' strings * (XXX: although only 1st entry is looked at) * @init_early: Very early callback [called from setup_arch()] - * @init_cpu_smp: for each CPU as it is coming up (SMP as well as UP) + * @init_per_cpu: for each CPU as it is coming up (SMP as well as UP) * [(M):init_IRQ(), (o):start_kernel_secondary()] * @init_machine: arch initcall level callback (e.g. populate static * platform devices or parse Devicetree) @@ -35,7 +35,7 @@ struct machine_desc { const char **dt_compat; void (*init_early)(void); #ifdef CONFIG_SMP - void (*init_cpu_smp)(unsigned int); + void (*init_per_cpu)(unsigned int); #endif void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 133c867d15af0a627576d9faf89c0569220a6f47..991380438d6bfd6af03e6fff4748b8c5342b6a70 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -48,7 +48,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * @init_early_smp: A SMP specific h/w block can init itself * Could be common across platforms so not covered by * mach_desc->init_early() - * @init_irq_cpu: Called for each core so SMP h/w block driver can do + * @init_per_cpu: Called for each core so SMP h/w block driver can do * any needed setup per cpu (e.g. IPI request) * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu @@ -57,7 +57,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq); struct plat_smp_ops { const char *info; void (*init_early_smp)(void); - void (*init_irq_cpu)(int cpu); + void (*init_per_cpu)(int cpu); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/arch/arc/include/asm/unwind.h b/arch/arc/include/asm/unwind.h index 7ca628b6ee2aa6d1e02f6ad01363990439ec10b7..c11a25bb8158dee7f60a5f00ca8ec0addaf54b56 100644 --- a/arch/arc/include/asm/unwind.h +++ b/arch/arc/include/asm/unwind.h @@ -112,7 +112,6 @@ struct unwind_frame_info { extern int arc_unwind(struct unwind_frame_info *frame); extern void arc_unwind_init(void); -extern void arc_unwind_setup(void); extern void *unwind_add_table(struct module *module, const void *table_start, unsigned long table_size); extern void unwind_remove_table(void *handle, int init_only); @@ -152,9 +151,6 @@ static inline void arc_unwind_init(void) { } -static inline void arc_unwind_setup(void) -{ -} #define unwind_add_table(a, b, c) #define unwind_remove_table(a, b) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 26c15682747960d3b5b1c412f76dff3c2ebc7780..0394f9f61b466dea018af3ec371c65a8dbc17acb 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -106,10 +106,21 @@ static struct irq_chip arcv2_irq_chip = { static int arcv2_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - if (irq == TIMER0_IRQ || irq == IPI_IRQ) + /* + * core intc IRQs [16, 23]: + * Statically assigned always private-per-core (Timers, WDT, IPI, PCT) + */ + if (hw < 24) { + /* + * A subsequent request_percpu_irq() fails if percpu_devid is + * not set. That in turns sets NOAUTOEN, meaning each core needs + * to call enable_percpu_irq() + */ + irq_set_percpu_devid(irq); irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq); - else + } else { irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq); + } return 0; } diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 2ee226546c6a821f739a079326ed92ad52fe16b8..ba17f85285cfe85c53ba6c93db08d620c7109fb9 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -29,11 +29,11 @@ void __init init_IRQ(void) #ifdef CONFIG_SMP /* a SMP H/w block could do IPI IRQ request here */ - if (plat_smp_ops.init_irq_cpu) - plat_smp_ops.init_irq_cpu(smp_processor_id()); + if (plat_smp_ops.init_per_cpu) + plat_smp_ops.init_per_cpu(smp_processor_id()); - if (machine_desc->init_cpu_smp) - machine_desc->init_cpu_smp(smp_processor_id()); + if (machine_desc->init_per_cpu) + machine_desc->init_per_cpu(smp_processor_id()); #endif } @@ -51,6 +51,18 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs) set_irq_regs(old_regs); } +/* + * API called for requesting percpu interrupts - called by each CPU + * - For boot CPU, actually request the IRQ with genirq core + enables + * - For subsequent callers only enable called locally + * + * Relies on being called by boot cpu first (i.e. request called ahead) of + * any enable as expected by genirq. Hence Suitable only for TIMER, IPI + * which are guaranteed to be setup on boot core first. + * Late probed peripherals such as perf can't use this as there no guarantee + * of being called on boot CPU first. + */ + void arc_request_percpu_irq(int irq, int cpu, irqreturn_t (*isr)(int irq, void *dev), const char *irq_nm, @@ -60,14 +72,17 @@ void arc_request_percpu_irq(int irq, int cpu, if (!cpu) { int rc; +#ifdef CONFIG_ISA_ARCOMPACT /* - * These 2 calls are essential to making percpu IRQ APIs work - * Ideally these details could be hidden in irq chip map function - * but the issue is IPIs IRQs being static (non-DT) and platform - * specific, so we can't identify them there. + * A subsequent request_percpu_irq() fails if percpu_devid is + * not set. That in turns sets NOAUTOEN, meaning each core needs + * to call enable_percpu_irq() + * + * For ARCv2, this is done in irq map function since we know + * which irqs are strictly per cpu */ irq_set_percpu_devid(irq); - irq_modify_status(irq, IRQ_NOAUTOEN, 0); /* @irq, @clr, @set */ +#endif rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev); if (rc) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 74a9b074ac3e4e64d97ef8e069f73a9104531682..bd237acdf4f2f9601efbf4c25c45067ddd2b69d9 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -132,7 +132,7 @@ static void mcip_probe_n_setup(void) struct plat_smp_ops plat_smp_ops = { .info = smp_cpuinfo_buf, .init_early_smp = mcip_probe_n_setup, - .init_irq_cpu = mcip_setup_per_cpu, + .init_per_cpu = mcip_setup_per_cpu, .ipi_send = mcip_ipi_send, .ipi_clear = mcip_ipi_clear, }; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 0c08bb1ce15aab114c344609879eaa86bfa59a9d..8b134cfe5e1f11023b559639497f7e1a35d2ee79 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -428,12 +428,11 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) #endif /* CONFIG_ISA_ARCV2 */ -void arc_cpu_pmu_irq_init(void) +static void arc_cpu_pmu_irq_init(void *data) { - struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + int irq = *(int *)data; - arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr, - "ARC perf counters", pmu_cpu); + enable_percpu_irq(irq, IRQ_TYPE_NONE); /* Clear all pending interrupt flags */ write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); @@ -515,7 +514,6 @@ static int arc_pmu_device_probe(struct platform_device *pdev) if (has_interrupts) { int irq = platform_get_irq(pdev, 0); - unsigned long flags; if (irq < 0) { pr_err("Cannot get IRQ number for the platform\n"); @@ -524,24 +522,12 @@ static int arc_pmu_device_probe(struct platform_device *pdev) arc_pmu->irq = irq; - /* - * arc_cpu_pmu_irq_init() needs to be called on all cores for - * their respective local PMU. - * However we use opencoded on_each_cpu() to ensure it is called - * on core0 first, so that arc_request_percpu_irq() sets up - * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable - * perf IRQ on non master cores. - * see arc_request_percpu_irq() - */ - preempt_disable(); - local_irq_save(flags); - arc_cpu_pmu_irq_init(); - local_irq_restore(flags); - smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1); - preempt_enable(); - - /* Clean all pending interrupt flags */ - write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); + /* intc map function ensures irq_set_percpu_devid() called */ + request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters", + this_cpu_ptr(&arc_pmu_cpu)); + + on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); + } else arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index c33e77c0ad3e9eb60367b6c2510c4e35c60d3b52..e1b87444ea9a0740b9651ad6b68fd39f701121a0 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -429,7 +429,6 @@ void __init setup_arch(char **cmdline_p) #endif arc_unwind_init(); - arc_unwind_setup(); } static int __init customize_machine(void) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 580587805fa302d0d28f7adc89434b920b3be651..ef6e9e15b82abf72d946ab45c3e20096854605c5 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -132,11 +132,11 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); /* Some SMP H/w setup - for each cpu */ - if (plat_smp_ops.init_irq_cpu) - plat_smp_ops.init_irq_cpu(cpu); + if (plat_smp_ops.init_per_cpu) + plat_smp_ops.init_per_cpu(cpu); - if (machine_desc->init_cpu_smp) - machine_desc->init_cpu_smp(cpu); + if (machine_desc->init_per_cpu) + machine_desc->init_per_cpu(cpu); arc_local_timer_setup(); diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 7352475451f6c3798885c97b04c11ddd53146990..0587bf121d116052129faff03e83d9b1ae87d423 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -170,6 +170,23 @@ static struct unwind_table *find_table(unsigned long pc) static unsigned long read_pointer(const u8 **pLoc, const void *end, signed ptrType); +static void init_unwind_hdr(struct unwind_table *table, + void *(*alloc) (unsigned long)); + +/* + * wrappers for header alloc (vs. calling one vs. other at call site) + * to elide section mismatches warnings + */ +static void *__init unw_hdr_alloc_early(unsigned long sz) +{ + return __alloc_bootmem_nopanic(sz, sizeof(unsigned int), + MAX_DMA_ADDRESS); +} + +static void *unw_hdr_alloc(unsigned long sz) +{ + return kmalloc(sz, GFP_KERNEL); +} static void init_unwind_table(struct unwind_table *table, const char *name, const void *core_start, unsigned long core_size, @@ -209,6 +226,8 @@ void __init arc_unwind_init(void) __start_unwind, __end_unwind - __start_unwind, NULL, 0); /*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/ + + init_unwind_hdr(&root_table, unw_hdr_alloc_early); } static const u32 bad_cie, not_fde; @@ -241,8 +260,8 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) e2->fde = v; } -static void __init setup_unwind_table(struct unwind_table *table, - void *(*alloc) (unsigned long)) +static void init_unwind_hdr(struct unwind_table *table, + void *(*alloc) (unsigned long)) { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; @@ -277,10 +296,10 @@ static void __init setup_unwind_table(struct unwind_table *table, if (cie == ¬_fde) continue; if (cie == NULL || cie == &bad_cie) - return; + goto ret_err; ptrType = fde_pointer_type(cie); if (ptrType < 0) - return; + goto ret_err; ptr = (const u8 *)(fde + 2); if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, @@ -296,13 +315,15 @@ static void __init setup_unwind_table(struct unwind_table *table, } if (tableSize || !n) - return; + goto ret_err; hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + 2 * n * sizeof(unsigned long); + header = alloc(hdrSize); if (!header) - return; + goto ret_err; + header->version = 1; header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native; header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4; @@ -340,18 +361,10 @@ static void __init setup_unwind_table(struct unwind_table *table, table->hdrsz = hdrSize; smp_wmb(); table->header = (const void *)header; -} - -static void *__init balloc(unsigned long sz) -{ - return __alloc_bootmem_nopanic(sz, - sizeof(unsigned int), - __pa(MAX_DMA_ADDRESS)); -} + return; -void __init arc_unwind_setup(void) -{ - setup_unwind_table(&root_table, balloc); +ret_err: + panic("Attention !!! Dwarf FDE parsing errors\n");; } #ifdef CONFIG_MODULES @@ -372,11 +385,13 @@ void *unwind_add_table(struct module *module, const void *table_start, return NULL; init_unwind_table(table, module->name, - module->module_core, module->core_size, - module->module_init, module->init_size, + module->core_layout.base, module->core_layout.size, + module->init_layout.base, module->init_layout.size, table_start, table_size, NULL, 0); + init_unwind_hdr(table, unw_hdr_alloc); + #ifdef UNWIND_DEBUG unw_debug("Table added for [%s] %lx %lx\n", module->name, table->core.pc, table->core.range); @@ -439,6 +454,7 @@ void unwind_remove_table(void *handle, int init_only) info.init_only = init_only; unlink_table(&info); /* XXX: SMP */ + kfree(table->header); kfree(table); } @@ -588,9 +604,6 @@ static signed fde_pointer_type(const u32 *cie) const u8 *ptr = (const u8 *)(cie + 2); unsigned version = *ptr; - if (version != 1) - return -1; /* unsupported */ - if (*++ptr) { const char *aug; const u8 *end = (const u8 *)(cie + 1) + *cie; @@ -1002,9 +1015,7 @@ int arc_unwind(struct unwind_frame_info *frame) ptr = (const u8 *)(cie + 2); end = (const u8 *)(cie + 1) + *cie; frame->call_frame = 1; - if ((state.version = *ptr) != 1) - cie = NULL; /* unsupported version */ - else if (*++ptr) { + if (*++ptr) { /* check if augmentation size is first (thus present) */ if (*ptr == 'z') { while (++ptr < end && *ptr) { diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index 065ee6bfa82a2463ee60c790f08c5b60382715f3..92dd92cad7f92db32f9ecc1b73992e1ea0a5f74d 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -111,7 +111,7 @@ void __kunmap_atomic(void *kv) } EXPORT_SYMBOL(__kunmap_atomic); -noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr) +static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) { pgd_t *pgd_k; pud_t *pud_k; @@ -127,7 +127,7 @@ noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr) return pte_k; } -void kmap_init(void) +void __init kmap_init(void) { /* Due to recursive include hell, we can't do this in processor.h */ BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index a9305b5a2cd4ba091f7ae18914f6ef5e64d5236c..7d2c4fbf4f22eb1402bc666c077c652c40f38361 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -51,7 +51,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) int in_use = 0; if (!low_mem_sz) { - BUG_ON(base != low_mem_start); + if (base != low_mem_start) + panic("CONFIG_LINUX_LINK_BASE != DT memory { }"); + low_mem_sz = size; in_use = 1; } else { diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 34e1569a11ee322a0a020bdfe0c9b801b6b58c12..84b1b21b08aee0c756538b311dd3b1f55fc64965 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2,6 +2,7 @@ config ARM bool default y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H @@ -20,6 +21,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -33,10 +35,11 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 - select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK + select HAVE_ARM_SMCCC if CPU_V7 select HAVE_BPF_JIT select HAVE_CC_STACKPROTECTOR select HAVE_CONTEXT_TRACKING @@ -45,7 +48,7 @@ config ARM select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if MMU - select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 + select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) @@ -611,6 +614,7 @@ config ARCH_PXA select AUTO_ZRELADDR select COMMON_CLK select CLKDEV_LOOKUP + select CLKSRC_PXA select CLKSRC_MMIO select CLKSRC_OF select GENERIC_CLOCKEVENTS @@ -650,6 +654,8 @@ config ARCH_SA1100 select ARCH_SPARSEMEM_ENABLE select CLKDEV_LOOKUP select CLKSRC_MMIO + select CLKSRC_PXA + select CLKSRC_OF if OF select CPU_FREQ select CPU_SA1100 select GENERIC_CLOCKEVENTS @@ -799,6 +805,7 @@ config ARCH_VIRT bool "Dummy Virtual Machine" if ARCH_MULTI_V7 select ARM_AMBA select ARM_GIC + select ARM_GIC_V2M if PCI_MSI select ARM_GIC_V3 select ARM_PSCI select HAVE_ARM_ARCH_TIMER @@ -1422,7 +1429,7 @@ config BIG_LITTLE config BL_SWITCHER bool "big.LITTLE switcher support" - depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC select ARM_CPU_SUSPEND select CPU_PM help @@ -1481,7 +1488,7 @@ config HOTPLUG_CPU config ARM_PSCI bool "Support for the ARM Power State Coordination Interface (PSCI)" - depends on CPU_V7 + depends on HAVE_ARM_SMCCC select ARM_PSCI_FW help Say Y here if you want Linux to communicate with system firmware @@ -1604,6 +1611,24 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 config ARM_ASM_UNIFIED bool +config ARM_PATCH_IDIV + bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" + depends on CPU_32v7 && !XIP_KERNEL + default y + help + The ARM compiler inserts calls to __aeabi_idiv() and + __aeabi_uidiv() when it needs to perform division on signed + and unsigned integers. Some v7 CPUs have support for the sdiv + and udiv instructions that can be used to implement those + functions. + + Enabling this option allows the kernel to modify itself to + replace the first two instructions of these library functions + with the sdiv or udiv plus "bx lr" instructions when the CPU + it is running on supports them. Typically this will be faster + and less power intensive than running the original library + code to do integer division. + config AEABI bool "Use the ARM EABI to compile the kernel" help @@ -1800,6 +1825,25 @@ config SWIOTLB config IOMMU_HELPER def_bool SWIOTLB +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config XEN_DOM0 def_bool y depends on XEN @@ -1813,6 +1857,7 @@ config XEN select ARCH_DMA_ADDR_T_64BIT select ARM_PSCI select SWIOTLB_XEN + select PARAVIRT help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. @@ -2040,6 +2085,25 @@ config AUTO_ZRELADDR 0xf8000000. This assumes the zImage being placed in the first 128MB from start of memory. +config EFI_STUB + bool + +config EFI + bool "UEFI runtime support" + depends on OF && !CPU_BIG_ENDIAN && MMU && AUTO_ZRELADDR && !XIP_KERNEL + select UCS2_STRING + select EFI_PARAMS_FROM_FDT + select EFI_STUB + select EFI_ARMSTUB + select EFI_RUNTIME_WRAPPERS + ---help--- + This option provides support for runtime services provided + by UEFI firmware (such as non-volatile variables, realtime + clock, and platform reset). A UEFI stub is also provided to + allow the kernel to be booted as an EFI application. This + is only useful for kernels that may run on systems that have + UEFI firmware. + endmenu menu "CPU Power Management" diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 259c0ca9c99a8f510410d3b1e9f2dd40707555ca..e356357d86bb94c3bc0be665c0b8cbd25be5fd1f 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -15,20 +15,6 @@ config ARM_PTDUMP kernel. If in doubt, say "N" -config STRICT_DEVMEM - bool "Filter access to /dev/mem" - depends on MMU - ---help--- - If this option is disabled, you allow userspace (root) access to all - of memory, including kernel and userspace memory. Accidental - access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. - - If this option is switched on, the /dev/mem file only allows - userspace access to memory mapped peripherals. - - If in doubt, say Y. - # RMK wants arm kernels compiled with frame pointers or stack unwinding. # If you know what you are doing and are willing to live without stack # traces, you can get a slightly smaller kernel by setting this option to diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 3f9a9ebc77c389bbf794ee5a6840ceb0294b63f3..4c23a68a0917049e4c7e9e864dcebca38d6b75b2 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -167,9 +167,11 @@ if [ $(words $(ZRELADDR)) -gt 1 -a "$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \ false; \ fi +efi-obj-$(CONFIG_EFI_STUB) := $(objtree)/drivers/firmware/efi/libstub/lib.a + $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \ - $(bswapsdi2) FORCE + $(bswapsdi2) $(efi-obj-y) FORCE @$(check_for_multiple_zreladdr) $(call if_changed,ld) @$(check_for_bad_syms) diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S new file mode 100644 index 0000000000000000000000000000000000000000..9d5dc4fda3c16710c0443b6a5043f212230f057d --- /dev/null +++ b/arch/arm/boot/compressed/efi-header.S @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2013-2015 Linaro Ltd + * Authors: Roy Franz + * Ard Biesheuvel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + .macro __nop +#ifdef CONFIG_EFI_STUB + @ This is almost but not quite a NOP, since it does clobber the + @ condition flags. But it is the best we can do for EFI, since + @ PE/COFF expects the magic string "MZ" at offset 0, while the + @ ARM/Linux boot protocol expects an executable instruction + @ there. + .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000 +#else + mov r0, r0 +#endif + .endm + + .macro __EFI_HEADER +#ifdef CONFIG_EFI_STUB + b __efi_start + + .set start_offset, __efi_start - start + .org start + 0x3c + @ + @ The PE header can be anywhere in the file, but for + @ simplicity we keep it together with the MSDOS header + @ The offset to the PE/COFF header needs to be at offset + @ 0x3C in the MSDOS header. + @ The only 2 fields of the MSDOS header that are used are this + @ PE/COFF offset, and the "MZ" bytes at offset 0x0. + @ + .long pe_header - start @ Offset to the PE header. + +pe_header: + .ascii "PE\0\0" + +coff_header: + .short 0x01c2 @ ARM or Thumb + .short 2 @ nr_sections + .long 0 @ TimeDateStamp + .long 0 @ PointerToSymbolTable + .long 1 @ NumberOfSymbols + .short section_table - optional_header + @ SizeOfOptionalHeader + .short 0x306 @ Characteristics. + @ IMAGE_FILE_32BIT_MACHINE | + @ IMAGE_FILE_DEBUG_STRIPPED | + @ IMAGE_FILE_EXECUTABLE_IMAGE | + @ IMAGE_FILE_LINE_NUMS_STRIPPED + +optional_header: + .short 0x10b @ PE32 format + .byte 0x02 @ MajorLinkerVersion + .byte 0x14 @ MinorLinkerVersion + .long _end - __efi_start @ SizeOfCode + .long 0 @ SizeOfInitializedData + .long 0 @ SizeOfUninitializedData + .long efi_stub_entry - start @ AddressOfEntryPoint + .long start_offset @ BaseOfCode + .long 0 @ data + +extra_header_fields: + .long 0 @ ImageBase + .long 0x200 @ SectionAlignment + .long 0x200 @ FileAlignment + .short 0 @ MajorOperatingSystemVersion + .short 0 @ MinorOperatingSystemVersion + .short 0 @ MajorImageVersion + .short 0 @ MinorImageVersion + .short 0 @ MajorSubsystemVersion + .short 0 @ MinorSubsystemVersion + .long 0 @ Win32VersionValue + + .long _end - start @ SizeOfImage + .long start_offset @ SizeOfHeaders + .long 0 @ CheckSum + .short 0xa @ Subsystem (EFI application) + .short 0 @ DllCharacteristics + .long 0 @ SizeOfStackReserve + .long 0 @ SizeOfStackCommit + .long 0 @ SizeOfHeapReserve + .long 0 @ SizeOfHeapCommit + .long 0 @ LoaderFlags + .long 0x6 @ NumberOfRvaAndSizes + + .quad 0 @ ExportTable + .quad 0 @ ImportTable + .quad 0 @ ResourceTable + .quad 0 @ ExceptionTable + .quad 0 @ CertificationTable + .quad 0 @ BaseRelocationTable + +section_table: + @ + @ The EFI application loader requires a relocation section + @ because EFI applications must be relocatable. This is a + @ dummy section as far as we are concerned. + @ + .ascii ".reloc\0\0" + .long 0 @ VirtualSize + .long 0 @ VirtualAddress + .long 0 @ SizeOfRawData + .long 0 @ PointerToRawData + .long 0 @ PointerToRelocations + .long 0 @ PointerToLineNumbers + .short 0 @ NumberOfRelocations + .short 0 @ NumberOfLineNumbers + .long 0x42100040 @ Characteristics + + .ascii ".text\0\0\0" + .long _end - __efi_start @ VirtualSize + .long __efi_start @ VirtualAddress + .long _edata - __efi_start @ SizeOfRawData + .long __efi_start @ PointerToRawData + .long 0 @ PointerToRelocations + .long 0 @ PointerToLineNumbers + .short 0 @ NumberOfRelocations + .short 0 @ NumberOfLineNumbers + .long 0xe0500020 @ Characteristics + + .align 9 +__efi_start: +#endif + .endm diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 06e983f59980ffc4c0955891d894bbbbaa5cb5e2..af11c2f8f3b7a63c350db162511c779617d70f61 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -12,6 +12,8 @@ #include #include +#include "efi-header.S" + AR_CLASS( .arch armv7-a ) M_CLASS( .arch armv7-m ) @@ -126,7 +128,7 @@ start: .type start,#function .rept 7 - mov r0, r0 + __nop .endr ARM( mov r0, r0 ) ARM( b 1f ) @@ -139,7 +141,8 @@ start: .word 0x04030201 @ endianness flag THUMB( .thumb ) -1: +1: __EFI_HEADER + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 AR_CLASS( mrs r9, cpsr ) #ifdef CONFIG_ARM_VIRT_EXT @@ -1353,6 +1356,53 @@ __enter_kernel: reloc_code_end: +#ifdef CONFIG_EFI_STUB + .align 2 +_start: .long start - . + +ENTRY(efi_stub_entry) + @ allocate space on stack for passing current zImage address + @ and for the EFI stub to return of new entry point of + @ zImage, as EFI stub may copy the kernel. Pointer address + @ is passed in r2. r0 and r1 are passed through from the + @ EFI firmware to efi_entry + adr ip, _start + ldr r3, [ip] + add r3, r3, ip + stmfd sp!, {r3, lr} + mov r2, sp @ pass zImage address in r2 + bl efi_entry + + @ Check for error return from EFI stub. r0 has FDT address + @ or error code. + cmn r0, #1 + beq efi_load_fail + + @ Preserve return value of efi_entry() in r4 + mov r4, r0 + bl cache_clean_flush + bl cache_off + + @ Set parameters for booting zImage according to boot protocol + @ put FDT address in r2, it was returned by efi_entry() + @ r1 is the machine type, and r0 needs to be 0 + mov r0, #0 + mov r1, #0xFFFFFFFF + mov r2, r4 + + @ Branch to (possibly) relocated zImage that is in [sp] + ldr lr, [sp] + ldr ip, =start_offset + add lr, lr, ip + mov pc, lr @ no mode switch + +efi_load_fail: + @ Return EFI_LOAD_ERROR to EFI firmware on error. + ldr r0, =0x80000001 + ldmfd sp!, {ip, pc} +ENDPROC(efi_stub_entry) +#endif + .align .section ".stack", "aw", %nobits .L_user_stack: .space 4096 diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 2b60b843ac5e9517f06e3e20190afa983fa7d21f..81c493156ce87e12a4719268076c861da18e7a53 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -48,6 +48,13 @@ SECTIONS *(.rodata) *(.rodata.*) } + .data : { + /* + * The EFI stub always executes from RAM, and runs strictly before the + * decompressor, so we can make an exception for its r/w data, and keep it + */ + *(.data.efistub) + } .piggydata : { *(.piggydata) } diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 294cfe40388dd582d77d45eac441b15318ac1cde..40beede46e55836898779fc503e5b5380a77551c 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -64,73 +64,73 @@ cpu0_opp_table: opp_table0 { compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <900000>; clock-latency-ns = <200000>; }; - opp01 { + opp@300000000 { opp-hz = /bits/ 64 <300000000>; opp-microvolt = <900000>; clock-latency-ns = <200000>; }; - opp02 { + opp@400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <925000>; clock-latency-ns = <200000>; }; - opp03 { + opp@500000000 { opp-hz = /bits/ 64 <500000000>; opp-microvolt = <950000>; clock-latency-ns = <200000>; }; - opp04 { + opp@600000000 { opp-hz = /bits/ 64 <600000000>; opp-microvolt = <975000>; clock-latency-ns = <200000>; }; - opp05 { + opp@700000000 { opp-hz = /bits/ 64 <700000000>; opp-microvolt = <987500>; clock-latency-ns = <200000>; }; - opp06 { + opp@800000000 { opp-hz = /bits/ 64 <800000000>; opp-microvolt = <1000000>; clock-latency-ns = <200000>; opp-suspend; }; - opp07 { + opp@900000000 { opp-hz = /bits/ 64 <900000000>; opp-microvolt = <1037500>; clock-latency-ns = <200000>; }; - opp08 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <1087500>; clock-latency-ns = <200000>; }; - opp09 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <1137500>; clock-latency-ns = <200000>; }; - opp10 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1187500>; clock-latency-ns = <200000>; }; - opp11 { + opp@1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1250000>; clock-latency-ns = <200000>; }; - opp12 { + opp@1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1287500>; clock-latency-ns = <200000>; }; - opp13 { + opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1350000>; clock-latency-ns = <200000>; diff --git a/arch/arm/boot/dts/imx6q-gw5400-a.dts b/arch/arm/boot/dts/imx6q-gw5400-a.dts index 58adf176425a69fc48d16731973f93827b080bab..a51834e1dd2789ed0bfeed69b2dbddc08aa946fc 100644 --- a/arch/arm/boot/dts/imx6q-gw5400-a.dts +++ b/arch/arm/boot/dts/imx6q-gw5400-a.dts @@ -154,7 +154,7 @@ flash: m25p80@0 { &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi index 7b31fdb79ced5cbac2eecff792fa8d56b1a9a010..dc0cebfe22d7b9a8f0500e1313c67ede2718234d 100644 --- a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi @@ -94,7 +94,7 @@ reg_usb_otg_vbus: regulator@2 { &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index 1b66328a84987a1040b0406e1b4dfe29a87707ed..18cd4114a23e85a82f724c3b8773c04399826129 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -154,7 +154,7 @@ &can1 { &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi index 7c51839ff93467d209e0f3809b3ed9e8bc541c54..eea90f37bbb8c236ebc97e2e1c90b439e4aed2ed 100644 --- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi @@ -155,7 +155,7 @@ &can1 { &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi index 929e0b37bd9e542e252be1159e6e588dd34b94e1..6c11a2ae35ef98b0f2adcb218d20543cead8f399 100644 --- a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi @@ -145,7 +145,7 @@ &can1 { &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 8263fc18a7d95ff88b470ae57fd5e2e89036f99f..d354d406954da437071db41de61e349a48644802 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -113,14 +113,14 @@ backlight { &clks { assigned-clocks = <&clks IMX6QDL_PLL4_BYPASS_SRC>, <&clks IMX6QDL_PLL4_BYPASS>, - <&clks IMX6QDL_CLK_PLL4_POST_DIV>, <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, + <&clks IMX6QDL_CLK_PLL4_POST_DIV>; assigned-clock-parents = <&clks IMX6QDL_CLK_LVDS2_IN>, <&clks IMX6QDL_PLL4_BYPASS_SRC>, <&clks IMX6QDL_CLK_PLL3_USB_OTG>, <&clks IMX6QDL_CLK_PLL3_USB_OTG>; - assigned-clock-rates = <0>, <0>, <24576000>; + assigned-clock-rates = <0>, <0>, <0>, <0>, <24576000>; }; &ecspi1 { diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts index 1a78f013f37ab8e2ee4de5361dc16bdeec2b2516..b75f7b2b7c4ac9e0aca7c4c44e3f1b8e2bd09c28 100644 --- a/arch/arm/boot/dts/omap4-duovero-parlor.dts +++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts @@ -189,3 +189,7 @@ hdmi_out: endpoint { }; }; +&uart3 { + interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH + &omap4_pmx_core OMAP4_UART3_RX>; +}; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index 314f59c121620868fae063a8c9fc13d323b5400b..d0c74385331803383d5296d7157516aa63c63758 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -25,9 +25,9 @@ L2: l2-cache { cache-sets = <512>; cache-line-size = <32>; /* At full speed latency must be >=2 */ - arm,tag-latency = <2>; - arm,data-latency = <2 2>; - arm,dirty-latency = <2>; + arm,tag-latency = <8>; + arm,data-latency = <8 8>; + arm,dirty-latency = <8>; }; mtu0: mtu@101e2000 { diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi index ad21a4293a339c28446056b189e6c3059266c15c..133375bc8aa503d57f85a2b0026ff432b496a2d3 100644 --- a/arch/arm/boot/dts/stihxxx-b2120.dtsi +++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi @@ -6,6 +6,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include +#include +#include / { soc { sbc_serial0: serial@9530000 { @@ -35,12 +38,18 @@ pwm1: pwm@9510000 { status = "okay"; }; - i2c@9842000 { + ssc2: i2c@9842000 { status = "okay"; + clock-frequency = <100000>; + st,i2c-min-scl-pulse-width-us = <0>; + st,i2c-min-sda-pulse-width-us = <5>; }; - i2c@9843000 { + ssc3: i2c@9843000 { status = "okay"; + clock-frequency = <100000>; + st,i2c-min-scl-pulse-width-us = <0>; + st,i2c-min-sda-pulse-width-us = <5>; }; i2c@9844000 { @@ -93,5 +102,38 @@ ethernet0: dwmac@9630000 { phy-mode = "rgmii"; fixed-link = <0 1 1000 0 0>; }; + + demux@08a20000 { + compatible = "st,stih407-c8sectpfe"; + status = "okay"; + reg = <0x08a20000 0x10000>, + <0x08a00000 0x4000>; + reg-names = "c8sectpfe", "c8sectpfe-ram"; + interrupts = , + ; + interrupt-names = "c8sectpfe-error-irq", + "c8sectpfe-idle-irq"; + pinctrl-0 = <&pinctrl_tsin0_serial>; + pinctrl-1 = <&pinctrl_tsin0_parallel>; + pinctrl-2 = <&pinctrl_tsin3_serial>; + pinctrl-3 = <&pinctrl_tsin4_serial_alt3>; + pinctrl-4 = <&pinctrl_tsin5_serial_alt1>; + pinctrl-names = "tsin0-serial", + "tsin0-parallel", + "tsin3-serial", + "tsin4-serial", + "tsin5-serial"; + clocks = <&clk_s_c0_flexgen CLK_PROC_STFE>; + clock-names = "c8sectpfe"; + + /* tsin0 is TSA on NIMA */ + tsin0: port@0 { + tsin-num = <0>; + serial-not-parallel; + i2c-bus = <&ssc2>; + reset-gpios = <&pio15 4 GPIO_ACTIVE_HIGH>; + dvb-card = ; + }; + }; }; }; diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts index 2d4250b1faf80040e3e6c3477fe64b7431141627..68b479b8772c6f3eadc16f2ad6c3d33e35d39c5f 100644 --- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts +++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts @@ -83,6 +83,7 @@ ctp@5d { reg = <0x5d>; interrupt-parent = <&pio>; interrupts = <0 3 IRQ_TYPE_LEVEL_HIGH>; /* PA3 */ + touchscreen-swapped-x-y; }; }; diff --git a/arch/arm/boot/dts/tegra124-nyan.dtsi b/arch/arm/boot/dts/tegra124-nyan.dtsi index 40c23a0b7cfc2adf8517e28ac51b0078423a0c61..ec1aa64ded681a1a4f834343b3fc45d0c4946819 100644 --- a/arch/arm/boot/dts/tegra124-nyan.dtsi +++ b/arch/arm/boot/dts/tegra124-nyan.dtsi @@ -399,7 +399,7 @@ sdhci@0,700b0600 { /* eMMC on this bus */ /* CPU DFLL clock */ clock@0,70110000 { - status = "okay"; + status = "disabled"; vdd-cpu-supply = <&vdd_cpu>; nvidia,i2c-fs-rate = <400000>; }; diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts index 01f40197ea13c4cc03691ef65a18c7ee20eb1990..3279bf1a17a123ac26448eaf4b9055884793eca4 100644 --- a/arch/arm/boot/dts/versatile-ab.dts +++ b/arch/arm/boot/dts/versatile-ab.dts @@ -110,7 +110,11 @@ sic: intc@10003000 { interrupt-parent = <&vic>; interrupts = <31>; /* Cascaded to vic */ clear-mask = <0xffffffff>; - valid-mask = <0xffc203f8>; + /* + * Valid interrupt lines mask according to + * table 4-36 page 4-50 of ARM DUI 0225D + */ + valid-mask = <0x0760031b>; }; dma@10130000 { @@ -266,8 +270,8 @@ aaci@4000 { }; mmc@5000 { compatible = "arm,pl180", "arm,primecell"; - reg = < 0x5000 0x1000>; - interrupts-extended = <&vic 22 &sic 2>; + reg = <0x5000 0x1000>; + interrupts-extended = <&vic 22 &sic 1>; clocks = <&xtal24mhz>, <&pclk>; clock-names = "mclk", "apb_pclk"; }; diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts index b83137f66034016d2580dfaeaf63b162cc0ab5c4..33a8eb28374eaa8d3b8aca95d8801227bedd87ca 100644 --- a/arch/arm/boot/dts/versatile-pb.dts +++ b/arch/arm/boot/dts/versatile-pb.dts @@ -5,6 +5,16 @@ / { compatible = "arm,versatile-pb"; amba { + /* The Versatile PB is using more SIC IRQ lines than the AB */ + sic: intc@10003000 { + clear-mask = <0xffffffff>; + /* + * Valid interrupt lines mask according to + * figure 3-30 page 3-74 of ARM DUI 0224B + */ + valid-mask = <0x7fe003ff>; + }; + gpio2: gpio@101e6000 { compatible = "arm,pl061", "arm,primecell"; reg = <0x101e6000 0x1000>; @@ -67,6 +77,13 @@ pci-controller@10001000 { }; fpga { + mmc@5000 { + /* + * Overrides the interrupt assignment from + * the Versatile AB board file. + */ + interrupts-extended = <&sic 22 &sic 23>; + }; uart@9000 { compatible = "arm,pl011", "arm,primecell"; reg = <0x9000 0x1000>; @@ -86,7 +103,8 @@ sci@a000 { mmc@b000 { compatible = "arm,pl180", "arm,primecell"; reg = <0xb000 0x1000>; - interrupts-extended = <&vic 23 &sic 2>; + interrupt-parent = <&sic>; + interrupts = <1>, <2>; clocks = <&xtal24mhz>, <&pclk>; clock-names = "mclk", "apb_pclk"; }; diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi index b1c59a766a13381a693d897eb3349db4ac9d3c16..e12213d16693b890d0440e2da264a25b591e789a 100644 --- a/arch/arm/boot/dts/wm8650.dtsi +++ b/arch/arm/boot/dts/wm8650.dtsi @@ -187,6 +187,15 @@ uhci@d8007b00 { interrupts = <43>; }; + sdhc@d800a000 { + compatible = "wm,wm8505-sdhc"; + reg = <0xd800a000 0x400>; + interrupts = <20>, <21>; + clocks = <&clksdhc>; + bus-width = <4>; + sdon-inverted; + }; + fb: fb@d8050800 { compatible = "wm,wm8505-fb"; reg = <0xd8050800 0x200>; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 69a22fdb52a5a49ecb26760ed4163989e2f735e4..cd7b198fc79e220a1a082ada1889f919751a123f 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -366,6 +366,7 @@ CONFIG_BATTERY_MAX17042=m CONFIG_CHARGER_MAX14577=m CONFIG_CHARGER_MAX77693=m CONFIG_CHARGER_TPS65090=y +CONFIG_AXP20X_POWER=m CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_GPIO_RESTART=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 4725fab562cb3de63af3ac24c409a18542283efc..ec5250547d145124e3a388a4f846faabb0475ed7 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -54,6 +54,8 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_DMADEVICES=y +CONFIG_STM32_DMA=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 3c36e16fcacf7d44f7e8ce76f32a2cbc20f2b1d4..b503a89441bf25ce5cda365a17cb7a172c28b4e2 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -84,6 +84,7 @@ CONFIG_SPI_SUN4I=y CONFIG_SPI_SUN6I=y CONFIG_GPIO_SYSFS=y CONFIG_POWER_SUPPLY=y +CONFIG_AXP20X_POWER=y CONFIG_THERMAL=y CONFIG_CPU_THERMAL=y CONFIG_WATCHDOG=y diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index bd425302c97a3ec9966d4c04bdb61c3c40252056..16da6380eb8505a9f9b87db77ac4b56c1febd8d9 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += bitsperlong.h generic-y += cputime.h generic-y += current.h +generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index e7335a92144ef1d20d296ebdf26798148b2fa711..4e6e88a6b2f4ba36bcc3a0167b684a24cc58b7e9 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -5,8 +5,6 @@ #include #include -#ifdef CONFIG_BUG - /* * Use a suitable undefined instruction to use for ARM/Thumb2 bug handling. * We need to be careful not to conflict with those used by other modules and @@ -47,7 +45,7 @@ do { \ unreachable(); \ } while (0) -#else /* not CONFIG_DEBUG_BUGVERBOSE */ +#else #define __BUG(__file, __line, __value) \ do { \ @@ -57,7 +55,6 @@ do { \ #endif /* CONFIG_DEBUG_BUGVERBOSE */ #define HAVE_ARCH_BUG -#endif /* CONFIG_BUG */ #include diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h index 0f842492490280b9d0b50fd1b25935a51b5a4c9d..3848259bebf85786d39d4212d74f2e1646b98a44 100644 --- a/arch/arm/include/asm/cpuidle.h +++ b/arch/arm/include/asm/cpuidle.h @@ -30,7 +30,7 @@ static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct device_node; struct cpuidle_ops { - int (*suspend)(int cpu, unsigned long arg); + int (*suspend)(unsigned long arg); int (*init)(struct device_node *, int cpu); }; diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h new file mode 100644 index 0000000000000000000000000000000000000000..e0eea72deb87eb1ba3967b464a8848256565ee6c --- /dev/null +++ b/arch/arm/include/asm/efi.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_ARM_EFI_H +#define __ASM_ARM_EFI_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_EFI +void efi_init(void); + +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + +#define efi_call_virt(f, ...) \ +({ \ + efi_##f##_t *__f; \ + efi_status_t __s; \ + \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ + __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ + __s; \ +}) + +#define __efi_call_virt(f, ...) \ +({ \ + efi_##f##_t *__f; \ + \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ + __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ +}) + +static inline void efi_set_pgd(struct mm_struct *mm) +{ + check_and_switch_context(mm, NULL); +} + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + +#else +#define efi_init() +#endif /* CONFIG_EFI */ + +/* arch specific definitions used by the stub code */ + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +/* + * A reasonable upper bound for the uncompressed kernel size is 32 MBytes, + * so we will reserve that amount of memory. We have no easy way to tell what + * the actuall size of code + data the uncompressed kernel will use. + * If this is insufficient, the decompressor will relocate itself out of the + * way before performing the decompression. + */ +#define MAX_UNCOMP_KERNEL_SIZE SZ_32M + +/* + * The kernel zImage should preferably be located between 32 MB and 128 MB + * from the base of DRAM. The min address leaves space for a maximal size + * uncompressed image, and the max address is due to how the zImage decompressor + * picks a destination address. + */ +#define ZIMAGE_OFFSET_LIMIT SZ_128M +#define MIN_ZIMAGE_OFFSET MAX_UNCOMP_KERNEL_SIZE +#define MAX_FDT_OFFSET ZIMAGE_OFFSET_LIMIT + +#endif /* _ASM_ARM_EFI_H */ diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 58cfe9f1a687e6e1c8794f5ff5f37c6b4fcd7cb9..5c17d2dec777f627eaf6b706f92053065d5fa06b 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -19,20 +19,47 @@ enum fixed_addresses { FIX_TEXT_POKE0, FIX_TEXT_POKE1, - __end_of_fixed_addresses + __end_of_fixmap_region, + + /* + * Share the kmap() region with early_ioremap(): this is guaranteed + * not to clash since early_ioremap() is only available before + * paging_init(), and kmap() only after. + */ +#define NR_FIX_BTMAPS 32 +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + __end_of_early_ioremap_region }; +static const enum fixed_addresses __end_of_fixed_addresses = + __end_of_fixmap_region > __end_of_early_ioremap_region ? + __end_of_fixmap_region : __end_of_early_ioremap_region; + #define FIXMAP_PAGE_COMMON (L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY) #define FIXMAP_PAGE_NORMAL (FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK) +#define FIXMAP_PAGE_RO (FIXMAP_PAGE_NORMAL | L_PTE_RDONLY) /* Used by set_fixmap_(io|nocache), both meant for mapping a device */ #define FIXMAP_PAGE_IO (FIXMAP_PAGE_COMMON | L_PTE_MT_DEV_SHARED | L_PTE_SHARED) #define FIXMAP_PAGE_NOCACHE FIXMAP_PAGE_IO +#define __early_set_fixmap __set_fixmap + +#ifdef CONFIG_MMU + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); void __init early_fixmap_init(void); #include +#else + +static inline void early_fixmap_init(void) { } + +#endif #endif diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index fe3ea776dc34267724f377465134e52b39434fed..3d7351c844aac0ae2392d441796ce9904dcaf717 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include #include -#define NR_IPI 8 +#define NR_IPI 7 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index f98c7f32c9c8aefb256c7c7a287c6c2d69602011..9b7c328fb20778f9826ffbd953c10cda4cb37b6d 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -42,6 +42,8 @@ enum { extern void iotable_init(struct map_desc *, int); extern void vm_reserve_area_early(unsigned long addr, unsigned long size, void *caller); +extern void create_mapping_late(struct mm_struct *mm, struct map_desc *md, + bool ng); #ifdef CONFIG_DEBUG_LL extern void debug_ll_addr(unsigned long *paddr, unsigned long *vaddr); diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 9b32f76bb0ddaa6d6e485d79cc9f2bdfeb24d776..432ce8176498e0a014b26689716454150278056c 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -26,7 +26,7 @@ void __check_vmalloc_seq(struct mm_struct *mm); #ifdef CONFIG_CPU_HAS_ASID void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); -#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) #ifdef CONFIG_ARM_ERRATA_798181 void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h new file mode 100644 index 0000000000000000000000000000000000000000..8435ff591386c4ab7c70357929ae98da6b36371c --- /dev/null +++ b/arch/arm/include/asm/paravirt.h @@ -0,0 +1,20 @@ +#ifndef _ASM_ARM_PARAVIRT_H +#define _ASM_ARM_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; +extern struct pv_time_ops pv_time_ops; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_time_ops.steal_clock(cpu); +} +#endif + +#endif diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index 68ee3ce17b820e9c3cc6b77d0645cb68f6276988..b4c6d99364f179abe56849fa3ecc7bd82bb1ffcc 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,7 +16,7 @@ extern struct smp_operations psci_smp_ops; -#ifdef CONFIG_ARM_PSCI +#if defined(CONFIG_SMP) && defined(CONFIG_ARM_PSCI) bool psci_smp_available(void); #else static inline bool psci_smp_available(void) { return false; } diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index e0adb9f1bf94a7f58d98835e8efd332604fd657f..3613d7e9fc40097afa2ab4cb44ae251a8438ccaa 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -25,4 +25,10 @@ extern int arm_add_memory(u64 start, u64 size); extern void early_print(const char *str, ...); extern void dump_machine_table(void); +#ifdef CONFIG_ATAGS_PROC +extern void save_atags(const struct tag *tags); +#else +static inline void save_atags(const struct tag *tags) { } +#endif + #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 8cc85a4ebec20a1223f966bb868e5def5f70928e..35c9db857ebe9c7d53715ec42518a6e9fbe1dc6e 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -510,10 +510,14 @@ __copy_to_user_std(void __user *to, const void *from, unsigned long n); static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { +#ifndef CONFIG_UACCESS_WITH_MEMCPY unsigned int __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; +#else + return arm_copy_to_user(to, from, n); +#endif } extern unsigned long __must_check diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 712b50e0a6dc6901a5507ac89eeeeabdfefe4f19..d769972db8cbe49b232c3ac81ff10c8a958a5eee 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -35,6 +35,7 @@ #include #include +#include long privcmd_call(unsigned call, unsigned long a1, unsigned long a2, unsigned long a3, @@ -49,6 +50,12 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_platform_op_raw(void *arg); +static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) +{ + op->interface_version = XENPF_INTERFACE_VERSION; + return HYPERVISOR_platform_op_raw(op); +} int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr); static inline int diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h index 50066006e6bd3a961a580720911ab9c224f2fa36..75d5968628929b552b88107d279f988b3eda448e 100644 --- a/arch/arm/include/asm/xen/interface.h +++ b/arch/arm/include/asm/xen/interface.h @@ -27,6 +27,8 @@ (hnd).p = val; \ } while (0) +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op + #ifndef __ASSEMBLY__ /* Explicitly size integers that represent pfns in the interface with * Xen so that we can have one ABI that works for 32 and 64 bit guests. @@ -76,6 +78,7 @@ struct pvclock_wall_clock { u32 version; u32 sec; u32 nsec; + u32 sec_hi; } __attribute__((__packed__)); #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index af9e59bf3831b9fd648d095c2070209b1e97677d..2c5f160be65e44cbabc2c5d01b023d56df0a1985 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -73,14 +73,15 @@ obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ perf_event_v7.o -CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_VDSO) += vdso.o +obj-$(CONFIG_EFI) += efi.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o endif +obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o @@ -88,8 +89,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif +obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index f89811fb9a55f3a490c3633ef99ef52745c58129..7e45f69a0ddc9d0ef3b04c145195cf1b4ad7d84b 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -175,3 +176,8 @@ EXPORT_SYMBOL(__gnu_mcount_nc); EXPORT_SYMBOL(__pv_phys_pfn_offset); EXPORT_SYMBOL(__pv_offset); #endif + +#ifdef CONFIG_HAVE_ARM_SMCCC +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); +#endif diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index ec4164da6e3018737c42022bef16b3ed6bcea350..edfa2268c12779dc86ae2513e6ff36a37ad7e0ff 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -1,9 +1,3 @@ -#ifdef CONFIG_ATAGS_PROC -extern void save_atags(struct tag *tags); -#else -static inline void save_atags(struct tag *tags) { } -#endif - void convert_to_tag_list(struct tag *tags); #ifdef CONFIG_ATAGS diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index 318da33465f413f8c207eb2dd9b3766e53ab5ad3..703926e7007b4e0000b689006722780a02588e04 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -56,7 +56,7 @@ int arm_cpuidle_suspend(int index) int cpu = smp_processor_id(); if (cpuidle_ops[cpu].suspend) - ret = cpuidle_ops[cpu].suspend(cpu, index); + ret = cpuidle_ops[cpu].suspend(index); return ret; } diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c new file mode 100644 index 0000000000000000000000000000000000000000..ff8a9d8acfaca727517c76bbefe4bbdd4755f73a --- /dev/null +++ b/arch/arm/kernel/efi.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2015 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +{ + struct map_desc desc = { + .virtual = md->virt_addr, + .pfn = __phys_to_pfn(md->phys_addr), + .length = md->num_pages * EFI_PAGE_SIZE, + }; + + /* + * Order is important here: memory regions may have all of the + * bits below set (and usually do), so we check them in order of + * preference. + */ + if (md->attribute & EFI_MEMORY_WB) + desc.type = MT_MEMORY_RWX; + else if (md->attribute & EFI_MEMORY_WT) + desc.type = MT_MEMORY_RWX_NONCACHED; + else if (md->attribute & EFI_MEMORY_WC) + desc.type = MT_DEVICE_WC; + else + desc.type = MT_DEVICE; + + create_mapping_late(mm, &desc, true); + return 0; +} diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index b6c8bb9315e7bb6c15b9eb015ad9627fe545fdcb..907534f97053abbc32b667ff43df88dc93c08fb7 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -88,7 +88,7 @@ __pendsv_entry: @ execute the pending work, including reschedule get_thread_info tsk mov why, #0 - b ret_to_user + b ret_to_user_from_irq ENDPROC(__pendsv_entry) /* diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 097e2e201b9f0e33e0fbf2a6ffe93115876bd76c..0c7efc3446c0022d9e7b0ca45051321aa8e950e2 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -32,7 +32,7 @@ struct plt_entries { static bool in_init(const struct module *mod, u32 addr) { - return addr - (u32)mod->module_init < mod->init_size; + return addr - (u32)mod->init_layout.base < mod->init_layout.size; } u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm/kernel/paravirt.c similarity index 52% rename from arch/arm64/kernel/psci-call.S rename to arch/arm/kernel/paravirt.c index cf83e61cd3b59436eb75d30b76f590d4e4b7f7b8..53f371ed4568c8a08cf07fc27fafccc4dc44770e 100644 --- a/arch/arm64/kernel/psci-call.S +++ b/arch/arm/kernel/paravirt.c @@ -8,21 +8,18 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * Copyright (C) 2015 ARM Limited + * Copyright (C) 2013 Citrix Systems * - * Author: Will Deacon + * Author: Stefano Stabellini */ -#include +#include +#include +#include +#include -/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - hvc #0 - ret -ENDPROC(__invoke_psci_fn_hvc) +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; -/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_smc) - smc #0 - ret -ENDPROC(__invoke_psci_fn_smc) +struct pv_time_ops pv_time_ops; +EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 126dc679b2308a2b846252bbde1e513389d71cdd..4152158f6e6a527eca033e0235ee5a54f0ab2d69 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -35,133 +35,117 @@ * but the encodings are considered to be `reserved' in the case that * they are not available. */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_L1_ICACHE_REFILL = 0x01, - ARMV7_PERFCTR_ITLB_REFILL = 0x02, - ARMV7_PERFCTR_L1_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_L1_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_MEM_READ = 0x06, - ARMV7_PERFCTR_MEM_WRITE = 0x07, - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, +#define ARMV7_PERFCTR_PMNC_SW_INCR 0x00 +#define ARMV7_PERFCTR_L1_ICACHE_REFILL 0x01 +#define ARMV7_PERFCTR_ITLB_REFILL 0x02 +#define ARMV7_PERFCTR_L1_DCACHE_REFILL 0x03 +#define ARMV7_PERFCTR_L1_DCACHE_ACCESS 0x04 +#define ARMV7_PERFCTR_DTLB_REFILL 0x05 +#define ARMV7_PERFCTR_MEM_READ 0x06 +#define ARMV7_PERFCTR_MEM_WRITE 0x07 +#define ARMV7_PERFCTR_INSTR_EXECUTED 0x08 +#define ARMV7_PERFCTR_EXC_TAKEN 0x09 +#define ARMV7_PERFCTR_EXC_EXECUTED 0x0A +#define ARMV7_PERFCTR_CID_WRITE 0x0B - /* - * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all (taken) branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - ARMV7_PERFCTR_PC_BRANCH_PRED = 0x12, - - /* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ - ARMV7_PERFCTR_MEM_ACCESS = 0x13, - ARMV7_PERFCTR_L1_ICACHE_ACCESS = 0x14, - ARMV7_PERFCTR_L1_DCACHE_WB = 0x15, - ARMV7_PERFCTR_L2_CACHE_ACCESS = 0x16, - ARMV7_PERFCTR_L2_CACHE_REFILL = 0x17, - ARMV7_PERFCTR_L2_CACHE_WB = 0x18, - ARMV7_PERFCTR_BUS_ACCESS = 0x19, - ARMV7_PERFCTR_MEM_ERROR = 0x1A, - ARMV7_PERFCTR_INSTR_SPEC = 0x1B, - ARMV7_PERFCTR_TTBR_WRITE = 0x1C, - ARMV7_PERFCTR_BUS_CYCLES = 0x1D, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; +/* + * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all (taken) branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ +#define ARMV7_PERFCTR_PC_WRITE 0x0C +#define ARMV7_PERFCTR_PC_IMM_BRANCH 0x0D +#define ARMV7_PERFCTR_PC_PROC_RETURN 0x0E +#define ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F +#define ARMV7_PERFCTR_PC_BRANCH_MIS_PRED 0x10 +#define ARMV7_PERFCTR_CLOCK_CYCLES 0x11 +#define ARMV7_PERFCTR_PC_BRANCH_PRED 0x12 + +/* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ +#define ARMV7_PERFCTR_MEM_ACCESS 0x13 +#define ARMV7_PERFCTR_L1_ICACHE_ACCESS 0x14 +#define ARMV7_PERFCTR_L1_DCACHE_WB 0x15 +#define ARMV7_PERFCTR_L2_CACHE_ACCESS 0x16 +#define ARMV7_PERFCTR_L2_CACHE_REFILL 0x17 +#define ARMV7_PERFCTR_L2_CACHE_WB 0x18 +#define ARMV7_PERFCTR_BUS_ACCESS 0x19 +#define ARMV7_PERFCTR_MEM_ERROR 0x1A +#define ARMV7_PERFCTR_INSTR_SPEC 0x1B +#define ARMV7_PERFCTR_TTBR_WRITE 0x1C +#define ARMV7_PERFCTR_BUS_CYCLES 0x1D + +#define ARMV7_PERFCTR_CPU_CYCLES 0xFF /* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_A8_PERFCTR_L2_CACHE_ACCESS = 0x43, - ARMV7_A8_PERFCTR_L2_CACHE_REFILL = 0x44, - ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS = 0x50, - ARMV7_A8_PERFCTR_STALL_ISIDE = 0x56, -}; +#define ARMV7_A8_PERFCTR_L2_CACHE_ACCESS 0x43 +#define ARMV7_A8_PERFCTR_L2_CACHE_REFILL 0x44 +#define ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS 0x50 +#define ARMV7_A8_PERFCTR_STALL_ISIDE 0x56 /* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_A9_PERFCTR_INSTR_CORE_RENAME = 0x68, - ARMV7_A9_PERFCTR_STALL_ICACHE = 0x60, - ARMV7_A9_PERFCTR_STALL_DISPATCH = 0x66, -}; +#define ARMV7_A9_PERFCTR_INSTR_CORE_RENAME 0x68 +#define ARMV7_A9_PERFCTR_STALL_ICACHE 0x60 +#define ARMV7_A9_PERFCTR_STALL_DISPATCH 0x66 /* ARMv7 Cortex-A5 specific event types */ -enum armv7_a5_perf_types { - ARMV7_A5_PERFCTR_PREFETCH_LINEFILL = 0xc2, - ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP = 0xc3, -}; +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL 0xc2 +#define ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP 0xc3 /* ARMv7 Cortex-A15 specific event types */ -enum armv7_a15_perf_types { - ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, - ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, - ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ = 0x42, - ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE = 0x43, +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ 0x42 +#define ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE 0x43 - ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ = 0x4C, - ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE = 0x4D, +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ 0x4C +#define ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE 0x4D - ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, - ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, - ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ = 0x52, - ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE = 0x53, +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ 0x52 +#define ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE 0x53 - ARMV7_A15_PERFCTR_PC_WRITE_SPEC = 0x76, -}; +#define ARMV7_A15_PERFCTR_PC_WRITE_SPEC 0x76 /* ARMv7 Cortex-A12 specific event types */ -enum armv7_a12_perf_types { - ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, - ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ 0x40 +#define ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE 0x41 - ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, - ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ 0x50 +#define ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE 0x51 - ARMV7_A12_PERFCTR_PC_WRITE_SPEC = 0x76, +#define ARMV7_A12_PERFCTR_PC_WRITE_SPEC 0x76 - ARMV7_A12_PERFCTR_PF_TLB_REFILL = 0xe7, -}; +#define ARMV7_A12_PERFCTR_PF_TLB_REFILL 0xe7 /* ARMv7 Krait specific event types */ -enum krait_perf_types { - KRAIT_PMRESR0_GROUP0 = 0xcc, - KRAIT_PMRESR1_GROUP0 = 0xd0, - KRAIT_PMRESR2_GROUP0 = 0xd4, - KRAIT_VPMRESR0_GROUP0 = 0xd8, +#define KRAIT_PMRESR0_GROUP0 0xcc +#define KRAIT_PMRESR1_GROUP0 0xd0 +#define KRAIT_PMRESR2_GROUP0 0xd4 +#define KRAIT_VPMRESR0_GROUP0 0xd8 - KRAIT_PERFCTR_L1_ICACHE_ACCESS = 0x10011, - KRAIT_PERFCTR_L1_ICACHE_MISS = 0x10010, +#define KRAIT_PERFCTR_L1_ICACHE_ACCESS 0x10011 +#define KRAIT_PERFCTR_L1_ICACHE_MISS 0x10010 - KRAIT_PERFCTR_L1_ITLB_ACCESS = 0x12222, - KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, -}; +#define KRAIT_PERFCTR_L1_ITLB_ACCESS 0x12222 +#define KRAIT_PERFCTR_L1_DTLB_ACCESS 0x12210 /* ARMv7 Scorpion specific event types */ -enum scorpion_perf_types { - SCORPION_LPM0_GROUP0 = 0x4c, - SCORPION_LPM1_GROUP0 = 0x50, - SCORPION_LPM2_GROUP0 = 0x54, - SCORPION_L2LPM_GROUP0 = 0x58, - SCORPION_VLPM_GROUP0 = 0x5c, +#define SCORPION_LPM0_GROUP0 0x4c +#define SCORPION_LPM1_GROUP0 0x50 +#define SCORPION_LPM2_GROUP0 0x54 +#define SCORPION_L2LPM_GROUP0 0x58 +#define SCORPION_VLPM_GROUP0 0x5c - SCORPION_ICACHE_ACCESS = 0x10053, - SCORPION_ICACHE_MISS = 0x10052, +#define SCORPION_ICACHE_ACCESS 0x10053 +#define SCORPION_ICACHE_MISS 0x10052 - SCORPION_DTLB_ACCESS = 0x12013, - SCORPION_DTLB_MISS = 0x12012, +#define SCORPION_DTLB_ACCESS 0x12013 +#define SCORPION_DTLB_MISS 0x12012 - SCORPION_ITLB_MISS = 0x12021, -}; +#define SCORPION_ITLB_MISS 0x12021 /* * Cortex-A8 HW events mapping @@ -547,6 +531,134 @@ static const unsigned scorpion_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, }; +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *armv7_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group armv7_pmu_format_attr_group = { + .name = "format", + .attrs = armv7_pmu_format_attrs, +}; + +#define ARMV7_EVENT_ATTR_RESOLVE(m) #m +#define ARMV7_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_STRING(name, armv7_event_attr_##name, \ + "event=" ARMV7_EVENT_ATTR_RESOLVE(config)) + +ARMV7_EVENT_ATTR(sw_incr, ARMV7_PERFCTR_PMNC_SW_INCR); +ARMV7_EVENT_ATTR(l1i_cache_refill, ARMV7_PERFCTR_L1_ICACHE_REFILL); +ARMV7_EVENT_ATTR(l1i_tlb_refill, ARMV7_PERFCTR_ITLB_REFILL); +ARMV7_EVENT_ATTR(l1d_cache_refill, ARMV7_PERFCTR_L1_DCACHE_REFILL); +ARMV7_EVENT_ATTR(l1d_cache, ARMV7_PERFCTR_L1_DCACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_tlb_refill, ARMV7_PERFCTR_DTLB_REFILL); +ARMV7_EVENT_ATTR(ld_retired, ARMV7_PERFCTR_MEM_READ); +ARMV7_EVENT_ATTR(st_retired, ARMV7_PERFCTR_MEM_WRITE); +ARMV7_EVENT_ATTR(inst_retired, ARMV7_PERFCTR_INSTR_EXECUTED); +ARMV7_EVENT_ATTR(exc_taken, ARMV7_PERFCTR_EXC_TAKEN); +ARMV7_EVENT_ATTR(exc_return, ARMV7_PERFCTR_EXC_EXECUTED); +ARMV7_EVENT_ATTR(cid_write_retired, ARMV7_PERFCTR_CID_WRITE); +ARMV7_EVENT_ATTR(pc_write_retired, ARMV7_PERFCTR_PC_WRITE); +ARMV7_EVENT_ATTR(br_immed_retired, ARMV7_PERFCTR_PC_IMM_BRANCH); +ARMV7_EVENT_ATTR(br_return_retired, ARMV7_PERFCTR_PC_PROC_RETURN); +ARMV7_EVENT_ATTR(unaligned_ldst_retired, ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS); +ARMV7_EVENT_ATTR(br_mis_pred, ARMV7_PERFCTR_PC_BRANCH_MIS_PRED); +ARMV7_EVENT_ATTR(cpu_cycles, ARMV7_PERFCTR_CLOCK_CYCLES); +ARMV7_EVENT_ATTR(br_pred, ARMV7_PERFCTR_PC_BRANCH_PRED); + +static struct attribute *armv7_pmuv1_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv1_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv1_event_attrs, +}; + +static const struct attribute_group *armv7_pmuv1_attr_groups[] = { + &armv7_pmuv1_events_attr_group, + &armv7_pmu_format_attr_group, + NULL, +}; + +ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS); +ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS); +ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB); +ARMV7_EVENT_ATTR(l2d_cache, ARMV7_PERFCTR_L2_CACHE_ACCESS); +ARMV7_EVENT_ATTR(l2d_cache_refill, ARMV7_PERFCTR_L2_CACHE_REFILL); +ARMV7_EVENT_ATTR(l2d_cache_wb, ARMV7_PERFCTR_L2_CACHE_WB); +ARMV7_EVENT_ATTR(bus_access, ARMV7_PERFCTR_BUS_ACCESS); +ARMV7_EVENT_ATTR(memory_error, ARMV7_PERFCTR_MEM_ERROR); +ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC); +ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE); +ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES); + +static struct attribute *armv7_pmuv2_event_attrs[] = { + &armv7_event_attr_sw_incr.attr.attr, + &armv7_event_attr_l1i_cache_refill.attr.attr, + &armv7_event_attr_l1i_tlb_refill.attr.attr, + &armv7_event_attr_l1d_cache_refill.attr.attr, + &armv7_event_attr_l1d_cache.attr.attr, + &armv7_event_attr_l1d_tlb_refill.attr.attr, + &armv7_event_attr_ld_retired.attr.attr, + &armv7_event_attr_st_retired.attr.attr, + &armv7_event_attr_inst_retired.attr.attr, + &armv7_event_attr_exc_taken.attr.attr, + &armv7_event_attr_exc_return.attr.attr, + &armv7_event_attr_cid_write_retired.attr.attr, + &armv7_event_attr_pc_write_retired.attr.attr, + &armv7_event_attr_br_immed_retired.attr.attr, + &armv7_event_attr_br_return_retired.attr.attr, + &armv7_event_attr_unaligned_ldst_retired.attr.attr, + &armv7_event_attr_br_mis_pred.attr.attr, + &armv7_event_attr_cpu_cycles.attr.attr, + &armv7_event_attr_br_pred.attr.attr, + &armv7_event_attr_mem_access.attr.attr, + &armv7_event_attr_l1i_cache.attr.attr, + &armv7_event_attr_l1d_cache_wb.attr.attr, + &armv7_event_attr_l2d_cache.attr.attr, + &armv7_event_attr_l2d_cache_refill.attr.attr, + &armv7_event_attr_l2d_cache_wb.attr.attr, + &armv7_event_attr_bus_access.attr.attr, + &armv7_event_attr_memory_error.attr.attr, + &armv7_event_attr_inst_spec.attr.attr, + &armv7_event_attr_ttbr_write_retired.attr.attr, + &armv7_event_attr_bus_cycles.attr.attr, + NULL, +}; + +static struct attribute_group armv7_pmuv2_events_attr_group = { + .name = "events", + .attrs = armv7_pmuv2_event_attrs, +}; + +static const struct attribute_group *armv7_pmuv2_attr_groups[] = { + &armv7_pmuv2_events_attr_group, + &armv7_pmu_format_attr_group, + NULL, +}; + /* * Perf Events' indices */ @@ -1085,6 +1197,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a8"; cpu_pmu->map_event = armv7_a8_map_event; + cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1093,6 +1206,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a9"; cpu_pmu->map_event = armv7_a9_map_event; + cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1101,6 +1215,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) armv7pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a5"; cpu_pmu->map_event = armv7_a5_map_event; + cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1110,6 +1225,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1119,6 +1235,7 @@ static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1128,6 +1245,7 @@ static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->name = "armv7_cortex_a12"; cpu_pmu->map_event = armv7_a12_map_event; cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return armv7_probe_num_events(cpu_pmu); } @@ -1135,6 +1253,7 @@ static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) { int ret = armv7_a12_pmu_init(cpu_pmu); cpu_pmu->name = "armv7_cortex_a17"; + cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups; return ret; } diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c index 8153e36b24917e96c8fa69d18bd78e8b0c130c1a..7c9248b74d3f41548e5ce41d614dc8850d08aa94 100644 --- a/arch/arm/kernel/pj4-cp0.c +++ b/arch/arm/kernel/pj4-cp0.c @@ -66,9 +66,13 @@ static void __init pj4_cp_access_write(u32 value) __asm__ __volatile__ ( "mcr p15, 0, %1, c1, c0, 2\n\t" +#ifdef CONFIG_THUMB2_KERNEL + "isb\n\t" +#else "mrc p15, 0, %0, c1, c0, 2\n\t" "mov %0, %0\n\t" "sub pc, pc, #4\n\t" +#endif : "=r" (temp) : "r" (value)); } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 7a7c4cea55231b1c793982ab63d6c5598004b14b..4adfb46e3ee93276ea506b723eb11e33c57459f4 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -95,6 +95,22 @@ void __show_regs(struct pt_regs *regs) { unsigned long flags; char buf[64]; +#ifndef CONFIG_CPU_V7M + unsigned int domain; +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Get the domain register for the parent context. In user + * mode, we don't save the DACR, so lets use what it should + * be. For other modes, we place it after the pt_regs struct. + */ + if (user_mode(regs)) + domain = DACR_UACCESS_ENABLE; + else + domain = *(unsigned int *)(regs + 1); +#else + domain = get_domain(); +#endif +#endif show_regs_print_info(KERN_DEFAULT); @@ -123,21 +139,8 @@ void __show_regs(struct pt_regs *regs) #ifndef CONFIG_CPU_V7M { - unsigned int domain = get_domain(); const char *segment; -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Get the domain register for the parent context. In user - * mode, we don't save the DACR, so lets use what it should - * be. For other modes, we place it after the pt_regs struct. - */ - if (user_mode(regs)) - domain = DACR_UACCESS_ENABLE; - else - domain = *(unsigned int *)(regs + 1); -#endif - if ((domain & domain_mask(DOMAIN_USER)) == domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) segment = "none"; @@ -163,11 +166,11 @@ void __show_regs(struct pt_regs *regs) buf[0] = '\0'; #ifdef CONFIG_CPU_CP15_MMU { - unsigned int transbase, dac = get_domain(); + unsigned int transbase; asm("mrc p15, 0, %0, c2, c0\n\t" : "=r" (transbase)); snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x", - transbase, dac); + transbase, domain); } #endif asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl)); diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S deleted file mode 100644 index a78e9e1e206dee03c7f16e67a30eb9a74159e9ab..0000000000000000000000000000000000000000 --- a/arch/arm/kernel/psci-call.S +++ /dev/null @@ -1,31 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2015 ARM Limited - * - * Author: Mark Rutland - */ - -#include - -#include -#include - -/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - __HVC(0) - bx lr -ENDPROC(__invoke_psci_fn_hvc) - -/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ -ENTRY(__invoke_psci_fn_smc) - __SMC(0) - bx lr -ENDPROC(__invoke_psci_fn_smc) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 20edd349d379f22c583438db7fbf52f46e1133ef..7d0cba6f1cc5efadff31fd0cde2aca3279e120da 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -37,7 +38,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -375,6 +378,72 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +#ifdef CONFIG_ARM_PATCH_IDIV + +static inline u32 __attribute_const__ sdiv_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "sdiv r0, r0, r1" */ + u32 insn = __opcode_thumb32_compose(0xfb90, 0xf0f1); + return __opcode_to_mem_thumb32(insn); + } + + /* "sdiv r0, r0, r1" */ + return __opcode_to_mem_arm(0xe710f110); +} + +static inline u32 __attribute_const__ udiv_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "udiv r0, r0, r1" */ + u32 insn = __opcode_thumb32_compose(0xfbb0, 0xf0f1); + return __opcode_to_mem_thumb32(insn); + } + + /* "udiv r0, r0, r1" */ + return __opcode_to_mem_arm(0xe730f110); +} + +static inline u32 __attribute_const__ bx_lr_instruction(void) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* "bx lr; nop" */ + u32 insn = __opcode_thumb32_compose(0x4770, 0x46c0); + return __opcode_to_mem_thumb32(insn); + } + + /* "bx lr" */ + return __opcode_to_mem_arm(0xe12fff1e); +} + +static void __init patch_aeabi_idiv(void) +{ + extern void __aeabi_uidiv(void); + extern void __aeabi_idiv(void); + uintptr_t fn_addr; + unsigned int mask; + + mask = IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA; + if (!(elf_hwcap & mask)) + return; + + pr_info("CPU: div instructions available: patching division code\n"); + + fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1; + ((u32 *)fn_addr)[0] = udiv_instruction(); + ((u32 *)fn_addr)[1] = bx_lr_instruction(); + flush_icache_range(fn_addr, fn_addr + 8); + + fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1; + ((u32 *)fn_addr)[0] = sdiv_instruction(); + ((u32 *)fn_addr)[1] = bx_lr_instruction(); + flush_icache_range(fn_addr, fn_addr + 8); +} + +#else +static inline void patch_aeabi_idiv(void) { } +#endif + static void __init cpuid_init_hwcaps(void) { int block; @@ -642,6 +711,7 @@ static void __init setup_processor(void) elf_hwcap = list->elf_hwcap; cpuid_init_hwcaps(); + patch_aeabi_idiv(); #ifndef CONFIG_ARM_THUMB elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); @@ -956,8 +1026,8 @@ void __init setup_arch(char **cmdline_p) strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; - if (IS_ENABLED(CONFIG_FIX_EARLYCON_MEM)) - early_fixmap_init(); + early_fixmap_init(); + early_ioremap_init(); parse_early_param(); @@ -965,9 +1035,12 @@ void __init setup_arch(char **cmdline_p) early_paging_init(mdesc); #endif setup_dma_zone(mdesc); + efi_init(); sanity_check_meminfo(); arm_memblock_init(mdesc); + early_ioremap_reset(); + paging_init(mdesc); request_standard_resources(mdesc); diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S new file mode 100644 index 0000000000000000000000000000000000000000..2e48b674aab190563d321902be9919167d8f80cd --- /dev/null +++ b/arch/arm/kernel/smccc-call.S @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include + +#include +#include +#include + + /* + * Wrap c macros in asm macros to delay expansion until after the + * SMCCC asm macro is expanded. + */ + .macro SMCCC_SMC + __SMC(0) + .endm + + .macro SMCCC_HVC + __HVC(0) + .endm + + .macro SMCCC instr +UNWIND( .fnstart) + mov r12, sp + push {r4-r7} +UNWIND( .save {r4-r7}) + ldm r12, {r4-r7} + \instr + pop {r4-r7} + ldr r12, [sp, #(4 * 4)] + stm r12, {r0-r3} + bx lr +UNWIND( .fnend) + .endm + +/* + * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_smc) + SMCCC SMCCC_SMC +ENDPROC(arm_smccc_smc) + +/* + * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_hvc) + SMCCC SMCCC_HVC +ENDPROC(arm_smccc_hvc) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index b26361355daeb39b61c238333c20dd2d68ec4a61..37312f6749f3dd641f48803b67c8f26a4623a8a2 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -69,11 +69,15 @@ enum ipi_msg_type { IPI_TIMER, IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, - IPI_CPU_BACKTRACE = 15, + IPI_CPU_BACKTRACE, + /* + * SGI8-15 can be reserved by secure firmware, and thus may + * not be usable by the kernel. Please keep the above limited + * to at most 8 entries. + */ }; static DECLARE_COMPLETION(cpu_running); @@ -475,7 +479,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), S(IPI_COMPLETION, "completion interrupts"), @@ -525,7 +528,7 @@ void arch_send_wakeup_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } #ifdef CONFIG_IRQ_WORK @@ -620,12 +623,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; - case IPI_CALL_FUNC_SINGLE: - irq_enter(); - generic_smp_call_function_single_interrupt(); - irq_exit(); - break; - case IPI_CPU_STOP: irq_enter(); ipi_cpu_stop(cpu); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 5b26e7efa9ea415967b63ede27ab1edf2bc8e888..c3fe769d75584c7248015b855bca904589f3e29f 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -36,10 +36,10 @@ */ #define __user_swpX_asm(data, addr, res, temp, B) \ __asm__ __volatile__( \ - " mov %2, %1\n" \ - "0: ldrex"B" %1, [%3]\n" \ - "1: strex"B" %0, %2, [%3]\n" \ + "0: ldrex"B" %2, [%3]\n" \ + "1: strex"B" %0, %1, [%3]\n" \ " cmp %0, #0\n" \ + " moveq %1, %2\n" \ " movne %0, %4\n" \ "2:\n" \ " .section .text.fixup,\"ax\"\n" \ diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index b83f3b7737fb9dce2a18c66a086aac2430e98cf6..087acb569b63a4bd90982e0c9b15fc2313636c53 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -193,15 +193,44 @@ struct oabi_flock64 { pid_t l_pid; } __attribute__ ((packed,aligned(4))); -asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, +static long do_locks(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct oabi_flock64 user; struct flock64 kernel; - mm_segment_t fs = USER_DS; /* initialized to kill a warning */ - unsigned long local_arg = arg; - int ret; + struct oabi_flock64 user; + mm_segment_t fs; + long ret; + + if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, + sizeof(user))) + return -EFAULT; + kernel.l_type = user.l_type; + kernel.l_whence = user.l_whence; + kernel.l_start = user.l_start; + kernel.l_len = user.l_len; + kernel.l_pid = user.l_pid; + + fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel); + set_fs(fs); + + if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) { + user.l_type = kernel.l_type; + user.l_whence = kernel.l_whence; + user.l_start = kernel.l_start; + user.l_len = kernel.l_len; + user.l_pid = kernel.l_pid; + if (copy_to_user((struct oabi_flock64 __user *)arg, + &user, sizeof(user))) + ret = -EFAULT; + } + return ret; +} +asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, + unsigned long arg) +{ switch (cmd) { case F_OFD_GETLK: case F_OFD_SETLK: @@ -209,39 +238,11 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, - sizeof(user))) - return -EFAULT; - kernel.l_type = user.l_type; - kernel.l_whence = user.l_whence; - kernel.l_start = user.l_start; - kernel.l_len = user.l_len; - kernel.l_pid = user.l_pid; - local_arg = (unsigned long)&kernel; - fs = get_fs(); - set_fs(KERNEL_DS); - } - - ret = sys_fcntl64(fd, cmd, local_arg); + return do_locks(fd, cmd, arg); - switch (cmd) { - case F_GETLK64: - if (!ret) { - user.l_type = kernel.l_type; - user.l_whence = kernel.l_whence; - user.l_start = kernel.l_start; - user.l_len = kernel.l_len; - user.l_pid = kernel.l_pid; - if (copy_to_user((struct oabi_flock64 __user *)arg, - &user, sizeof(user))) - ret = -EFAULT; - } - case F_SETLK64: - case F_SETLKW64: - set_fs(fs); + default: + return sys_fcntl64(fd, cmd, arg); } - - return ret; } struct oabi_epoll_event { diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 54a5aeab988d3526657b8e3089942ca8cfe4fe5e..994e971a8538a2d316235ba1f61be54317fbba61 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -224,7 +224,7 @@ static int install_vvar(struct mm_struct *mm, unsigned long addr) VM_READ | VM_MAYREAD, &vdso_data_mapping); - return IS_ERR(vma) ? PTR_ERR(vma) : 0; + return PTR_ERR_OR_ZERO(vma); } /* assumes mmap_sem is write-locked */ diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index af2267f6a52941eb91e46f1613891d4155427b5a..9397b2e532afa3d863930b4e29a663c166ae475e 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -205,6 +205,10 @@ Boston, MA 02111-1307, USA. */ .endm +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) UNWIND(.fnstart) @@ -253,6 +257,10 @@ UNWIND(.fnstart) UNWIND(.fnend) ENDPROC(__umodsi3) +#ifdef CONFIG_ARM_PATCH_IDIV + .align 3 +#endif + ENTRY(__divsi3) ENTRY(__aeabi_idiv) UNWIND(.fnstart) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index d72b90905132487257220939a255ac7ed1d3754d..588bbc288396ae52fe0d801bc537906b3e5dbfce 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -88,6 +88,7 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) static unsigned long noinline __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) { + unsigned long ua_flags; int atomic; if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { @@ -118,7 +119,9 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) if (tocopy > n) tocopy = n; + ua_flags = uaccess_save_and_enable(); memcpy((void *)to, from, tocopy); + uaccess_restore(ua_flags); to += tocopy; from += tocopy; n -= tocopy; @@ -145,14 +148,21 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n) * With frame pointer disabled, tail call optimization kicks in * as well making this test almost invisible. */ - if (n < 64) - return __copy_to_user_std(to, from, n); - return __copy_to_user_memcpy(to, from, n); + if (n < 64) { + unsigned long ua_flags = uaccess_save_and_enable(); + n = __copy_to_user_std(to, from, n); + uaccess_restore(ua_flags); + } else { + n = __copy_to_user_memcpy(to, from, n); + } + return n; } static unsigned long noinline __clear_user_memset(void __user *addr, unsigned long n) { + unsigned long ua_flags; + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { memset((void *)addr, 0, n); return 0; @@ -175,7 +185,9 @@ __clear_user_memset(void __user *addr, unsigned long n) if (tocopy > n) tocopy = n; + ua_flags = uaccess_save_and_enable(); memset((void *)addr, 0, tocopy); + uaccess_restore(ua_flags); addr += tocopy; n -= tocopy; @@ -193,9 +205,14 @@ __clear_user_memset(void __user *addr, unsigned long n) unsigned long arm_clear_user(void __user *addr, unsigned long n) { /* See rational for this in __copy_to_user() above. */ - if (n < 64) - return __clear_user_std(addr, n); - return __clear_user_memset(addr, n); + if (n < 64) { + unsigned long ua_flags = uaccess_save_and_enable(); + n = __clear_user_std(addr, n); + uaccess_restore(ua_flags); + } else { + n = __clear_user_memset(addr, n); + } + return n; } #if 0 diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 1ed545cc2b83452954cc4a2217ac1be324237775..9cc7b818fbf639bae6ec76c5cc116095c98d19bf 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -49,8 +49,8 @@ #include #include -#include -#include +#include +#include #define DA850_EVM_PHY_ID "davinci_mdio-0:00" #define DA850_LCD_PWR_PIN GPIO_TO_PIN(2, 8) diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index b46b4d25f93e889b2aa8241be2a7cd303a5459c4..c71dd9982f03a1eadf5e4b75b2d8a284f343bda8 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index a756003595e9685c739d682d83594b302e903273..f073518f621a744dcd00c9befb6abfc354a9c4ce 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -40,8 +40,8 @@ #include #include -#include -#include +#include +#include #include "davinci.h" diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index bbdd2d614b4978022f0b9f51a7abc898dc0b5e0f..7a20507a3eefb3a6197afd1861286922184b67a2 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index 846a84ddc28e5a6d573c9767f6e57869c3e423fe..ee6ab7e8d3b0cdf7b54399e399550609d0e9301e 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -25,8 +25,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c index c4904264256ad8675c04d42f2223dab64c8d1e15..b2db791b3b38b0adb69e64ea0cd596356184b483 100644 --- a/arch/arm/mach-ep93xx/snappercl15.c +++ b/arch/arm/mach-ep93xx/snappercl15.c @@ -49,7 +49,7 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); static u16 nand_state = SNAPPERCL15_NAND_WPN; u16 set; @@ -76,7 +76,7 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, static int snappercl15_nand_dev_ready(struct mtd_info *mtd) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); return !!(__raw_readw(NAND_CTRL_ADDR(chip)) & SNAPPERCL15_NAND_RDY); } diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index 61f4b5dc4d7dd9ad8d6802606b47d2117be314cc..45b81a2bcd4b39ca121c5eedb04c6600c65ec65d 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -74,7 +74,7 @@ static void __init ts72xx_map_io(void) static void ts72xx_nand_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); if (ctrl & NAND_CTRL_CHANGE) { void __iomem *addr = chip->IO_ADDR_R; @@ -96,7 +96,7 @@ static void ts72xx_nand_hwcontrol(struct mtd_info *mtd, static int ts72xx_nand_device_ready(struct mtd_info *mtd) { - struct nand_chip *chip = mtd->priv; + struct nand_chip *chip = mtd_to_nand(mtd); void __iomem *addr = chip->IO_ADDR_R; addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE); diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 3a10f1a8317ae7a053ed997da88a06ddd5311b57..ff105399aae48973432e4f688a4efb5a570f0216 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -27,6 +27,7 @@ menuconfig ARCH_EXYNOS select SRAM select THERMAL select MFD_SYSCON + select CLKSRC_EXYNOS_MCT help Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5) diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index 67f7fb13050dbdec3d84cfe381943240a2e4e192..09cebd8cef2b835477e96fef15863d3b9c3ec09d 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -177,7 +177,7 @@ struct platform_device *__init imx_add_imx_uart_1irq( const struct imxuart_platform_data *pdata); #include -#include +#include struct imx_ipu_core_data { resource_size_t iobase; resource_size_t synirq; @@ -192,7 +192,7 @@ struct platform_device *__init imx_add_mx3_sdc_fb( const struct imx_ipu_core_data *data, struct mx3fb_platform_data *pdata); -#include +#include struct imx_mx2_camera_data { const char *devid; resource_size_t iobasecsi; diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c index a213e7b9cb1c90caf89afbb06408c43004ee8af6..5c27646047270fc4fbcab94e689f5e533d3d9d09 100644 --- a/arch/arm/mach-imx/mach-qong.c +++ b/arch/arm/mach-imx/mach-qong.c @@ -131,7 +131,7 @@ static void qong_init_nor_mtd(void) */ static void qong_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *nand_chip = mtd->priv; + struct nand_chip *nand_chip = mtd_to_nand(mtd); if (cmd == NAND_CMD_NONE) return; diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index e7b8befa872950c43d7289e12f49ae7850d6c3c9..508c2d7786e2ec8389f97bb55bf446d3e323d198 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -76,8 +76,8 @@ static struct mtd_partition ixdp425_partitions[] = { static void ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; - int offset = (int)this->priv; + struct nand_chip *this = mtd_to_nand(mtd); + int offset = (int)nand_get_controller_data(this); if (ctrl & NAND_CTRL_CHANGE) { if (ctrl & NAND_NCE) { @@ -88,7 +88,7 @@ ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) offset = (ctrl & NAND_CLE) ? IXDP425_NAND_CMD_BYTE : 0; offset |= (ctrl & NAND_ALE) ? IXDP425_NAND_ADDR_BYTE : 0; - this->priv = (void *)offset; + nand_set_controller_data(this, (void *)offset); } if (cmd != NAND_CMD_NONE) diff --git a/arch/arm/mach-omap1/board-nand.c b/arch/arm/mach-omap1/board-nand.c index 4d0835327d2038580ce707f57d7aac5b2a2269c6..7684f9203474386eafbd3d974dbcfa5a30b24989 100644 --- a/arch/arm/mach-omap1/board-nand.c +++ b/arch/arm/mach-omap1/board-nand.c @@ -22,7 +22,7 @@ void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl) { - struct nand_chip *this = mtd->priv; + struct nand_chip *this = mtd_to_nand(mtd); unsigned long mask; if (cmd == NAND_CMD_NONE) diff --git a/arch/arm/mach-omap1/include/mach/camera.h b/arch/arm/mach-omap1/include/mach/camera.h index 847d00f0bb0a6ae2393e0bf116cc3df76e24b92b..caa6c0d6f0ac40d93018b9561304489ae774c0a3 100644 --- a/arch/arm/mach-omap1/include/mach/camera.h +++ b/arch/arm/mach-omap1/include/mach/camera.h @@ -1,7 +1,7 @@ #ifndef __ASM_ARCH_CAMERA_H_ #define __ASM_ARCH_CAMERA_H_ -#include +#include void omap1_camera_init(void *); diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 4b4371db5799cc8c5d726281e0834277304404f5..0517f0c1581a71091539c5c1abe47473f4b789ef 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -65,6 +65,8 @@ config SOC_AM43XX select MACH_OMAP_GENERIC select MIGHT_HAVE_CACHE_L2X0 select HAVE_ARM_SCU + select GENERIC_CLOCKEVENTS_BROADCAST + select HAVE_ARM_TWD config SOC_DRA7XX bool "TI DRA7XX" diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 04a56cc04dfa48cfc8c9752033cff152e5f15dba..809827265fb39d1b8a7178481bb91e87951c4a14 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -16,6 +16,7 @@ #include #include +#include #include #include "common.h" @@ -76,8 +77,17 @@ static const char *const n900_boards_compat[] __initconst = { NULL, }; +/* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags, + * save them while the data is still not overwritten + */ +static void __init rx51_reserve(void) +{ + save_atags((const struct tag *)(PAGE_OFFSET + 0x100)); + omap_reserve(); +} + DT_MACHINE_START(OMAP3_N900_DT, "Nokia RX-51 board") - .reserve = omap_reserve, + .reserve = rx51_reserve, .map_io = omap3_map_io, .init_early = omap3430_init_early, .init_machine = omap_generic_init, diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index 14edcd7a2a1d14c990848bf41c515009bb74bdb5..0a0567f8e8a030ffc2b45e4d353ebff35f5149d0 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include @@ -48,7 +48,7 @@ #include