Merge tag 'docs-for-linus' of git://git.lwn.net/linux-2.6

Pull documentation updates from Jonathan Corbet: "There's been a fair amount going on in the docs tree this time around, including: - Support for reproducible document builds, from Ben Hutchings and company. - The ability to automatically generate cross-reference links within a single DocBook book and embedded descriptions for large structures. From Danilo Cesar Lemes de Paula. - A new document on how to add a system call from David Drysdale. - Chameleon bus documentation from Johannes Thumshirn. ...plus the usual collection of improvements, typo fixes, and more" * tag 'docs-for-linus' of git://git.lwn.net/linux-2.6: (39 commits) Documentation, add kernel-parameters.txt entry for dis_ucode_ldr Documentation/x86: Rename IRQSTACKSIZE to IRQ_STACK_SIZE Documentation/Intel-IOMMU.txt: Modify definition of DRHD docs: update HOWTO for 3.x -> 4.x versioning kernel-doc: ignore unneeded attribute information scripts/kernel-doc: Adding cross-reference links to html documentation. DocBook: Fix non-determinstic installation of duplicate man pages Documentation: minor typo fix in mailbox.txt Documentation: describe how to add a system call doc: Add more workqueue functions to the documentation ARM: keystone: add documentation for SoCs and EVMs scripts/kernel-doc Allow struct arguments documentation in struct body SubmittingPatches: remove stray quote character Revert "DocBook: Avoid building man pages repeatedly and inconsistently" Documentation: Minor changes to men-chameleon-bus.txt Doc: fix trivial typo in SubmittingPatches MAINTAINERS: Direct Documentation/DocBook/media properly Documentation: installed man pages don't need to be executable fix Evolution submenu name in email-clients.txt Documentation: Add MCB documentation ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-08-31 15:40:05 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-08-31 15:40:05 -0700
commit: e2701603f72cd38e99c6b1da13c8e99bc27b2f34 (patch)
tree: af55a86e0fbc26d4b19d1a2eabb41db26e7687ef
parent: 22629b6d9072c4e86e900306d7020ad722ae6536 (diff)
parent: ce14c5831364118324b10c0355dead062b9ddd40 (diff)
download: linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.gz
linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.bz2
linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.zip
28 files changed, 1195 insertions, 76 deletions
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index b713c35f8543..c06f817b3091 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -929,13 +929,11 @@ The C Programming Language, Second Edition
 by Brian W. Kernighan and Dennis M. Ritchie.
 Prentice Hall, Inc., 1988.
 ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback).
-URL: http://cm.bell-labs.com/cm/cs/cbook/
 
 The Practice of Programming
 by Brian W. Kernighan and Rob Pike.
 Addison-Wesley, Inc., 1999.
 ISBN 0-201-61586-X.
-URL: http://cm.bell-labs.com/cm/cs/tpop/
 
 GNU manuals - where in compliance with K&R and this text - for cpp, gcc,
 gcc internals and indent, all available from http://www.gnu.org/manual/
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 9e086067b4ae..93eff64387cd 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -56,16 +56,19 @@ htmldocs: $(HTML)
 
 MAN := $(patsubst %.xml, %.9, $(BOOKS))
 mandocs: $(MAN)
-	find $(obj)/man -name '*.9' | xargs gzip -f
+	find $(obj)/man -name '*.9' | xargs gzip -nf
 
 installmandocs: mandocs
 	mkdir -p /usr/local/man/man9/
-	install $(obj)/man/*.9.gz /usr/local/man/man9/
+	find $(obj)/man -name '*.9.gz' -printf '%h %f\n' | \
+		sort -k 2 -k 1 | uniq -f 1 | sed -e 's: :/:' | \
+		xargs install -m 644 -t /usr/local/man/man9/
 
 ###
 #External programs used
-KERNELDOC = $(srctree)/scripts/kernel-doc
-DOCPROC   = $(objtree)/scripts/docproc
+KERNELDOCXMLREF = $(srctree)/scripts/kernel-doc-xml-ref
+KERNELDOC       = $(srctree)/scripts/kernel-doc
+DOCPROC         = $(objtree)/scripts/docproc
 
 XMLTOFLAGS = -m $(srctree)/$(src)/stylesheet.xsl
 XMLTOFLAGS += --skip-validation
@@ -89,7 +92,7 @@ define rule_docproc
         ) > $(dir $@).$(notdir $@).cmd
 endef
 
-%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) FORCE
+%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) $(KERNELDOCXMLREF) FORCE
 	$(call if_changed_rule,docproc)
 
 # Tell kbuild to always build the programs
@@ -140,7 +143,20 @@ quiet_cmd_db2html = HTML    $@
 		echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \
 		$(patsubst %.html,%,$(notdir $@))</a><p>' > $@
 
-%.html:	%.xml
+###
+# Rules to create an aux XML and .db, and use them to re-process the DocBook XML
+# to fill internal hyperlinks
+       gen_aux_xml = :
+ quiet_gen_aux_xml = echo '  XMLREF  $@'
+silent_gen_aux_xml = :
+%.aux.xml: %.xml
+	@$($(quiet)gen_aux_xml)
+	@rm -rf $@
+	@(cat $< | egrep "^<refentry id" | egrep -o "\".*\"" | cut -f 2 -d \" > $<.db)
+	@$(KERNELDOCXMLREF) -db $<.db $< > $@
+.PRECIOUS: %.aux.xml
+
+%.html:	%.aux.xml
 	@(which xmlto > /dev/null 2>&1) || \
 	 (echo "*** You need to install xmlto ***"; \
 	  exit 1)
@@ -150,12 +166,12 @@ quiet_cmd_db2html = HTML    $@
             cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi
 
 quiet_cmd_db2man = MAN     $@
-      cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man $< ; fi
+      cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man/$(*F) $< ; fi
 %.9 : %.xml
 	@(which xmlto > /dev/null 2>&1) || \
 	 (echo "*** You need to install xmlto ***"; \
 	  exit 1)
-	$(Q)mkdir -p $(obj)/man
+	$(Q)mkdir -p $(obj)/man/$(*F)
 	$(call cmd,db2man)
 	@touch $@
 
@@ -209,15 +225,18 @@ dochelp:
 ###
 # Temporary files left by various tools
 clean-files := $(DOCBOOKS) \
-	$(patsubst %.xml, %.dvi,  $(DOCBOOKS)) \
-	$(patsubst %.xml, %.aux,  $(DOCBOOKS)) \
-	$(patsubst %.xml, %.tex,  $(DOCBOOKS)) \
-	$(patsubst %.xml, %.log,  $(DOCBOOKS)) \
-	$(patsubst %.xml, %.out,  $(DOCBOOKS)) \
-	$(patsubst %.xml, %.ps,   $(DOCBOOKS)) \
-	$(patsubst %.xml, %.pdf,  $(DOCBOOKS)) \
-	$(patsubst %.xml, %.html, $(DOCBOOKS)) \
-	$(patsubst %.xml, %.9,    $(DOCBOOKS)) \
+	$(patsubst %.xml, %.dvi,     $(DOCBOOKS)) \
+	$(patsubst %.xml, %.aux,     $(DOCBOOKS)) \
+	$(patsubst %.xml, %.tex,     $(DOCBOOKS)) \
+	$(patsubst %.xml, %.log,     $(DOCBOOKS)) \
+	$(patsubst %.xml, %.out,     $(DOCBOOKS)) \
+	$(patsubst %.xml, %.ps,      $(DOCBOOKS)) \
+	$(patsubst %.xml, %.pdf,     $(DOCBOOKS)) \
+	$(patsubst %.xml, %.html,    $(DOCBOOKS)) \
+	$(patsubst %.xml, %.9,       $(DOCBOOKS)) \
+	$(patsubst %.xml, %.aux.xml, $(DOCBOOKS)) \
+	$(patsubst %.xml, %.xml.db,  $(DOCBOOKS)) \
+	$(patsubst %.xml, %.xml,     $(DOCBOOKS)) \
 	$(index)
 
 clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index faf09d4a0ea8..bbc1d7ee9c76 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -66,6 +66,7 @@
 !Ekernel/time/hrtimer.c
      </sect1>
      <sect1><title>Workqueues and Kevents</title>
+!Iinclude/linux/workqueue.h
 !Ekernel/workqueue.c
      </sect1>
      <sect1><title>Internal Functions</title>
diff --git a/Documentation/DocBook/stylesheet.xsl b/Documentation/DocBook/stylesheet.xsl
index 85b25275196f..3bf4ecf3d760 100644
--- a/Documentation/DocBook/stylesheet.xsl
+++ b/Documentation/DocBook/stylesheet.xsl
@@ -5,6 +5,7 @@
 <param name="funcsynopsis.tabular.threshold">80</param>
 <param name="callout.graphics">0</param>
 <!-- <param name="paper.type">A4</param> -->
+<param name="generate.consistent.ids">1</param>
 <param name="generate.section.toc.level">2</param>
 <param name="use.id.as.filename">1</param>
 </stylesheet>
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 93aa8604630e..21152d397b88 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -218,16 +218,16 @@ The development process
 Linux kernel development process currently consists of a few different
 main kernel "branches" and lots of different subsystem-specific kernel
 branches.  These different branches are:
-  - main 3.x kernel tree
-  - 3.x.y -stable kernel tree
-  - 3.x -git kernel patches
+  - main 4.x kernel tree
+  - 4.x.y -stable kernel tree
+  - 4.x -git kernel patches
   - subsystem specific kernel trees and patches
-  - the 3.x -next kernel tree for integration tests
+  - the 4.x -next kernel tree for integration tests
 
-3.x kernel tree
+4.x kernel tree
 -----------------
-3.x kernels are maintained by Linus Torvalds, and can be found on
-kernel.org in the pub/linux/kernel/v3.x/ directory.  Its development
+4.x kernels are maintained by Linus Torvalds, and can be found on
+kernel.org in the pub/linux/kernel/v4.x/ directory.  Its development
 process is as follows:
   - As soon as a new kernel is released a two weeks window is open,
     during this period of time maintainers can submit big diffs to
@@ -262,20 +262,20 @@ mailing list about kernel releases:
 	released according to perceived bug status, not according to a
 	preconceived timeline."
 
-3.x.y -stable kernel tree
+4.x.y -stable kernel tree
 ---------------------------
 Kernels with 3-part versions are -stable kernels. They contain
 relatively small and critical fixes for security problems or significant
-regressions discovered in a given 3.x kernel.
+regressions discovered in a given 4.x kernel.
 
 This is the recommended branch for users who want the most recent stable
 kernel and are not interested in helping test development/experimental
 versions.
 
-If no 3.x.y kernel is available, then the highest numbered 3.x
+If no 4.x.y kernel is available, then the highest numbered 4.x
 kernel is the current stable kernel.
 
-3.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and
+4.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and
 are released as needs dictate.  The normal release period is approximately
 two weeks, but it can be longer if there are no pressing problems.  A
 security-related problem, instead, can cause a release to happen almost
@@ -285,7 +285,7 @@ The file Documentation/stable_kernel_rules.txt in the kernel tree
 documents what kinds of changes are acceptable for the -stable tree, and
 how the release process works.
 
-3.x -git patches
+4.x -git patches
 ------------------
 These are daily snapshots of Linus' kernel tree which are managed in a
 git repository (hence the name.) These patches are usually released
@@ -317,9 +317,9 @@ revisions to it, and maintainers can mark patches as under review,
 accepted, or rejected.  Most of these patchwork sites are listed at
 http://patchwork.kernel.org/.
 
-3.x -next kernel tree for integration tests
+4.x -next kernel tree for integration tests
 ---------------------------------------------
-Before updates from subsystem trees are merged into the mainline 3.x
+Before updates from subsystem trees are merged into the mainline 4.x
 tree, they need to be integration-tested.  For this purpose, a special
 testing repository exists into which virtually all subsystem trees are
 pulled on an almost daily basis:
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
index cf9431db8731..7b57fc087088 100644
--- a/Documentation/Intel-IOMMU.txt
+++ b/Documentation/Intel-IOMMU.txt
@@ -10,7 +10,7 @@ This guide gives a quick cheat sheet for some basic understanding.
 Some Keywords
 
 DMAR - DMA remapping
-DRHD - DMA Engine Reporting Structure
+DRHD - DMA Remapping Hardware Unit Definition
 RMRR - Reserved memory Region Reporting Structure
 ZLR  - Zero length reads from PCI devices
 IOVA - IO Virtual address.
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 27e7e5edeca8..fa596d8a3ab0 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -90,11 +90,11 @@ patch.
 
 Make sure your patch does not include any extra files which do not
 belong in a patch submission.  Make sure to review your patch -after-
-generated it with diff(1), to ensure accuracy.
+generating it with diff(1), to ensure accuracy.
 
 If your changes produce a lot of deltas, you need to split them into
 individual patches which modify things in logical stages; see section
-#3.  This will facilitate easier reviewing by other kernel developers,
+#3.  This will facilitate review by other kernel developers,
 very important if you want your patch accepted.
 
 If you're using git, "git rebase -i" can help you with this process.  If
@@ -267,7 +267,7 @@ You should always copy the appropriate subsystem maintainer(s) on any patch
 to code that they maintain; look through the MAINTAINERS file and the
 source code revision history to see who those maintainers are.  The
 script scripts/get_maintainer.pl can be very useful at this step.  If you
-cannot find a maintainer for the subsystem your are working on, Andrew
+cannot find a maintainer for the subsystem you are working on, Andrew
 Morton (akpm@linux-foundation.org) serves as a maintainer of last resort.
 
 You should also normally choose at least one mailing list to receive a copy
@@ -340,7 +340,7 @@ on the changes you are submitting.  It is important for a kernel
 developer to be able to "quote" your changes, using standard e-mail
 tools, so that they may comment on specific portions of your code.
 
-For this reason, all patches should be submitting e-mail "inline".
+For this reason, all patches should be submitted by e-mail "inline".
 WARNING:  Be wary of your editor's word-wrap corrupting your patch,
 if you choose to cut-n-paste your patch.
 
@@ -739,7 +739,7 @@ interest on a single line; it should look something like:
 
       git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus
 
-  to get these changes:"
+  to get these changes:
 
 A pull request should also include an overall message saying what will be
 included in the request, a "git shortlog" listing of the patches
@@ -796,7 +796,7 @@ NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lkml.org/lkml/2005/7/11/336>
 
 Kernel Documentation/CodingStyle:
-  <http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle>
+  <Documentation/CodingStyle>
 
 Linus Torvalds's mail on the canonical patch format:
   <http://lkml.org/lkml/2005/4/7/183>
diff --git a/Documentation/adding-syscalls.txt b/Documentation/adding-syscalls.txt
new file mode 100644
index 000000000000..cc2d4ac4f404
--- /dev/null
+++ b/Documentation/adding-syscalls.txt
@@ -0,0 +1,527 @@
+Adding a New System Call
+========================
+
+This document describes what's involved in adding a new system call to the
+Linux kernel, over and above the normal submission advice in
+Documentation/SubmittingPatches.
+
+
+System Call Alternatives
+------------------------
+
+The first thing to consider when adding a new system call is whether one of
+the alternatives might be suitable instead.  Although system calls are the
+most traditional and most obvious interaction points between userspace and the
+kernel, there are other possibilities -- choose what fits best for your
+interface.
+
+ - If the operations involved can be made to look like a filesystem-like
+   object, it may make more sense to create a new filesystem or device.  This
+   also makes it easier to encapsulate the new functionality in a kernel module
+   rather than requiring it to be built into the main kernel.
+     - If the new functionality involves operations where the kernel notifies
+       userspace that something has happened, then returning a new file
+       descriptor for the relevant object allows userspace to use
+       poll/select/epoll to receive that notification.
+     - However, operations that don't map to read(2)/write(2)-like operations
+       have to be implemented as ioctl(2) requests, which can lead to a
+       somewhat opaque API.
+ - If you're just exposing runtime system information, a new node in sysfs
+   (see Documentation/filesystems/sysfs.txt) or the /proc filesystem may be
+   more appropriate.  However, access to these mechanisms requires that the
+   relevant filesystem is mounted, which might not always be the case (e.g.
+   in a namespaced/sandboxed/chrooted environment).  Avoid adding any API to
+   debugfs, as this is not considered a 'production' interface to userspace.
+ - If the operation is specific to a particular file or file descriptor, then
+   an additional fcntl(2) command option may be more appropriate.  However,
+   fcntl(2) is a multiplexing system call that hides a lot of complexity, so
+   this option is best for when the new function is closely analogous to
+   existing fcntl(2) functionality, or the new functionality is very simple
+   (for example, getting/setting a simple flag related to a file descriptor).
+ - If the operation is specific to a particular task or process, then an
+   additional prctl(2) command option may be more appropriate.  As with
+   fcntl(2), this system call is a complicated multiplexor so is best reserved
+   for near-analogs of existing prctl() commands or getting/setting a simple
+   flag related to a process.
+
+
+Designing the API: Planning for Extension
+-----------------------------------------
+
+A new system call forms part of the API of the kernel, and has to be supported
+indefinitely.  As such, it's a very good idea to explicitly discuss the
+interface on the kernel mailing list, and it's important to plan for future
+extensions of the interface.
+
+(The syscall table is littered with historical examples where this wasn't done,
+together with the corresponding follow-up system calls -- eventfd/eventfd2,
+dup2/dup3, inotify_init/inotify_init1,  pipe/pipe2, renameat/renameat2 -- so
+learn from the history of the kernel and plan for extensions from the start.)
+
+For simpler system calls that only take a couple of arguments, the preferred
+way to allow for future extensibility is to include a flags argument to the
+system call.  To make sure that userspace programs can safely use flags
+between kernel versions, check whether the flags value holds any unknown
+flags, and reject the system call (with EINVAL) if it does:
+
+    if (flags & ~(THING_FLAG1 | THING_FLAG2 | THING_FLAG3))
+        return -EINVAL;
+
+(If no flags values are used yet, check that the flags argument is zero.)
+
+For more sophisticated system calls that involve a larger number of arguments,
+it's preferred to encapsulate the majority of the arguments into a structure
+that is passed in by pointer.  Such a structure can cope with future extension
+by including a size argument in the structure:
+
+    struct xyzzy_params {
+        u32 size; /* userspace sets p->size = sizeof(struct xyzzy_params) */
+        u32 param_1;
+        u64 param_2;
+        u64 param_3;
+    };
+
+As long as any subsequently added field, say param_4, is designed so that a
+zero value gives the previous behaviour, then this allows both directions of
+version mismatch:
+
+ - To cope with a later userspace program calling an older kernel, the kernel
+   code should check that any memory beyond the size of the structure that it
+   expects is zero (effectively checking that param_4 == 0).
+ - To cope with an older userspace program calling a newer kernel, the kernel
+   code can zero-extend a smaller instance of the structure (effectively
+   setting param_4 = 0).
+
+See perf_event_open(2) and the perf_copy_attr() function (in
+kernel/events/core.c) for an example of this approach.
+
+
+Designing the API: Other Considerations
+---------------------------------------
+
+If your new system call allows userspace to refer to a kernel object, it
+should use a file descriptor as the handle for that object -- don't invent a
+new type of userspace object handle when the kernel already has mechanisms and
+well-defined semantics for using file descriptors.
+
+If your new xyzzy(2) system call does return a new file descriptor, then the
+flags argument should include a value that is equivalent to setting O_CLOEXEC
+on the new FD.  This makes it possible for userspace to close the timing
+window between xyzzy() and calling fcntl(fd, F_SETFD, FD_CLOEXEC), where an
+unexpected fork() and execve() in another thread could leak a descriptor to
+the exec'ed program. (However, resist the temptation to re-use the actual value
+of the O_CLOEXEC constant, as it is architecture-specific and is part of a
+numbering space of O_* flags that is fairly full.)
+
+If your system call returns a new file descriptor, you should also consider
+what it means to use the poll(2) family of system calls on that file
+descriptor. Making a file descriptor ready for reading or writing is the
+normal way for the kernel to indicate to userspace that an event has
+occurred on the corresponding kernel object.
+
+If your new xyzzy(2) system call involves a filename argument:
+
+    int sys_xyzzy(const char __user *path, ..., unsigned int flags);
+
+you should also consider whether an xyzzyat(2) version is more appropriate:
+
+    int sys_xyzzyat(int dfd, const char __user *path, ..., unsigned int flags);
+
+This allows more flexibility for how userspace specifies the file in question;
+in particular it allows userspace to request the functionality for an
+already-opened file descriptor using the AT_EMPTY_PATH flag, effectively giving
+an fxyzzy(3) operation for free:
+
+ - xyzzyat(AT_FDCWD, path, ..., 0) is equivalent to xyzzy(path,...)
+ - xyzzyat(fd, "", ..., AT_EMPTY_PATH) is equivalent to fxyzzy(fd, ...)
+
+(For more details on the rationale of the *at() calls, see the openat(2) man
+page; for an example of AT_EMPTY_PATH, see the statat(2) man page.)
+
+If your new xyzzy(2) system call involves a parameter describing an offset
+within a file, make its type loff_t so that 64-bit offsets can be supported
+even on 32-bit architectures.
+
+If your new xyzzy(2) system call involves privileged functionality, it needs
+to be governed by the appropriate Linux capability bit (checked with a call to
+capable()), as described in the capabilities(7) man page.  Choose an existing
+capability bit that governs related functionality, but try to avoid combining
+lots of only vaguely related functions together under the same bit, as this
+goes against capabilities' purpose of splitting the power of root.  In
+particular, avoid adding new uses of the already overly-general CAP_SYS_ADMIN
+capability.
+
+If your new xyzzy(2) system call manipulates a process other than the calling
+process, it should be restricted (using a call to ptrace_may_access()) so that
+only a calling process with the same permissions as the target process, or
+with the necessary capabilities, can manipulate the target process.
+
+Finally, be aware that some non-x86 architectures have an easier time if
+system call parameters that are explicitly 64-bit fall on odd-numbered
+arguments (i.e. parameter 1, 3, 5), to allow use of contiguous pairs of 32-bit
+registers.  (This concern does not apply if the arguments are part of a
+structure that's passed in by pointer.)
+
+
+Proposing the API
+-----------------
+
+To make new system calls easy to review, it's best to divide up the patchset
+into separate chunks.  These should include at least the following items as
+distinct commits (each of which is described further below):
+
+ - The core implementation of the system call, together with prototypes,
+   generic numbering, Kconfig changes and fallback stub implementation.
+ - Wiring up of the new system call for one particular architecture, usually
+   x86 (including all of x86_64, x86_32 and x32).
+ - A demonstration of the use of the new system call in userspace via a
+   selftest in tools/testing/selftests/.
+ - A draft man-page for the new system call, either as plain text in the
+   cover letter, or as a patch to the (separate) man-pages repository.
+
+New system call proposals, like any change to the kernel's API, should always
+be cc'ed to linux-api@vger.kernel.org.
+
+
+Generic System Call Implementation
+----------------------------------
+
+The main entry point for your new xyzzy(2) system call will be called
+sys_xyzzy(), but you add this entry point with the appropriate
+SYSCALL_DEFINEn() macro rather than explicitly.  The 'n' indicates the number
+of arguments to the system call, and the macro takes the system call name
+followed by the (type, name) pairs for the parameters as arguments.  Using
+this macro allows metadata about the new system call to be made available for
+other tools.
+
+The new entry point also needs a corresponding function prototype, in
+include/linux/syscalls.h, marked as asmlinkage to match the way that system
+calls are invoked:
+
+    asmlinkage long sys_xyzzy(...);
+
+Some architectures (e.g. x86) have their own architecture-specific syscall
+tables, but several other architectures share a generic syscall table. Add your
+new system call to the generic list by adding an entry to the list in
+include/uapi/asm-generic/unistd.h:
+
+    #define __NR_xyzzy 292
+    __SYSCALL(__NR_xyzzy, sys_xyzzy)
+
+Also update the __NR_syscalls count to reflect the additional system call, and
+note that if multiple new system calls are added in the same merge window,
+your new syscall number may get adjusted to resolve conflicts.
+
+The file kernel/sys_ni.c provides a fallback stub implementation of each system
+call, returning -ENOSYS.  Add your new system call here too:
+
+    cond_syscall(sys_xyzzy);
+
+Your new kernel functionality, and the system call that controls it, should
+normally be optional, so add a CONFIG option (typically to init/Kconfig) for
+it. As usual for new CONFIG options:
+
+ - Include a description of the new functionality and system call controlled
+   by the option.
+ - Make the option depend on EXPERT if it should be hidden from normal users.
+ - Make any new source files implementing the function dependent on the CONFIG
+   option in the Makefile (e.g. "obj-$(CONFIG_XYZZY_SYSCALL) += xyzzy.c").
+ - Double check that the kernel still builds with the new CONFIG option turned
+   off.
+
+To summarize, you need a commit that includes:
+
+ - CONFIG option for the new function, normally in init/Kconfig
+ - SYSCALL_DEFINEn(xyzzy, ...) for the entry point
+ - corresponding prototype in include/linux/syscalls.h
+ - generic table entry in include/uapi/asm-generic/unistd.h
+ - fallback stub in kernel/sys_ni.c
+
+
+x86 System Call Implementation
+------------------------------
+
+To wire up your new system call for x86 platforms, you need to update the
+master syscall tables.  Assuming your new system call isn't special in some
+way (see below), this involves a "common" entry (for x86_64 and x32) in
+arch/x86/entry/syscalls/syscall_64.tbl:
+
+    333   common   xyzzy     sys_xyzzy
+
+and an "i386" entry in arch/x86/entry/syscalls/syscall_32.tbl:
+
+    380   i386     xyzzy     sys_xyzzy
+
+Again, these numbers are liable to be changed if there are conflicts in the
+relevant merge window.
+
+
+Compatibility System Calls (Generic)
+------------------------------------
+
+For most system calls the same 64-bit implementation can be invoked even when
+the userspace program is itself 32-bit; even if the system call's parameters
+include an explicit pointer, this is handled transparently.
+
+However, there are a couple of situations where a compatibility layer is
+needed to cope with size differences between 32-bit and 64-bit.
+
+The first is if the 64-bit kernel also supports 32-bit userspace programs, and
+so needs to parse areas of (__user) memory that could hold either 32-bit or
+64-bit values.  In particular, this is needed whenever a system call argument
+is:
+
+ - a pointer to a pointer
+ - a pointer to a struct containing a pointer (e.g. struct iovec __user *)
+ - a pointer to a varying sized integral type (time_t, off_t, long, ...)
+ - a pointer to a struct containing a varying sized integral type.
+
+The second situation that requires a compatibility layer is if one of the
+system call's arguments has a type that is explicitly 64-bit even on a 32-bit
+architecture, for example loff_t or __u64.  In this case, a value that arrives
+at a 64-bit kernel from a 32-bit application will be split into two 32-bit
+values, which then need to be re-assembled in the compatibility layer.
+
+(Note that a system call argument that's a pointer to an explicit 64-bit type
+does *not* need a compatibility layer; for example, splice(2)'s arguments of
+type loff_t __user * do not trigger the need for a compat_ system call.)
+
+The compatibility version of the system call is called compat_sys_xyzzy(), and
+is added with the COMPAT_SYSCALL_DEFINEn() macro, analogously to
+SYSCALL_DEFINEn.  This version of the implementation runs as part of a 64-bit
+kernel, but expects to receive 32-bit parameter values and does whatever is
+needed to deal with them.  (Typically, the compat_sys_ version converts the
+values to 64-bit versions and either calls on to the sys_ version, or both of
+them call a common inner implementation function.)
+
+The compat entry point also needs a corresponding function prototype, in
+include/linux/compat.h, marked as asmlinkage to match the way that system
+calls are invoked:
+
+    asmlinkage long compat_sys_xyzzy(...);
+
+If the system call involves a structure that is laid out differently on 32-bit
+and 64-bit systems, say struct xyzzy_args, then the include/linux/compat.h
+header file should also include a compat version of the structure (struct
+compat_xyzzy_args) where each variable-size field has the appropriate compat_
+type that corresponds to the type in struct xyzzy_args.  The
+compat_sys_xyzzy() routine can then use this compat_ structure to parse the
+arguments from a 32-bit invocation.
+
+For example, if there are fields:
+
+    struct xyzzy_args {
+        const char __user *ptr;
+        __kernel_long_t varying_val;
+        u64 fixed_val;
+        /* ... */
+    };
+
+in struct xyzzy_args, then struct compat_xyzzy_args would have:
+
+    struct compat_xyzzy_args {
+        compat_uptr_t ptr;
+        compat_long_t varying_val;
+        u64 fixed_val;
+        /* ... */
+    };
+
+The generic system call list also needs adjusting to allow for the compat
+version; the entry in include/uapi/asm-generic/unistd.h should use
+__SC_COMP rather than __SYSCALL:
+
+    #define __NR_xyzzy 292
+    __SC_COMP(__NR_xyzzy, sys_xyzzy, compat_sys_xyzzy)
+
+To summarize, you need:
+
+ - a COMPAT_SYSCALL_DEFINEn(xyzzy, ...) for the compat entry point
+ - corresponding prototype in include/linux/compat.h
+ - (if needed) 32-bit mapping struct in include/linux/compat.h
+ - instance of __SC_COMP not __SYSCALL in include/uapi/asm-generic/unistd.h
+
+
+Compatibility System Calls (x86)
+--------------------------------
+
+To wire up the x86 architecture of a system call with a compatibility version,
+the entries in the syscall tables need to be adjusted.
+
+First, the entry in arch/x86/entry/syscalls/syscall_32.tbl gets an extra
+column to indicate that a 32-bit userspace program running on a 64-bit kernel
+should hit th
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-08-31 15:40:05 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-08-31 15:40:05 -0700
commit	e2701603f72cd38e99c6b1da13c8e99bc27b2f34 (patch)
tree	af55a86e0fbc26d4b19d1a2eabb41db26e7687ef
parent	22629b6d9072c4e86e900306d7020ad722ae6536 (diff)
parent	ce14c5831364118324b10c0355dead062b9ddd40 (diff)
download	linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.gz linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.bz2 linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.zip