diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-31 15:40:05 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-08-31 15:40:05 -0700 |
| commit | e2701603f72cd38e99c6b1da13c8e99bc27b2f34 (patch) | |
| tree | af55a86e0fbc26d4b19d1a2eabb41db26e7687ef | |
| parent | 22629b6d9072c4e86e900306d7020ad722ae6536 (diff) | |
| parent | ce14c5831364118324b10c0355dead062b9ddd40 (diff) | |
| download | linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.gz linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.tar.bz2 linux-e2701603f72cd38e99c6b1da13c8e99bc27b2f34.zip | |
Merge tag 'docs-for-linus' of git://git.lwn.net/linux-2.6
Pull documentation updates from Jonathan Corbet:
"There's been a fair amount going on in the docs tree this time around,
including:
- Support for reproducible document builds, from Ben Hutchings and
company.
- The ability to automatically generate cross-reference links within
a single DocBook book and embedded descriptions for large
structures. From Danilo Cesar Lemes de Paula.
- A new document on how to add a system call from David Drysdale.
- Chameleon bus documentation from Johannes Thumshirn.
...plus the usual collection of improvements, typo fixes, and more"
* tag 'docs-for-linus' of git://git.lwn.net/linux-2.6: (39 commits)
Documentation, add kernel-parameters.txt entry for dis_ucode_ldr
Documentation/x86: Rename IRQSTACKSIZE to IRQ_STACK_SIZE
Documentation/Intel-IOMMU.txt: Modify definition of DRHD
docs: update HOWTO for 3.x -> 4.x versioning
kernel-doc: ignore unneeded attribute information
scripts/kernel-doc: Adding cross-reference links to html documentation.
DocBook: Fix non-determinstic installation of duplicate man pages
Documentation: minor typo fix in mailbox.txt
Documentation: describe how to add a system call
doc: Add more workqueue functions to the documentation
ARM: keystone: add documentation for SoCs and EVMs
scripts/kernel-doc Allow struct arguments documentation in struct body
SubmittingPatches: remove stray quote character
Revert "DocBook: Avoid building man pages repeatedly and inconsistently"
Documentation: Minor changes to men-chameleon-bus.txt
Doc: fix trivial typo in SubmittingPatches
MAINTAINERS: Direct Documentation/DocBook/media properly
Documentation: installed man pages don't need to be executable
fix Evolution submenu name in email-clients.txt
Documentation: Add MCB documentation
...
28 files changed, 1195 insertions, 76 deletions
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index b713c35f8543..c06f817b3091 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle @@ -929,13 +929,11 @@ The C Programming Language, Second Edition by Brian W. Kernighan and Dennis M. Ritchie. Prentice Hall, Inc., 1988. ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback). -URL: http://cm.bell-labs.com/cm/cs/cbook/ The Practice of Programming by Brian W. Kernighan and Rob Pike. Addison-Wesley, Inc., 1999. ISBN 0-201-61586-X. -URL: http://cm.bell-labs.com/cm/cs/tpop/ GNU manuals - where in compliance with K&R and this text - for cpp, gcc, gcc internals and indent, all available from http://www.gnu.org/manual/ diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 9e086067b4ae..93eff64387cd 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -56,16 +56,19 @@ htmldocs: $(HTML) MAN := $(patsubst %.xml, %.9, $(BOOKS)) mandocs: $(MAN) - find $(obj)/man -name '*.9' | xargs gzip -f + find $(obj)/man -name '*.9' | xargs gzip -nf installmandocs: mandocs mkdir -p /usr/local/man/man9/ - install $(obj)/man/*.9.gz /usr/local/man/man9/ + find $(obj)/man -name '*.9.gz' -printf '%h %f\n' | \ + sort -k 2 -k 1 | uniq -f 1 | sed -e 's: :/:' | \ + xargs install -m 644 -t /usr/local/man/man9/ ### #External programs used -KERNELDOC = $(srctree)/scripts/kernel-doc -DOCPROC = $(objtree)/scripts/docproc +KERNELDOCXMLREF = $(srctree)/scripts/kernel-doc-xml-ref +KERNELDOC = $(srctree)/scripts/kernel-doc +DOCPROC = $(objtree)/scripts/docproc XMLTOFLAGS = -m $(srctree)/$(src)/stylesheet.xsl XMLTOFLAGS += --skip-validation @@ -89,7 +92,7 @@ define rule_docproc ) > $(dir $@).$(notdir $@).cmd endef -%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) FORCE +%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) $(KERNELDOCXMLREF) FORCE $(call if_changed_rule,docproc) # Tell kbuild to always build the programs @@ -140,7 +143,20 @@ quiet_cmd_db2html = HTML $@ echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \ $(patsubst %.html,%,$(notdir $@))</a><p>' > $@ -%.html: %.xml +### +# Rules to create an aux XML and .db, and use them to re-process the DocBook XML +# to fill internal hyperlinks + gen_aux_xml = : + quiet_gen_aux_xml = echo ' XMLREF $@' +silent_gen_aux_xml = : +%.aux.xml: %.xml + @$($(quiet)gen_aux_xml) + @rm -rf $@ + @(cat $< | egrep "^<refentry id" | egrep -o "\".*\"" | cut -f 2 -d \" > $<.db) + @$(KERNELDOCXMLREF) -db $<.db $< > $@ +.PRECIOUS: %.aux.xml + +%.html: %.aux.xml @(which xmlto > /dev/null 2>&1) || \ (echo "*** You need to install xmlto ***"; \ exit 1) @@ -150,12 +166,12 @@ quiet_cmd_db2html = HTML $@ cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi quiet_cmd_db2man = MAN $@ - cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man $< ; fi + cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man/$(*F) $< ; fi %.9 : %.xml @(which xmlto > /dev/null 2>&1) || \ (echo "*** You need to install xmlto ***"; \ exit 1) - $(Q)mkdir -p $(obj)/man + $(Q)mkdir -p $(obj)/man/$(*F) $(call cmd,db2man) @touch $@ @@ -209,15 +225,18 @@ dochelp: ### # Temporary files left by various tools clean-files := $(DOCBOOKS) \ - $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \ - $(patsubst %.xml, %.aux, $(DOCBOOKS)) \ - $(patsubst %.xml, %.tex, $(DOCBOOKS)) \ - $(patsubst %.xml, %.log, $(DOCBOOKS)) \ - $(patsubst %.xml, %.out, $(DOCBOOKS)) \ - $(patsubst %.xml, %.ps, $(DOCBOOKS)) \ - $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ - $(patsubst %.xml, %.html, $(DOCBOOKS)) \ - $(patsubst %.xml, %.9, $(DOCBOOKS)) \ + $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \ + $(patsubst %.xml, %.aux, $(DOCBOOKS)) \ + $(patsubst %.xml, %.tex, $(DOCBOOKS)) \ + $(patsubst %.xml, %.log, $(DOCBOOKS)) \ + $(patsubst %.xml, %.out, $(DOCBOOKS)) \ + $(patsubst %.xml, %.ps, $(DOCBOOKS)) \ + $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ + $(patsubst %.xml, %.html, $(DOCBOOKS)) \ + $(patsubst %.xml, %.9, $(DOCBOOKS)) \ + $(patsubst %.xml, %.aux.xml, $(DOCBOOKS)) \ + $(patsubst %.xml, %.xml.db, $(DOCBOOKS)) \ + $(patsubst %.xml, %.xml, $(DOCBOOKS)) \ $(index) clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index faf09d4a0ea8..bbc1d7ee9c76 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -66,6 +66,7 @@ !Ekernel/time/hrtimer.c </sect1> <sect1><title>Workqueues and Kevents</title> +!Iinclude/linux/workqueue.h !Ekernel/workqueue.c </sect1> <sect1><title>Internal Functions</title> diff --git a/Documentation/DocBook/stylesheet.xsl b/Documentation/DocBook/stylesheet.xsl index 85b25275196f..3bf4ecf3d760 100644 --- a/Documentation/DocBook/stylesheet.xsl +++ b/Documentation/DocBook/stylesheet.xsl @@ -5,6 +5,7 @@ <param name="funcsynopsis.tabular.threshold">80</param> <param name="callout.graphics">0</param> <!-- <param name="paper.type">A4</param> --> +<param name="generate.consistent.ids">1</param> <param name="generate.section.toc.level">2</param> <param name="use.id.as.filename">1</param> </stylesheet> diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 93aa8604630e..21152d397b88 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -218,16 +218,16 @@ The development process Linux kernel development process currently consists of a few different main kernel "branches" and lots of different subsystem-specific kernel branches. These different branches are: - - main 3.x kernel tree - - 3.x.y -stable kernel tree - - 3.x -git kernel patches + - main 4.x kernel tree + - 4.x.y -stable kernel tree + - 4.x -git kernel patches - subsystem specific kernel trees and patches - - the 3.x -next kernel tree for integration tests + - the 4.x -next kernel tree for integration tests -3.x kernel tree +4.x kernel tree ----------------- -3.x kernels are maintained by Linus Torvalds, and can be found on -kernel.org in the pub/linux/kernel/v3.x/ directory. Its development +4.x kernels are maintained by Linus Torvalds, and can be found on +kernel.org in the pub/linux/kernel/v4.x/ directory. Its development process is as follows: - As soon as a new kernel is released a two weeks window is open, during this period of time maintainers can submit big diffs to @@ -262,20 +262,20 @@ mailing list about kernel releases: released according to perceived bug status, not according to a preconceived timeline." -3.x.y -stable kernel tree +4.x.y -stable kernel tree --------------------------- Kernels with 3-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant -regressions discovered in a given 3.x kernel. +regressions discovered in a given 4.x kernel. This is the recommended branch for users who want the most recent stable kernel and are not interested in helping test development/experimental versions. -If no 3.x.y kernel is available, then the highest numbered 3.x +If no 4.x.y kernel is available, then the highest numbered 4.x kernel is the current stable kernel. -3.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and +4.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and are released as needs dictate. The normal release period is approximately two weeks, but it can be longer if there are no pressing problems. A security-related problem, instead, can cause a release to happen almost @@ -285,7 +285,7 @@ The file Documentation/stable_kernel_rules.txt in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and how the release process works. -3.x -git patches +4.x -git patches ------------------ These are daily snapshots of Linus' kernel tree which are managed in a git repository (hence the name.) These patches are usually released @@ -317,9 +317,9 @@ revisions to it, and maintainers can mark patches as under review, accepted, or rejected. Most of these patchwork sites are listed at http://patchwork.kernel.org/. -3.x -next kernel tree for integration tests +4.x -next kernel tree for integration tests --------------------------------------------- -Before updates from subsystem trees are merged into the mainline 3.x +Before updates from subsystem trees are merged into the mainline 4.x tree, they need to be integration-tested. For this purpose, a special testing repository exists into which virtually all subsystem trees are pulled on an almost daily basis: diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt index cf9431db8731..7b57fc087088 100644 --- a/Documentation/Intel-IOMMU.txt +++ b/Documentation/Intel-IOMMU.txt @@ -10,7 +10,7 @@ This guide gives a quick cheat sheet for some basic understanding. Some Keywords DMAR - DMA remapping -DRHD - DMA Engine Reporting Structure +DRHD - DMA Remapping Hardware Unit Definition RMRR - Reserved memory Region Reporting Structure ZLR - Zero length reads from PCI devices IOVA - IO Virtual address. diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 27e7e5edeca8..fa596d8a3ab0 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -90,11 +90,11 @@ patch. Make sure your patch does not include any extra files which do not belong in a patch submission. Make sure to review your patch -after- -generated it with diff(1), to ensure accuracy. +generating it with diff(1), to ensure accuracy. If your changes produce a lot of deltas, you need to split them into individual patches which modify things in logical stages; see section -#3. This will facilitate easier reviewing by other kernel developers, +#3. This will facilitate review by other kernel developers, very important if you want your patch accepted. If you're using git, "git rebase -i" can help you with this process. If @@ -267,7 +267,7 @@ You should always copy the appropriate subsystem maintainer(s) on any patch to code that they maintain; look through the MAINTAINERS file and the source code revision history to see who those maintainers are. The script scripts/get_maintainer.pl can be very useful at this step. If you -cannot find a maintainer for the subsystem your are working on, Andrew +cannot find a maintainer for the subsystem you are working on, Andrew Morton (akpm@linux-foundation.org) serves as a maintainer of last resort. You should also normally choose at least one mailing list to receive a copy @@ -340,7 +340,7 @@ on the changes you are submitting. It is important for a kernel developer to be able to "quote" your changes, using standard e-mail tools, so that they may comment on specific portions of your code. -For this reason, all patches should be submitting e-mail "inline". +For this reason, all patches should be submitted by e-mail "inline". WARNING: Be wary of your editor's word-wrap corrupting your patch, if you choose to cut-n-paste your patch. @@ -739,7 +739,7 @@ interest on a single line; it should look something like: git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus - to get these changes:" + to get these changes: A pull request should also include an overall message saying what will be included in the request, a "git shortlog" listing of the patches @@ -796,7 +796,7 @@ NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! <https://lkml.org/lkml/2005/7/11/336> Kernel Documentation/CodingStyle: - <http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle> + <Documentation/CodingStyle> Linus Torvalds's mail on the canonical patch format: <http://lkml.org/lkml/2005/4/7/183> diff --git a/Documentation/adding-syscalls.txt b/Documentation/adding-syscalls.txt new file mode 100644 index 000000000000..cc2d4ac4f404 --- /dev/null +++ b/Documentation/adding-syscalls.txt @@ -0,0 +1,527 @@ +Adding a New System Call +======================== + +This document describes what's involved in adding a new system call to the +Linux kernel, over and above the normal submission advice in +Documentation/SubmittingPatches. + + +System Call Alternatives +------------------------ + +The first thing to consider when adding a new system call is whether one of +the alternatives might be suitable instead. Although system calls are the +most traditional and most obvious interaction points between userspace and the +kernel, there are other possibilities -- choose what fits best for your +interface. + + - If the operations involved can be made to look like a filesystem-like + object, it may make more sense to create a new filesystem or device. This + also makes it easier to encapsulate the new functionality in a kernel module + rather than requiring it to be built into the main kernel. + - If the new functionality involves operations where the kernel notifies + userspace that something has happened, then returning a new file + descriptor for the relevant object allows userspace to use + poll/select/epoll to receive that notification. + - However, operations that don't map to read(2)/write(2)-like operations + have to be implemented as ioctl(2) requests, which can lead to a + somewhat opaque API. + - If you're just exposing runtime system information, a new node in sysfs + (see Documentation/filesystems/sysfs.txt) or the /proc filesystem may be + more appropriate. However, access to these mechanisms requires that the + relevant filesystem is mounted, which might not always be the case (e.g. + in a namespaced/sandboxed/chrooted environment). Avoid adding any API to + debugfs, as this is not considered a 'production' interface to userspace. + - If the operation is specific to a particular file or file descriptor, then + an additional fcntl(2) command option may be more appropriate. However, + fcntl(2) is a multiplexing system call that hides a lot of complexity, so + this option is best for when the new function is closely analogous to + existing fcntl(2) functionality, or the new functionality is very simple + (for example, getting/setting a simple flag related to a file descriptor). + - If the operation is specific to a particular task or process, then an + additional prctl(2) command option may be more appropriate. As with + fcntl(2), this system call is a complicated multiplexor so is best reserved + for near-analogs of existing prctl() commands or getting/setting a simple + flag related to a process. + + +Designing the API: Planning for Extension +----------------------------------------- + +A new system call forms part of the API of the kernel, and has to be supported +indefinitely. As such, it's a very good idea to explicitly discuss the +interface on the kernel mailing list, and it's important to plan for future +extensions of the interface. + +(The syscall table is littered with historical examples where this wasn't done, +together with the corresponding follow-up system calls -- eventfd/eventfd2, +dup2/dup3, inotify_init/inotify_init1, pipe/pipe2, renameat/renameat2 -- so +learn from the history of the kernel and plan for extensions from the start.) + +For simpler system calls that only take a couple of arguments, the preferred +way to allow for future extensibility is to include a flags argument to the +system call. To make sure that userspace programs can safely use flags +between kernel versions, check whether the flags value holds any unknown +flags, and reject the system call (with EINVAL) if it does: + + if (flags & ~(THING_FLAG1 | THING_FLAG2 | THING_FLAG3)) + return -EINVAL; + +(If no flags values are used yet, check that the flags argument is zero.) + +For more sophisticated system calls that involve a larger number of arguments, +it's preferred to encapsulate the majority of the arguments into a structure +that is passed in by pointer. Such a structure can cope with future extension +by including a size argument in the structure: + + struct xyzzy_params { + u32 size; /* userspace sets p->size = sizeof(struct xyzzy_params) */ + u32 param_1; + u64 param_2; + u64 param_3; + }; + +As long as any subsequently added field, say param_4, is designed so that a +zero value gives the previous behaviour, then this allows both directions of +version mismatch: + + - To cope with a later userspace program calling an older kernel, the kernel + code should check that any memory beyond the size of the structure that it + expects is zero (effectively checking that param_4 == 0). + - To cope with an older userspace program calling a newer kernel, the kernel + code can zero-extend a smaller instance of the structure (effectively + setting param_4 = 0). + +See perf_event_open(2) and the perf_copy_attr() function (in +kernel/events/core.c) for an example of this approach. + + +Designing the API: Other Considerations +--------------------------------------- + +If your new system call allows userspace to refer to a kernel object, it +should use a file descriptor as the handle for that object -- don't invent a +new type of userspace object handle when the kernel already has mechanisms and +well-defined semantics for using file descriptors. + +If your new xyzzy(2) system call does return a new file descriptor, then the +flags argument should include a value that is equivalent to setting O_CLOEXEC +on the new FD. This makes it possible for userspace to close the timing +window between xyzzy() and calling fcntl(fd, F_SETFD, FD_CLOEXEC), where an +unexpected fork() and execve() in another thread could leak a descriptor to +the exec'ed program. (However, resist the temptation to re-use the actual value +of the O_CLOEXEC constant, as it is architecture-specific and is part of a +numbering space of O_* flags that is fairly full.) + +If your system call returns a new file descriptor, you should also consider +what it means to use the poll(2) family of system calls on that file +descriptor. Making a file descriptor ready for reading or writing is the +normal way for the kernel to indicate to userspace that an event has +occurred on the corresponding kernel object. + +If your new xyzzy(2) system call involves a filename argument: + + int sys_xyzzy(const char __user *path, ..., unsigned int flags); + +you should also consider whether an xyzzyat(2) version is more appropriate: + + int sys_xyzzyat(int dfd, const char __user *path, ..., unsigned int flags); + +This allows more flexibility for how userspace specifies the file in question; +in particular it allows userspace to request the functionality for an +already-opened file descriptor using the AT_EMPTY_PATH flag, effectively giving +an fxyzzy(3) operation for free: + + - xyzzyat(AT_FDCWD, path, ..., 0) is equivalent to xyzzy(path,...) + - xyzzyat(fd, "", ..., AT_EMPTY_PATH) is equivalent to fxyzzy(fd, ...) + +(For more details on the rationale of the *at() calls, see the openat(2) man +page; for an example of AT_EMPTY_PATH, see the statat(2) man page.) + +If your new xyzzy(2) system call involves a parameter describing an offset +within a file, make its type loff_t so that 64-bit offsets can be supported +even on 32-bit architectures. + +If your new xyzzy(2) system call involves privileged functionality, it needs +to be governed by the appropriate Linux capability bit (checked with a call to +capable()), as described in the capabilities(7) man page. Choose an existing +capability bit that governs related functionality, but try to avoid combining +lots of only vaguely related functions together under the same bit, as this +goes against capabilities' purpose of splitting the power of root. In +particular, avoid adding new uses of the already overly-general CAP_SYS_ADMIN +capability. + +If your new xyzzy(2) system call manipulates a process other than the calling +process, it should be restricted (using a call to ptrace_may_access()) so that +only a calling process with the same permissions as the target process, or +with the necessary capabilities, can manipulate the target process. + +Finally, be aware that some non-x86 architectures have an easier time if +system call parameters that are explicitly 64-bit fall on odd-numbered +arguments (i.e. parameter 1, 3, 5), to allow use of contiguous pairs of 32-bit +registers. (This concern does not apply if the arguments are part of a +structure that's passed in by pointer.) + + +Proposing the API +----------------- + +To make new system calls easy to review, it's best to divide up the patchset +into separate chunks. These should include at least the following items as +distinct commits (each of which is described further below): + + - The core implementation of the system call, together with prototypes, + generic numbering, Kconfig changes and fallback stub implementation. + - Wiring up of the new system call for one particular architecture, usually + x86 (including all of x86_64, x86_32 and x32). + - A demonstration of the use of the new system call in userspace via a + selftest in tools/testing/selftests/. + - A draft man-page for the new system call, either as plain text in the + cover letter, or as a patch to the (separate) man-pages repository. + +New system call proposals, like any change to the kernel's API, should always +be cc'ed to linux-api@vger.kernel.org. + + +Generic System Call Implementation +---------------------------------- + +The main entry point for your new xyzzy(2) system call will be called +sys_xyzzy(), but you add this entry point with the appropriate +SYSCALL_DEFINEn() macro rather than explicitly. The 'n' indicates the number +of arguments to the system call, and the macro takes the system call name +followed by the (type, name) pairs for the parameters as arguments. Using +this macro allows metadata about the new system call to be made available for +other tools. + +The new entry point also needs a corresponding function prototype, in +include/linux/syscalls.h, marked as asmlinkage to match the way that system +calls are invoked: + + asmlinkage long sys_xyzzy(...); + +Some architectures (e.g. x86) have their own architecture-specific syscall +tables, but several other architectures share a generic syscall table. Add your +new system call to the generic list by adding an entry to the list in +include/uapi/asm-generic/unistd.h: + + #define __NR_xyzzy 292 + __SYSCALL(__NR_xyzzy, sys_xyzzy) + +Also update the __NR_syscalls count to reflect the additional system call, and +note that if multiple new system calls are added in the same merge window, +your new syscall number may get adjusted to resolve conflicts. + +The file kernel/sys_ni.c provides a fallback stub implementation of each system +call, returning -ENOSYS. Add your new system call here too: + + cond_syscall(sys_xyzzy); + +Your new kernel functionality, and the system call that controls it, should +normally be optional, so add a CONFIG option (typically to init/Kconfig) for +it. As usual for new CONFIG options: + + - Include a description of the new functionality and system call controlled + by the option. + - Make the option depend on EXPERT if it should be hidden from normal users. + - Make any new source files implementing the function dependent on the CONFIG + option in the Makefile (e.g. "obj-$(CONFIG_XYZZY_SYSCALL) += xyzzy.c"). + - Double check that the kernel still builds with the new CONFIG option turned + off. + +To summarize, you need a commit that includes: + + - CONFIG option for the new function, normally in init/Kconfig + - SYSCALL_DEFINEn(xyzzy, ...) for the entry point + - corresponding prototype in include/linux/syscalls.h + - generic table entry in include/uapi/asm-generic/unistd.h + - fallback stub in kernel/sys_ni.c + + +x86 System Call Implementation +------------------------------ + +To wire up your new system call for x86 platforms, you need to update the +master syscall tables. Assuming your new system call isn't special in some +way (see below), this involves a "common" entry (for x86_64 and x32) in +arch/x86/entry/syscalls/syscall_64.tbl: + + 333 common xyzzy sys_xyzzy + +and an "i386" entry in arch/x86/entry/syscalls/syscall_32.tbl: + + 380 i386 xyzzy sys_xyzzy + +Again, these numbers are liable to be changed if there are conflicts in the +relevant merge window. + + +Compatibility System Calls (Generic) +------------------------------------ + +For most system calls the same 64-bit implementation can be invoked even when +the userspace program is itself 32-bit; even if the system call's parameters +include an explicit pointer, this is handled transparently. + +However, there are a couple of situations where a compatibility layer is +needed to cope with size differences between 32-bit and 64-bit. + +The first is if the 64-bit kernel also supports 32-bit userspace programs, and +so needs to parse areas of (__user) memory that could hold either 32-bit or +64-bit values. In particular, this is needed whenever a system call argument +is: + + - a pointer to a pointer + - a pointer to a struct containing a pointer (e.g. struct iovec __user *) + - a pointer to a varying sized integral type (time_t, off_t, long, ...) + - a pointer to a struct containing a varying sized integral type. + +The second situation that requires a compatibility layer is if one of the +system call's arguments has a type that is explicitly 64-bit even on a 32-bit +architecture, for example loff_t or __u64. In this case, a value that arrives +at a 64-bit kernel from a 32-bit application will be split into two 32-bit +values, which then need to be re-assembled in the compatibility layer. + +(Note that a system call argument that's a pointer to an explicit 64-bit type +does *not* need a compatibility layer; for example, splice(2)'s arguments of +type loff_t __user * do not trigger the need for a compat_ system call.) + +The compatibility version of the system call is called compat_sys_xyzzy(), and +is added with the COMPAT_SYSCALL_DEFINEn() macro, analogously to +SYSCALL_DEFINEn. This version of the implementation runs as part of a 64-bit +kernel, but expects to receive 32-bit parameter values and does whatever is +needed to deal with them. (Typically, the compat_sys_ version converts the +values to 64-bit versions and either calls on to the sys_ version, or both of +them call a common inner implementation function.) + +The compat entry point also needs a corresponding function prototype, in +include/linux/compat.h, marked as asmlinkage to match the way that system +calls are invoked: + + asmlinkage long compat_sys_xyzzy(...); + +If the system call involves a structure that is laid out differently on 32-bit +and 64-bit systems, say struct xyzzy_args, then the include/linux/compat.h +header file should also include a compat version of the structure (struct +compat_xyzzy_args) where each variable-size field has the appropriate compat_ +type that corresponds to the type in struct xyzzy_args. The +compat_sys_xyzzy() routine can then use this compat_ structure to parse the +arguments from a 32-bit invocation. + +For example, if there are fields: + + struct xyzzy_args { + const char __user *ptr; + __kernel_long_t varying_val; + u64 fixed_val; + /* ... */ + }; + +in struct xyzzy_args, then struct compat_xyzzy_args would have: + + struct compat_xyzzy_args { + compat_uptr_t ptr; + compat_long_t varying_val; + u64 fixed_val; + /* ... */ + }; + +The generic system call list also needs adjusting to allow for the compat +version; the entry in include/uapi/asm-generic/unistd.h should use +__SC_COMP rather than __SYSCALL: + + #define __NR_xyzzy 292 + __SC_COMP(__NR_xyzzy, sys_xyzzy, compat_sys_xyzzy) + +To summarize, you need: + + - a COMPAT_SYSCALL_DEFINEn(xyzzy, ...) for the compat entry point + - corresponding prototype in include/linux/compat.h + - (if needed) 32-bit mapping struct in include/linux/compat.h + - instance of __SC_COMP not __SYSCALL in include/uapi/asm-generic/unistd.h + + +Compatibility System Calls (x86) +-------------------------------- + +To wire up the x86 architecture of a system call with a compatibility version, +the entries in the syscall tables need to be adjusted. + +First, the entry in arch/x86/entry/syscalls/syscall_32.tbl gets an extra +column to indicate that a 32-bit userspace program running on a 64-bit kernel +should hit th |
