[PATCH 9/9] dosfs: Comments for UTF8 backwards compatibility

Ralf Kirchner ralf.kirchner at embedded-brains.de
Fri May 31 13:42:25 UTC 2013


Comments for the backwards compatibility of the UFT-8 handling within the FAT
file system have been added.

---
 cpukit/libfs/src/dosfs/dosfs.h |   98 +++++++++++++++++++++++++++++++++++++++-
 1 Datei geändert, 96 Zeilen hinzugefügt(+), 2 Zeilen entfernt(-)

diff --git a/cpukit/libfs/src/dosfs/dosfs.h b/cpukit/libfs/src/dosfs/dosfs.h
index b4e66e2..ccb2807 100644
--- a/cpukit/libfs/src/dosfs/dosfs.h
+++ b/cpukit/libfs/src/dosfs/dosfs.h
@@ -147,7 +147,7 @@ typedef int (*rtems_dosfs_utf8_normalize_and_fold)(
 );
 
 /**
- * @brief Destorys a convert control structure.
+ * @brief Destroys a convert control structure.
  *
  * @param[in/out] self The convert control for destruction.
  */
@@ -204,7 +204,71 @@ struct rtems_dosfs_convert_control {
  */
 typedef struct {
   /**
-   * @brief Converter implementation for new filesystem instance.
+   * @brief Converter implementation for new file system instance.
+   *
+   * Before converters have been added to the RTEMS implementation
+   * of the FAT file system, the implementation was:
+   * Short names have been saved in code page format (as is still the case).
+   * Long names have not been saved in UTF-16 format. Instead
+   * only the lower byte of the characters used to get saved.
+   *
+   * There are a few compatibility issues due to a non-standard conform
+   * implementation of the RTEMS FAT file system before UTF-8 support was
+   * added. These following issues affect the default converter and the UTF-8
+   * converter:
+   * - Before UTF-8 support was added, it was possible to create files with the
+   *   same short name in single case and mixed case in a directory.
+   *   Now this bug gets avoids.
+   * - Before UTF-8 support was added it was possible to create files with a
+   *   name length of slightly more than 255 characters. Now the implementation
+   *   adheres exactly to the 255 character limit.
+   * - Long file names saved before UTF-8 support was added could contain
+   *   non-ASCII characters in the one byte which was saved for a long name
+   *   character. With the default converter this means such files can be
+   *   read only by their short file name. With the UTF-8 converter file names
+   *   will be read correctly as long as the characters written with the
+   *   old implementation where LATIN-1 characters.
+   *
+   * The following sample code demonstrates how to mount a file
+   * system with UTF-8 support:
+   * @code
+   * #include <assert.h>
+   * #include <rtems/dosfs.h>
+   * #include <rtems/libio.h>
+   *
+   * static int mount_with_utf8(
+   *   const char* device_file,
+   *   const char* mount_point
+   * )
+   * {
+   *   rtems_dosfs_convert_control *convert_ctrl;
+   *   rtems_dosfs_mount_options    mount_opts;
+   *   int                          rv;
+   *
+   *   memset( &mount_opts, 0, sizeof(mount_opts) );
+   *
+   *   convert_ctrl = rtems_dosfs_create_utf8_converter( "CP850" );
+   *
+   *   if ( convert_ctrl != NULL ) {
+   *     mount_opts.converter = convert_ctrl;
+   *
+   *     rv = mount_and_make_target_path(
+   *       device_file,
+   *       mount_point,
+   *       RTEMS_FILESYSTEM_TYPE_DOSFS,
+   *       RTEMS_FILESYSTEM_READ_WRITE,
+   *       &mount_opts);
+   *   } else {
+   *     rv = ENOMEM;
+   *   }
+   *
+   *   return rv;
+   * }
+   * @endcode
+   *
+   * In case you don't want UTF-8 support, you can simply pass a
+   * NULL pointer to mount_and_make_target_path() respectively to
+   * mount() instead of the mount_opts address.
    *
    * @see rtems_dosfs_create_default_converter() and
    * rtems_dosfs_create_utf8_converter().
@@ -215,6 +279,17 @@ typedef struct {
 /**
  * @brief Allocates and initializes a default converter.
  *
+ * This default converter will accept only POSIX file names with
+ * pure ASCII characters. This largely corresponds to the file
+ * name handling before the optional UTF-8 support was added
+ * to the RTEMS implementation of the FAT file system.
+ * This handling is mostly backwards compatible to the previous RTEMS
+ * implementation of the FAT file system.
+ *
+ * For backwards compatibility and the previous RTEMS implementation
+ * of the FAT file system please see also
+ * @ref rtems_dosfs_mount_options and mount().
+ *
  * @retval NULL Something failed.
  * @retval other Pointer to initialized converter.
  *
@@ -225,6 +300,25 @@ rtems_dosfs_convert_control *rtems_dosfs_create_default_converter(void);
 /**
  * @brief Allocates and initializes a UTF-8 converter.
  *
+ * This converter will assume that all file names passed to POSIX file handling
+ * methods are UTF-8 strings and will convert them to the selected code page
+ * for short file names and to UTF-16 for long file names.
+ * This conversion will be done during reading and writing.
+ * These conversions correspond to the specification of the
+ * FAT file system.
+ * This handling is mostly backwards compatible to the previous RTEMS
+ * implementation of the FAT file system.
+ *
+ * For backwards compatibility and the previous RTEMS implementation
+ * of the FAT file system please see also
+ * @ref rtems_dosfs_mount_options and mount().
+ *
+ * One possible issue with this converter is:
+ * When reading file names which have been created with other
+ * implementations of the FAT file system, it can happen that
+ * during the conversion to UTF-8 a long file name becomes longer exceeds
+ * the 255 bytes limit. In such a case only the short file name will get read.
+ *
  * @param[in] codepage The iconv() identification string for the used codepage.
  *
  * @retval NULL Something failed.
-- 
1.7.10.4




More information about the devel mailing list