[PATCH 14/18] third party code - comment filter

Andrew.Butterfield at scss.tcd.ie Andrew.Butterfield at scss.tcd.ie
Thu Dec 22 11:47:38 UTC 2022


forked from https://github.com/quic/comment-filter/commits/master
commit 9cfb52318e5f71af56b5808e280a9b089b9abc32
---
 .../comment-filter/.circleci/config.yml       |   9 +
 .../src/modules/comment-filter/.coveragerc    |   2 +
 .../src/src/modules/comment-filter/.gitignore | 104 +++++
 .../src/src/modules/comment-filter/AUTHORS    |   7 +
 .../modules/comment-filter/CODE-OF-CONDUCT.md |  73 +++
 .../modules/comment-filter/CONTRIBUTING.md    |  87 ++++
 .../src/src/modules/comment-filter/Dockerfile |   9 +
 .../src/src/modules/comment-filter/LICENSE    |  13 +
 .../src/modules/comment-filter/MANIFEST.in    |   7 +
 .../src/src/modules/comment-filter/README.md  | 158 +++++++
 .../src/modules/comment-filter/bin/comments   |  25 ++
 .../comment-filter/bin/testdata/hello.c       |   8 +
 .../comment-filter/comment_filter/__init__.py |   1 +
 .../comment-filter/comment_filter/_version.py |   0
 .../comment-filter/comment_filter/language.py |  60 +++
 .../comment-filter/comment_filter/rfc.py      | 423 ++++++++++++++++++
 .../comment-filter/comment_filter/rfc_test.py | 258 +++++++++++
 .../src/src/modules/comment-filter/setup.cfg  |   5 +
 .../src/src/modules/comment-filter/setup.py   |  12 +
 .../src/src/modules/comment-filter/tox.ini    |  12 +
 20 files changed, 1273 insertions(+)
 create mode 100644 formal/promela/src/src/modules/comment-filter/.circleci/config.yml
 create mode 100644 formal/promela/src/src/modules/comment-filter/.coveragerc
 create mode 100644 formal/promela/src/src/modules/comment-filter/.gitignore
 create mode 100644 formal/promela/src/src/modules/comment-filter/AUTHORS
 create mode 100644 formal/promela/src/src/modules/comment-filter/CODE-OF-CONDUCT.md
 create mode 100644 formal/promela/src/src/modules/comment-filter/CONTRIBUTING.md
 create mode 100644 formal/promela/src/src/modules/comment-filter/Dockerfile
 create mode 100644 formal/promela/src/src/modules/comment-filter/LICENSE
 create mode 100644 formal/promela/src/src/modules/comment-filter/MANIFEST.in
 create mode 100644 formal/promela/src/src/modules/comment-filter/README.md
 create mode 100755 formal/promela/src/src/modules/comment-filter/bin/comments
 create mode 100644 formal/promela/src/src/modules/comment-filter/bin/testdata/hello.c
 create mode 100644 formal/promela/src/src/modules/comment-filter/comment_filter/__init__.py
 create mode 100644 formal/promela/src/src/modules/comment-filter/comment_filter/_version.py
 create mode 100644 formal/promela/src/src/modules/comment-filter/comment_filter/language.py
 create mode 100644 formal/promela/src/src/modules/comment-filter/comment_filter/rfc.py
 create mode 100644 formal/promela/src/src/modules/comment-filter/comment_filter/rfc_test.py
 create mode 100644 formal/promela/src/src/modules/comment-filter/setup.cfg
 create mode 100644 formal/promela/src/src/modules/comment-filter/setup.py
 create mode 100644 formal/promela/src/src/modules/comment-filter/tox.ini

diff --git a/formal/promela/src/src/modules/comment-filter/.circleci/config.yml b/formal/promela/src/src/modules/comment-filter/.circleci/config.yml
new file mode 100644
index 00000000..dc96c5c2
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/.circleci/config.yml
@@ -0,0 +1,9 @@
+version: 2
+jobs:
+  build:
+    docker:
+      - image: themattrix/tox
+    steps:
+      - checkout
+      - run:
+         command: tox
diff --git a/formal/promela/src/src/modules/comment-filter/.coveragerc b/formal/promela/src/src/modules/comment-filter/.coveragerc
new file mode 100644
index 00000000..9b6154a0
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/.coveragerc
@@ -0,0 +1,2 @@
+[run]
+omit = *_test.py
diff --git a/formal/promela/src/src/modules/comment-filter/.gitignore b/formal/promela/src/src/modules/comment-filter/.gitignore
new file mode 100644
index 00000000..af2f5375
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/.gitignore
@@ -0,0 +1,104 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+.static_storage/
+.media/
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/formal/promela/src/src/modules/comment-filter/AUTHORS b/formal/promela/src/src/modules/comment-filter/AUTHORS
new file mode 100644
index 00000000..44c0a5cf
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/AUTHORS
@@ -0,0 +1,7 @@
+comment-filter was originally created in 2014 by Greg Fitzgerald <gregf at codeaurora.org>.
+
+Other authors include:
+    Craig Northway <cnorthway at codeaurora.org>
+    Jesse Porter <jporter at codeaurora.org>
+    Rashmi Chitrakar <rashmic at codeaurora.org>
+    Zac Bristow <zbristow at codeaurora.org>
diff --git a/formal/promela/src/src/modules/comment-filter/CODE-OF-CONDUCT.md b/formal/promela/src/src/modules/comment-filter/CODE-OF-CONDUCT.md
new file mode 100644
index 00000000..7ef343f5
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/CODE-OF-CONDUCT.md
@@ -0,0 +1,73 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of experience,
+nationality, personal appearance, race, religion, or sexual identity and
+orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team. All complaints will be reviewed
+and investigated and will result in a response that is deemed necessary and 
+appropriate to the circumstances. The project team is obligated to maintain
+confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
diff --git a/formal/promela/src/src/modules/comment-filter/CONTRIBUTING.md b/formal/promela/src/src/modules/comment-filter/CONTRIBUTING.md
new file mode 100644
index 00000000..02f6b9ff
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/CONTRIBUTING.md
@@ -0,0 +1,87 @@
+## Contributing to Comment Filter
+
+Hi there!
+We’re thrilled that you’d like to contribute to this project.
+Your help is essential for keeping this project great and for making it better.
+
+## Branching Strategy
+
+In general, contributors should develop on branches based off of `master` and pull requests should be made against `master`.
+
+## Submitting a pull request
+
+1. Please read our [code of conduct](code-of-conduct.md] and [license](LICENSE.txt).
+1. [Fork](https://github.com/codeauroraforum/comment-filter/fork) and clone the repository.
+1. Create a new branch based on `master`: `git checkout -b <my-branch-name> master`.
+1. Make your changes, add tests, and make sure the tests still pass.
+1. Push to your fork and [submit a pull request](https://github.com/codeauroraforum/comment-filter/compare) from your branch to `master`.
+1. Pat yourself on the back and wait for your pull request to be reviewed.
+
+Here are a few things you can do that will increase the likelihood of your pull request to be accepted:
+
+- Follow the existing style where possible. We try and adhere to [pep8](https://www.python.org/dev/peps/pep-0008/).
+- Write tests.
+- Keep your change as focused as possible.
+  If you want to make multiple independent changes, please consider submitting them as separate pull requests.
+- Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
+
+
+## Developer Certification of Origin (DCO)
+Comment Filter requires the Developer Certificate of Origin (DCO) process to be followed.
+
+The DCO is an attestation attached to every contribution made by every developer. In the commit message of the contribution, the developer simply adds a Signed-off-by statement and thereby agrees to the DCO, which you can find below or at http://developercertificate.org/.
+
+Comment Filter does not merge any pull requests made until each commit has been signed for the DCO.
+
+```
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+1 Letterman Drive
+Suite D4700
+San Francisco, CA, 94129
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+ ```
+
+### DCO commit example
+
+After committing your changes with `git commit -s`, your message should look something like:
+```
+commit 442deae270bf585052be012b064ed92299e221c4
+Author: Random Developer <random at developer.org>
+Date:   Sat Oct 21 08:33:15 2017 -0700
+
+    My example commit message
+
+    Signed-off-by: Random Developer <random at developer.org>
+```
diff --git a/formal/promela/src/src/modules/comment-filter/Dockerfile b/formal/promela/src/src/modules/comment-filter/Dockerfile
new file mode 100644
index 00000000..2fdb19e9
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/Dockerfile
@@ -0,0 +1,9 @@
+FROM ubuntu:14.04
+
+MAINTAINER Craig Northway
+
+RUN apt-get update && apt-get install -y python python3 python-pip git && pip install -U setuptools==25.2.0 tox
+
+COPY . /src
+
+RUN tox -c /src/tox.ini
diff --git a/formal/promela/src/src/modules/comment-filter/LICENSE b/formal/promela/src/src/modules/comment-filter/LICENSE
new file mode 100644
index 00000000..b8bf6d0f
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/LICENSE
@@ -0,0 +1,13 @@
+Copyright (c) 2017, The Linux Foundation. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SPDX-License-Identifier: BSD-3-Clause
diff --git a/formal/promela/src/src/modules/comment-filter/MANIFEST.in b/formal/promela/src/src/modules/comment-filter/MANIFEST.in
new file mode 100644
index 00000000..f800456f
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/MANIFEST.in
@@ -0,0 +1,7 @@
+include *.md
+include .coveragerc
+include AUTHORS
+include LICENSE
+include tox.ini
+exclude Dockerfile
+recursive-include bin *.c
diff --git a/formal/promela/src/src/modules/comment-filter/README.md b/formal/promela/src/src/modules/comment-filter/README.md
new file mode 100644
index 00000000..0e0f15fb
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/README.md
@@ -0,0 +1,158 @@
+Comment Filter
+==============
+
+[![CircleCI](https://circleci.com/gh/codeauroraforum/comment-filter.svg?style=svg)](https://circleci.com/gh/codeauroraforum/comment-filter)
+
+A Python library and command-line utility that filters comments from a source
+code file, replacing each non-comment character with a space.
+
+When run from the the command-line, the `comment` utility will generate an
+output file in which any comment text will remain at the same line and column
+as the input file.
+
+The Python library provides one function `parse_file()`, which streams the
+input file and returns the filtered file contents via a generator that yields
+one line at a time.
+
+Both interfaces support an 'only code' option that inverts the functionality
+such that comment text is replaced by spaces and the code text is preserved in
+its original location.
+
+There is also a 'no tokens' option which will preserve the comments, but
+replace the comment tokens with spaces (in addition to the code text).
+
+
+hello.c:
+
+```c
+/* multi-line
+   comment */
+// single-line comment
+
+int main() {
+  return 0;
+}
+```
+
+Example of getting the comments in a C file:
+
+```bash
+$ comments hello.c
+/* multi-line
+   comment */
+// single-line comment
+```
+
+When filtering for comments, any character that is not the start of a comment
+is replaced with a space.
+
+To get comments without the comment tokens:
+
+```bash
+$ comments --notokens hello.c
+   multi-line
+   comment
+   single-line comment
+```
+
+Filter out the comments:
+
+```bash
+$ comments --onlycode hello.c
+
+
+
+
+int main() {
+  return 0;
+}
+```
+
+
+Python library
+--------------
+
+Alternatively, one can use the provided Python library directly.  It provides
+one function `parse_file()`, which streams the input file and returns
+the filtered file via a generator.  The generator yields one line at a time.
+
+
+Implementation Notes
+--------------------
+
+A challenging requirement is that the parser is only fed one line at a time.
+This means that we cannot leverage most Python parsing libraries, including
+PyParsing, PyPEG, or even the Haskell Parsec-inspired funcparserlib.  Instead,
+we need stream parsing combinators, like those provided by Haskell's Conduit
+or Iteratee.  But in Python, and for this small parser, implementing that
+infrastructure seemed like overkill.  Unlike Haskell, Python cannot optimize
+out the additional abstraction layer.  So this library implements streaming,
+recursive-decent parsers by hand.  Lots of ugly noise in the code, but lots
+and lots of unit tests to keep complexity under control.
+
+
+Grammar
+-------
+
+```antlr
+file                 : declaration* ;
+
+declaration          : line_comment | multiline_comment | code ;
+
+line_comment         : line_comment_start (~endl)* endl ;
+
+multiline_comment    : multiline_comment_start multiline_contents multiline_comment_end ;
+multiline_contents   : (multiline_char | multiline_comment)* ;
+multiline_char       : ~(multiline_comment_start | multiline_comment_end) ;
+
+code                 : (string_literal | code_char)* ;
+code_char            : ~(string_literal_start | line_comment_start | multiline_comment_start) ;
+
+string_literal       : string_literal_start string_literal_char* string_literal_end ;
+string_literal_char  : escape_char (string_literal_start | string_literal_end)
+                     | ~(escape_char | string_literal_end) ;
+```
+
+The syntax for the following tokens are provided by the `language` module:
+
+  * line_comment_start
+  * multiline_comment_start
+  * multiline_comment_end
+  * string_literal_start
+  * string_literal_end
+  * escape_char
+
+
+Recognized Languages
+--------------------
+
+  * C
+  * C++
+  * Go
+  * Haskell
+  * Java
+  * Lua
+  * Python
+  * Perl
+  * Ruby
+
+
+Developing
+----------
+
+This assumes the following are installed and in your system path:
+
+   * Python 2.7.x OR Python 3.4.x
+   * tox
+
+To build and test, run `tox`.
+
+```bash
+$ tox
+```
+
+To remove all files not registered with git.
+
+```bash
+$ git clean -Xdf
+```
diff --git a/formal/promela/src/src/modules/comment-filter/bin/comments b/formal/promela/src/src/modules/comment-filter/bin/comments
new file mode 100755
index 00000000..0b9c7d5a
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/bin/comments
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import fileinput
+from comment_filter import language
+import comment_filter
+import argparse
+from comment_filter import _version
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--onlycode', help='filter out comments', action='store_true')
+    parser.add_argument('--notokens', help='filter out comment tokens', action='store_true')
+    parser.add_argument('--version', action='version', version=_version.__version__)
+    parser.add_argument('path', help='path to file to parse')
+    args = parser.parse_args()
+
+    _, ext = os.path.splitext(args.path)
+    lang = language.extension_to_lang_map.get(ext, language.c)
+    input_stream = fileinput.input(args.path)
+    keep_tokens = not args.notokens
+    for line in comment_filter.parse_file(lang, input_stream, code_only=args.onlycode, keep_tokens=keep_tokens):
+        sys.stdout.write(line)
diff --git a/formal/promela/src/src/modules/comment-filter/bin/testdata/hello.c b/formal/promela/src/src/modules/comment-filter/bin/testdata/hello.c
new file mode 100644
index 00000000..0baf6932
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/bin/testdata/hello.c
@@ -0,0 +1,8 @@
+/* multi-line
+   comment */
+// single-line comment
+
+int main() {
+    return 0;
+}
+
diff --git a/formal/promela/src/src/modules/comment-filter/comment_filter/__init__.py b/formal/promela/src/src/modules/comment-filter/comment_filter/__init__.py
new file mode 100644
index 00000000..1d3b256e
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/comment_filter/__init__.py
@@ -0,0 +1 @@
+from .rfc import *
diff --git a/formal/promela/src/src/modules/comment-filter/comment_filter/_version.py b/formal/promela/src/src/modules/comment-filter/comment_filter/_version.py
new file mode 100644
index 00000000..e69de29b
diff --git a/formal/promela/src/src/modules/comment-filter/comment_filter/language.py b/formal/promela/src/src/modules/comment-filter/comment_filter/language.py
new file mode 100644
index 00000000..a934810e
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/comment_filter/language.py
@@ -0,0 +1,60 @@
+class Lang:
+    def __init__(self, line_comment, comment_bookends, nested_comments):
+        self.line_comment = line_comment
+        self.comment_bookends = comment_bookends
+        self.nested_comments = nested_comments
+        self.string_literal_start = '"'
+        self.string_literal2_start = "'"
+
+c = Lang(
+    line_comment='//',
+    comment_bookends=[('/*', '*/'), (';;', ';;')],
+    nested_comments=False)
+
+haskell = Lang(
+    line_comment='--',
+    comment_bookends=[('{-', '-}')],
+    nested_comments=True)
+
+python = Lang(
+    line_comment='#',
+    comment_bookends=[('"""', '"""'), ("'''", "'''")],
+    nested_comments=False)
+
+ruby = Lang(
+    line_comment='#',
+    comment_bookends=[("=begin", "=end")],
+    nested_comments=False)
+
+lua = Lang(
+    line_comment='--',
+    comment_bookends=[("--[[", "--]]")],
+    nested_comments=False)
+
+perl = Lang(
+    line_comment='#',
+    comment_bookends=[("=pod", "=cut")],
+    nested_comments=False)
+
+java = Lang(
+    line_comment='//',
+    comment_bookends=[('/*', '*/')],
+    nested_comments=True)
+
+go = c
+
+extension_to_lang_map = {
+    '.c': c,
+    '.cc': c,
+    '.cxx': c,
+    '.cpp': c,
+    '.h': c,
+    '.S': c,
+    '.java': java,
+    '.go': go,
+    '.hs': haskell,
+    '.py': python,
+    '.rb': ruby,
+    '.lua': lua,
+    '.pl': perl,
+}
diff --git a/formal/promela/src/src/modules/comment-filter/comment_filter/rfc.py b/formal/promela/src/src/modules/comment-filter/comment_filter/rfc.py
new file mode 100644
index 00000000..86b7deaa
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/comment_filter/rfc.py
@@ -0,0 +1,423 @@
+import re
+
+class State:
+    """Parser State"""
+    def __init__(self, line='', multi_end_stack=None, in_literal=None):
+        # The remaining input.
+        self.line = line
+
+        # A stack of end tokens for multi-line comments.  The token on the
+        # top of the stack is the expected end token for the most nested
+        # multi-line comment.
+        self.multi_end_stack = multi_end_stack or []
+
+        # If the parser is waiting on the end quote, in_literal will be
+        # string the parser is waiting for.
+        self.in_literal = in_literal
+
+    def __eq__(self, x):
+        # Return True if all members are equal.
+        return self.line == x.line and self.multi_end_stack == x.multi_end_stack \
+            and self.in_literal == x.in_literal
+
+
+def parse_file(lang, file_obj, code_only=False, keep_tokens=True):
+    """
+    Return a generator that yields a filtered line for
+    each line in file_obj.
+
+    Args:
+      lang (dictionary):
+        Syntax description for the language being parsed.
+      file_obj (iterator<string>):
+        An iterater that yields lines.
+      code_only (bool, default: False):
+        If False, each non-comment character is replaced with a space.
+        If True, each comment character is replaced with a space.
+      keep_tokens (bool, default: True):
+        If False, comment tokens are filtered out.
+        If True, comment tokens are preserved.
+
+    Returns:
+      iterator<string>
+    """
+    state = State()
+    for line in file_obj:
+        state.line = line
+        line, state = parse_line(lang, state, code_only, keep_tokens)
+        yield line
+
+
+def parse_line(lang, state, code_only=False, keep_tokens=True):
+    """
+    Return the comments or code of state.line.
+
+    The output string will be the same length as the input string.
+    Filtered out characters are represented as spaces.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state.
+      code_only (bool, default: False):
+        If False, each non-comment character is replaced with a space.
+        If True, each comment character is replaced with a space.
+      keep_tokens (bool, default: True):
+        If False, comment tokens are filtered out.
+        If True, comment tokens are preserved.
+
+    Returns:
+      (string, State)
+    """
+    # If currently within a string literal or multi-line comment, first
+    # complete parsing that declaration.  Store the result in 'rest_of_decl'.
+    rest_of_decl = ''
+    if state.in_literal:
+        # Parsing a string literal.
+        cnts, state = finish_string_literal(state.in_literal, state)
+        if code_only:
+            rest_of_decl = cnts
+        else:
+            rest_of_decl = clear_line(cnts)
+    elif state.multi_end_stack:
+        # If there is state, we assume it is because we have parsed
+        # the start of a multiline comment, but haven't found the end.
+        cmt, state = finish_multiline_comment(lang, state, keep_tokens)
+        if code_only:
+            rest_of_decl = clear_line(cmt)
+        else:
+            rest_of_decl = cmt
+
+    if state.in_literal or state.multi_end_stack:
+        return rest_of_decl, state
+
+    decls, state = parse_declarations(lang, state, code_only, keep_tokens)
+    return rest_of_decl + decls, state
+
+
+def parse_declarations(lang, state, code_only=False, keep_tokens=True):
+    """
+    Return the comments or code of state.line.
+
+    Unlike parse_line, this function assumes the parser is *not*
+    in the context of a multi-line comment.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state.
+      code_only (bool, default: False):
+        If False, each non-comment character is replaced with a space.
+        If True, each comment character is replaced with a space.
+      keep_tokens (bool, default: True):
+        If False, comment tokens are filtered out.
+        If True, comment tokens are preserved.
+
+    Returns:
+      (string, State)
+    """
+    code, state = parse_code(lang, state)
+    comment, state = parse_line_comment(lang, state, keep_tokens)
+    comment2, state = parse_multiline_comment(lang, state, keep_tokens)
+
+    if comment or comment2:
+        line = state.line
+        if not state.multi_end_stack:
+            # Continue looking for declarations.
+            line, state = parse_declarations(lang, state, code_only, keep_tokens)
+        if code_only:
+            line = code + clear_line(comment) + clear_line(comment2) + line
+        else:
+            line = clear_line(code) + comment + comment2 + line
+        return line, state
+    else:
+        state.line = ''
+        if code_only:
+            return code, state
+        else:
+            return clear_line(code), state
+
+
+def parse_code(lang, state):
+    """
+    Returns all characters up to the first comment.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state.
+
+    Returns:
+      (string, State)
+    """
+    code = ''
+    while True:
+        line = state.line
+        multi_start_tokens = [start for start, end in lang.comment_bookends]
+        tokens = multi_start_tokens + [
+            lang.line_comment,
+            lang.string_literal_start,
+            lang.string_literal2_start]
+        i = index_of_first_found(line, tokens)
+        if i != -1:
+            state.line = line[i:]
+            code += line[:i]
+            if line.startswith(lang.line_comment, i) or \
+                    index_of_first_found(line, multi_start_tokens) == i:
+                return code, state
+            elif line.startswith(lang.string_literal_start, i):
+                lit, state = parse_string_literal(
+                    lang.string_literal_start, state)
+                code += lit
+                continue
+            else:
+                lit, state = parse_string_literal(
+                    lang.string_literal2_start, state)
+                code += lit
+                continue
+        else:
+            state.line = ''
+            return code + line, state
+
+
+def parse_string_literal(quote, state):
+    """
+    Returns the string literal at the beginning of state.line,
+    otherwise the empty string.
+
+    Args:
+      quote (string):
+        The syntax for the start and end quote.
+      state (State):
+        Parser state.
+
+    Returns:
+      (string, State)
+    """
+    if state.line.startswith(quote):
+        state.line = state.line[len(quote):]
+        line, state = finish_string_literal(quote, state)
+        return quote + line, state
+    else:
+        return '', state
+
+
+def finish_string_literal(quote, state):
+    cnts, state = parse_string_literal_contents(quote, state)
+    if state.line.startswith(quote):
+        state.line = state.line[len(quote):]
+        state.in_literal = None
+        return cnts + quote, state
+    else:
+        # No end-quote yet.
+        state.in_literal = quote
+        return cnts, state
+
+
+def parse_string_literal_contents(quote, state):
+    """
+    Returns the string literal contents at the beginning of state.line.
+    The end quote is not included.
+
+    Args:
+      quote (string):
+        The syntax for the end quote.
+      state (State):
+        Parser state.
+
+    Returns:
+      (string, State)
+    """
+    contents = ''
+    escaped_quote = '\\' + quote
+    while True:
+        i = index_of_first_found(state.line, [quote, escaped_quote])
+        if i != -1:
+            if state.line.startswith(quote, i):
+                contents += state.line[:i]
+                state.line = state.line[i:]
+                return contents, state
+            else:
+                # Escaped quote.
+                i += len(escaped_quote)
+                contents += state.line[:i]
+                state.line = state.line[i:]
+                continue
+        else:
+            # No end-quote.  Chew up the whole line.
+            contents += state.line
+            state.line = ''
+            return contents, state
+
+
+def parse_line_comment(lang, state, keep_tokens=True):
+    """
+    Returns the single-line comment at the beginning of state.line,
+    otherwise the empty string.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state
+      keep_tokens (bool, default: True):
+        If False, comment tokens are filtered out.
+        If True, comment tokens are preserved.
+
+    Returns:
+      (string, State)
+    """
+    line = state.line
+    line_comment = lang.line_comment
+    if line.startswith(line_comment):
+        state.line = ''
+        i = len(line_comment)
+        if not keep_tokens:
+            line_comment = ' ' * i
+        return line_comment + line[i:], state
+    else:
+        return '', state
+
+
+def parse_multiline_comment(lang, state, keep_tokens=True):
+    """
+    Returns the multi-line comment at the beginning of state.line,
+    otherwise the empty string.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state
+      keep_tokens (bool, default: True):
+        If False, comment tokens are filtered out.
+        If True, comment tokens are preserved.
+
+    Returns:
+      (string, State)
+    """
+    line = state.line
+    for multi_start, multi_end in lang.comment_bookends:
+        if line.startswith(multi_start):
+            state.multi_end_stack.append(multi_end)
+            state.line = line[len(multi_start):]
+            cnts, state = finish_multiline_comment(lang, state, keep_tokens)
+            if not keep_tokens:
+                multi_start = ' ' * len(multi_start)
+            return multi_start + cnts, state
+    return '', state
+
+
+def finish_multiline_comment(lang, state, keep_tokens=True):
+    """
+    Returns the rest of a multi-line comment at the beginning of state.line.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state
+      keep_tokens (bool, default: True):
+        If False, comment tokens are filtered out.
+        If True, comment tokens are preserved.
+
+    Returns:
+      (string, State)
+    """
+    cnts, state = parse_multiline_contents(lang, state)
+    multi_end = state.multi_end_stack[-1]
+
+    # Handle language supports nested comments.
+    if lang.nested_comments:
+        cmt, state = parse_multiline_comment(lang, state, keep_tokens)
+    else:
+        cmt = ''
+
+    line = state.line
+    if line:
+        if line.startswith(multi_end):
+            i = len(multi_end)
+            state.multi_end_stack.pop()
+            state.line = line[i:]
+            if not keep_tokens:
+                multi_end = ' ' * len(multi_end)
+            return cnts + cmt + multi_end, state
+        else:
+            more_cnts, state = finish_multiline_comment(lang, state, keep_tokens)
+            return cnts + cmt + more_cnts, state
+    else:
+        return cnts + cmt, state
+
+
+def parse_multiline_contents(lang, state):
+    """
+    Returns the multi-line comment contents at the beginning of state.line.
+
+    Args:
+      lang (Language):
+        Syntax description for the language being parsed.
+      state (State):
+        Parser state
+
+    Returns:
+      (string, State)
+    """
+    line = state.line
+    tokens = [start for start, end in lang.comment_bookends]
+    multi_end = state.multi_end_stack[-1]
+    tokens.append(multi_end)
+
+    if lang.nested_comments:
+        i = index_of_first_found(line, tokens)
+    else:
+        try:
+            i = line.index(multi_end)
+        except ValueError:
+            i = -1
+
+    if i != -1:
+        state.line = line[i:]
+        return line[:i], state
+    else:
+        # Reached the end of line before the end of comment.
+        state.line = ''
+        return line, state
+
+
+def index_of_first_found(s, xs):
+    """
+    Return the index of the first string from xs found in s.
+    """
+    regex = '|'.join(map(re.escape, xs))
+    m = re.search(regex, s)
+    if m:
+        return m.start()
+    else:
+        return -1
+
+
+def clear_line(line):
+    """
+    Return a string where each non-newline character is replaced with a space.
+    """
+    sep = get_linesep(line)
+    if sep:
+        return ' ' * (len(line) - len(sep)) + sep
+    else:
+        return ' ' * len(line)
+
+
+def get_linesep(line):
+    """
+    Returns the line separator if it exists, otherwise the empty string."
+    """
+    n = len(line)
+    if n >= 2 and line[-2:] == '\r\n':
+        return '\r\n'
+    elif n >= 1 and line[-1] == '\n':
+        return '\n'
+    else:
+        return ''
diff --git a/formal/promela/src/src/modules/comment-filter/comment_filter/rfc_test.py b/formal/promela/src/src/modules/comment-filter/comment_filter/rfc_test.py
new file mode 100644
index 00000000..dd626a68
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/comment_filter/rfc_test.py
@@ -0,0 +1,258 @@
+from . import rfc
+from . import language
+from functools import reduce, wraps
+from sys import getrecursionlimit, setrecursionlimit
+
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
+
+
+# Same as rfc.parse_line(), but ensure it always returns a string with the
+# same length as the input string.
+def safe_parse_line(lang, state, **kwargs):
+    old_line = state.line
+    new_line, new_state = rfc.parse_line(lang, state, **kwargs)
+    assert len(new_line) == len(old_line)
+    return new_line, new_state
+
+
+def make_state(line='', multi_end_stack=None, in_literal=None):
+    return rfc.State(line, multi_end_stack, in_literal)
+
+
+def test_state():
+    """
+    Verify constructor doesn't return a global default value.
+    """
+    rfc.State().multi_end_stack.append('doh!')
+    assert(len(rfc.State().multi_end_stack) == 0)
+
+
+def c_line(s):
+    """
+    Given a string, return only the C comments, and in the column same position.
+    """
+    line, state = safe_parse_line(language.c, make_state(s))
+    assert state.multi_end_stack == []
+    return line
+
+
+def c_code(s):
+    """
+    Given a string, return only the C code, and in the column same position.
+    """
+    line, state = safe_parse_line(language.c, make_state(s), code_only=True)
+    assert state.multi_end_stack == []
+    return line
+
+
+def py_line(s):
+    """
+    Given a string, return only the Python comments, and in the column same position.
+    """
+    line, state = safe_parse_line(language.python, make_state(s))
+    assert state.multi_end_stack == []
+    return line
+
+
+def java_line(s):
+    """
+    Given a string, return only the Java comments, and in the column same position.
+    Unlike c_line, java_line supports nested comments.
+    """
+    line, state = safe_parse_line(language.java, make_state(s))
+    assert state.multi_end_stack == []
+    return line
+
+
+def parse_code(lang, line):
+    return rfc.parse_code(lang, make_state(line))
+
+def test_parse_code():
+    c = language.c
+    assert parse_code(c, '') == ('', make_state(''))
+    assert parse_code(c, '\n') == ('\n', make_state(''))
+    assert parse_code(c, 'foo // bar') == ('foo ', make_state('// bar'))
+    assert parse_code(c, 'foo /* bar') == ('foo ', make_state('/* bar'))  # Ensure '*' is escapted.
+    assert parse_code(c, '/**/ foo') == ('', make_state('/**/ foo'))  # Only return code /before/ comments.
+
+    # Multi-line Python strings are treated as comments.
+    assert parse_code(language.python, '""" bar """') == ('', make_state('""" bar """'))
+
+
+def line_comment(lang, line, keep_tokens=True):
+    cmt, state = rfc.parse_line_comment(lang, make_state(line), keep_tokens)
+    return (cmt, state.line)
+
+
+def test_parse_line_comment():
+    c = language.c
+    assert line_comment(c, '') == ('', '')
+    assert line_comment(c, '\n') == ('', '\n')
+    assert line_comment(c, '//\n') == ('//\n', '')
+    assert line_comment(c, '//\n', False) == ('  \n', '')
+
+
+def test_index_of_first_found():
+    assert rfc.index_of_first_found('', []) == 0  # Nothing to find.
+    assert rfc.index_of_first_found('', ['a']) == -1  # Nothing found.
+    assert rfc.index_of_first_found('ab', ['b']) == 1  # 'b' found.
+    assert rfc.index_of_first_found('abc', ['b', 'c']) == 1  # 'b' found first.
+    assert rfc.index_of_first_found('acb', ['b', 'c']) == 1  # 'c' found first.
+
+
+def test_clear_line():
+    assert rfc.clear_line('') == ''
+    assert rfc.clear_line('abc') == '   '
+    assert rfc.clear_line('abc\n') == '   \n'  # Preserve newline.
+    assert rfc.clear_line('abc\r\n') == '   \r\n'  # Preserve multibyte newline.
+
+
+def test_get_linesep():
+    assert rfc.get_linesep('') == ''
+    assert rfc.get_linesep('foo\n') == '\n'
+    assert rfc.get_linesep('foo\r\n') == '\r\n'
+    assert rfc.get_linesep('foo\r\nbar\n') == '\n'
+
+
+def multiline_comment(lang, line, keep_tokens=True):
+    return rfc.parse_multiline_comment(lang, make_state(line), keep_tokens)
+
+
+def test_parse_multiline_comment():
+    c = language.c
+    assert multiline_comment(c, '/**/\n') == ('/**/', make_state('\n'))
+    assert multiline_comment(c, '/**/\n', False) == ('    ', make_state('\n'))
+
+
+def declarations(lang, line, keep_tokens=True):
+    return rfc.parse_declarations(lang, make_state(line), keep_tokens=keep_tokens)
+
+
+def test_parse_declarations():
+    c = language.c
+    assert declarations(c, '\n') == ('\n', make_state())
+    assert declarations(c, '/**/\n') == ('/**/\n', make_state())
+
+
+def test_parse_line():
+    assert c_line('') == ''
+    assert c_line('no comments') == '           '
+    assert c_line('/**/') == '/**/'
+    assert c_line('/* a */') == '/* a */'
+    assert c_line('// a') == '// a'
+    assert py_line('""" a """') == '""" a """'
+    assert py_line("''' a '''") == "''' a '''"
+
+    # Preserve newline
+    assert c_line('/* a */\n') == '/* a */\n'
+
+    # Ensure column position is not modified.
+    assert c_line('abc /* a */') == '    /* a */'
+    assert c_line('abc // a') == '    // a'
+
+    # Test comments in comments
+    assert c_line('// /*abc*/') == '// /*abc*/'
+    assert c_line('/* a */ // a') == '/* a */ // a'
+    assert java_line('/* /**/ */') == '/* /**/ */'
+    assert java_line('/*/**/*/') == '/*/**/*/'
+    assert c_line('/*/**/*/') == '/*/**/  '
+    assert c_line('/* // */') == '/* // */'
+    assert py_line('"""# a"""') == '"""# a"""'
+
+    # Test strings with strings
+    assert c_line('"\\\"foo\\\""') == '         '
+    assert c_line('"foo') == '    '
+
+    # Test string literals with comments
+    assert c_line('"/*"') == '    '
+    assert c_line("'/*'") == '    '
+
+    # Test c_code
+    assert c_code('') == ''
+    assert c_code('no comments') == 'no comments'
+    assert c_code('/**/') == '    '
+    assert c_code('/* a */\n') == '       \n'
+    assert c_code('/* a */ abc') == '        abc'
+    assert c_code('abc /**/') == 'abc     '
+
+
+def string_literal(quote, line):
+    lit, state = rfc.parse_string_literal(quote, make_state(line))
+    return (lit, state.line)
+
+
+def test_parse_string_literal():
+    assert string_literal('"', 'abc') == ('', 'abc')
+    assert string_literal('"', '"a"') == ('"a"', '')
+    assert string_literal("'", "'a'") == ("'a'", '')
+    assert string_literal("'", "'a'b") == ("'a'", 'b')
+
+    # String without an end quote.
+    assert string_literal('"', '"a') == ('"a', '')
+
+
+def parse_line(lang, line, multi_end_stack=None, code_only=False):
+    return safe_parse_line(lang, make_state(line, multi_end_stack), code_only=code_only)
+
+
+def test_incomplete_multiline():
+    c = language.c
+    assert parse_line(c, '/* a\n') == ('/* a\n', make_state('', ['*/']))
+
+
+def test_incomplete_string_literal():
+    c = language.c
+    assert parse_line(c, '" a \\\n', code_only=True) == ('" a \\\n', make_state('', None, '"'))
+
+
+def test_previous_state_maintained():
+    c = language.c
+    assert parse_line(c, '', ['foo']) == ('', make_state('', ['foo']))
+    assert parse_line(c, '/**/', ['foo']) == ('/**/', make_state('', ['foo']))
+    assert parse_line(c, '/*', ['foo']) == ('/*', make_state('', ['foo']))
+    j = language.java
+    assert parse_line(j, '/*', ['foo']) == ('/*', make_state('', ['foo', '*/']))
+
+
+def test_code_of_resumed_multiline_comment():
+    c = language.c
+    assert parse_line(c, 'a', ['*/'], True) == (' ', make_state('', ['*/']))
+    assert parse_line(c, 'a */', ['*/'], True) == ('    ', make_state('', []))
+
+
+def parse_lit_line(lang, line, code_only=False):
+    return safe_parse_line(lang, make_state(line, None, lang.string_literal_start), code_only=code_only)
+
+
+def test_previous_state_maintained_literal():
+    c = language.c
+    assert parse_lit_line(c, '') == ('', make_state('', in_literal='"'))
+    assert parse_lit_line(c, '"') == (' ', make_state(''))
+
+
+def test_code_of_resumed_multiline_literal():
+    c = language.c
+    assert parse_lit_line(c, '', True) == ('', make_state('', in_literal='"'))
+    assert parse_lit_line(c, '"', True) == ('"', make_state(''))
+
+
+def c_comments(s, keep_tokens=True):
+    return list(rfc.parse_file(language.c, StringIO(s), keep_tokens=keep_tokens))
+
+
+def test_parse_file():
+    assert c_comments('/* hello */ world\n') == ['/* hello */      \n']
+    assert c_comments('/* hello */ world\n', False) == ['   hello         \n']
+
+
+def test_parse_comments_via_reduce():
+    def f(st, x):
+        st.line = x
+        _, st = safe_parse_line(language.c, st)
+        return st
+
+    assert reduce(f, ['/*a', 'b*/'], make_state()) == make_state()
+    assert reduce(f, ['/*a', 'b'], make_state()) == make_state('', ['*/'])
diff --git a/formal/promela/src/src/modules/comment-filter/setup.cfg b/formal/promela/src/src/modules/comment-filter/setup.cfg
new file mode 100644
index 00000000..ed8a958e
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/setup.cfg
@@ -0,0 +1,5 @@
+[bdist_wheel]
+universal = 1
+
+[metadata]
+license_file = LICENSE
diff --git a/formal/promela/src/src/modules/comment-filter/setup.py b/formal/promela/src/src/modules/comment-filter/setup.py
new file mode 100644
index 00000000..998f8459
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/setup.py
@@ -0,0 +1,12 @@
+from setuptools import setup, find_packages
+
+
+setup(
+    name="comment_filter",
+    author='Greg Fitzgerald',
+    author_email='gregf at codeaurora.org',
+    url='https://source.codeaurora.org/external/qostg/comment-filter',
+    version='1.0.0',
+    packages=find_packages(),
+    scripts=['bin/comments']
+)
diff --git a/formal/promela/src/src/modules/comment-filter/tox.ini b/formal/promela/src/src/modules/comment-filter/tox.ini
new file mode 100644
index 00000000..e37207b6
--- /dev/null
+++ b/formal/promela/src/src/modules/comment-filter/tox.ini
@@ -0,0 +1,12 @@
+[tox]
+envlist = py27, py36
+
+[testenv]
+deps =
+    pytest
+    pytest-cov
+
+commands = py.test
+
+[pytest]
+addopts = --cov=comment_filter --cov-report=term-missing
-- 
2.37.1 (Apple Git-137.1)




More information about the devel mailing list