Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*__pycache__*
*.py[cod]
*$py.class

Expand Down Expand Up @@ -28,6 +28,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
bin

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down Expand Up @@ -132,3 +133,8 @@ dmypy.json

# VSCode
.vscode

# Eclipse files
*.project
*.pydevproject
*.settings*
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,4 @@ Example
perf(CommentPreprocessor): Remove the exceptional punctuations
reviewed by : [email protected]
tested by : [email protected]
```
```
17 changes: 8 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
# Atarashi Dockerfile
# Copyright (C) 2018-2019 Gaurav Mishra, [email protected]
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This package is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# This package is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Copying and distribution of this file, with or without modification,
# along with this package; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# Description: Docker container image recipe

Expand Down
8 changes: 7 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,18 @@ recursive-exclude __pycache__ *.pyc *.pyo *.orig

exclude *.git*
exclude *.sh
exclude *.eggs
exclude *imtihaan.py
exclude atarashi/evaluator
exclude .travis.yml
exclude Dockerfile

include requirements*.*
include requirements.txt
include pyproject.toml
include atarashi/data/licenses/processedLicenses.csv
include atarashi/data/Ngram_keywords.json
include *.py
include LICENSE

prune .git
prune venv
Expand Down
25 changes: 17 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ https://fossology.github.io/atarashi
- These files will be placed to their appropriate places by the install script.

### Installing just dependencies

- `pip install -r requirements.txt`
- `# pip install -r requirements.txt`

### Build (optional)

Expand Down Expand Up @@ -110,16 +109,26 @@ pass the options and path to the file relative to the mounted path.

## Creating Debian packages

- Install dependencies
- Install build dependencies from `debian/control`
- Install dependencies from requirements.txt
```sh
# python3 -m pip install -r requirements.txt
```
# apt-get install python3-setuptools python3-all debhelper
# pip install stdeb
- Build the orig.tar file manually or get from github (master branch) using uscan.
```sh
$ uscan -dd
```
- Create Debian packages
- Build the packages using your favourite script
```sh
$ debuild
```
$ python3 setup.py --command-packages=stdeb.command bdist_deb

## Installing Debian package
- Install the `.deb` file
- Install the missing dependencies (`Nirjas`, `textdistance`, `pyxDamerauLevenshtein`)
```sh
# python3 -m pip install textdistance>=3.0.3 pyxDamerauLevenshtein>=1.5 Nirjas>=0.0.3
```
- Locate the files under `deb_dist`

## License

Expand Down
10 changes: 3 additions & 7 deletions atarashi/atarashii.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,16 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="Specify the input file path to scan")
parser.add_argument("-l", "--processedLicenseList", required=False,
help="Specify the location of processed license list file")
help="Specify the location of processed license list file",
default=defaultProcessed)
parser.add_argument("-a", "--agent_name", required=True,
choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'],
help="Name of the agent that needs to be run")
parser.add_argument("-s", "--similarity", required=False, default="CosineSim",
choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"],
help="Specify the similarity algorithm that you want."
" First 2 are for TFIDF and last 3 are for Ngram")
parser.add_argument("-j", "--ngram_json", required=False,
parser.add_argument("-j", "--ngram_json", required=False, default=defaultJSON,
help="Specify the location of Ngram JSON (for Ngram agent only)")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
Expand All @@ -115,11 +116,6 @@ def main():
processedLicense = args.processedLicenseList
ngram_json = args.ngram_json

if processedLicense is None:
processedLicense = defaultProcessed
if ngram_json is None:
ngram_json = defaultJSON

result = atarashii_runner(inputFile, processedLicense, agent_name, similarity, ngram_json, verbose)
if agent_name == "wordFrequencySimilarity":
result = [{
Expand Down
13 changes: 13 additions & 0 deletions debian/atarashi.lintian-overrides
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# License data is provided as knowledge base and does not apply on the file
atarashi binary: license-problem-bad-php-license atarashi/data/licenses/2020-11-25_3_11.csv
atarashi binary: license-problem-bad-php-license atarashi/data/licenses/licenseList.csv
atarashi binary: license-problem-bad-php-license atarashi/data/licenses/processedLicenses.csv
atarashi binary: license-problem-gfdl-invariants atarashi/data/Ngram_keywords.json
atarashi binary: license-problem-json-evil atarashi/data/licenses/2020-11-25_3_11.csv
atarashi binary: license-problem-json-evil atarashi/data/licenses/licenseList.csv
atarashi binary: license-problem-json-evil atarashi/data/licenses/processedLicenses.csv
atarashi binary: license-problem-json-evil atarashi/data/Ngram_keywords.json
atarashi binary: license-problem-json-evil atarashi/evaluator/TestFiles/JSON.js
atarashi binary: license-problem-php-license atarashi/data/licenses/2020-11-25_3_11.csv
atarashi binary: license-problem-php-license atarashi/data/licenses/licenseList.csv
atarashi binary: license-problem-php-license atarashi/data/licenses/processedLicenses.csv
5 changes: 5 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
atarashi (0.0.11-1) unstable; urgency=low

* Initial Atarashi release

-- Gaurav Mishra <[email protected]> Wed, 14 Oct 2020 17:48:43 +0530
25 changes: 25 additions & 0 deletions debian/control
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Source: atarashi
Maintainer: FOSSology <[email protected]>
Section: python
Priority: optional
Build-Depends: debhelper-compat (= 12),
dh-python,
python3-setuptools,
python3-all,
python3-pip,
python3-tqdm,
python3-numpy,
python3-pandas,
python3-urllib3
Standards-Version: 4.3.0
X-Python3-Version: >= 3.5
Homepage: https://fossology.github.io/atarashi
Vcs-Git: https://github.com/fossology/atarashi.git
Vcs-Browser: https://github.com/fossology/atarashi

Package: atarashi
Architecture: all
Depends: ${misc:Depends}, ${python3:Depends}
Description: An intelligent license scanner.
Atarashi is a new license scanner which uses statistical text analysis for
license scanning.
62 changes: 62 additions & 0 deletions debian/copyright
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: atarashi
Upstream-Contact: FOSSology <[email protected]>
Source: https://github.com/fossology/atarashi
Disclaimer: This file is offered as-is, without any warranty.
License: GPL-2.0
Comment: Created on 2019-10-24T18:17:32Z with FOSSology


Files: *
Copyright: Copyright 2018 Aman Jain <[email protected]>
Copyright 2018 Gaurav Mishra <[email protected]>
Copyright 2019 Ayush Bhardwaj <[email protected]>
License: GPL-2.0

Files: atarashi/libs/initialmatch.py
atarashi/license/licenseDownloader.py
LICENSE
README.md
Dockerfile
atarashi/libs/utils.py
atarashi/license/licenseLoader.py
atarashi/agents/atarashiAgent.py
setup.py
atarashi/imtihaan.py
atarashi/build_deps.py
atarashi/agents/wordFrequencySimilarity.py
atarashi/evaluator/evaluator.py
atarashi/agents/tfidf.py
atarashi/atarashii.py
atarashi/agents/dameruLevenDist.py
atarashi/libs/license_clustering.py
atarashi/license/licensePreprocessor.py
atarashi/license/license_merger.py
atarashi/libs/ngram.py
atarashi/agents/cosineSimNgram.py
atarashi/libs/commentPreprocessor.py
.travis.yml
Copyright: Copyright 2018 Aman Jain <[email protected]>
Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Copyright 2018 Gaurav Mishra <[email protected]>
Copyright 2019 Ayush Bhardwaj <[email protected]>
Copyright (C) 2018-2019 Gaurav Mishra <[email protected]>
Copyright Siemens AG, 2019
License: GPL-2.0

License: GPL-2.0
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 dated June, 1991.
.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
.
You should have received a copy of the GNU General Public License
along with this package; if not, write to the Free Software Foundation,
Inc., 51 Franklin St, Fifth Floor, Boston,
MA 02110-1301, USA.
.
See /usr/share/common-licenses/GPL-2 for more information.
28 changes: 28 additions & 0 deletions debian/rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/make -f

export PYBUILD_NAME=atarashi
export DH_VERBOSE=1
export PYBUILD_SYSTEM=distutils
#export PYBUILD_DESTDIR_python3=debian/atarashi/
#export PYBUILD_INSTALL_ARGS=--install-lib=/usr/share/atarashi/ --install-scripts=/usr/share/atarashi/
PYTHON3_VERSIONS = $(shell py3versions -r)

%:
dh $@ --with python3 -v --buildsystem=pybuild #python_distutils

override_dh_auto_clean:
dh_auto_clean
find . -name \*.pyc -exec rm {} \;
rm -rf build

override_dh_auto_build:
dh_auto_build
set -ex; for python in $(PYTHON3_VERSIONS); do \
$$python setup.py build; \
done

override_dh_auto_install:
dh_auto_install
set -ex; for python in $(PYTHON3_VERSIONS); do \
$$python setup.py install --root=$(CURDIR)/debian/$(DEB_SOURCE) --no-compile -O0 --install-layout=deb; \
done
1 change: 1 addition & 0 deletions debian/source/format
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.0 (quilt)
3 changes: 3 additions & 0 deletions debian/watch
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
version=4
opts="filenamemangle=s/.+\/v?(\d\S+)\.tar\.gz/@PACKAGE@-$1\.tar\.gz/" \
https://github.com/fossology/@PACKAGE@/tags .*\/(\d+\.\d+.\d+)\.tar\.gz debian uupdate
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ requires = [
"scipy>=0.18.1",
"textdistance>=3.0.3",
"pyxDamerauLevenshtein>=1.5",
"nirjas>=0.0.3",
"Nirjas>=0.0.3",
"urllib3>=1.24.1"
]
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
tqdm>=4.23.4
pandas>=0.23.1
numpy>=1.15.1
pyxDamerauLevenshtein>=1.5
scikit-learn>=0.18.1
scipy>=0.18.1
spacy>=2.0.11
textdistance>=3.0.3
setuptools>=39.2.0
nirjas>=0.0.3
urllib3>=1.24.1
Nirjas>=0.0.3
urllib3>=1.24.1
Loading