@ -0,0 +1,54 @@
|
||||
/lib/libsnowboy-detect.a
|
||||
snowboy-detect-swig.cc
|
||||
snowboydetect.py
|
||||
Snowboy.pm
|
||||
.DS_Store
|
||||
|
||||
*.dylib
|
||||
*.pyc
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
/examples/C/pa_stable_v19_20140130.tgz
|
||||
/examples/C/pa_stable_v190600_20161030.tgz
|
||||
/examples/C/portaudio
|
||||
/examples/C/demo
|
||||
/examples/C++/pa_stable_v19_20140130.tgz
|
||||
/examples/C++/pa_stable_v190600_20161030.tgz
|
||||
/examples/C++/portaudio
|
||||
/examples/C++/demo
|
||||
/examples/C++/demo2
|
||||
/examples/Java/Demo.class
|
||||
/examples/Perl/data/
|
||||
/examples/iOS/Obj-C/Pods/Pods.xcodeproj/xcuserdata/
|
||||
/examples/iOS/Obj-C/SnowboyTest.xcodeproj/project.xcworkspace/xcuserdata/
|
||||
/examples/iOS/Obj-C/SnowboyTest.xcodeproj/xcuserdata/
|
||||
/examples/iOS/Obj-C/SnowboyTest.xcworkspace/xcuserdata/
|
||||
/examples/iOS/Swift3/SnowboyTest.xcodeproj/project.xcworkspace/xcuserdata/
|
||||
/examples/iOS/Swift3/SnowboyTest.xcodeproj/xcuserdata/
|
||||
|
||||
/swig/Android/OpenBLAS-0.2.18.tar.gz
|
||||
/swig/Android/android-ndk-r11c-darwin-x86_64.zip
|
||||
/swig/Android/android-ndk-r14b-darwin-x86_64.zip
|
||||
/swig/Android/android-ndk-r11c-linux-x86_64.zip
|
||||
/swig/Android/OpenBLAS-Android/
|
||||
/swig/Android/OpenBLAS-Android-ARM32/
|
||||
/swig/Android/android-ndk-r11c/
|
||||
/swig/Android/android-ndk-r14b/
|
||||
/swig/Android/ndk_install/
|
||||
/swig/Android/ndk_install_32bit/
|
||||
/swig/Android/java/
|
||||
/swig/Android/jniLibs/
|
||||
/swig/Java/java/
|
||||
/swig/Java/jniLibs/
|
||||
|
||||
/build
|
||||
/node_modules
|
||||
/lib/node/binding
|
||||
/lib/node/index.js
|
||||
|
||||
/dist
|
||||
**/snowboy.egg-info
|
||||
/.idea
|
@ -0,0 +1,22 @@
|
||||
/lib/libsnowboy-detect.a
|
||||
snowboy-detect-swig.cc
|
||||
snowboydetect.py
|
||||
.DS_Store
|
||||
|
||||
*.pyc
|
||||
*.o
|
||||
*.so
|
||||
|
||||
/examples/C++/*
|
||||
/examples/Python/*
|
||||
|
||||
/swig/Android/*
|
||||
/swig/Python/*
|
||||
|
||||
/build
|
||||
/node_modules
|
||||
|
||||
/lib/node/*.ts
|
||||
|
||||
.npmignore
|
||||
.travis.yml
|
@ -0,0 +1,90 @@
|
||||
language: cpp
|
||||
|
||||
# Cache node dependencies
|
||||
cache:
|
||||
directories:
|
||||
- node_modules
|
||||
|
||||
# Ubuntu 14.04 Trusty support
|
||||
sudo: required
|
||||
dist: trusty
|
||||
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
# add PPAs with more up-to-date toolchains
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-precise-3.9
|
||||
packages:
|
||||
# install toolchains
|
||||
- libmagic-dev
|
||||
- libatlas-base-dev
|
||||
- gcc-5
|
||||
- g++-5
|
||||
- clang-3.8
|
||||
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
env:
|
||||
global:
|
||||
- secure: Hpft/SbwPrjQbHq+3DeJ8aMCpg2uW4z9MY4XaPPA5FQ80QkUdFMqALRvdBhXf/hm6bEZVLbIMXxqCImL5C4nx1SMUmsL6w/FbJjnamYEopk2MKCPZHKtZOdxsbdUwpL30WRH85DQ0KbcG9LatEr+qLwf9adRQrozhh5zhoRXzjuH8nxS/GRkYuZgTt4wxNt7xYnCVlARS9/V15OeOGcRWw/Q/r++ipINz8ylGqUnTGImZrDZ2nhlOkBSNzrPA7NhCSw1OiGvZpg4zVj/gDkSkPNFn4oDFr1nNDqg0EPFGVXDDI0KA7dpw2DhrJk1z8HgXw8PorPGP0mLnDl4i811KkCz6g6y+ETC6k1VtdB2jss0MCnD9HtxM0RS62yls6Bm5aMhoFjryOHgLHNrjiHfW2/lki421K6QlGp3a2ONkRk9zHiti3uTdtbxlz0kcu7Z8FT045lHNZX0B6QpPiLi2sy7H/dItqAGdWuY0lrGrddX1PpxCckBAZLO8/VEGGGkLQtzbxEXgF+EW0HJxURvUYUF2VCy+kaq86KrFzvSKS/evW/vj7Sq2rNbOCtnIy/rvIKAXU0bbR/1imuEiiMhKdiZku+jRfZZmpjKHoydba9SsHpuNGnR/sH40AIHv7Lv6q+z3mEI+X1YaOVAAlLYWExuHLLbWYjng2gEBIHwmuU=
|
||||
- secure: RNZDzRXBhS98DMpa0QIKQjL8Nl7Pbo6cYtPyaMjEgF2nv+W+gwhcyDDRUE4psJm26Qkz3AZNfLx/kGKPhhAjBpuGFreCbAFy3uDfbDdcn2K68E+yRSdBAoTIKlxVPpQR11hfPHiAs+3s4BIwLGnuwJSK3JMisboji4ceaxVQpdo0ZcJnNKykN2zabUl+8BW8SYQ8cYp/DLg+wSeqq7eplyYD7zoT/GGnSNylkrRsJxB5zlrRQC/ngUfK7AuxhkfQ14dsdWkkrx0RyVFul5VAc85qAbrtJvLZs2Cu/J3ohNzcRZG7m8+U4diHuIlBFx0ezL3hVBfXkOf74dP8+OnL3rAr/1n+dczl5/5mQqlSsy8UAtUtfdAtd+wRNRy5d+er1YuJBWOGs2SXInjNViEY1Phgs6bY/Lu3wiIxDJH0TORan6ZVSje2/vi7aegRoiqHNrs4m2JuQDCPXu53HKh22+nWgRLLXFT2oBN3FdCz3xj04t+LyT+P5uq9q0jXxKc1nlNpvF3nDzhIuJKcfgBRNm9Wt1vz04xzSRgZEFGMTRWkYTdV+0ZVeqEQjEPo4fRNJ6PT1Tem8VqIoHEKGivGkwiAZ6FhQ/TNkVD7tv5Vhq7eK3ZPXDRakuBsLJ5Nc9QnLCpoEqbuIYqjr8ODKV2HSjS16VaGPbvtYPWzhGKU9C4=
|
||||
matrix:
|
||||
- NODE_VERSION="4.0.0"
|
||||
- NODE_VERSION="5.0.0"
|
||||
- NODE_VERSION="6.0.0"
|
||||
- NODE_VERSION="7.0.0"
|
||||
- NODE_VERSION="8.0.0"
|
||||
- NODE_VERSION="9.0.0"
|
||||
|
||||
before_install:
|
||||
# use the correct version of node
|
||||
- rm -rf ~/.nvm/ && git clone --depth 1 https://github.com/creationix/nvm.git ~/.nvm
|
||||
- source ~/.nvm/nvm.sh
|
||||
- nvm install $NODE_VERSION
|
||||
- nvm use $NODE_VERSION
|
||||
# get commit message
|
||||
- COMMIT_MESSAGE=$(git show -s --format=%B $TRAVIS_COMMIT | tr -d '\n')
|
||||
# put local node-pre-gyp on PATH
|
||||
- export PATH=./node_modules/.bin/:$PATH
|
||||
# put global node-gyp and nan on PATH
|
||||
- npm install node-gyp -g
|
||||
# install aws-sdk so it is available for publishing
|
||||
- npm install aws-sdk nan typescript @types/node
|
||||
# figure out if we should publish or republish
|
||||
- PUBLISH_BINARY=false
|
||||
- REPUBLISH_BINARY=false
|
||||
# if we are building a tag then publish
|
||||
# - if [[ $TRAVIS_BRANCH == `git describe --tags --always HEAD` ]]; then PUBLISH_BINARY=true; fi;
|
||||
# or if we put [publish binary] in the commit message
|
||||
- if test "${COMMIT_MESSAGE#*'[publish binary]'}" != "$COMMIT_MESSAGE"; then PUBLISH_BINARY=true; fi;
|
||||
# alternativly we can [republish binary] which will replace any existing binary
|
||||
- if test "${COMMIT_MESSAGE#*'[republish binary]'}" != "$COMMIT_MESSAGE"; then PUBLISH_BINARY=true && REPUBLISH_BINARY=true; fi;
|
||||
install:
|
||||
# ensure source install works
|
||||
- npm install --build-from-source
|
||||
# test our module
|
||||
- node lib/node/index.js
|
||||
|
||||
before_script:
|
||||
# if publishing, do it
|
||||
- if [[ $REPUBLISH_BINARY == true ]]; then node-pre-gyp package unpublish; fi;
|
||||
- if [[ $PUBLISH_BINARY == true ]]; then node-pre-gyp package publish; fi;
|
||||
# cleanup
|
||||
- node-pre-gyp clean
|
||||
- node-gyp clean
|
||||
|
||||
script:
|
||||
# if publishing, test installing from remote
|
||||
- INSTALL_RESULT=0
|
||||
- if [[ $PUBLISH_BINARY == true ]]; then INSTALL_RESULT=$(npm install --fallback-to-build=false > /dev/null)$? || true; fi;
|
||||
# if install returned non zero (errored) then we first unpublish and then call false so travis will bail at this line
|
||||
- if [[ $INSTALL_RESULT != 0 ]]; then echo "returned $INSTALL_RESULT";node-pre-gyp unpublish;false; fi
|
||||
# If success then we arrive here so lets clean up
|
||||
- node-pre-gyp clean
|
||||
|
||||
after_success:
|
||||
# if success then query and display all published binaries
|
||||
- node-pre-gyp info
|
@ -0,0 +1,206 @@
|
||||
THIS LICENSE GOVERNS THE SOURCE CODE, THE LIBRARIES, THE RESOURCE FILES, AS WELL
|
||||
AS THE HOTWORD MODEL snowboy/resources/snowboy.umdl PROVIDED IN THIS REPOSITORY.
|
||||
ALL OTHER HOTWORD MODELS ARE GOVERNED BY THEIR OWN LICENSES.
|
||||
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -0,0 +1,12 @@
|
||||
recursive-include include *
|
||||
recursive-include lib *
|
||||
recursive-include swig/Python *
|
||||
recursive-include resources *
|
||||
include README.md
|
||||
|
||||
exclude *.txt
|
||||
exclude *.pyc
|
||||
global-exclude .DS_Store _snowboydetect.so
|
||||
prune resources/alexa
|
||||
prune lib/ios
|
||||
prune lib/android
|
@ -0,0 +1,134 @@
|
||||
# Common Questions for a Commercial Application
|
||||
|
||||
You are looking for a way to put Snowboy in a commercial application. We have compiled a large collection of common questions from our customers all over the world in various industries.
|
||||
|
||||
|
||||
## Universal models (paid) vs. personal models (free)
|
||||
|
||||
Personal models:
|
||||
|
||||
* are the models you downloaded from https://snowboy.kitt.ai or using our `/train` SaaS API.
|
||||
* are good for quick demos
|
||||
* are built with only 3 voice samples
|
||||
* are not noise robust and you'll get a lot of false alarms in real environment
|
||||
* only work on your own voice or a very similar voice, thus is speaker dependent
|
||||
* are free
|
||||
|
||||
Universal models:
|
||||
|
||||
* are built using a lot more voice samples (at least thousands)
|
||||
* take effort to collect those voice samples
|
||||
* take a lot of GPU time to train
|
||||
* are more robust against noise
|
||||
* are mostly speaker independent (with challenges on children's voice and accents)
|
||||
* cannot be built by yourself using the web interface or the SaaS API
|
||||
* cost you money
|
||||
|
||||
### FAQ for universal & personal models
|
||||
|
||||
Q: **If I record multiple times on snowboy.kitt.ai, can I improve the personal models?**
|
||||
A: No. Personal models only take 3 voice samples to build. Each time you record new voices, the previous samples are overwritten and not used in your current model.
|
||||
|
||||
|
||||
Q: **How can I get a universal model for free?**
|
||||
A: The *one and only* way: Ask 500 people to log in to snowboy.kitt.ai, contribute their voice samples to a particular hotword, then ask us to build a universal model for that hotword.
|
||||
|
||||
Q: **Can I use your API to collect voices from 500 people and increment the sample counter from snowboy.kitt.ai?**
|
||||
A: No. The [SaaS](https://github.com/kitt-ai/snowboy#hotword-as-a-service) API is separate from the website.
|
||||
|
||||
Q: **How long does it take to get a universal model?**
|
||||
A: Usually a month.
|
||||
|
||||
## Licensing
|
||||
|
||||
|
||||
### Explain your license again?
|
||||
|
||||
Everything on Snowboy's GitHub repo is Apache licensed, including various sample applications and wrapper codes, though the Snowboy library is binary code compiled against different platforms.
|
||||
|
||||
With that said, if you built an application from https://github.com/kitt-ai/snowboy or personal models downloaded from https://snowboy.kitt.ai, you don't need to pay a penny.
|
||||
|
||||
If you want to use a universal model with your own customized hotword, you'll need an **evaluation license** and a **commercial license**.
|
||||
|
||||
### Evaluation license
|
||||
|
||||
Each hotword is different. When you train a universal model with your own hotword, nobody can guarantee that it works on your system without any flaws. Thus you'll need to get an evaluation license first to test whether your universal model works for you.
|
||||
|
||||
An evaluation license:
|
||||
|
||||
* gives you a 90 day window to evaluate the universal model we build for you
|
||||
* costs you money
|
||||
|
||||
**Warning: an evaluation license will expire after 90 days. Make sure you don't use the model with evaluation license in production systems.** Get a commercial license from us for your production system.
|
||||
|
||||
#### Evaluation license FAQ
|
||||
|
||||
Q: **How much does it cost?**
|
||||
A: A few thousand dollars.
|
||||
|
||||
Q: **Can I get a discount as a {startup, student, NGO}?**
|
||||
A: No. Our pricing is already at least half of what others charge.
|
||||
|
||||
Q: **How can you make sure your universal model works for me?**
|
||||
A: We simply can't. However we have a few sample universal models from our GitHub [repo](https://github.com/Kitt-AI/snowboy/tree/master/resources), including "alexa.umdl", "snowboy.umdl", and "smart_mirror.umdl". The "alexa.umdl" model is enhanced with a lot more data and is not a typical case. So pay attention to test "snowboy.umdl" and "smart_mirror.umdl". They offer similar performance to your model.
|
||||
|
||||
|
||||
### Commercial license
|
||||
|
||||
After evaluation, if you feel want to go with Snowboy, you'll need a commercial license to deploy it. We usually charge a flat fee per unit of hardware you sell.
|
||||
|
||||
#### Commercial license FAQ
|
||||
|
||||
Q: **Is it a one-time license or subscription-based license?**
|
||||
A: It's a perpetual license for each device. Since the Snowboy library runs *offline* on your device, you can run it forever without worrying about any broken and dependent web services.
|
||||
|
||||
Q: **What's your pricing structure?**
|
||||
A: We have tiered pricing depending on your volume. We charge less if you sell more.
|
||||
|
||||
Q: **Can you give me one example?**
|
||||
A: For instance, if your product is a talking robot with a $300 price tag, and you sell at least 100,000 units per year, we'll probably charge you $1 per unit once you go over 100,000 units. If your product is a smart speaker with a $30 price tag, we won't charge you $1, but you'll have to sell a lot more to make the business sense to us.
|
||||
|
||||
Q: **I plan to sell 1000 units a year, can I license your software for $1 per unit?**
|
||||
A: No. In that way we only make $1000 a year, which is not worth the amount of time we put on your hotword.
|
||||
|
||||
Q: **I make a cellphone app, not a hardware product, what's the pricing structure?**
|
||||
A: Depends on how you generate revenue. For instance, if your app is priced at $1.99, we'll collect cents per paid user, assuming you have a large user base. If you only have 2000 paid users, we'll make a revenue of less than a hundred dollars and it won't make sense to us.
|
||||
|
||||
|
||||
### What's the process of getting a license?
|
||||
|
||||
1. Make sure Snowboy can run on your system
|
||||
2. Reach out to us with your hotword name, commercial application, and target market
|
||||
3. Discuss with us about **commercial license** fee to make sure our pricing fits your budget
|
||||
4. Sign an evaluation contract, pay 50% of invoice
|
||||
5. We'll train a universal model for you and give you an **evaluation license** of 90 days
|
||||
6. Test the model and discuss how we can improve it
|
||||
7. If you decide to go with it, get a commercial license from us
|
||||
|
||||
## General Questions
|
||||
|
||||
### What language does Snowboy support?
|
||||
|
||||
We support North American English and Chinese the best. We can deal with a bit of Indian accents as well. For other languages, we'll need to first listen to your hotword (please send us a few .wav voice samples) before we can engage.
|
||||
|
||||
### How many voice samples do you need?
|
||||
|
||||
Usually 1500 voice samples from 500 people to get started. The more the better. If your hotword is in English, we can collect the voice samples for you. Otherwise you'll need to collect it yourself and send to us.
|
||||
|
||||
### What's the format on voice samples?
|
||||
|
||||
16000Hz sample rate, 16 bit integer, mono channel, .wav files.
|
||||
|
||||
### Does Snowboy do: AEC, VAD, Noise Suppression, Beam Forming?
|
||||
|
||||
Snowboy has a weak support for VAD and noise suppression, as we found some customers would use Snowboy without a microphone array. Snowboy is not a audio frontend processing toolkit thus does not support AEC and beam forming.
|
||||
|
||||
If your application wants to support far-field speech, i.e., verbal communication at least 3 feet away, you'll need a microphone array to enhance incoming speech and reduce noise. Please do not reply on Snowboy to do everything.
|
||||
|
||||
### Can you compile Snowboy for my platform?
|
||||
|
||||
If your platform is not listed [here](https://github.com/Kitt-AI/snowboy/tree/master/lib), and you want to get a commercial license from us, please contact us with your toolchain, hardware chip, RAM, OS, GCC/G++ version. Depending on the effort, we might charge an NRE fee for cross compiling.
|
||||
|
||||
### Contact
|
||||
|
||||
If this document doesn't cover what's needed, feel free to reach out to us at snowboy@kitt.ai
|
@ -0,0 +1,85 @@
|
||||
{
|
||||
'targets': [{
|
||||
'target_name': 'snowboy',
|
||||
'sources': [
|
||||
'swig/Node/snowboy.cc'
|
||||
],
|
||||
'conditions': [
|
||||
['OS=="mac"', {
|
||||
'link_settings': {
|
||||
'libraries': [
|
||||
'<(module_root_dir)/lib/osx/libsnowboy-detect.a',
|
||||
]
|
||||
}
|
||||
}],
|
||||
['OS=="linux" and target_arch=="x64"', {
|
||||
'link_settings': {
|
||||
'ldflags': [
|
||||
'-Wl,--no-as-needed',
|
||||
],
|
||||
'libraries': [
|
||||
'<(module_root_dir)/lib/ubuntu64/libsnowboy-detect.a',
|
||||
]
|
||||
}
|
||||
}],
|
||||
['OS=="linux" and target_arch=="arm"', {
|
||||
'link_settings': {
|
||||
'ldflags': [
|
||||
'-Wl,--no-as-needed',
|
||||
],
|
||||
'libraries': [
|
||||
'<(module_root_dir)/lib/rpi/libsnowboy-detect.a',
|
||||
]
|
||||
}
|
||||
}],
|
||||
['OS=="linux" and target_arch=="arm64"', {
|
||||
'link_settings': {
|
||||
'ldflags': [
|
||||
'-Wl,--no-as-needed',
|
||||
],
|
||||
'libraries': [
|
||||
'<(module_root_dir)/lib/aarch64-ubuntu1604/libsnowboy-detect.a',
|
||||
]
|
||||
}
|
||||
}]
|
||||
],
|
||||
'cflags': [
|
||||
'-std=c++11',
|
||||
'-fexceptions',
|
||||
'-Wall',
|
||||
'-D_GLIBCXX_USE_CXX11_ABI=0'
|
||||
],
|
||||
'cflags!': [
|
||||
'-fno-exceptions'
|
||||
],
|
||||
'cflags_cc!': [
|
||||
'-fno-exceptions'
|
||||
],
|
||||
'include_dirs': [
|
||||
"<!(node -e \"require('nan')\")",
|
||||
"<!(pwd)/include"
|
||||
],
|
||||
'libraries': [
|
||||
'-lcblas'
|
||||
],
|
||||
'xcode_settings': {
|
||||
'MACOSX_DEPLOYMENT_TARGET': '10.11',
|
||||
"GCC_ENABLE_CPP_EXCEPTIONS": "YES",
|
||||
'OTHER_CFLAGS': [
|
||||
'-std=c++11',
|
||||
'-stdlib=libc++'
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"target_name": "action_after_build",
|
||||
"type": "none",
|
||||
"dependencies": [ "<(module_name)" ],
|
||||
"copies": [
|
||||
{
|
||||
"files": [ "<(PRODUCT_DIR)/<(module_name).node" ],
|
||||
"destination": "<(module_path)"
|
||||
}
|
||||
]
|
||||
}]
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
# Snowboy Demo on Android
|
||||
|
||||
Note:
|
||||
|
||||
1. supported building platforms are Android Studio running on Mac OS X or Ubuntu. Windows is not supported.
|
||||
2. supported target CPU is ARMv7 (32bit) and ARMv8 (64bit) (most Android phones run on ARM CPUs)
|
||||
3. we have prepared an Android app which can be installed and run out of box: [SnowboyAlexaDemo.apk](https://github.com/Kitt-AI/snowboy/raw/master/resources/alexa/SnowboyAlexaDemo.apk) (please uninstall any previous one first if you installed this app before).
|
||||
|
||||
## General Workflow
|
||||
|
||||
1. Install `swig`. For Mac, do `brew install swig`; for Ubuntu, do `sudo apt-get install swig3.0`. Make sure your `swig` version is at least `3.0.10`. You'll also need `wget` to download files.
|
||||
|
||||
2. Go to `swig/Android` and build swig wrappers for Snowboy:
|
||||
|
||||
cd swig/Android
|
||||
make
|
||||
|
||||
To make for ARMv8 64bit:
|
||||
|
||||
make BIT=64
|
||||
|
||||
Ths will generate a cross-compiled library for ARM:
|
||||
|
||||
jniLibs/
|
||||
├── arm64-v8a
|
||||
│ └── libsnowboy-detect-android.so
|
||||
└── armeabi-v7a
|
||||
└── libsnowboy-detect-android.so
|
||||
|
||||
and a few Java wrapper files:
|
||||
|
||||
java
|
||||
└── ai
|
||||
└── kitt
|
||||
└── snowboy
|
||||
├── SnowboyDetect.java
|
||||
├── snowboy.java
|
||||
└── snowboyJNI.java
|
||||
|
||||
The generated `.so` and `.java` files are hyperlinked to the `examples/Android/SnowboyAlexaDemo` folder.
|
||||
|
||||
3. Use Android Studio to open the project in `examples/Android/SnowboyAlexaDemo` and run it.
|
||||
|
||||
Screenshot (say "Alexa" after clicking "Start"):
|
||||
|
||||
<img src="https://s3-us-west-2.amazonaws.com/kittai-cdn/Snowboy/SnowboyAlexaDemo-Andriod.jpeg" alt="Android Alexa Demo" width=300 />
|
||||
|
||||
|
||||
Don't forget to disable the "debug" option when releasing your Android App!
|
||||
|
||||
Note: If you need to copy the Android demo to another folder, please use the `-RL` option of `cp` to replace the relative symbol links with real files:
|
||||
|
||||
cp -RL SnowboyAlexaDemo Other_Folder
|
||||
|
||||
Note: The sample app will save/overwrite all audio to a file (`recording.pcm`). Make sure you do not leave it on for a long time.
|
||||
|
||||
## Useful Code Snippets
|
||||
|
||||
|
||||
To initialize Snowboy detector in Java:
|
||||
|
||||
# Assume you put the model related files under /sdcard/snowboy/
|
||||
SnowboyDetect snowboyDetector = new SnowboyDetect("/sdcard/snowboy/common.res",
|
||||
"/sdcard/snowboy/snowboy.umdl");
|
||||
snowboyDetector.SetSensitivity("0.45"); // Sensitivity for each hotword
|
||||
snowboyDetector.SetAudioGain(2.0); // Audio gain for detection
|
||||
|
||||
To run hotword detection in Java:
|
||||
|
||||
int result = snowboyDetector.RunDetection(buffer, buffer.length); // buffer is a short array.
|
||||
|
||||
You may want to play with the frequency of the calls to `RunDetection()`, which controls the CPU usage and the detection latency.
|
||||
|
||||
|
||||
## Common Asks
|
||||
|
||||
The following issues have been fixed pushed to `master`.
|
||||
|
||||
- [x] softfloating point support with OpenBlas
|
||||
- [x] upgrade NDK version to newer than r11c
|
||||
- [x] NDK toolchain building: remove `--stl=libc++` option
|
||||
|
||||
|
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
|
||||
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
|
||||
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.DEPENDENCIES"/>
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="src" path="gen"/>
|
||||
<classpathentry kind="output" path="bin/classes"/>
|
||||
</classpath>
|
@ -0,0 +1,14 @@
|
||||
*.iml
|
||||
.gradle
|
||||
/local.properties
|
||||
/.idea/workspace.xml
|
||||
/.idea/libraries
|
||||
.DS_Store
|
||||
/build
|
||||
/captures
|
||||
.externalNativeBuild
|
||||
*.apk
|
||||
*.ap_
|
||||
.metadata/
|
||||
.idea/workspace.xml
|
||||
.idea/tasks.xml
|
@ -0,0 +1 @@
|
||||
SnowboyAlexaDemo
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CompilerConfiguration">
|
||||
<resourceExtensions />
|
||||
<wildcardResourcePatterns>
|
||||
<entry name="!?*.java" />
|
||||
<entry name="!?*.form" />
|
||||
<entry name="!?*.class" />
|
||||
<entry name="!?*.groovy" />
|
||||
<entry name="!?*.scala" />
|
||||
<entry name="!?*.flex" />
|
||||
<entry name="!?*.kt" />
|
||||
<entry name="!?*.clj" />
|
||||
<entry name="!?*.aj" />
|
||||
</wildcardResourcePatterns>
|
||||
<annotationProcessing>
|
||||
<profile default="true" name="Default" enabled="false">
|
||||
<processorPath useClasspath="true" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,3 @@
|
||||
<component name="CopyrightManager">
|
||||
<settings default="" />
|
||||
</component>
|
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding">
|
||||
<file url="file://$PROJECT_DIR$/src/ai/kitt/snowboy/Demo.java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/src/ai/kitt/snowboy/audio/AudioDataSaver.java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/src/ai/kitt/snowboy/audio/RecordingThread.java" charset="UTF-8" />
|
||||
<file url="PROJECT" charset="ISO-8859-1" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="GradleSettings">
|
||||
<option name="linkedExternalProjectsSettings">
|
||||
<GradleProjectSettings>
|
||||
<option name="distributionType" value="DEFAULT_WRAPPED" />
|
||||
<option name="externalProjectPath" value="$PROJECT_DIR$" />
|
||||
<option name="modules">
|
||||
<set>
|
||||
<option value="$PROJECT_DIR$" />
|
||||
</set>
|
||||
</option>
|
||||
<option name="resolveModulePerSourceSet" value="false" />
|
||||
</GradleProjectSettings>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,46 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="EntryPointsManager">
|
||||
<entry_points version="2.0" />
|
||||
</component>
|
||||
<component name="NullableNotNullManager">
|
||||
<option name="myDefaultNullable" value="android.support.annotation.Nullable" />
|
||||
<option name="myDefaultNotNull" value="android.support.annotation.NonNull" />
|
||||
<option name="myNullables">
|
||||
<value>
|
||||
<list size="4">
|
||||
<item index="0" class="java.lang.String" itemvalue="org.jetbrains.annotations.Nullable" />
|
||||
<item index="1" class="java.lang.String" itemvalue="javax.annotation.Nullable" />
|
||||
<item index="2" class="java.lang.String" itemvalue="edu.umd.cs.findbugs.annotations.Nullable" />
|
||||
<item index="3" class="java.lang.String" itemvalue="android.support.annotation.Nullable" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
<option name="myNotNulls">
|
||||
<value>
|
||||
<list size="4">
|
||||
<item index="0" class="java.lang.String" itemvalue="org.jetbrains.annotations.NotNull" />
|
||||
<item index="1" class="java.lang.String" itemvalue="javax.annotation.Nonnull" />
|
||||
<item index="2" class="java.lang.String" itemvalue="edu.umd.cs.findbugs.annotations.NonNull" />
|
||||
<item index="3" class="java.lang.String" itemvalue="android.support.annotation.NonNull" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
|
||||
<OptionsSetting value="true" id="Add" />
|
||||
<OptionsSetting value="true" id="Remove" />
|
||||
<OptionsSetting value="true" id="Checkout" />
|
||||
<OptionsSetting value="true" id="Update" />
|
||||
<OptionsSetting value="true" id="Status" />
|
||||
<OptionsSetting value="true" id="Edit" />
|
||||
<ConfirmationsSetting value="0" id="Add" />
|
||||
<ConfirmationsSetting value="0" id="Remove" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_7" default="true" assert-keyword="true" jdk-15="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/build/classes" />
|
||||
</component>
|
||||
<component name="ProjectType">
|
||||
<option name="id" value="Android" />
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/SnowboyAlexaDemo.iml" filepath="$PROJECT_DIR$/SnowboyAlexaDemo.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="RunConfigurationProducerService">
|
||||
<option name="ignoredProducers">
|
||||
<set>
|
||||
<option value="org.jetbrains.plugins.gradle.execution.test.runner.AllInPackageGradleConfigurationProducer" />
|
||||
<option value="org.jetbrains.plugins.gradle.execution.test.runner.TestClassGradleConfigurationProducer" />
|
||||
<option value="org.jetbrains.plugins.gradle.execution.test.runner.TestMethodGradleConfigurationProducer" />
|
||||
</set>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
@ -0,0 +1,33 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>Alexa19</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.ApkBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -0,0 +1,20 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:versionCode="1"
|
||||
android:versionName="1.0" package="ai.kitt.snowboy.demo" >
|
||||
<application android:label="@string/app_name" android:icon="@mipmap/ic_launcher">
|
||||
<activity android:name="ai.kitt.snowboy.Demo"
|
||||
android:windowSoftInputMode="stateHidden"
|
||||
android:label="@string/app_name">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
<uses-sdk android:minSdkVersion="3" />
|
||||
<uses-permission android:name="android.permission.ACCESS_WIFI_STATE"></uses-permission>
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO"></uses-permission>
|
||||
<uses-permission android:name="android.permission.INTERNET"></uses-permission>
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
|
||||
</manifest>
|
@ -0,0 +1 @@
|
||||
../../../../../resources/alexa/alexa-avs-sample-app/alexa.umdl
|
@ -0,0 +1 @@
|
||||
../../../../../resources/common.res
|
@ -0,0 +1 @@
|
||||
../../../../../resources/ding.wav
|
@ -0,0 +1,52 @@
|
||||
buildscript {
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
dependencies {
|
||||
classpath 'com.android.tools.build:gradle:2.3.0'
|
||||
}
|
||||
}
|
||||
apply plugin: 'android'
|
||||
|
||||
dependencies {
|
||||
compile fileTree(include: '*.jar', dir: 'libs')
|
||||
}
|
||||
|
||||
android {
|
||||
signingConfigs {
|
||||
}
|
||||
compileSdkVersion 25
|
||||
buildToolsVersion '25.0.0'
|
||||
compileOptions.encoding = 'ISO-8859-1'
|
||||
sourceSets {
|
||||
main {
|
||||
manifest.srcFile 'AndroidManifest.xml'
|
||||
java.srcDirs = ['src']
|
||||
resources.srcDirs = ['src']
|
||||
aidl.srcDirs = ['src']
|
||||
renderscript.srcDirs = ['src']
|
||||
res.srcDirs = ['res']
|
||||
assets.srcDirs = ['assets']
|
||||
}
|
||||
|
||||
// Move the tests to tests/java, tests/res, etc...
|
||||
instrumentTest.setRoot('tests')
|
||||
|
||||
// Move the build types to build-types/<type>
|
||||
// For instance, build-types/debug/java, build-types/debug/AndroidManifest.xml, ...
|
||||
// This moves them out of them default location under src/<type>/... which would
|
||||
// conflict with src/ being used by the main source set.
|
||||
// Adding new build types or product flavors should be accompanied
|
||||
// by a similar customization.
|
||||
debug.setRoot('build-types/debug')
|
||||
release.setRoot('build-types/release')
|
||||
}
|
||||
buildTypes {
|
||||
release {
|
||||
}
|
||||
}
|
||||
defaultConfig {
|
||||
}
|
||||
productFlavors {
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
#Tue Mar 07 14:27:10 PST 2017
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-3.3-all.zip
|
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## Gradle start up script for UN*X
|
||||
##
|
||||
##############################################################################
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS=""
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=`basename "$0"`
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
|
||||
warn ( ) {
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
die ( ) {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
}
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
case "`uname`" in
|
||||
CYGWIN* )
|
||||
cygwin=true
|
||||
;;
|
||||
Darwin* )
|
||||
darwin=true
|
||||
;;
|
||||
MINGW* )
|
||||
msys=true
|
||||
;;
|
||||
esac
|
||||
|
||||
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
||||
if $cygwin ; then
|
||||
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
||||
fi
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
# Resolve links: $0 may be a link
|
||||
PRG="$0"
|
||||
# Need this for relative symlinks.
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "$PRG"`"/$link"
|
||||
fi
|
||||
done
|
||||
SAVED="`pwd`"
|
||||
cd "`dirname \"$PRG\"`/" >&-
|
||||
APP_HOME="`pwd -P`"
|
||||
cd "$SAVED" >&-
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD="java"
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
||||
MAX_FD_LIMIT=`ulimit -H -n`
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
||||
MAX_FD="$MAX_FD_LIMIT"
|
||||
fi
|
||||
ulimit -n $MAX_FD
|
||||
if [ $? -ne 0 ] ; then
|
||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
||||
fi
|
||||
else
|
||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# For Darwin, add options to specify how the application appears in the dock
|
||||
if $darwin; then
|
||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
||||
fi
|
||||
|
||||
# For Cygwin, switch paths to Windows format before running java
|
||||
if $cygwin ; then
|
||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
||||
|
||||
# We build the pattern for arguments to be converted via cygpath
|
||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
||||
SEP=""
|
||||
for dir in $ROOTDIRSRAW ; do
|
||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
||||
SEP="|"
|
||||
done
|
||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
||||
# Add a user-defined pattern to the cygpath arguments
|
||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
||||
fi
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
i=0
|
||||
for arg in "$@" ; do
|
||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
||||
|
||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
||||
else
|
||||
eval `echo args$i`="\"$arg\""
|
||||
fi
|
||||
i=$((i+1))
|
||||
done
|
||||
case $i in
|
||||
(0) set -- ;;
|
||||
(1) set -- "$args0" ;;
|
||||
(2) set -- "$args0" "$args1" ;;
|
||||
(3) set -- "$args0" "$args1" "$args2" ;;
|
||||
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
||||
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
||||
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
||||
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
||||
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
||||
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
|
||||
function splitJvmOpts() {
|
||||
JVM_OPTS=("$@")
|
||||
}
|
||||
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
||||
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
||||
|
||||
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
@ -0,0 +1,90 @@
|
||||
@if "%DEBUG%" == "" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS=
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%" == "" set DIRNAME=.
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if "%ERRORLEVEL%" == "0" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto init
|
||||
|
||||
echo.
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||
echo.
|
||||
echo Please set the JAVA_HOME variable in your environment to match the
|
||||
echo location of your Java installation.
|
||||
|
||||
goto fail
|
||||
|
||||
:init
|
||||
@rem Get command-line arguments, handling Windowz variants
|
||||
|
||||
if not "%OS%" == "Windows_NT" goto win9xME_args
|
||||
if "%@eval[2+2]" == "4" goto 4NT_args
|
||||
|
||||
:win9xME_args
|
||||
@rem Slurp the command line arguments.
|
||||
set CMD_LINE_ARGS=
|
||||
set _SKIP=2
|
||||
|
||||
:win9xME_args_slurp
|
||||
if "x%~1" == "x" goto execute
|
||||
|
||||
set CMD_LINE_ARGS=%*
|
||||
goto execute
|
||||
|
||||
:4NT_args
|
||||
@rem Get arguments from the 4NT Shell from JP Software
|
||||
set CMD_LINE_ARGS=%$
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if "%ERRORLEVEL%"=="0" goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
||||
exit /b 1
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
After Width: | Height: | Size: 22 KiB |
@ -0,0 +1,17 @@
|
||||
# This file is automatically generated by Android Tools.
|
||||
# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
|
||||
#
|
||||
# This file must be checked in Version Control Systems.
|
||||
#
|
||||
# To customize properties used by the Ant build system edit
|
||||
# "ant.properties", and override values to adapt the script to your
|
||||
# project structure.
|
||||
#
|
||||
# To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
|
||||
#proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
|
||||
|
||||
# Indicates whether an apk should be generated for each density.
|
||||
split.density=false
|
||||
# Project target.
|
||||
target=android-17
|
||||
apk-configurations=
|
After Width: | Height: | Size: 3.1 KiB |
@ -0,0 +1,44 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="fill_parent"
|
||||
android:orientation="vertical" >
|
||||
<LinearLayout android:id="@+id/bottom"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:orientation="horizontal"
|
||||
android:layout_alignParentBottom="true">
|
||||
<Button
|
||||
android:id="@+id/btn_test1"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_width="fill_parent"
|
||||
android:text="@string/btn1_start"
|
||||
android:layout_weight="1">
|
||||
</Button>
|
||||
<Button
|
||||
android:id="@+id/btn_test2"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_width="fill_parent"
|
||||
android:text="@string/btn2_start"
|
||||
android:layout_weight="1">
|
||||
</Button>
|
||||
</LinearLayout>
|
||||
|
||||
<TextView
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Log"
|
||||
android:textColor="#FF0000"
|
||||
android:textSize="14.5sp"></TextView>
|
||||
|
||||
<ScrollView
|
||||
android:id="@+id/logView"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="fill_parent">
|
||||
<TextView
|
||||
android:id="@+id/log"
|
||||
android:layout_width="fill_parent"
|
||||
android:layout_height="fill_parent" />
|
||||
</ScrollView>
|
||||
|
||||
</LinearLayout>
|
After Width: | Height: | Size: 2.0 KiB |
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 2.6 KiB |
After Width: | Height: | Size: 4.2 KiB |
After Width: | Height: | Size: 5.9 KiB |
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
||||
<resources>
|
||||
<string name="app_name">SnowboyAlexaDemo</string>
|
||||
<string name="btn1_start">Start</string>
|
||||
<string name="btn1_stop">Stop</string>
|
||||
<string name="btn2_start">Play</string>
|
||||
<string name="btn2_stop">Stop</string>
|
||||
</resources>
|
@ -0,0 +1,64 @@
|
||||
package ai.kitt.snowboy;
|
||||
|
||||
import android.content.Context;
|
||||
import android.util.Log;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class AppResCopy {
|
||||
private final static String TAG = AppResCopy.class.getSimpleName();
|
||||
private static String envWorkSpace = Constants.DEFAULT_WORK_SPACE;
|
||||
|
||||
private static void copyFilesFromAssets(Context context, String assetsSrcDir, String sdcardDstDir, boolean override) {
|
||||
try {
|
||||
String fileNames[] = context.getAssets().list(assetsSrcDir);
|
||||
if (fileNames.length > 0) {
|
||||
Log.i(TAG, assetsSrcDir +" directory has "+fileNames.length+" files.\n");
|
||||
File dir = new File(sdcardDstDir);
|
||||
if (!dir.exists()) {
|
||||
if (!dir.mkdirs()) {
|
||||
Log.e(TAG, "mkdir failed: "+sdcardDstDir);
|
||||
return;
|
||||
} else {
|
||||
Log.i(TAG, "mkdir ok: "+sdcardDstDir);
|
||||
}
|
||||
} else {
|
||||
Log.w(TAG, sdcardDstDir+" already exists! ");
|
||||
}
|
||||
for (String fileName : fileNames) {
|
||||
copyFilesFromAssets(context,assetsSrcDir + "/" + fileName,sdcardDstDir+"/"+fileName, override);
|
||||
}
|
||||
} else {
|
||||
Log.i(TAG, assetsSrcDir +" is file\n");
|
||||
File outFile = new File(sdcardDstDir);
|
||||
if (outFile.exists()) {
|
||||
if (override) {
|
||||
outFile.delete();
|
||||
Log.e(TAG, "overriding file "+ sdcardDstDir +"\n");
|
||||
} else {
|
||||
Log.e(TAG, "file "+ sdcardDstDir +" already exists. No override.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
InputStream is = context.getAssets().open(assetsSrcDir);
|
||||
FileOutputStream fos = new FileOutputStream(outFile);
|
||||
byte[] buffer = new byte[1024];
|
||||
int byteCount=0;
|
||||
while ((byteCount=is.read(buffer)) != -1) {
|
||||
fos.write(buffer, 0, byteCount);
|
||||
}
|
||||
fos.flush();
|
||||
is.close();
|
||||
fos.close();
|
||||
Log.i(TAG, "copy to "+sdcardDstDir+" ok!");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public static void copyResFromAssetsToSD(Context context) {
|
||||
copyFilesFromAssets(context, Constants.ASSETS_RES_DIR, envWorkSpace+"/", true);
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
package ai.kitt.snowboy;
|
||||
import java.io.File;
|
||||
import android.os.Environment;
|
||||
|
||||
public class Constants {
|
||||
public static final String ASSETS_RES_DIR = "snowboy";
|
||||
public static final String DEFAULT_WORK_SPACE = Environment.getExternalStorageDirectory().getAbsolutePath() + "/snowboy/";
|
||||
public static final String ACTIVE_UMDL = "alexa.umdl";
|
||||
public static final String ACTIVE_RES = "common.res";
|
||||
public static final String SAVE_AUDIO = Constants.DEFAULT_WORK_SPACE + File.separatorChar + "recording.pcm";
|
||||
public static final int SAMPLE_RATE = 16000;
|
||||
}
|
@ -0,0 +1,252 @@
|
||||
package ai.kitt.snowboy;
|
||||
|
||||
import ai.kitt.snowboy.audio.RecordingThread;
|
||||
import ai.kitt.snowboy.audio.PlaybackThread;
|
||||
|
||||
import android.app.Activity;
|
||||
import android.media.AudioManager;
|
||||
import android.os.Bundle;
|
||||
import android.os.Handler;
|
||||
import android.os.Message;
|
||||
import android.text.Html;
|
||||
import android.view.View;
|
||||
import android.view.View.OnClickListener;
|
||||
import android.widget.Button;
|
||||
import android.widget.ScrollView;
|
||||
import android.widget.TextView;
|
||||
import android.widget.Toast;
|
||||
import android.content.Context;
|
||||
|
||||
import ai.kitt.snowboy.audio.AudioDataSaver;
|
||||
import ai.kitt.snowboy.demo.R;
|
||||
|
||||
|
||||
public class Demo extends Activity {
|
||||
|
||||
private Button record_button;
|
||||
private Button play_button;
|
||||
private TextView log;
|
||||
private ScrollView logView;
|
||||
static String strLog = null;
|
||||
|
||||
private int preVolume = -1;
|
||||
private static long activeTimes = 0;
|
||||
|
||||
private RecordingThread recordingThread;
|
||||
private PlaybackThread playbackThread;
|
||||
|
||||
@Override
|
||||
public void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
|
||||
setContentView(R.layout.main);
|
||||
setUI();
|
||||
|
||||
setProperVolume();
|
||||
|
||||
AppResCopy.copyResFromAssetsToSD(this);
|
||||
|
||||
activeTimes = 0;
|
||||
recordingThread = new RecordingThread(handle, new AudioDataSaver());
|
||||
playbackThread = new PlaybackThread();
|
||||
}
|
||||
|
||||
void showToast(CharSequence msg) {
|
||||
Toast.makeText(this, msg, Toast.LENGTH_SHORT).show();
|
||||
}
|
||||
|
||||
private void setUI() {
|
||||
record_button = (Button) findViewById(R.id.btn_test1);
|
||||
record_button.setOnClickListener(record_button_handle);
|
||||
record_button.setEnabled(true);
|
||||
|
||||
play_button = (Button) findViewById(R.id.btn_test2);
|
||||
play_button.setOnClickListener(play_button_handle);
|
||||
play_button.setEnabled(true);
|
||||
|
||||
log = (TextView)findViewById(R.id.log);
|
||||
logView = (ScrollView)findViewById(R.id.logView);
|
||||
}
|
||||
|
||||
private void setMaxVolume() {
|
||||
AudioManager audioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE);
|
||||
preVolume = audioManager.getStreamVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> preVolume = "+preVolume, "green");
|
||||
int maxVolume = audioManager.getStreamMaxVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> maxVolume = "+maxVolume, "green");
|
||||
audioManager.setStreamVolume(AudioManager.STREAM_MUSIC, maxVolume, 0);
|
||||
int currentVolume = audioManager.getStreamVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> currentVolume = "+currentVolume, "green");
|
||||
}
|
||||
|
||||
private void setProperVolume() {
|
||||
AudioManager audioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE);
|
||||
preVolume = audioManager.getStreamVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> preVolume = "+preVolume, "green");
|
||||
int maxVolume = audioManager.getStreamMaxVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> maxVolume = "+maxVolume, "green");
|
||||
int properVolume = (int) ((float) maxVolume * 0.2);
|
||||
audioManager.setStreamVolume(AudioManager.STREAM_MUSIC, properVolume, 0);
|
||||
int currentVolume = audioManager.getStreamVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> currentVolume = "+currentVolume, "green");
|
||||
}
|
||||
|
||||
private void restoreVolume() {
|
||||
if(preVolume>=0) {
|
||||
AudioManager audioManager = (AudioManager) getSystemService(Context.AUDIO_SERVICE);
|
||||
audioManager.setStreamVolume(AudioManager.STREAM_MUSIC, preVolume, 0);
|
||||
updateLog(" ----> set preVolume = "+preVolume, "green");
|
||||
int currentVolume = audioManager.getStreamVolume(AudioManager.STREAM_MUSIC);
|
||||
updateLog(" ----> currentVolume = "+currentVolume, "green");
|
||||
}
|
||||
}
|
||||
|
||||
private void startRecording() {
|
||||
recordingThread.startRecording();
|
||||
updateLog(" ----> recording started ...", "green");
|
||||
record_button.setText(R.string.btn1_stop);
|
||||
}
|
||||
|
||||
private void stopRecording() {
|
||||
recordingThread.stopRecording();
|
||||
updateLog(" ----> recording stopped ", "green");
|
||||
record_button.setText(R.string.btn1_start);
|
||||
}
|
||||
|
||||
private void startPlayback() {
|
||||
updateLog(" ----> playback started ...", "green");
|
||||
play_button.setText(R.string.btn2_stop);
|
||||
// (new PcmPlayer()).playPCM();
|
||||
playbackThread.startPlayback();
|
||||
}
|
||||
|
||||
private void stopPlayback() {
|
||||
updateLog(" ----> playback stopped ", "green");
|
||||
play_button.setText(R.string.btn2_start);
|
||||
playbackThread.stopPlayback();
|
||||
}
|
||||
|
||||
private void sleep() {
|
||||
try { Thread.sleep(500);
|
||||
} catch (Exception e) {}
|
||||
}
|
||||
|
||||
private OnClickListener record_button_handle = new OnClickListener() {
|
||||
// @Override
|
||||
public void onClick(View arg0) {
|
||||
if(record_button.getText().equals(getResources().getString(R.string.btn1_start))) {
|
||||
stopPlayback();
|
||||
sleep();
|
||||
startRecording();
|
||||
} else {
|
||||
stopRecording();
|
||||
sleep();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
private OnClickListener play_button_handle = new OnClickListener() {
|
||||
// @Override
|
||||
public void onClick(View arg0) {
|
||||
if (play_button.getText().equals(getResources().getString(R.string.btn2_start))) {
|
||||
stopRecording();
|
||||
sleep();
|
||||
startPlayback();
|
||||
} else {
|
||||
stopPlayback();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public Handler handle = new Handler() {
|
||||
@Override
|
||||
public void handleMessage(Message msg) {
|
||||
MsgEnum message = MsgEnum.getMsgEnum(msg.what);
|
||||
switch(message) {
|
||||
case MSG_ACTIVE:
|
||||
activeTimes++;
|
||||
updateLog(" ----> Detected " + activeTimes + " times", "green");
|
||||
// Toast.makeText(Demo.this, "Active "+activeTimes, Toast.LENGTH_SHORT).show();
|
||||
showToast("Active "+activeTimes);
|
||||
break;
|
||||
case MSG_INFO:
|
||||
updateLog(" ----> "+message);
|
||||
break;
|
||||
case MSG_VAD_SPEECH:
|
||||
updateLog(" ----> normal voice", "blue");
|
||||
break;
|
||||
case MSG_VAD_NOSPEECH:
|
||||
updateLog(" ----> no speech", "blue");
|
||||
break;
|
||||
case MSG_ERROR:
|
||||
updateLog(" ----> " + msg.toString(), "red");
|
||||
break;
|
||||
default:
|
||||
super.handleMessage(msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public void updateLog(final String text) {
|
||||
|
||||
log.post(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
if (currLogLineNum >= MAX_LOG_LINE_NUM) {
|
||||
int st = strLog.indexOf("<br>");
|
||||
strLog = strLog.substring(st+4);
|
||||
} else {
|
||||
currLogLineNum++;
|
||||
}
|
||||
String str = "<font color='white'>"+text+"</font>"+"<br>";
|
||||
strLog = (strLog == null || strLog.length() == 0) ? str : strLog + str;
|
||||
log.setText(Html.fromHtml(strLog));
|
||||
}
|
||||
});
|
||||
logView.post(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
logView.fullScroll(ScrollView.FOCUS_DOWN);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static int MAX_LOG_LINE_NUM = 200;
|
||||
static int currLogLineNum = 0;
|
||||
|
||||
public void updateLog(final String text, final String color) {
|
||||
log.post(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
if(currLogLineNum>=MAX_LOG_LINE_NUM) {
|
||||
int st = strLog.indexOf("<br>");
|
||||
strLog = strLog.substring(st+4);
|
||||
} else {
|
||||
currLogLineNum++;
|
||||
}
|
||||
String str = "<font color='"+color+"'>"+text+"</font>"+"<br>";
|
||||
strLog = (strLog == null || strLog.length() == 0) ? str : strLog + str;
|
||||
log.setText(Html.fromHtml(strLog));
|
||||
}
|
||||
});
|
||||
logView.post(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
logView.fullScroll(ScrollView.FOCUS_DOWN);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void emptyLog() {
|
||||
strLog = null;
|
||||
log.setText("");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onDestroy() {
|
||||
restoreVolume();
|
||||
recordingThread.stopRecording();
|
||||
super.onDestroy();
|
||||
}
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package ai.kitt.snowboy;
|
||||
|
||||
public enum MsgEnum {
|
||||
MSG_VAD_END,
|
||||
MSG_VAD_NOSPEECH,
|
||||
MSG_VAD_SPEECH,
|
||||
MSG_VOLUME_NOTIFY,
|
||||
MSG_WAV_DATAINFO,
|
||||
MSG_RECORD_START,
|
||||
MSG_RECORD_STOP,
|
||||
MSG_ACTIVE,
|
||||
MSG_ERROR,
|
||||
MSG_INFO;
|
||||
|
||||
public static MsgEnum getMsgEnum(int i) {
|
||||
return MsgEnum.values()[i];
|
||||
}
|
||||
}
|
@ -0,0 +1 @@
|
||||
../../../../../../../swig/Android/java/ai/kitt/snowboy/SnowboyDetect.java
|
@ -0,0 +1 @@
|
||||
../../../../../../../swig/Android/java/ai/kitt/snowboy/SnowboyVad.java
|
@ -0,0 +1,7 @@
|
||||
package ai.kitt.snowboy.audio;
|
||||
|
||||
public interface AudioDataReceivedListener {
|
||||
void start();
|
||||
void onAudioDataReceived(byte[] data, int length);
|
||||
void stop();
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
package ai.kitt.snowboy.audio;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import android.util.Log;
|
||||
|
||||
import ai.kitt.snowboy.Constants;
|
||||
|
||||
public class AudioDataSaver implements AudioDataReceivedListener {
|
||||
|
||||
private static final String TAG = AudioDataSaver.class.getSimpleName();
|
||||
|
||||
// file size of when to delete and create a new recording file
|
||||
private final float MAX_RECORDING_FILE_SIZE_IN_MB = 50f;
|
||||
|
||||
// initial file size of recording file
|
||||
private final float INITIAL_FILE_SIZE_IN_MB = 1.3f;
|
||||
|
||||
// converted max file size
|
||||
private final float MAX_RECORDING_FILE_SIZE_IN_BYTES
|
||||
= (MAX_RECORDING_FILE_SIZE_IN_MB - INITIAL_FILE_SIZE_IN_MB) * 1024 * 1024;
|
||||
|
||||
// keeps track of recording file size
|
||||
private int recordingFileSizeCounterInBytes = 0;
|
||||
|
||||
private File saveFile = null;
|
||||
private DataOutputStream dataOutputStreamInstance = null;
|
||||
|
||||
public AudioDataSaver() {
|
||||
saveFile = new File(Constants.SAVE_AUDIO);
|
||||
Log.e(TAG, Constants.SAVE_AUDIO);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() {
|
||||
if (null != saveFile) {
|
||||
if (saveFile.exists()) {
|
||||
saveFile.delete();
|
||||
}
|
||||
try {
|
||||
saveFile.createNewFile();
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO Exception on creating audio file " + saveFile.toString(), e);
|
||||
}
|
||||
|
||||
try {
|
||||
BufferedOutputStream bufferedStreamInstance = new BufferedOutputStream(
|
||||
new FileOutputStream(this.saveFile));
|
||||
dataOutputStreamInstance = new DataOutputStream(bufferedStreamInstance);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new IllegalStateException("Cannot Open File", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onAudioDataReceived(byte[] data, int length) {
|
||||
try {
|
||||
if (null != dataOutputStreamInstance) {
|
||||
if (recordingFileSizeCounterInBytes >= MAX_RECORDING_FILE_SIZE_IN_BYTES) {
|
||||
stop();
|
||||
start();
|
||||
recordingFileSizeCounterInBytes = 0;
|
||||
}
|
||||
dataOutputStreamInstance.write(data, 0, length);
|
||||
recordingFileSizeCounterInBytes += length;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO Exception on saving audio file " + saveFile.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
if (null != dataOutputStreamInstance) {
|
||||
try {
|
||||
dataOutputStreamInstance.close();
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO Exception on finishing saving audio file " + saveFile.toString(), e);
|
||||
}
|
||||
Log.e(TAG, "Recording saved to " + saveFile.toString());
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,115 @@
|
||||
package ai.kitt.snowboy.audio;
|
||||
|
||||
import android.media.AudioFormat;
|
||||
import android.media.AudioManager;
|
||||
import android.media.AudioTrack;
|
||||
import android.util.Log;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.ShortBuffer;
|
||||
|
||||
import ai.kitt.snowboy.Constants;
|
||||
|
||||
public class PlaybackThread {
|
||||
private static final String TAG = PlaybackThread.class.getSimpleName();
|
||||
|
||||
public PlaybackThread() {
|
||||
}
|
||||
|
||||
private Thread thread;
|
||||
private boolean shouldContinue;
|
||||
protected AudioTrack audioTrack;
|
||||
|
||||
public boolean playing() {
|
||||
return thread != null;
|
||||
}
|
||||
|
||||
public void startPlayback() {
|
||||
if (thread != null)
|
||||
return;
|
||||
|
||||
// Start streaming in a thread
|
||||
shouldContinue = true;
|
||||
thread = new Thread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
play();
|
||||
}
|
||||
});
|
||||
thread.start();
|
||||
}
|
||||
|
||||
public void stopPlayback() {
|
||||
if (thread == null)
|
||||
return;
|
||||
|
||||
shouldContinue = false;
|
||||
relaseAudioTrack();
|
||||
thread = null;
|
||||
}
|
||||
|
||||
protected void relaseAudioTrack() {
|
||||
if (audioTrack != null) {
|
||||
try {
|
||||
audioTrack.release();
|
||||
} catch (Exception e) {}
|
||||
}
|
||||
}
|
||||
|
||||
public short[] readPCM() {
|
||||
try {
|
||||
File recordFile = new File(Constants.SAVE_AUDIO);
|
||||
InputStream inputStream = new FileInputStream(recordFile);
|
||||
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
|
||||
DataInputStream dataInputStream = new DataInputStream(bufferedInputStream);
|
||||
|
||||
byte[] audioData = new byte[(int)recordFile.length()];
|
||||
|
||||
dataInputStream.read(audioData);
|
||||
dataInputStream.close();
|
||||
Log.v(TAG, "audioData size: " + audioData.length);
|
||||
|
||||
ShortBuffer sb = ByteBuffer.wrap(audioData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer();
|
||||
short[] samples = new short[sb.limit() - sb.position()];
|
||||
sb.get(samples);
|
||||
return samples;
|
||||
} catch (FileNotFoundException e) {
|
||||
Log.e(TAG, "Cannot find saved audio file", e);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO Exception on saved audio file", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void play() {
|
||||
short[] samples = this.readPCM();
|
||||
int shortSizeInBytes = Short.SIZE / Byte.SIZE;
|
||||
int bufferSizeInBytes = samples.length * shortSizeInBytes;
|
||||
Log.v(TAG, "shortSizeInBytes: " + shortSizeInBytes + " bufferSizeInBytes: " + bufferSizeInBytes);
|
||||
|
||||
audioTrack = new AudioTrack(
|
||||
AudioManager.STREAM_MUSIC,
|
||||
Constants.SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_OUT_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
bufferSizeInBytes,
|
||||
AudioTrack.MODE_STREAM);
|
||||
|
||||
if (audioTrack.getState() == AudioTrack.STATE_INITIALIZED) {
|
||||
audioTrack.play();
|
||||
audioTrack.write(samples, 0, samples.length);
|
||||
Log.v(TAG, "Audio playback started");
|
||||
}
|
||||
|
||||
if (!shouldContinue) {
|
||||
relaseAudioTrack();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,152 @@
|
||||
package ai.kitt.snowboy.audio;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import ai.kitt.snowboy.Constants;
|
||||
import ai.kitt.snowboy.MsgEnum;
|
||||
import android.media.AudioFormat;
|
||||
import android.media.AudioRecord;
|
||||
import android.media.MediaRecorder;
|
||||
import android.media.MediaPlayer;
|
||||
import android.os.Handler;
|
||||
import android.os.Message;
|
||||
import android.util.Log;
|
||||
|
||||
import ai.kitt.snowboy.SnowboyDetect;
|
||||
|
||||
public class RecordingThread {
|
||||
static { System.loadLibrary("snowboy-detect-android"); }
|
||||
|
||||
private static final String TAG = RecordingThread.class.getSimpleName();
|
||||
|
||||
private static final String ACTIVE_RES = Constants.ACTIVE_RES;
|
||||
private static final String ACTIVE_UMDL = Constants.ACTIVE_UMDL;
|
||||
|
||||
private boolean shouldContinue;
|
||||
private AudioDataReceivedListener listener = null;
|
||||
private Handler handler = null;
|
||||
private Thread thread;
|
||||
|
||||
private static String strEnvWorkSpace = Constants.DEFAULT_WORK_SPACE;
|
||||
private String activeModel = strEnvWorkSpace+ACTIVE_UMDL;
|
||||
private String commonRes = strEnvWorkSpace+ACTIVE_RES;
|
||||
|
||||
private SnowboyDetect detector = new SnowboyDetect(commonRes, activeModel);
|
||||
private MediaPlayer player = new MediaPlayer();
|
||||
|
||||
public RecordingThread(Handler handler, AudioDataReceivedListener listener) {
|
||||
this.handler = handler;
|
||||
this.listener = listener;
|
||||
|
||||
detector.SetSensitivity("0.6");
|
||||
detector.SetAudioGain(1);
|
||||
detector.ApplyFrontend(true);
|
||||
try {
|
||||
player.setDataSource(strEnvWorkSpace+"ding.wav");
|
||||
player.prepare();
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Playing ding sound error", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void sendMessage(MsgEnum what, Object obj){
|
||||
if (null != handler) {
|
||||
Message msg = handler.obtainMessage(what.ordinal(), obj);
|
||||
handler.sendMessage(msg);
|
||||
}
|
||||
}
|
||||
|
||||
public void startRecording() {
|
||||
if (thread != null)
|
||||
return;
|
||||
|
||||
shouldContinue = true;
|
||||
thread = new Thread(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
record();
|
||||
}
|
||||
});
|
||||
thread.start();
|
||||
}
|
||||
|
||||
public void stopRecording() {
|
||||
if (thread == null)
|
||||
return;
|
||||
|
||||
shouldContinue = false;
|
||||
thread = null;
|
||||
}
|
||||
|
||||
private void record() {
|
||||
Log.v(TAG, "Start");
|
||||
android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
|
||||
|
||||
// Buffer size in bytes: for 0.1 second of audio
|
||||
int bufferSize = (int)(Constants.SAMPLE_RATE * 0.1 * 2);
|
||||
if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) {
|
||||
bufferSize = Constants.SAMPLE_RATE * 2;
|
||||
}
|
||||
|
||||
byte[] audioBuffer = new byte[bufferSize];
|
||||
AudioRecord record = new AudioRecord(
|
||||
MediaRecorder.AudioSource.DEFAULT,
|
||||
Constants.SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
bufferSize);
|
||||
|
||||
if (record.getState() != AudioRecord.STATE_INITIALIZED) {
|
||||
Log.e(TAG, "Audio Record can't initialize!");
|
||||
return;
|
||||
}
|
||||
record.startRecording();
|
||||
if (null != listener) {
|
||||
listener.start();
|
||||
}
|
||||
Log.v(TAG, "Start recording");
|
||||
|
||||
long shortsRead = 0;
|
||||
detector.Reset();
|
||||
while (shouldContinue) {
|
||||
record.read(audioBuffer, 0, audioBuffer.length);
|
||||
|
||||
if (null != listener) {
|
||||
listener.onAudioDataReceived(audioBuffer, audioBuffer.length);
|
||||
}
|
||||
|
||||
// Converts to short array.
|
||||
short[] audioData = new short[audioBuffer.length / 2];
|
||||
ByteBuffer.wrap(audioBuffer).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(audioData);
|
||||
|
||||
shortsRead += audioData.length;
|
||||
|
||||
// Snowboy hotword detection.
|
||||
int result = detector.RunDetection(audioData, audioData.length);
|
||||
|
||||
if (result == -2) {
|
||||
// post a higher CPU usage:
|
||||
// sendMessage(MsgEnum.MSG_VAD_NOSPEECH, null);
|
||||
} else if (result == -1) {
|
||||
sendMessage(MsgEnum.MSG_ERROR, "Unknown Detection Error");
|
||||
} else if (result == 0) {
|
||||
// post a higher CPU usage:
|
||||
// sendMessage(MsgEnum.MSG_VAD_SPEECH, null);
|
||||
} else if (result > 0) {
|
||||
sendMessage(MsgEnum.MSG_ACTIVE, null);
|
||||
Log.i("Snowboy: ", "Hotword " + Integer.toString(result) + " detected!");
|
||||
player.start();
|
||||
}
|
||||
}
|
||||
|
||||
record.stop();
|
||||
record.release();
|
||||
|
||||
if (null != listener) {
|
||||
listener.stop();
|
||||
}
|
||||
Log.v(TAG, String.format("Recording stopped. Samples read: %d", shortsRead));
|
||||
}
|
||||
}
|
@ -0,0 +1 @@
|
||||
../../../../../../../swig/Android/java/ai/kitt/snowboy/snowboyJNI.java
|
@ -0,0 +1 @@
|
||||
../../../../../swig/Android/jniLibs
|
@ -0,0 +1,22 @@
|
||||
include demo.mk
|
||||
|
||||
BINFILES = demo demo2
|
||||
|
||||
all: $(BINFILES)
|
||||
|
||||
%.a:
|
||||
$(MAKE) -C ${@D} ${@F}
|
||||
|
||||
$(BINFILES): $(PORTAUDIOLIBS) $(SNOWBOYDETECTLIBFILE)
|
||||
|
||||
$(PORTAUDIOLIBS):
|
||||
@-./install_portaudio.sh
|
||||
|
||||
clean:
|
||||
-rm -f *.o *.a $(BINFILES)
|
||||
|
||||
depend:
|
||||
-$(CXX) -M $(CXXFLAGS) *.cc > .depend.mk
|
||||
|
||||
# Putting "-" so no error messages.
|
||||
-include .depend.mk
|
@ -0,0 +1,238 @@
|
||||
// example/C++/demo.cc
|
||||
|
||||
// Copyright 2016 KITT.AI (author: Guoguo Chen)
|
||||
|
||||
#include <cassert>
|
||||
#include <csignal>
|
||||
#include <iostream>
|
||||
#include <pa_ringbuffer.h>
|
||||
#include <pa_util.h>
|
||||
#include <portaudio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "include/snowboy-detect.h"
|
||||
|
||||
int PortAudioCallback(const void* input,
|
||||
void* output,
|
||||
unsigned long frame_count,
|
||||
const PaStreamCallbackTimeInfo* time_info,
|
||||
PaStreamCallbackFlags status_flags,
|
||||
void* user_data);
|
||||
|
||||
class PortAudioWrapper {
|
||||
public:
|
||||
// Constructor.
|
||||
PortAudioWrapper(int sample_rate, int num_channels, int bits_per_sample) {
|
||||
num_lost_samples_ = 0;
|
||||
min_read_samples_ = sample_rate * 0.1;
|
||||
Init(sample_rate, num_channels, bits_per_sample);
|
||||
}
|
||||
|
||||
// Reads data from ring buffer.
|
||||
template<typename T>
|
||||
void Read(std::vector<T>* data) {
|
||||
assert(data != NULL);
|
||||
|
||||
// Checks ring buffer overflow.
|
||||
if (num_lost_samples_ > 0) {
|
||||
std::cerr << "Lost " << num_lost_samples_ << " samples due to ring"
|
||||
<< " buffer overflow." << std::endl;
|
||||
num_lost_samples_ = 0;
|
||||
}
|
||||
|
||||
ring_buffer_size_t num_available_samples = 0;
|
||||
while (true) {
|
||||
num_available_samples =
|
||||
PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_);
|
||||
if (num_available_samples >= min_read_samples_) {
|
||||
break;
|
||||
}
|
||||
Pa_Sleep(5);
|
||||
}
|
||||
|
||||
// Reads data.
|
||||
num_available_samples = PaUtil_GetRingBufferReadAvailable(&pa_ringbuffer_);
|
||||
data->resize(num_available_samples);
|
||||
ring_buffer_size_t num_read_samples = PaUtil_ReadRingBuffer(
|
||||
&pa_ringbuffer_, data->data(), num_available_samples);
|
||||
if (num_read_samples != num_available_samples) {
|
||||
std::cerr << num_available_samples << " samples were available, but "
|
||||
<< "only " << num_read_samples << " samples were read." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int Callback(const void* input, void* output,
|
||||
unsigned long frame_count,
|
||||
const PaStreamCallbackTimeInfo* time_info,
|
||||
PaStreamCallbackFlags status_flags) {
|
||||
// Input audio.
|
||||
ring_buffer_size_t num_written_samples =
|
||||
PaUtil_WriteRingBuffer(&pa_ringbuffer_, input, frame_count);
|
||||
num_lost_samples_ += frame_count - num_written_samples;
|
||||
return paContinue;
|
||||
}
|
||||
|
||||
~PortAudioWrapper() {
|
||||
Pa_StopStream(pa_stream_);
|
||||
Pa_CloseStream(pa_stream_);
|
||||
Pa_Terminate();
|
||||
PaUtil_FreeMemory(ringbuffer_);
|
||||
}
|
||||
|
||||
private:
|
||||
// Initialization.
|
||||
bool Init(int sample_rate, int num_channels, int bits_per_sample) {
|
||||
// Allocates ring buffer memory.
|
||||
int ringbuffer_size = 16384;
|
||||
ringbuffer_ = static_cast<char*>(
|
||||
PaUtil_AllocateMemory(bits_per_sample / 8 * ringbuffer_size));
|
||||
if (ringbuffer_ == NULL) {
|
||||
std::cerr << "Fail to allocate memory for ring buffer." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initializes PortAudio ring buffer.
|
||||
ring_buffer_size_t rb_init_ans =
|
||||
PaUtil_InitializeRingBuffer(&pa_ringbuffer_, bits_per_sample / 8,
|
||||
ringbuffer_size, ringbuffer_);
|
||||
if (rb_init_ans == -1) {
|
||||
std::cerr << "Ring buffer size is not power of 2." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initializes PortAudio.
|
||||
PaError pa_init_ans = Pa_Initialize();
|
||||
if (pa_init_ans != paNoError) {
|
||||
std::cerr << "Fail to initialize PortAudio, error message is \""
|
||||
<< Pa_GetErrorText(pa_init_ans) << "\"" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
PaError pa_open_ans;
|
||||
if (bits_per_sample == 8) {
|
||||
pa_open_ans = Pa_OpenDefaultStream(
|
||||
&pa_stream_, num_channels, 0, paUInt8, sample_rate,
|
||||
paFramesPerBufferUnspecified, PortAudioCallback, this);
|
||||
} else if (bits_per_sample == 16) {
|
||||
pa_open_ans = Pa_OpenDefaultStream(
|
||||
&pa_stream_, num_channels, 0, paInt16, sample_rate,
|
||||
paFramesPerBufferUnspecified, PortAudioCallback, this);
|
||||
} else if (bits_per_sample == 32) {
|
||||
pa_open_ans = Pa_OpenDefaultStream(
|
||||
&pa_stream_, num_channels, 0, paInt32, sample_rate,
|
||||
paFramesPerBufferUnspecified, PortAudioCallback, this);
|
||||
} else {
|
||||
std::cerr << "Unsupported BitsPerSample: " << bits_per_sample
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
if (pa_open_ans != paNoError) {
|
||||
std::cerr << "Fail to open PortAudio stream, error message is \""
|
||||
<< Pa_GetErrorText(pa_open_ans) << "\"" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
PaError pa_stream_start_ans = Pa_StartStream(pa_stream_);
|
||||
if (pa_stream_start_ans != paNoError) {
|
||||
std::cerr << "Fail to start PortAudio stream, error message is \""
|
||||
<< Pa_GetErrorText(pa_stream_start_ans) << "\"" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
// Pointer to the ring buffer memory.
|
||||
char* ringbuffer_;
|
||||
|
||||
// Ring buffer wrapper used in PortAudio.
|
||||
PaUtilRingBuffer pa_ringbuffer_;
|
||||
|
||||
// Pointer to PortAudio stream.
|
||||
PaStream* pa_stream_;
|
||||
|
||||
// Number of lost samples at each Read() due to ring buffer overflow.
|
||||
int num_lost_samples_;
|
||||
|
||||
// Wait for this number of samples in each Read() call.
|
||||
int min_read_samples_;
|
||||
};
|
||||
|
||||
int PortAudioCallback(const void* input,
|
||||
void* output,
|
||||
unsigned long frame_count,
|
||||
const PaStreamCallbackTimeInfo* time_info,
|
||||
PaStreamCallbackFlags status_flags,
|
||||
void* user_data) {
|
||||
PortAudioWrapper* pa_wrapper = reinterpret_cast<PortAudioWrapper*>(user_data);
|
||||
pa_wrapper->Callback(input, output, frame_count, time_info, status_flags);
|
||||
return paContinue;
|
||||
}
|
||||
|
||||
void SignalHandler(int signal){
|
||||
std::cerr << "Caught signal " << signal << ", terminating..." << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string usage =
|
||||
"Example that shows how to use Snowboy in C++. Parameters are\n"
|
||||
"hard-coded in the parameter section. Please check the source code for\n"
|
||||
"more details. Audio is captured by PortAudio.\n"
|
||||
"\n"
|
||||
"To run the example:\n"
|
||||
" ./demo\n";
|
||||
|
||||
// Checks the command.
|
||||
if (argc > 1) {
|
||||
std::cerr << usage;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Configures signal handling.
|
||||
struct sigaction sig_int_handler;
|
||||
sig_int_handler.sa_handler = SignalHandler;
|
||||
sigemptyset(&sig_int_handler.sa_mask);
|
||||
sig_int_handler.sa_flags = 0;
|
||||
sigaction(SIGINT, &sig_int_handler, NULL);
|
||||
|
||||
// Parameter section.
|
||||
// If you have multiple hotword models (e.g., 2), you should set
|
||||
// <model_filename> and <sensitivity_str> as follows:
|
||||
// model_filename =
|
||||
// "resources/models/snowboy.umdl,resources/models/smart_mirror.umdl";
|
||||
// sensitivity_str = "0.5,0.5";
|
||||
std::string resource_filename = "resources/common.res";
|
||||
std::string model_filename = "resources/models/snowboy.umdl";
|
||||
std::string sensitivity_str = "0.5";
|
||||
float audio_gain = 1;
|
||||
bool apply_frontend = false;
|
||||
|
||||
// Initializes Snowboy detector.
|
||||
snowboy::SnowboyDetect detector(resource_filename, model_filename);
|
||||
detector.SetSensitivity(sensitivity_str);
|
||||
detector.SetAudioGain(audio_gain);
|
||||
detector.ApplyFrontend(apply_frontend);
|
||||
|
||||
// Initializes PortAudio. You may use other tools to capture the audio.
|
||||
PortAudioWrapper pa_wrapper(detector.SampleRate(),
|
||||
detector.NumChannels(), detector.BitsPerSample());
|
||||
|
||||
// Runs the detection.
|
||||
// Note: I hard-coded <int16_t> as data type because detector.BitsPerSample()
|
||||
// returns 16.
|
||||
std::cout << "Listening... Press Ctrl+C to exit" << std::endl;
|
||||
std::vector<int16_t> data;
|
||||
while (true) {
|
||||
pa_wrapper.Read(&data);
|
||||
if (data.size() != 0) {
|
||||
int result = detector.RunDetection(data.data(), data.size());
|
||||
if (result > 0) {
|
||||
std::cout << "Hotword " << result << " detected!" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
TOPDIR := ../../
|
||||
DYNAMIC := True
|
||||
CC = $(CXX)
|
||||
CXX :=
|
||||
LDFLAGS :=
|
||||
LDLIBS :=
|
||||
PORTAUDIOINC := portaudio/install/include
|
||||
PORTAUDIOLIBS := portaudio/install/lib/libportaudio.a
|
||||
|
||||
CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
|
||||
|
||||
ifeq ($(DYNAMIC), True)
|
||||
CXXFLAGS += -fPIC
|
||||
endif
|
||||
|
||||
ifeq ($(shell uname -m | cut -c 1-3), x86)
|
||||
CXXFLAGS += -msse -msse2
|
||||
endif
|
||||
|
||||
ifeq ($(shell uname), Darwin)
|
||||
# By default Mac uses clang++ as g++, but people may have changed their
|
||||
# default configuration.
|
||||
CXX := clang++
|
||||
CXXFLAGS += -I$(TOPDIR) -Wall -Wno-sign-compare -Winit-self \
|
||||
-DHAVE_POSIX_MEMALIGN -DHAVE_CLAPACK -I$(PORTAUDIOINC)
|
||||
LDLIBS += -ldl -lm -framework Accelerate -framework CoreAudio \
|
||||
-framework AudioToolbox -framework AudioUnit -framework CoreServices \
|
||||
$(PORTAUDIOLIBS)
|
||||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/osx/libsnowboy-detect.a
|
||||
else ifeq ($(shell uname), Linux)
|
||||
CXX := g++
|
||||
CXXFLAGS += -I$(TOPDIR) -std=c++0x -Wall -Wno-sign-compare \
|
||||
-Wno-unused-local-typedefs -Winit-self -rdynamic \
|
||||
-DHAVE_POSIX_MEMALIGN -I$(PORTAUDIOINC)
|
||||
LDLIBS += -ldl -lm -Wl,-Bstatic -Wl,-Bdynamic -lrt -lpthread $(PORTAUDIOLIBS)\
|
||||
-L/usr/lib/atlas-base -lf77blas -lcblas -llapack_atlas -latlas -lasound
|
||||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/ubuntu64/libsnowboy-detect.a
|
||||
ifneq (,$(findstring arm,$(shell uname -m)))
|
||||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/rpi/libsnowboy-detect.a
|
||||
endif
|
||||
endif
|
||||
|
||||
# Suppress clang warnings...
|
||||
COMPILER = $(shell $(CXX) -v 2>&1 )
|
||||
ifeq ($(findstring clang,$(COMPILER)), clang)
|
||||
CXXFLAGS += -Wno-mismatched-tags -Wno-c++11-extensions
|
||||
endif
|
||||
|
||||
# Set optimization level.
|
||||
CXXFLAGS += -O3
|
@ -0,0 +1,150 @@
|
||||
#include <iostream>
|
||||
#include "include/snowboy-detect.h"
|
||||
#include "portaudio.h"
|
||||
|
||||
#define resource_filename "resources/common.res"
|
||||
#define model_filename "resources/models/snowboy.umdl"
|
||||
#define sensitivity_str "0.5"
|
||||
#define audio_gain 1.0
|
||||
#define apply_frontend false
|
||||
|
||||
struct wavHeader { //44 byte HEADER only
|
||||
char RIFF[4];
|
||||
int RIFFsize;
|
||||
char fmt[8];
|
||||
int fmtSize;
|
||||
short fmtTag;
|
||||
short nchan;
|
||||
int fs;
|
||||
int avgBps;
|
||||
short nBlockAlign;
|
||||
short bps;
|
||||
char data[4];
|
||||
int datasize;
|
||||
};
|
||||
|
||||
|
||||
void readWavHeader(wavHeader *wavhdr, FILE *fi) {
|
||||
//=====================================================
|
||||
// Reads the WAV file header considering the follow restrictions:
|
||||
// - format tag needs to be 1=PCM (no encoding)
|
||||
// - <data chunk> shoud be imidiately before the databytes
|
||||
// (it should not contain chunks after 'data')
|
||||
// Returns a pointer pointing to the begining of the data
|
||||
|
||||
char *tag = (char *)wavhdr;
|
||||
fread(wavhdr, 34, 1, fi); //starting tag should be "RIFF"
|
||||
if (tag[0] != 'R' || tag[1] != 'I' || tag[2] != 'F' || tag[3] != 'F') {
|
||||
fclose(fi);
|
||||
perror("NO 'RIFF'.");
|
||||
}
|
||||
if (wavhdr->fmtTag != 1) {
|
||||
fclose(fi);
|
||||
perror("WAV file has encoded data or it is WAVEFORMATEXTENSIBLE.");
|
||||
}
|
||||
if (wavhdr->fmtSize == 14) {
|
||||
wavhdr->bps = 16;
|
||||
}
|
||||
if (wavhdr->fmtSize >= 16) {
|
||||
fread(&wavhdr->bps, 2, 1, fi);
|
||||
}
|
||||
if (wavhdr->fmtSize == 18) {
|
||||
short lixo;
|
||||
fread(&lixo, 2, 1, fi);
|
||||
}
|
||||
tag += 36; //aponta para wavhdr->data
|
||||
fread(tag, 4, 1, fi); //data chunk deve estar aqui.
|
||||
while (tag[0] != 'd' || tag[1] != 'a' || tag[2] != 't' || tag[3] != 'a') {
|
||||
fread(tag, 4, 1, fi);
|
||||
if (ftell(fi) >= long(wavhdr->RIFFsize)) {
|
||||
fclose(fi);
|
||||
perror("Bad WAV header !");
|
||||
}
|
||||
}
|
||||
fread(&wavhdr->datasize, 4, 1, fi); //data size
|
||||
// Assuming that header ends here.
|
||||
// From here until the end it is audio data
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
std::string usage =
|
||||
"C++ demo that shows how to use snowboy. In this examle user can read\n"
|
||||
"the audio data from a file.\n"
|
||||
"\n"
|
||||
"Atention reading from a file: this software is for simulation/test\n"
|
||||
"only. You need to take precautions when loading a file into the\n"
|
||||
"memory.\n"
|
||||
"\n"
|
||||
"To run the example:\n"
|
||||
" ./demo2 [filename.raw || filename.wav ]\n"
|
||||
"\n"
|
||||
"IMPORTANT NOTE: Raw file must be 16kHz sample, mono and 16bit\n";
|
||||
|
||||
// default
|
||||
char * filename;
|
||||
int fsize;
|
||||
short * data_buffer = NULL;
|
||||
bool isRaw = true;
|
||||
FILE *f = NULL;
|
||||
|
||||
if (argc > 2 or argc < 2) {
|
||||
std::cout << usage << std::endl;
|
||||
exit(1);
|
||||
} else {
|
||||
filename = argv[1];
|
||||
}
|
||||
|
||||
std::string str = filename;
|
||||
std::string type = ".wav";
|
||||
|
||||
if (str.find(type) != std::string::npos) {
|
||||
isRaw = false;
|
||||
}
|
||||
|
||||
|
||||
if (filename != NULL) {
|
||||
f = fopen(filename,"rb");
|
||||
}
|
||||
|
||||
if (f == NULL) {
|
||||
perror ("Error opening file");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
if (!isRaw) {
|
||||
wavHeader *wavhdr = new wavHeader();
|
||||
readWavHeader(wavhdr, f);
|
||||
|
||||
data_buffer = (short *)malloc(wavhdr->datasize);
|
||||
// Consume all the audio to the buffer
|
||||
fread(data_buffer, wavhdr->datasize, 1, f);
|
||||
fclose(f);
|
||||
fsize = wavhdr->datasize;
|
||||
} else {
|
||||
fseek(f,0,SEEK_END);
|
||||
fsize = ftell(f);
|
||||
rewind(f);
|
||||
|
||||
// Consume all the audio to the buffer
|
||||
data_buffer = (short *)malloc(fsize);
|
||||
int aa = fread(&data_buffer[0], 1 ,fsize, f);
|
||||
std::cout << "Read bytes: " << aa << std::endl;
|
||||
fclose(f);
|
||||
|
||||
}
|
||||
|
||||
// Initializes Snowboy detector.
|
||||
snowboy::SnowboyDetect detector(resource_filename, model_filename);
|
||||
detector.SetSensitivity(sensitivity_str);
|
||||
detector.SetAudioGain(audio_gain);
|
||||
detector.ApplyFrontend(apply_frontend);
|
||||
|
||||
int result = detector.RunDetection(&data_buffer[0], fsize/sizeof(short));
|
||||
std::cout << ">>>>> Result: " << result << " <<<<<" << std::endl;
|
||||
std::cout << "Legend: -2: noise | -1: error | 0: silence | 1: hotword"
|
||||
<< std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script attempts to install PortAudio, which can grap a live audio stream
|
||||
# from the soundcard.
|
||||
#
|
||||
# On linux systems, we only build with ALSA, so make sure you install it using
|
||||
# e.g.:
|
||||
# sudo apt-get -y install libasound2-dev
|
||||
|
||||
echo "Installing portaudio"
|
||||
|
||||
if [ ! -e pa_stable_v190600_20161030.tgz ]; then
|
||||
wget -T 10 -t 3 \
|
||||
http://www.portaudio.com/archives/pa_stable_v190600_20161030.tgz || exit 1;
|
||||
fi
|
||||
|
||||
tar -xovzf pa_stable_v190600_20161030.tgz || exit 1
|
||||
|
||||
cd portaudio
|
||||
patch < ../patches/portaudio.patch
|
||||
|
||||
MACOS=`uname 2>/dev/null | grep Darwin`
|
||||
if [ -z "$MACOS" ]; then
|
||||
./configure --without-jack --without-oss \
|
||||
--with-alsa --prefix=`pwd`/install --with-pic || exit 1;
|
||||
sed -i '40s:src/common/pa_ringbuffer.o::g' Makefile
|
||||
sed -i '40s:$: src/common/pa_ringbuffer.o:' Makefile
|
||||
else
|
||||
# People may have changed OSX's default configuration -- we use clang++.
|
||||
CC=clang CXX=clang++ ./configure --prefix=`pwd`/install --with-pic
|
||||
fi
|
||||
|
||||
make
|
||||
make install
|
||||
|
||||
cd ..
|
@ -0,0 +1,11 @@
|
||||
--- Makefile.in 2017-05-31 16:42:16.000000000 -0700
|
||||
+++ Makefile_new.in 2017-05-31 16:44:02.000000000 -0700
|
||||
@@ -193,6 +193,8 @@
|
||||
for include in $(INCLUDES); do \
|
||||
$(INSTALL_DATA) -m 644 $(top_srcdir)/include/$$include $(DESTDIR)$(includedir)/$$include; \
|
||||
done
|
||||
+ $(INSTALL_DATA) -m 644 $(top_srcdir)/src/common/pa_ringbuffer.h $(DESTDIR)$(includedir)/$$include
|
||||
+ $(INSTALL_DATA) -m 644 $(top_srcdir)/src/common/pa_util.h $(DESTDIR)$(includedir)/$$include
|
||||
$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
|
||||
$(INSTALL) -m 644 portaudio-2.0.pc $(DESTDIR)$(libdir)/pkgconfig/portaudio-2.0.pc
|
||||
@echo ""
|
@ -0,0 +1 @@
|
||||
../../resources/
|
@ -0,0 +1,20 @@
|
||||
include demo.mk
|
||||
|
||||
BINFILE = demo
|
||||
|
||||
OBJFILES = demo.o snowboy-detect-c-wrapper.o
|
||||
|
||||
all: $(BINFILE)
|
||||
|
||||
%.a:
|
||||
$(MAKE) -C ${@D} ${@F}
|
||||
|
||||
# We have to use the C++ compiler to link.
|
||||
$(BINFILE): $(PORTAUDIOLIBS) $(SNOWBOYDETECTLIBFILE) $(OBJFILES)
|
||||
$(CXX) $(OBJFILES) $(SNOWBOYDETECTLIBFILE) $(PORTAUDIOLIBS) $(LDLIBS) -o $(BINFILE)
|
||||
|
||||
$(PORTAUDIOLIBS):
|
||||
@-./install_portaudio.sh
|
||||
|
||||
clean:
|
||||
-rm -f *.o *.a $(BINFILE) $(OBJFILES)
|
@ -0,0 +1,223 @@
|
||||
// example/C/demo.c
|
||||
|
||||
// Copyright 2017 KITT.AI (author: Guoguo Chen)
|
||||
|
||||
#include <assert.h>
|
||||
#include <pa_ringbuffer.h>
|
||||
#include <pa_util.h>
|
||||
#include <portaudio.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "snowboy-detect-c-wrapper.h"
|
||||
|
||||
// Pointer to the ring buffer memory.
|
||||
char* g_ringbuffer;
|
||||
// Ring buffer wrapper used in PortAudio.
|
||||
PaUtilRingBuffer g_pa_ringbuffer;
|
||||
// Pointer to PortAudio stream.
|
||||
PaStream* g_pa_stream;
|
||||
// Number of lost samples at each LoadAudioData() due to ring buffer overflow.
|
||||
int g_num_lost_samples;
|
||||
// Wait for this number of samples in each LoadAudioData() call.
|
||||
int g_min_read_samples;
|
||||
// Pointer to the audio data.
|
||||
int16_t* g_data;
|
||||
|
||||
int PortAudioCallback(const void* input,
|
||||
void* output,
|
||||
unsigned long frame_count,
|
||||
const PaStreamCallbackTimeInfo* time_info,
|
||||
PaStreamCallbackFlags status_flags,
|
||||
void* user_data) {
|
||||
ring_buffer_size_t num_written_samples =
|
||||
PaUtil_WriteRingBuffer(&g_pa_ringbuffer, input, frame_count);
|
||||
g_num_lost_samples += frame_count - num_written_samples;
|
||||
return paContinue;
|
||||
}
|
||||
|
||||
void StartAudioCapturing(int sample_rate,
|
||||
int num_channels, int bits_per_sample) {
|
||||
g_data = NULL;
|
||||
g_num_lost_samples = 0;
|
||||
g_min_read_samples = sample_rate * 0.1;
|
||||
|
||||
// Allocates ring buffer memory.
|
||||
int ringbuffer_size = 16384;
|
||||
g_ringbuffer = (char*)(
|
||||
PaUtil_AllocateMemory(bits_per_sample / 8 * ringbuffer_size));
|
||||
if (g_ringbuffer == NULL) {
|
||||
fprintf(stderr, "Fail to allocate memory for ring buffer.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Initializes PortAudio ring buffer.
|
||||
ring_buffer_size_t rb_init_ans =
|
||||
PaUtil_InitializeRingBuffer(&g_pa_ringbuffer, bits_per_sample / 8,
|
||||
ringbuffer_size, g_ringbuffer);
|
||||
if (rb_init_ans == -1) {
|
||||
fprintf(stderr, "Ring buffer size is not power of 2.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Initializes PortAudio.
|
||||
PaError pa_init_ans = Pa_Initialize();
|
||||
if (pa_init_ans != paNoError) {
|
||||
fprintf(stderr, "Fail to initialize PortAudio, error message is %s.\n",
|
||||
Pa_GetErrorText(pa_init_ans));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
PaError pa_open_ans;
|
||||
if (bits_per_sample == 8) {
|
||||
pa_open_ans = Pa_OpenDefaultStream(
|
||||
&g_pa_stream, num_channels, 0, paUInt8, sample_rate,
|
||||
paFramesPerBufferUnspecified, PortAudioCallback, NULL);
|
||||
} else if (bits_per_sample == 16) {
|
||||
pa_open_ans = Pa_OpenDefaultStream(
|
||||
&g_pa_stream, num_channels, 0, paInt16, sample_rate,
|
||||
paFramesPerBufferUnspecified, PortAudioCallback, NULL);
|
||||
} else if (bits_per_sample == 32) {
|
||||
pa_open_ans = Pa_OpenDefaultStream(
|
||||
&g_pa_stream, num_channels, 0, paInt32, sample_rate,
|
||||
paFramesPerBufferUnspecified, PortAudioCallback, NULL);
|
||||
} else {
|
||||
fprintf(stderr, "Unsupported BitsPerSample: %d.\n", bits_per_sample);
|
||||
exit(1);
|
||||
}
|
||||
if (pa_open_ans != paNoError) {
|
||||
fprintf(stderr, "Fail to open PortAudio stream, error message is %s.\n",
|
||||
Pa_GetErrorText(pa_open_ans));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
PaError pa_stream_start_ans = Pa_StartStream(g_pa_stream);
|
||||
if (pa_stream_start_ans != paNoError) {
|
||||
fprintf(stderr, "Fail to start PortAudio stream, error message is %s.\n",
|
||||
Pa_GetErrorText(pa_stream_start_ans));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void StopAudioCapturing() {
|
||||
if (g_data != NULL) {
|
||||
free(g_data);
|
||||
g_data = NULL;
|
||||
}
|
||||
Pa_StopStream(g_pa_stream);
|
||||
Pa_CloseStream(g_pa_stream);
|
||||
Pa_Terminate();
|
||||
PaUtil_FreeMemory(g_ringbuffer);
|
||||
}
|
||||
|
||||
int LoadAudioData() {
|
||||
if (g_data != NULL) {
|
||||
free(g_data);
|
||||
g_data = NULL;
|
||||
}
|
||||
|
||||
// Checks ring buffer overflow.
|
||||
if (g_num_lost_samples > 0) {
|
||||
fprintf(stderr, "Lost %d samples due to ring buffer overflow.\n",
|
||||
g_num_lost_samples);
|
||||
g_num_lost_samples = 0;
|
||||
}
|
||||
|
||||
ring_buffer_size_t num_available_samples = 0;
|
||||
while (true) {
|
||||
num_available_samples =
|
||||
PaUtil_GetRingBufferReadAvailable(&g_pa_ringbuffer);
|
||||
if (num_available_samples >= g_min_read_samples) {
|
||||
break;
|
||||
}
|
||||
Pa_Sleep(5);
|
||||
}
|
||||
|
||||
// Reads data.
|
||||
num_available_samples = PaUtil_GetRingBufferReadAvailable(&g_pa_ringbuffer);
|
||||
g_data = malloc(num_available_samples * sizeof(int16_t));
|
||||
ring_buffer_size_t num_read_samples = PaUtil_ReadRingBuffer(
|
||||
&g_pa_ringbuffer, g_data, num_available_samples);
|
||||
if (num_read_samples != num_available_samples) {
|
||||
fprintf(stderr, "%d samples were available, but only %d samples were read"
|
||||
".\n", num_available_samples, num_read_samples);
|
||||
}
|
||||
return num_read_samples;
|
||||
}
|
||||
|
||||
void SignalHandler(int signal) {
|
||||
fprintf(stderr, "Caught signal %d, terminating...\n", signal);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const char usage[] =
|
||||
"Example that shows how to use Snowboy in pure C. Snowboy was written\n"
|
||||
"in C++, so we have to write a wrapper in order to use Snowboy in pure\n"
|
||||
"C. See snowboy-detect-c-wrapper.h and snowboy-detect-c-wrapper.cc for\n"
|
||||
"more details.\n"
|
||||
"\n"
|
||||
"Parameters are hard-coded in the parameter section for this example.\n"
|
||||
"Please check the source code for more details.\n"
|
||||
"\n"
|
||||
"Audio is captured by PortAudio, feel free to replace PortAudio with\n"
|
||||
"your own audio capturing tool.\n"
|
||||
"\n"
|
||||
"To run the example:\n"
|
||||
" ./demo\n";
|
||||
|
||||
// Checks the command.
|
||||
if (argc > 1) {
|
||||
printf("%s", usage);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Configures signal handling.
|
||||
struct sigaction sig_int_handler;
|
||||
sig_int_handler.sa_handler = SignalHandler;
|
||||
sigemptyset(&sig_int_handler.sa_mask);
|
||||
sig_int_handler.sa_flags = 0;
|
||||
sigaction(SIGINT, &sig_int_handler, NULL);
|
||||
|
||||
// Parameter section.
|
||||
// If you have multiple hotword models (e.g., 2), you should set
|
||||
// <model_filename> and <sensitivity_str> as follows:
|
||||
// model_filename =
|
||||
// "resources/models/snowboy.umdl,resources/models/smart_mirror.umdl";
|
||||
// sensitivity_str = "0.5,0.5";
|
||||
const char resource_filename[] = "resources/common.res";
|
||||
const char model_filename[] = "resources/models/snowboy.umdl";
|
||||
const char sensitivity_str[] = "0.5";
|
||||
float audio_gain = 1;
|
||||
bool apply_frontend = false;
|
||||
|
||||
// Initializes Snowboy detector.
|
||||
SnowboyDetect* detector = SnowboyDetectConstructor(resource_filename,
|
||||
model_filename);
|
||||
SnowboyDetectSetSensitivity(detector, sensitivity_str);
|
||||
SnowboyDetectSetAudioGain(detector, audio_gain);
|
||||
SnowboyDetectApplyFrontend(detector, apply_frontend);
|
||||
|
||||
// Initializes PortAudio. You may use other tools to capture the audio.
|
||||
StartAudioCapturing(SnowboyDetectSampleRate(detector),
|
||||
SnowboyDetectNumChannels(detector),
|
||||
SnowboyDetectBitsPerSample(detector));
|
||||
|
||||
// Runs the detection.
|
||||
printf("Listening... Press Ctrl+C to exit\n");
|
||||
while (true) {
|
||||
int array_length = LoadAudioData();
|
||||
if (array_length != 0) {
|
||||
int result = SnowboyDetectRunDetection(detector,
|
||||
g_data, array_length, false);
|
||||
if (result > 0) {
|
||||
printf("Hotword %d detected!\n", result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StopAudioCapturing();
|
||||
SnowboyDetectDestructor(detector);
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
TOPDIR := ../../
|
||||
DYNAMIC := True
|
||||
CC :=
|
||||
CXX :=
|
||||
LDFLAGS :=
|
||||
LDLIBS :=
|
||||
PORTAUDIOINC := portaudio/install/include
|
||||
PORTAUDIOLIBS := portaudio/install/lib/libportaudio.a
|
||||
|
||||
CFLAGS :=
|
||||
CXXFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0
|
||||
|
||||
ifeq ($(DYNAMIC), True)
|
||||
CFLAGS += -fPIC
|
||||
CXXFLAGS += -fPIC
|
||||
endif
|
||||
|
||||
ifeq ($(shell uname -m | cut -c 1-3), x86)
|
||||
CFLAGS += -msse -msse2
|
||||
CXXFLAGS += -msse -msse2
|
||||
endif
|
||||
|
||||
ifeq ($(shell uname), Darwin)
|
||||
# By default Mac uses clang++ as g++, but people may have changed their
|
||||
# default configuration.
|
||||
CC := clang
|
||||
CXX := clang++
|
||||
CFLAGS += -I$(TOPDIR) -Wall -I$(PORTAUDIOINC)
|
||||
CXXFLAGS += -I$(TOPDIR) -Wall -Wno-sign-compare -Winit-self \
|
||||
-DHAVE_POSIX_MEMALIGN -DHAVE_CLAPACK -I$(PORTAUDIOINC)
|
||||
LDLIBS += -ldl -lm -framework Accelerate -framework CoreAudio \
|
||||
-framework AudioToolbox -framework AudioUnit -framework CoreServices \
|
||||
$(PORTAUDIOLIBS)
|
||||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/osx/libsnowboy-detect.a
|
||||
else ifeq ($(shell uname), Linux)
|
||||
CC := gcc
|
||||
CXX := g++
|
||||
CFLAGS += -I$(TOPDIR) -Wall -I$(PORTAUDIOINC)
|
||||
CXXFLAGS += -I$(TOPDIR) -std=c++0x -Wall -Wno-sign-compare \
|
||||
-Wno-unused-local-typedefs -Winit-self -rdynamic \
|
||||
-DHAVE_POSIX_MEMALIGN -I$(PORTAUDIOINC)
|
||||
LDLIBS += -ldl -lm -Wl,-Bstatic -Wl,-Bdynamic -lrt -lpthread $(PORTAUDIOLIBS)\
|
||||
-L/usr/lib/atlas-base -lf77blas -lcblas -llapack_atlas -latlas -lasound
|
||||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/ubuntu64/libsnowboy-detect.a
|
||||
ifneq (,$(findstring arm,$(shell uname -m)))
|
||||
SNOWBOYDETECTLIBFILE := $(TOPDIR)/lib/rpi/libsnowboy-detect.a
|
||||
endif
|
||||
endif
|
||||
|
||||
# Suppress clang warnings...
|
||||
COMPILER = $(shell $(CXX) -v 2>&1 )
|
||||
ifeq ($(findstring clang,$(COMPILER)), clang)
|
||||
CXXFLAGS += -Wno-mismatched-tags -Wno-c++11-extensions
|
||||
endif
|
||||
|
||||
# Set optimization level.
|
||||
CFLAGS += -O3
|
||||
CXXFLAGS += -O3
|
@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script attempts to install PortAudio, which can grap a live audio stream
|
||||
# from the soundcard.
|
||||
#
|
||||
# On linux systems, we only build with ALSA, so make sure you install it using
|
||||
# e.g.:
|
||||
# sudo apt-get -y install libasound2-dev
|
||||
|
||||
echo "Installing portaudio"
|
||||
|
||||
if [ ! -e pa_stable_v190600_20161030.tgz ]; then
|
||||
wget -T 10 -t 3 \
|
||||
http://www.portaudio.com/archives/pa_stable_v190600_20161030.tgz || exit 1;
|
||||
fi
|
||||
|
||||
tar -xovzf pa_stable_v190600_20161030.tgz || exit 1
|
||||
|
||||
cd portaudio
|
||||
patch < ../patches/portaudio.patch
|
||||
|
||||
MACOS=`uname 2>/dev/null | grep Darwin`
|
||||
if [ -z "$MACOS" ]; then
|
||||
./configure --without-jack --without-oss \
|
||||
--with-alsa --prefix=`pwd`/install --with-pic || exit 1;
|
||||
sed -i '40s:src/common/pa_ringbuffer.o::g' Makefile
|
||||
sed -i '40s:$: src/common/pa_ringbuffer.o:' Makefile
|
||||
else
|
||||
# People may have changed OSX's default configuration -- we use clang++.
|
||||
CC=clang CXX=clang++ ./configure --prefix=`pwd`/install --with-pic
|
||||
fi
|
||||
|
||||
make
|
||||
make install
|
||||
|
||||
cd ..
|
@ -0,0 +1,11 @@
|
||||
--- Makefile.in 2017-05-31 16:42:16.000000000 -0700
|
||||
+++ Makefile_new.in 2017-05-31 16:44:02.000000000 -0700
|
||||
@@ -193,6 +193,8 @@
|
||||
for include in $(INCLUDES); do \
|
||||
$(INSTALL_DATA) -m 644 $(top_srcdir)/include/$$include $(DESTDIR)$(includedir)/$$include; \
|
||||
done
|
||||
+ $(INSTALL_DATA) -m 644 $(top_srcdir)/src/common/pa_ringbuffer.h $(DESTDIR)$(includedir)/$$include
|
||||
+ $(INSTALL_DATA) -m 644 $(top_srcdir)/src/common/pa_util.h $(DESTDIR)$(includedir)/$$include
|
||||
$(INSTALL) -d $(DESTDIR)$(libdir)/pkgconfig
|
||||
$(INSTALL) -m 644 portaudio-2.0.pc $(DESTDIR)$(libdir)/pkgconfig/portaudio-2.0.pc
|
||||
@echo ""
|
@ -0,0 +1 @@
|
||||
../../resources
|
@ -0,0 +1,82 @@
|
||||
// snowboy-detect-c-wrapper.cc
|
||||
|
||||
// Copyright 2017 KITT.AI (author: Guoguo Chen)
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "snowboy-detect-c-wrapper.h"
|
||||
#include "include/snowboy-detect.h"
|
||||
|
||||
extern "C" {
|
||||
SnowboyDetect* SnowboyDetectConstructor(const char* const resource_filename,
|
||||
const char* const model_str) {
|
||||
return reinterpret_cast<SnowboyDetect*>(
|
||||
new snowboy::SnowboyDetect(resource_filename, model_str));
|
||||
}
|
||||
|
||||
bool SnowboyDetectReset(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
return reinterpret_cast<snowboy::SnowboyDetect*>(detector)->Reset();
|
||||
}
|
||||
|
||||
int SnowboyDetectRunDetection(SnowboyDetect* detector,
|
||||
const int16_t* const data,
|
||||
const int array_length, bool is_end) {
|
||||
assert(detector != NULL);
|
||||
assert(data != NULL);
|
||||
return reinterpret_cast<snowboy::SnowboyDetect*>(
|
||||
detector)->RunDetection(data, array_length, is_end);
|
||||
}
|
||||
|
||||
void SnowboyDetectSetSensitivity(SnowboyDetect* detector,
|
||||
const char* const sensitivity_str) {
|
||||
assert(detector != NULL);
|
||||
reinterpret_cast<snowboy::SnowboyDetect*>(
|
||||
detector)->SetSensitivity(sensitivity_str);
|
||||
}
|
||||
|
||||
void SnowboyDetectSetAudioGain(SnowboyDetect* detector,
|
||||
const float audio_gain) {
|
||||
assert(detector != NULL);
|
||||
reinterpret_cast<snowboy::SnowboyDetect*>(
|
||||
detector)->SetAudioGain(audio_gain);
|
||||
}
|
||||
|
||||
void SnowboyDetectUpdateModel(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
reinterpret_cast<snowboy::SnowboyDetect*>(detector)->UpdateModel();
|
||||
}
|
||||
|
||||
void SnowboyDetectApplyFrontend(SnowboyDetect* detector,
|
||||
const bool apply_frontend) {
|
||||
assert(detector != NULL);
|
||||
reinterpret_cast<snowboy::SnowboyDetect*>(
|
||||
detector)->ApplyFrontend(apply_frontend);
|
||||
}
|
||||
|
||||
int SnowboyDetectNumHotwords(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
return reinterpret_cast<snowboy::SnowboyDetect*>(detector)->NumHotwords();
|
||||
}
|
||||
|
||||
int SnowboyDetectSampleRate(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
return reinterpret_cast<snowboy::SnowboyDetect*>(detector)->SampleRate();
|
||||
}
|
||||
|
||||
int SnowboyDetectNumChannels(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
return reinterpret_cast<snowboy::SnowboyDetect*>(detector)->NumChannels();
|
||||
}
|
||||
|
||||
int SnowboyDetectBitsPerSample(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
return reinterpret_cast<snowboy::SnowboyDetect*>(detector)->BitsPerSample();
|
||||
}
|
||||
|
||||
void SnowboyDetectDestructor(SnowboyDetect* detector) {
|
||||
assert(detector != NULL);
|
||||
delete reinterpret_cast<snowboy::SnowboyDetect*>(detector);
|
||||
detector = NULL;
|
||||
}
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
// snowboy-detect-c-wrapper.h
|
||||
|
||||
// Copyright 2017 KITT.AI (author: Guoguo Chen)
|
||||
|
||||
#ifndef SNOWBOY_DETECT_C_WRAPPER_H_
|
||||
#define SNOWBOY_DETECT_C_WRAPPER_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct SnowboyDetect SnowboyDetect;
|
||||
|
||||
SnowboyDetect* SnowboyDetectConstructor(const char* const resource_filename,
|
||||
const char* const model_str);
|
||||
|
||||
bool SnowboyDetectReset(SnowboyDetect* detector);
|
||||
|
||||
int SnowboyDetectRunDetection(SnowboyDetect* detector,
|
||||
const int16_t* const data,
|
||||
const int array_length, bool is_end);
|
||||
|
||||
void SnowboyDetectSetSensitivity(SnowboyDetect* detector,
|
||||
const char* const sensitivity_str);
|
||||
|
||||
void SnowboyDetectSetAudioGain(SnowboyDetect* detector,
|
||||
const float audio_gain);
|
||||
|
||||
void SnowboyDetectUpdateModel(SnowboyDetect* detector);
|
||||
|
||||
void SnowboyDetectApplyFrontend(SnowboyDetect* detector,
|
||||
const bool apply_frontend);
|
||||
|
||||
int SnowboyDetectNumHotwords(SnowboyDetect* detector);
|
||||
|
||||
int SnowboyDetectSampleRate(SnowboyDetect* detector);
|
||||
|
||||
int SnowboyDetectNumChannels(SnowboyDetect* detector);
|
||||
|
||||
int SnowboyDetectBitsPerSample(SnowboyDetect* detector);
|
||||
|
||||
void SnowboyDetectDestructor(SnowboyDetect* detector);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // SNOWBOY_DETECT_C_WRAPPER_H_
|
@ -0,0 +1,40 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"unsafe"
|
||||
"os"
|
||||
|
||||
"github.com/Kitt-AI/snowboy/swig/Go"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 3 {
|
||||
fmt.Printf("usage: %s <keyword.umdl> <wav file>\n", os.Args[0])
|
||||
return
|
||||
}
|
||||
fmt.Printf("Snowboy detecting keyword in %s\n", os.Args[2])
|
||||
detector := snowboydetect.NewSnowboyDetect("../../../resources/common.res", os.Args[1])
|
||||
detector.SetSensitivity("0.5")
|
||||
detector.SetAudioGain(1)
|
||||
detector.ApplyFrontend(false)
|
||||
defer snowboydetect.DeleteSnowboyDetect(detector)
|
||||
|
||||
dat, err := ioutil.ReadFile(os.Args[2])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ptr := snowboydetect.SwigcptrInt16_t(unsafe.Pointer(&dat[0]))
|
||||
res := detector.RunDetection(ptr, len(dat) / 2 /* len of int16 */)
|
||||
if res == -2 {
|
||||
fmt.Println("Snowboy detected silence")
|
||||
} else if res == -1 {
|
||||
fmt.Println("Snowboy detection returned error")
|
||||
} else if res == 0 {
|
||||
fmt.Println("Snowboy detected nothing")
|
||||
} else {
|
||||
fmt.Println("Snowboy detected keyword ", res)
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
## Dependencies
|
||||
|
||||
### Swig
|
||||
http://www.swig.org/
|
||||
|
||||
### Go Package
|
||||
```
|
||||
go get github.com/Kitt-AI/snowboy/swig/Go
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
```
|
||||
go build -o snowboy main.go
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
```
|
||||
./snowboy <keyword.umdl> <wav file>
|
||||
```
|
||||
|
||||
### Examples
|
||||
Cmd:
|
||||
`./snowboy ../../../resources/models/snowboy.umdl ../../../resources/snowboy.wav`
|
||||
|
||||
Output:
|
||||
```
|
||||
Snowboy detecting keyword in ../../resources/snowboy.wav
|
||||
Snowboy detected keyword 1
|
||||
```
|
||||
|
||||
Cmd:
|
||||
`./snowboy ../../resources/alexa.umdl ../../resources/snowboy.wav`
|
||||
|
||||
Output:
|
||||
```
|
||||
Snowboy detecting keyword in ../../resources/snowboy.wav
|
||||
Snowboy detected nothing
|
||||
```
|
@ -0,0 +1,36 @@
|
||||
## Dependencies
|
||||
|
||||
### Swig
|
||||
http://www.swig.org/
|
||||
|
||||
### Go Package alongside the more idiomatic wrapper `go-snowboy`, plus PortAudio
|
||||
```
|
||||
github.com/brentnd/go-snowboy
|
||||
github.com/gordonklaus/portaudio
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
```
|
||||
go build -o listen main.go
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
```
|
||||
./listen [path to snowboy resource file] [path to snowboy hotword file]
|
||||
```
|
||||
|
||||
### Examples
|
||||
Cmd:
|
||||
`./listen ../../../resources/common.res ../../../resources/models/snowboy.umdl`
|
||||
|
||||
Output:
|
||||
```
|
||||
sample rate=16000, num channels=1, bit depth=16
|
||||
Silence detected.
|
||||
Silence detected.
|
||||
Silence detected.
|
||||
You said the hotword!
|
||||
Silence detected.
|
||||
```
|
@ -0,0 +1,101 @@
|
||||
// This example streams the microphone thru Snowboy to listen for the hotword,
|
||||
// by using the PortAudio interface.
|
||||
//
|
||||
// HOW TO USE:
|
||||
// go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
|
||||
//
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/brentnd/go-snowboy"
|
||||
"github.com/gordonklaus/portaudio"
|
||||
)
|
||||
|
||||
// Sound represents a sound stream implementing the io.Reader interface
|
||||
// that provides the microphone data.
|
||||
type Sound struct {
|
||||
stream *portaudio.Stream
|
||||
data []int16
|
||||
}
|
||||
|
||||
// Init initializes the Sound's PortAudio stream.
|
||||
func (s *Sound) Init() {
|
||||
inputChannels := 1
|
||||
outputChannels := 0
|
||||
sampleRate := 16000
|
||||
s.data = make([]int16, 1024)
|
||||
|
||||
// initialize the audio recording interface
|
||||
err := portaudio.Initialize()
|
||||
if err != nil {
|
||||
fmt.Errorf("Error initialize audio interface: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// open the sound input stream for the microphone
|
||||
stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
|
||||
if err != nil {
|
||||
fmt.Errorf("Error open default audio stream: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = stream.Start()
|
||||
if err != nil {
|
||||
fmt.Errorf("Error on stream start: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
s.stream = stream
|
||||
}
|
||||
|
||||
// Close closes down the Sound's PortAudio connection.
|
||||
func (s *Sound) Close() {
|
||||
s.stream.Close()
|
||||
portaudio.Terminate()
|
||||
}
|
||||
|
||||
// Read is the Sound's implementation of the io.Reader interface.
|
||||
func (s *Sound) Read(p []byte) (int, error) {
|
||||
s.stream.Read()
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
for _, v := range s.data {
|
||||
binary.Write(buf, binary.LittleEndian, v)
|
||||
}
|
||||
|
||||
copy(p, buf.Bytes())
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
// open the mic
|
||||
mic := &Sound{}
|
||||
mic.Init()
|
||||
defer mic.Close()
|
||||
|
||||
// open the snowboy detector
|
||||
d := snowboy.NewDetector(os.Args[1])
|
||||
defer d.Close()
|
||||
|
||||
// set the handlers
|
||||
d.HandleFunc(snowboy.NewHotword(os.Args[2], 0.5), func(string) {
|
||||
fmt.Println("You said the hotword!")
|
||||
})
|
||||
|
||||
d.HandleSilenceFunc(1*time.Second, func(string) {
|
||||
fmt.Println("Silence detected.")
|
||||
})
|
||||
|
||||
// display the detector's expected audio format
|
||||
sr, nc, bd := d.AudioFormat()
|
||||
fmt.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
|
||||
|
||||
// start detecting using the microphone
|
||||
d.ReadAndDetect(mic)
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
import ai.kitt.snowboy.*;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import javax.sound.sampled.AudioFormat;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import javax.sound.sampled.DataLine;
|
||||
import javax.sound.sampled.SourceDataLine;
|
||||
import javax.sound.sampled.TargetDataLine;
|
||||
|
||||
public class Demo {
|
||||
static {
|
||||
System.loadLibrary("snowboy-detect-java");
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
// Sets up audio.
|
||||
AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
|
||||
DataLine.Info targetInfo = new DataLine.Info(TargetDataLine.class, format);
|
||||
|
||||
// Sets up Snowboy.
|
||||
SnowboyDetect detector = new SnowboyDetect("resources/common.res",
|
||||
"resources/models/snowboy.umdl");
|
||||
detector.SetSensitivity("0.5");
|
||||
detector.SetAudioGain(1);
|
||||
detector.ApplyFrontend(false);
|
||||
|
||||
try {
|
||||
TargetDataLine targetLine =
|
||||
(TargetDataLine) AudioSystem.getLine(targetInfo);
|
||||
targetLine.open(format);
|
||||
targetLine.start();
|
||||
|
||||
// Reads 0.1 second of audio in each call.
|
||||
byte[] targetData = new byte[3200];
|
||||
short[] snowboyData = new short[1600];
|
||||
int numBytesRead;
|
||||
|
||||
while (true) {
|
||||
// Reads the audio data in the blocking mode. If you are on a very slow
|
||||
// machine such that the hotword detector could not process the audio
|
||||
// data in real time, this will cause problem...
|
||||
numBytesRead = targetLine.read(targetData, 0, targetData.length);
|
||||
|
||||
if (numBytesRead == -1) {
|
||||
System.out.print("Fails to read audio data.");
|
||||
break;
|
||||
}
|
||||
|
||||
// Converts bytes into int16 that Snowboy will read.
|
||||
ByteBuffer.wrap(targetData).order(
|
||||
ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(snowboyData);
|
||||
|
||||
// Detection.
|
||||
int result = detector.RunDetection(snowboyData, snowboyData.length);
|
||||
if (result > 0) {
|
||||
System.out.print("Hotword " + result + " detected!\n");
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println(e);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
all: Demo.class
|
||||
|
||||
Demo.class: Demo.java
|
||||
javac -classpath java Demo.java
|
||||
|
||||
run: Demo.class
|
||||
java -classpath .:java -Djava.library.path=jniLibs Demo
|
||||
|
||||
clean:
|
||||
-rm -f Demo.class
|
@ -0,0 +1 @@
|
||||
../../swig/Java/java/
|
@ -0,0 +1 @@
|
||||
../../swig/Java/jniLibs/
|
@ -0,0 +1 @@
|
||||
../../resources/
|
@ -0,0 +1,46 @@
|
||||
const fs = require('fs');
|
||||
const wav = require('wav');
|
||||
const Detector = require('../../').Detector;
|
||||
const Models = require('../../').Models;
|
||||
|
||||
const models = new Models();
|
||||
|
||||
models.add({
|
||||
file: 'resources/models/snowboy.umdl',
|
||||
sensitivity: '0.5',
|
||||
hotwords : 'snowboy'
|
||||
});
|
||||
|
||||
const detector = new Detector({
|
||||
resource: "resources/common.res",
|
||||
models: models,
|
||||
audioGain: 1.0,
|
||||
applyFrontend: false
|
||||
});
|
||||
|
||||
detector.on('silence', function () {
|
||||
console.log('silence');
|
||||
});
|
||||
|
||||
detector.on('sound', function (buffer) {
|
||||
// <buffer> contains the last chunk of the audio that triggers the "sound"
|
||||
// event. It could be written to a wav stream.
|
||||
console.log('sound');
|
||||
});
|
||||
|
||||
detector.on('error', function () {
|
||||
console.log('error');
|
||||
});
|
||||
|
||||
detector.on('hotword', function (index, hotword, buffer) {
|
||||
// <buffer> contains the last chunk of the audio that triggers the "hotword"
|
||||
// event. It could be written to a wav stream. You will have to use it
|
||||
// together with the <buffer> in the "sound" event if you want to get audio
|
||||
// data after the hotword.
|
||||
console.log('hotword', index, hotword);
|
||||
});
|
||||
|
||||
const file = fs.createReadStream('resources/snowboy.wav');
|
||||
const reader = new wav.Reader();
|
||||
|
||||
file.pipe(reader).pipe(detector);
|
@ -0,0 +1,48 @@
|
||||
const record = require('node-record-lpcm16');
|
||||
const Detector = require('../../').Detector;
|
||||
const Models = require('../../').Models;
|
||||
|
||||
const models = new Models();
|
||||
|
||||
models.add({
|
||||
file: 'resources/models/snowboy.umdl',
|
||||
sensitivity: '0.5',
|
||||
hotwords : 'snowboy'
|
||||
});
|
||||
|
||||
const detector = new Detector({
|
||||
resource: "resources/common.res",
|
||||
models: models,
|
||||
audioGain: 2.0,
|
||||
applyFrontend: true
|
||||
});
|
||||
|
||||
detector.on('silence', function () {
|
||||
console.log('silence');
|
||||
});
|
||||
|
||||
detector.on('sound', function (buffer) {
|
||||
// <buffer> contains the last chunk of the audio that triggers the "sound"
|
||||
// event. It could be written to a wav stream.
|
||||
console.log('sound');
|
||||
});
|
||||
|
||||
detector.on('error', function () {
|
||||
console.log('error');
|
||||
});
|
||||
|
||||
detector.on('hotword', function (index, hotword, buffer) {
|
||||
// <buffer> contains the last chunk of the audio that triggers the "hotword"
|
||||
// event. It could be written to a wav stream. You will have to use it
|
||||
// together with the <buffer> in the "sound" event if you want to get audio
|
||||
// data after the hotword.
|
||||
console.log(buffer);
|
||||
console.log('hotword', index, hotword);
|
||||
});
|
||||
|
||||
const mic = record.start({
|
||||
threshold: 0,
|
||||
verbose: true
|
||||
});
|
||||
|
||||
mic.pipe(detector);
|
@ -0,0 +1 @@
|
||||
../../resources/
|
@ -0,0 +1,5 @@
|
||||
requires 'Audio::PortAudio', '>= 0.03';
|
||||
requires 'JSON', '>= 2.00';
|
||||
requires 'Statistics::Basic', '>= 1.6611';
|
||||
requires 'LWP::UserAgent', '>= 6.0.0';
|
||||
requires 'Mozilla::CA', '>= 20160104';
|
@ -0,0 +1 @@
|
||||
../../resources/
|
@ -0,0 +1,251 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
# This script uses PortAudio to record 3 audio samples on your computer, and
|
||||
# sends them to the KITT.AI RESTful API to train the personal hotword model.
|
||||
|
||||
use Audio::PortAudio;
|
||||
use File::Path qw(make_path);
|
||||
use IO::Handle;
|
||||
use JSON;
|
||||
use LWP::UserAgent;
|
||||
use MIME::Base64;
|
||||
use Statistics::Basic qw(:all);
|
||||
|
||||
my $Usage = <<EOU;
|
||||
|
||||
This script uses PortAudio to record 3 audio samples on your computer, and sends
|
||||
them to the KITT.AI RESTful API to train the personal hotword model.
|
||||
|
||||
Usage: ./snowboy_RESTful_train.pl <API_TOKEN> <Hotword> <Language>
|
||||
e.g.: ./snowboy_RESTful_train.pl \
|
||||
abcdefghijklmnopqrstuvwxyzABCD0123456789 snowboy en
|
||||
|
||||
EOU
|
||||
|
||||
if (@ARGV != 3) {
|
||||
die $Usage;
|
||||
}
|
||||
|
||||
# Gets parameters.
|
||||
my $api_token = shift @ARGV;
|
||||
my $hotword = shift @ARGV;
|
||||
my $language = shift @ARGV;
|
||||
|
||||
# Turns on OUTPUT_AUTOFLUSH.
|
||||
$|++;
|
||||
|
||||
# Audio format
|
||||
use constant RATE => 16000;
|
||||
use constant NUMCHANNELS => 1;
|
||||
use constant BITSPERSAMPLE => 16;
|
||||
|
||||
# Calculates number of samples per chunk based on a given chunk size in
|
||||
# milliseconds.
|
||||
use constant CHUNK_SIZE_MS => 20;
|
||||
use constant SAMPLES => RATE * CHUNK_SIZE_MS / 1000;
|
||||
|
||||
# Miniumum number of non-silent chunks to count as utterance. Anything less is
|
||||
# noise.
|
||||
use constant MIN_SPEECH => 5;
|
||||
|
||||
# Detects 500ms silence (25 blocks * 20 ms ~500ms of speech) before termiating
|
||||
# recording.
|
||||
use constant TRAILING_SILENCE_BLOCKS => 25;
|
||||
|
||||
# Depth of FIFO buffer in blocks
|
||||
use constant FIFO_DEPTH => 25;
|
||||
|
||||
# REST endpoint for model training
|
||||
use constant URL => 'https://snowboy.kitt.ai/api/v1/train/';
|
||||
|
||||
$trailing_silence_blocks = 0;
|
||||
$speech_blocks = 0;
|
||||
$buffer = '';
|
||||
|
||||
# Audio capturing.
|
||||
my $api = Audio::PortAudio::default_host_api();
|
||||
my $device = $api->default_input_device;
|
||||
my $stream = $device->open_read_stream(
|
||||
{channel_count => NUMCHANNELS, sample_format => 'int16'},
|
||||
RATE,
|
||||
SAMPLES);
|
||||
|
||||
# Collects 1000 msec worth of voice data and calculates silence treshold and DC
|
||||
# offset.
|
||||
print "\n";
|
||||
print "Calculating statistics on silence, please be quite...\n";
|
||||
for ($i = 0; $i < (1000 / CHUNK_SIZE_MS); $i++) {
|
||||
# SLN format = 2 bytes per sample.
|
||||
$stream->read($buffer, SAMPLES);
|
||||
|
||||
# Discards first (usually noisy) block.
|
||||
next if not $i;
|
||||
|
||||
# Unpacks into an array of 16-bit linear samples.
|
||||
my $vec = vector(unpack('s*', $buffer));
|
||||
|
||||
my $stddev = round(stddev($vec));
|
||||
my $mean = round(mean($vec));
|
||||
|
||||
push @alldevs, $stddev;
|
||||
push @allmeans, $mean;
|
||||
|
||||
# printf "%.2f secs: mean: %d, stdddev: %d\r",
|
||||
# $i * SAMPLES / RATE, $mean, $stddev;
|
||||
|
||||
# Finds MAX mean across all data chunks.
|
||||
$maxdev = $stddev if $stddev > $maxdev;
|
||||
}
|
||||
|
||||
my $vec = vector(@alldevs);
|
||||
$stddev = round(stddev($vec));
|
||||
$mean = round(mean($vec));
|
||||
|
||||
$maxdev = $mean + $stddev;
|
||||
|
||||
# Too quiet (good silence supression, like SIP phones)
|
||||
$maxdev = 100 if $maxdev < 100;
|
||||
|
||||
# Add margin to silence detection to be safe.
|
||||
$maxdev *= 2;
|
||||
|
||||
$dcoffset = round(mean(@allmeans));
|
||||
|
||||
print "Done (Silence Threshold: $maxdev, DC Offset: $dcoffset)\n";
|
||||
|
||||
@spin = (qw[/ - \ |]);
|
||||
|
||||
# Collects 3 voice samples to send to KITT.AI for personal model training.
|
||||
for ($samples = 0; $samples < 3; $samples++) {
|
||||
$speech_blocks = 0;
|
||||
$trailing_silence_blocks = 0;
|
||||
@utterance_blocks = ();
|
||||
$buffer = '';
|
||||
$i = 0;
|
||||
|
||||
print "\n";
|
||||
printf "Now speak your sample %d:\n", $samples + 1;
|
||||
while ($trailing_silence_blocks < TRAILING_SILENCE_BLOCKS) {
|
||||
$stream->read($buffer, SAMPLES);
|
||||
push @utterance_blocks, $buffer;
|
||||
|
||||
if (isSilence($buffer)) {
|
||||
if ($speech_blocks > MIN_SPEECH) {
|
||||
print '.';
|
||||
$trailing_silence_blocks++;
|
||||
} else {
|
||||
# No good speech collected; restart.
|
||||
print $spin[$i++], "\r";
|
||||
$i = 0 if $i == scalar @spin;
|
||||
$speech_blocks = 0;
|
||||
# FIFO - remove first block, shift array up.
|
||||
shift @utterance_blocks if scalar @utterance_blocks > FIFO_DEPTH;
|
||||
}
|
||||
} else {
|
||||
print '*' if $speech_blocks > MIN_SPEECH;
|
||||
$speech_blocks++;
|
||||
$trailing_silence_blocks = 0;
|
||||
}
|
||||
}
|
||||
|
||||
printf "\n";
|
||||
printf "Utterance is %.2f seconds long (%d blocks)\n",
|
||||
(20 * (scalar @utterance_blocks) / 1000), scalar @utterance_blocks;
|
||||
|
||||
$utterance[$samples] = join '', @utterance_blocks;
|
||||
}
|
||||
print "\n";
|
||||
|
||||
# Send API request to KITT.AI
|
||||
$APIreq = encode_json({
|
||||
# gender => 'male',
|
||||
# age_group => '40-49',
|
||||
name => $hotword,
|
||||
language => $language,
|
||||
token => $api_token,
|
||||
microphone => 'mobile',
|
||||
voice_samples => [
|
||||
{wave => encode_base64(addWavHeader($utterance[0]))},
|
||||
{wave => encode_base64(addWavHeader($utterance[1]))},
|
||||
{wave => encode_base64(addWavHeader($utterance[2]))}
|
||||
]
|
||||
});
|
||||
|
||||
$ua = LWP::UserAgent->new(debug => 1);
|
||||
my $response = $ua->post(URL,
|
||||
Content_Type => "application/json",
|
||||
Content => $APIreq);
|
||||
|
||||
$model_dir = "data";
|
||||
$time_str = time;
|
||||
$hotword_name = $hotword;
|
||||
$hotword_name =~ s/\s+/_/g;
|
||||
if ($response->is_success) {
|
||||
# Saves the generated models in the current working directory.
|
||||
make_path($model_dir);
|
||||
|
||||
# Saves samples.
|
||||
for (0..2) {
|
||||
$id = $_ + 1;
|
||||
my $fh = IO::File->new(
|
||||
">$model_dir/${hotword_name}_${time_str}_sample${id}.wav");
|
||||
if (defined $fh) {
|
||||
print $fh addWavHeader($utterance[$_]);
|
||||
$fh->close;
|
||||
}
|
||||
}
|
||||
|
||||
# Saves the generated personal model.
|
||||
my $fh = IO::File->new(">$model_dir/${hotword_name}_${time_str}.pmdl");
|
||||
if (defined $fh) {
|
||||
print $fh $response->content;
|
||||
$fh->close;
|
||||
}
|
||||
|
||||
print "Model $model_dir/${hotword_name}_${time_str}.pmdl created.\n";
|
||||
} else {
|
||||
print "Failed to create model:\n";
|
||||
die $response->status_line;
|
||||
}
|
||||
|
||||
sub isSilence {
|
||||
my $samples = shift;
|
||||
|
||||
# Unpack into an array of 16-bit linear samples
|
||||
my $vec = vector(unpack('s*', $samples));
|
||||
my $stddev = round(stddev($vec));
|
||||
|
||||
return $stddev < $maxdev;
|
||||
}
|
||||
|
||||
# WAV format reference: http://soundfile.sapp.org/doc/WaveFormat/
|
||||
sub addWavHeader {
|
||||
my $raw = shift;
|
||||
my $header;
|
||||
|
||||
my $byterate = RATE * NUMCHANNELS * BITSPERSAMPLE / 8;
|
||||
my $blockalign = NUMCHANNELS * BITSPERSAMPLE / 8;
|
||||
|
||||
$header = pack('A4VA4A4VvvVVvvA4V',
|
||||
'RIFF',
|
||||
36 + length $raw,
|
||||
'WAVE',
|
||||
'fmt',
|
||||
16,
|
||||
1, # PCM
|
||||
1, # Num Channels
|
||||
RATE,
|
||||
$byterate,
|
||||
$blockalign,
|
||||
BITSPERSAMPLE,
|
||||
'data',
|
||||
length $raw
|
||||
);
|
||||
|
||||
return $header . $raw;
|
||||
}
|
||||
|
||||
sub round {
|
||||
my($number) = shift;
|
||||
return int($number + .5);
|
||||
}
|
@ -0,0 +1,235 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
# This script first uses Snowboy to wake up, then collects audio and sends to
|
||||
# Google Speech API for further recognition. It works with both personal and
|
||||
# universal models. By default, it uses the Snowboy universal model at
|
||||
# resources/models/snowboy.umdl, you can change it to other universal models, or
|
||||
# your own personal models. You also have to provide your Google API key in
|
||||
# order to use it.
|
||||
|
||||
use Snowboy;
|
||||
|
||||
use Audio::PortAudio;
|
||||
use Data::Dumper;
|
||||
use Getopt::Long;
|
||||
use IO::Handle;
|
||||
use JSON;
|
||||
use LWP::UserAgent;
|
||||
use Statistics::Basic qw(:all);
|
||||
use Time::HiRes qw(gettimeofday tv_interval);
|
||||
|
||||
my $Usage = <<EOU;
|
||||
|
||||
This script first uses Snowboy to wake up, then collects audio and sends to
|
||||
Google Speech API for further recognition. It works with both personal and
|
||||
universal models. By default, it uses the Snowboy universal model at
|
||||
resources/models/snowboy.umdl, you can change it to other universal models, or
|
||||
your own personal models. You also have to provide your Google API key in order
|
||||
to use it.
|
||||
|
||||
Note: Google is now moving to Google Cloud Speech API, so we will have to update
|
||||
the API query later.
|
||||
|
||||
Usage: ./snowboy_googlevoice.pl <Google_API_Key> [Hotword_Model]
|
||||
e.g.: ./snowboy_googlevoice.pl \
|
||||
abcdefghijklmnopqrstuvwxyzABC0123456789 resources/models/snowboy.umdl
|
||||
|
||||
Allowed options:
|
||||
--language : Language for speech recognizer. (string, default="en")
|
||||
|
||||
EOU
|
||||
|
||||
my $language = "en";
|
||||
GetOptions('language=s' => \$language);
|
||||
|
||||
if (@ARGV < 1 || @ARGV > 2) {
|
||||
die $Usage;
|
||||
}
|
||||
|
||||
# Gets parameters.
|
||||
my $api_key = shift @ARGV;
|
||||
my $model = shift @ARGV || 'resources/models/snowboy.umdl';
|
||||
|
||||
if ($model eq 'resources/models/snowboy.umdl') {
|
||||
$hotword = "Snowboy";
|
||||
} else {
|
||||
$hotword = "your hotword";
|
||||
}
|
||||
|
||||
# Output setting.
|
||||
STDOUT->autoflush(1);
|
||||
binmode STDOUT, ':utf8';
|
||||
|
||||
# Audio format.
|
||||
use constant RATE => 16000;
|
||||
use constant NUMCHANNELS => 1;
|
||||
use constant BITSPERSAMPLE => 16;
|
||||
|
||||
# Samples per data chunk count
|
||||
use constant SAMPLES => 640;
|
||||
|
||||
# Detects 500ms silence (12 blocks * 40 ms = after 500ms of speech)
|
||||
use constant TRAILING_SILENCE_BLOCKS => 12;
|
||||
|
||||
# Google Speech API endpoint (language-dependent).
|
||||
$url = "http://www.google.com/speech-api/v2/recognize?lang="
|
||||
. $language
|
||||
. "&key="
|
||||
. $api_key
|
||||
. "&output=json&maxresults=1&grammar=builtin:search";
|
||||
|
||||
# Audio capturing.
|
||||
my $api = Audio::PortAudio::default_host_api();
|
||||
my $device = $api->default_input_device;
|
||||
my $stream = $device->open_read_stream(
|
||||
{channel_count => NUMCHANNELS, sample_format => 'int16'},
|
||||
RATE,
|
||||
SAMPLES);
|
||||
|
||||
# Collects 1000 msec worth of voice data and calculates silence treshold and DC
|
||||
# offset.
|
||||
print "\n";
|
||||
print "Calculating statistics on silence, please be quite...\n";
|
||||
for ($i = 0; $i < (1 / (SAMPLES / RATE)); $i++) {
|
||||
# SLN format = 2 bytes per sample.
|
||||
$stream->read($buffer, SAMPLES);
|
||||
|
||||
# Discards first (usually noisy) block.
|
||||
next if not $i;
|
||||
|
||||
# Unpacks into an array of 16-bit linear samples.
|
||||
my $vec = vector(unpack('s*', $buffer));
|
||||
|
||||
my $stddev = round(stddev($vec));
|
||||
my $mean = round(mean($vec));
|
||||
|
||||
push @alldevs, $stddev;
|
||||
push @allmeans, $mean;
|
||||
|
||||
# printf "%.2f secs: mean: %d, stdddev: %d\n",
|
||||
# $i * SAMPLES / RATE, $mean, $stddev;
|
||||
|
||||
# Find AMX mean across all data chunks.
|
||||
$maxdev = $stddev if $stddev > $maxdev;
|
||||
}
|
||||
|
||||
my $vec = vector(@alldevs);
|
||||
$stddev = round(stddev($vec));
|
||||
$mean = round(mean($vec));
|
||||
|
||||
$maxdev = $mean + $stddev;
|
||||
|
||||
# Too quiet (good silence supression, like SIP phones)
|
||||
$maxdev = 100 if $maxdev < 100;
|
||||
|
||||
# Add margin to silence detection to be safe.
|
||||
$maxdev *= 2;
|
||||
|
||||
$dcoffset = round(mean(@allmeans));
|
||||
|
||||
print "Done (Silence Threshold: $maxdev, DC Offset: $dcoffset)\n";
|
||||
|
||||
# Snowboy decoder.
|
||||
$sb = new Snowboy::SnowboyDetect('resources/common.res', $model);
|
||||
$sb->SetSensitivity('0.5');
|
||||
$sb->SetAudioGain(1.0);
|
||||
$sb->ApplyFrontend(0);
|
||||
|
||||
# Running the detection forever.
|
||||
print "\n";
|
||||
print "Start by saying " . $hotword . "...\n";
|
||||
while (1) {
|
||||
$stream->read($buffer, SAMPLES);
|
||||
$processed = DSP($buffer);
|
||||
|
||||
# Running the Snowboy detection.
|
||||
$result = $sb->RunDetection($processed);
|
||||
|
||||
$silence_blocks = 0;
|
||||
$speech_blocks = 0;
|
||||
$prespeech = '';
|
||||
$speechbuffer = '';
|
||||
|
||||
if ($result == 1) {
|
||||
print 'Speak> ';
|
||||
$sb->Reset();
|
||||
|
||||
while ($silence_blocks < TRAILING_SILENCE_BLOCKS) {
|
||||
$stream->read($buffer, SAMPLES);
|
||||
|
||||
# Buffer up (trim the leading silence).
|
||||
$speechbuffer .= $buffer unless $speech_blocks < 5;
|
||||
|
||||
if (isSilence($buffer)) {
|
||||
# Counts blocks of 20ms silence after solid 500ms of speech.
|
||||
$silence_blocks++ unless $speech_blocks < 10;
|
||||
} else {
|
||||
$silence_blocks = 0;
|
||||
$speech_blocks++;
|
||||
$prespeech .= $buffer unless $speech_blocks >= 5;
|
||||
print '.';
|
||||
}
|
||||
}
|
||||
|
||||
print "\n";
|
||||
|
||||
$ua = LWP::UserAgent->new(debug => 1);
|
||||
$t1 = [gettimeofday];
|
||||
my $response = $ua->post(
|
||||
$url,
|
||||
Content_Type => "audio/l16; rate=" . RATE,
|
||||
Content => amp($prespeech . $speechbuffer));
|
||||
$t2 = [gettimeofday];
|
||||
|
||||
if ($response->is_success) {
|
||||
my $resp = (split /\n/, $response->content)[1];
|
||||
next if not $resp;
|
||||
$res = decode_json($resp);
|
||||
|
||||
$result = $res->{result}[res->{result_index}]
|
||||
->{alternative}[0]->{transcript};
|
||||
} else {
|
||||
delete $response->{'_request'}->{'_content'};
|
||||
print "Failed to do speech recognition from Google Speech API:\n";
|
||||
die $response->status_line;
|
||||
}
|
||||
|
||||
print "$result (", tv_interval ($t1, $t2), " sec)\n";
|
||||
print "\n";
|
||||
print "Start by saying " . $hotword . "...\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub DSP {
|
||||
my $mysamples = shift;
|
||||
my @processed, @samples;
|
||||
|
||||
# Removes DC offset.
|
||||
@samples = unpack('s*', $mysamples);
|
||||
|
||||
# Calculated DC offset for each voice data chunk.
|
||||
# $mean = round(mean(@samples));
|
||||
|
||||
# Uses the same DC offset identified during training.
|
||||
return pack('s*', map {$_ -= $dcoffset} @samples);
|
||||
}
|
||||
|
||||
sub isSilence {
|
||||
my $samples = shift;
|
||||
|
||||
# Unpacks into an array of 16-bit linear samples.
|
||||
my $vec = vector(unpack('s*', $samples));
|
||||
my $stddev = round(stddev($vec));
|
||||
|
||||
return $stddev < $maxdev;
|
||||
}
|
||||
|
||||
sub amp {
|
||||
my $samples = shift;
|
||||
return pack 's*', map {$_ <<= 3} unpack('s*', $samples);
|
||||
}
|
||||
|
||||
sub round {
|
||||
my($number) = shift;
|
||||
return int($number + .5);
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use Snowboy;
|
||||
use Fcntl;
|
||||
|
||||
# Positive test.
|
||||
open WAV, 'resources/snowboy.wav';
|
||||
|
||||
# Set $INPUT_RECORD_SEPARATOR to undef so that we can read the full file.
|
||||
local $/ = undef;
|
||||
$data = <WAV>;
|
||||
close WAV;
|
||||
|
||||
$sb = new Snowboy::SnowboyDetect('resources/common.res',
|
||||
'resources/models/snowboy.umdl');
|
||||
|
||||
$sb->SetSensitivity ("0.5");
|
||||
$sb->SetAudioGain (1);
|
||||
$sb->ApplyFrontend (0);
|
||||
|
||||
print "==== SnowBoy object properties ====\n";
|
||||
print "Sample Rate : ", $sb->SampleRate(), "\n";
|
||||
print "Number of Channels : ", $sb->NumChannels(), "\n";
|
||||
print "Bits per Sample : ", $sb->BitsPerSample(), "\n";
|
||||
print "Number of hotwords : ", $sb->NumHotwords(), "\n\n";
|
||||
|
||||
if ($sb->RunDetection($data) > 0) {
|
||||
print "Unit test passed!\n"
|
||||
} else {
|
||||
print "Unit test failed!\n"
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import signal
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: need to specify model name")
|
||||
print("Usage: python demo.py your.model")
|
||||
sys.exit(-1)
|
||||
|
||||
model = sys.argv[1]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
detector.start(detected_callback=snowboydecoder.play_audio_file,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.03)
|
||||
|
||||
detector.terminate()
|
@ -0,0 +1,41 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import signal
|
||||
|
||||
# Demo code for listening to two hotwords at the same time
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Error: need to specify 2 model names")
|
||||
print("Usage: python demo.py 1st.model 2nd.model")
|
||||
sys.exit(-1)
|
||||
|
||||
models = sys.argv[1:]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
sensitivity = [0.5]*len(models)
|
||||
detector = snowboydecoder.HotwordDetector(models, sensitivity=sensitivity)
|
||||
callbacks = [lambda: snowboydecoder.play_audio_file(snowboydecoder.DETECT_DING),
|
||||
lambda: snowboydecoder.play_audio_file(snowboydecoder.DETECT_DONG)]
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
# make sure you have the same numbers of callbacks and models
|
||||
detector.start(detected_callback=callbacks,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.03)
|
||||
|
||||
detector.terminate()
|
@ -0,0 +1,40 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import wave
|
||||
|
||||
# Demo code for detecting hotword in a .wav file
|
||||
# Example Usage:
|
||||
# $ python demo3.py resources/snowboy.wav resources/models/snowboy.umdl
|
||||
# Should print:
|
||||
# Hotword Detected!
|
||||
#
|
||||
# $ python demo3.py resources/ding.wav resources/models/snowboy.umdl
|
||||
# Should print:
|
||||
# Hotword Not Detected!
|
||||
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Error: need to specify wave file name and model name")
|
||||
print("Usage: python demo3.py wave_file model_file")
|
||||
sys.exit(-1)
|
||||
|
||||
wave_file = sys.argv[1]
|
||||
model_file = sys.argv[2]
|
||||
|
||||
f = wave.open(wave_file)
|
||||
assert f.getnchannels() == 1, "Error: Snowboy only supports 1 channel of audio (mono, not stereo)"
|
||||
assert f.getframerate() == 16000, "Error: Snowboy only supports 16K sampling rate"
|
||||
assert f.getsampwidth() == 2, "Error: Snowboy only supports 16bit per sample"
|
||||
data = f.readframes(f.getnframes())
|
||||
f.close()
|
||||
|
||||
sensitivity = 0.5
|
||||
detection = snowboydecoder.HotwordDetector(model_file, sensitivity=sensitivity)
|
||||
|
||||
ans = detection.detector.RunDetection(data)
|
||||
|
||||
if ans == 1:
|
||||
print('Hotword Detected!')
|
||||
else:
|
||||
print('Hotword Not Detected!')
|
||||
|
@ -0,0 +1,76 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import signal
|
||||
import speech_recognition as sr
|
||||
import os
|
||||
|
||||
"""
|
||||
This demo file shows you how to use the new_message_callback to interact with
|
||||
the recorded audio after a keyword is spoken. It uses the speech recognition
|
||||
library in order to convert the recorded audio into text.
|
||||
|
||||
Information on installing the speech recognition library can be found at:
|
||||
https://pypi.python.org/pypi/SpeechRecognition/
|
||||
"""
|
||||
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def audioRecorderCallback(fname):
|
||||
print "converting audio to text"
|
||||
r = sr.Recognizer()
|
||||
with sr.AudioFile(fname) as source:
|
||||
audio = r.record(source) # read the entire audio file
|
||||
# recognize speech using Google Speech Recognition
|
||||
try:
|
||||
# for testing purposes, we're just using the default API key
|
||||
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
|
||||
# instead of `r.recognize_google(audio)`
|
||||
print(r.recognize_google(audio))
|
||||
except sr.UnknownValueError:
|
||||
print "Google Speech Recognition could not understand audio"
|
||||
except sr.RequestError as e:
|
||||
print "Could not request results from Google Speech Recognition service; {0}".format(e)
|
||||
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
|
||||
def detectedCallback():
|
||||
sys.stdout.write("recording audio...")
|
||||
sys.stdout.flush()
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print "Error: need to specify model name"
|
||||
print "Usage: python demo.py your.model"
|
||||
sys.exit(-1)
|
||||
|
||||
model = sys.argv[1]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.38)
|
||||
print "Listening... Press Ctrl+C to exit"
|
||||
|
||||
# main loop
|
||||
detector.start(detected_callback=detectedCallback,
|
||||
audio_recorder_callback=audioRecorderCallback,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.01)
|
||||
|
||||
detector.terminate()
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,35 @@
|
||||
import snowboydecoder_arecord
|
||||
import sys
|
||||
import signal
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: need to specify model name")
|
||||
print("Usage: python demo.py your.model")
|
||||
sys.exit(-1)
|
||||
|
||||
model = sys.argv[1]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
detector = snowboydecoder_arecord.HotwordDetector(model, sensitivity=0.5)
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
detector.start(detected_callback=snowboydecoder_arecord.play_audio_file,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.03)
|
||||
|
||||
detector.terminate()
|
@ -0,0 +1,47 @@
|
||||
import snowboythreaded
|
||||
import sys
|
||||
import signal
|
||||
import time
|
||||
|
||||
stop_program = False
|
||||
|
||||
# This a demo that shows running Snowboy in another thread
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global stop_program
|
||||
stop_program = True
|
||||
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: need to specify model name")
|
||||
print("Usage: python demo4.py your.model")
|
||||
sys.exit(-1)
|
||||
|
||||
model = sys.argv[1]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
# Initialize ThreadedDetector object and start the detection thread
|
||||
threaded_detector = snowboythreaded.ThreadedDetector(model, sensitivity=0.5)
|
||||
threaded_detector.start()
|
||||
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
threaded_detector.start_recog(sleep_time=0.03)
|
||||
|
||||
# Let audio initialization happen before requesting input
|
||||
time.sleep(1)
|
||||
|
||||
# Do a simple task separate from the detection - addition of numbers
|
||||
while not stop_program:
|
||||
try:
|
||||
num1 = int(raw_input("Enter the first number to add: "))
|
||||
num2 = int(raw_input("Enter the second number to add: "))
|
||||
print "Sum of number: {}".format(num1 + num2)
|
||||
except ValueError:
|
||||
print "You did not enter a number."
|
||||
|
||||
threaded_detector.terminate()
|
@ -0,0 +1 @@
|
||||
PyAudio==0.2.9
|
@ -0,0 +1 @@
|
||||
../../resources/
|
@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import collections
|
||||
import pyaudio
|
||||
import snowboydetect
|
||||
import time
|
||||
import wave
|
||||
import os
|
||||
import logging
|
||||
from ctypes import *
|
||||
from contextlib import contextmanager
|
||||
|
||||
logging.basicConfig()
|
||||
logger = logging.getLogger("snowboy")
|
||||
logger.setLevel(logging.INFO)
|
||||
TOP_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
|
||||
DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
|
||||
DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
|
||||
|
||||
def py_error_handler(filename, line, function, err, fmt):
|
||||
pass
|
||||
|
||||
ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
|
||||
|
||||
c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
|
||||
|
||||
@contextmanager
|
||||
def no_alsa_error():
|
||||
try:
|
||||
asound = cdll.LoadLibrary('libasound.so')
|
||||
asound.snd_lib_error_set_handler(c_error_handler)
|
||||
yield
|
||||
asound.snd_lib_error_set_handler(None)
|
||||
except:
|
||||
yield
|
||||
pass
|
||||
|
||||
class RingBuffer(object):
|
||||
"""Ring buffer to hold audio from PortAudio"""
|
||||
def __init__(self, size = 4096):
|
||||
self._buf = collections.deque(maxlen=size)
|
||||
|
||||
def extend(self, data):
|
||||
"""Adds data to the end of buffer"""
|
||||
self._buf.extend(data)
|
||||
|
||||
def get(self):
|
||||
"""Retrieves data from the beginning of buffer and clears it"""
|
||||
tmp = bytes(bytearray(self._buf))
|
||||
self._buf.clear()
|
||||
return tmp
|
||||
|
||||
|
||||
def play_audio_file(fname=DETECT_DING):
|
||||
"""Simple callback function to play a wave file. By default it plays
|
||||
a Ding sound.
|
||||
|
||||
:param str fname: wave file name
|
||||
:return: None
|
||||
"""
|
||||
ding_wav = wave.open(fname, 'rb')
|
||||
ding_data = ding_wav.readframes(ding_wav.getnframes())
|
||||
with no_alsa_error():
|
||||
audio = pyaudio.PyAudio()
|
||||
stream_out = audio.open(
|
||||
format=audio.get_format_from_width(ding_wav.getsampwidth()),
|
||||
channels=ding_wav.getnchannels(),
|
||||
rate=ding_wav.getframerate(), input=False, output=True)
|
||||
stream_out.start_stream()
|
||||
stream_out.write(ding_data)
|
||||
time.sleep(0.2)
|
||||
stream_out.stop_stream()
|
||||
stream_out.close()
|
||||
audio.terminate()
|
||||
|
||||
|
||||
class HotwordDetector(object):
|
||||
"""
|
||||
Snowboy decoder to detect whether a keyword specified by `decoder_model`
|
||||
exists in a microphone input stream.
|
||||
|
||||
:param decoder_model: decoder model file path, a string or a list of strings
|
||||
:param resource: resource file path.
|
||||
:param sensitivity: decoder sensitivity, a float of a list of floats.
|
||||
The bigger the value, the more senstive the
|
||||
decoder. If an empty list is provided, then the
|
||||
default sensitivity in the model will be used.
|
||||
:param audio_gain: multiply input volume by this factor.
|
||||
:param apply_frontend: applies the frontend processing algorithm if True.
|
||||
"""
|
||||
def __init__(self, decoder_model,
|
||||
resource=RESOURCE_FILE,
|
||||
sensitivity=[],
|
||||
audio_gain=1,
|
||||
apply_frontend=False):
|
||||
|
||||
def audio_callback(in_data, frame_count, time_info, status):
|
||||
self.ring_buffer.extend(in_data)
|
||||
play_data = chr(0) * len(in_data)
|
||||
return play_data, pyaudio.paContinue
|
||||
|
||||
tm = type(decoder_model)
|
||||
ts = type(sensitivity)
|
||||
if tm is not list:
|
||||
decoder_model = [decoder_model]
|
||||
if ts is not list:
|
||||
sensitivity = [sensitivity]
|
||||
model_str = ",".join(decoder_model)
|
||||
|
||||
self.detector = snowboydetect.SnowboyDetect(
|
||||
resource_filename=resource.encode(), model_str=model_str.encode())
|
||||
self.detector.SetAudioGain(audio_gain)
|
||||
self.detector.ApplyFrontend(apply_frontend)
|
||||
self.num_hotwords = self.detector.NumHotwords()
|
||||
|
||||
if len(decoder_model) > 1 and len(sensitivity) == 1:
|
||||
sensitivity = sensitivity*self.num_hotwords
|
||||
if len(sensitivity) != 0:
|
||||
assert self.num_hotwords == len(sensitivity), \
|
||||
"number of hotwords in decoder_model (%d) and sensitivity " \
|
||||
"(%d) does not match" % (self.num_hotwords, len(sensitivity))
|
||||
sensitivity_str = ",".join([str(t) for t in sensitivity])
|
||||
if len(sensitivity) != 0:
|
||||
self.detector.SetSensitivity(sensitivity_str.encode())
|
||||
|
||||
self.ring_buffer = RingBuffer(
|
||||
self.detector.NumChannels() * self.detector.SampleRate() * 5)
|
||||
with no_alsa_error():
|
||||
self.audio = pyaudio.PyAudio()
|
||||
self.stream_in = self.audio.open(
|
||||
input=True, output=False,
|
||||
format=self.audio.get_format_from_width(
|
||||
self.detector.BitsPerSample() / 8),
|
||||
channels=self.detector.NumChannels(),
|
||||
rate=self.detector.SampleRate(),
|
||||
frames_per_buffer=2048,
|
||||
stream_callback=audio_callback)
|
||||
|
||||
|
||||
def start(self, detected_callback=play_audio_file,
|
||||
interrupt_check=lambda: False,
|
||||
sleep_time=0.03,
|
||||
audio_recorder_callback=None,
|
||||
silent_count_threshold=15,
|
||||
recording_timeout=100):
|
||||
"""
|
||||
Start the voice detector. For every `sleep_time` second it checks the
|
||||
audio buffer for triggering keywords. If detected, then call
|
||||
corresponding function in `detected_callback`, which can be a single
|
||||
function (single model) or a list of callback functions (multiple
|
||||
models). Every loop it also calls `interrupt_check` -- if it returns
|
||||
True, then breaks from the loop and return.
|
||||
|
||||
:param detected_callback: a function or list of functions. The number of
|
||||
items must match the number of models in
|
||||
`decoder_model`.
|
||||
:param interrupt_check: a function that returns True if the main loop
|
||||
needs to stop.
|
||||
:param float sleep_time: how much time in second every loop waits.
|
||||
:param audio_recorder_callback: if specified, this will be called after
|
||||
a keyword has been spoken and after the
|
||||
phrase immediately after the keyword has
|
||||
been recorded. The function will be
|
||||
passed the name of the file where the
|
||||
phrase was recorded.
|
||||
:param silent_count_threshold: indicates how long silence must be heard
|
||||
to mark the end of a phrase that is
|
||||
being recorded.
|
||||
:param recording_timeout: limits the maximum length of a recording.
|
||||
:return: None
|
||||
"""
|
||||
if interrupt_check():
|
||||
logger.debug("detect voice return")
|
||||
return
|
||||
|
||||
tc = type(detected_callback)
|
||||
if tc is not list:
|
||||
detected_callback = [detected_callback]
|
||||
if len(detected_callback) == 1 and self.num_hotwords > 1:
|
||||
detected_callback *= self.num_hotwords
|
||||
|
||||
assert self.num_hotwords == len(detected_callback), \
|
||||
"Error: hotwords in your models (%d) do not match the number of " \
|
||||
"callbacks (%d)" % (self.num_hotwords, len(detected_callback))
|
||||
|
||||
logger.debug("detecting...")
|
||||
|
||||
state = "PASSIVE"
|
||||
while True:
|
||||
if interrupt_check():
|
||||
logger.debug("detect voice break")
|
||||
break
|
||||
data = self.ring_buffer.get()
|
||||
if len(data) == 0:
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
status = self.detector.RunDetection(data)
|
||||
if status == -1:
|
||||
logger.warning("Error initializing streams or reading audio data")
|
||||
|
||||
#small state machine to handle recording of phrase after keyword
|
||||
if state == "PASSIVE":
|
||||
if status > 0: #key word found
|
||||
self.recordedData = []
|
||||
self.recordedData.append(data)
|
||||
silentCount = 0
|
||||
recordingCount = 0
|
||||
message = "Keyword " + str(status) + " detected at time: "
|
||||
message += time.strftime("%Y-%m-%d %H:%M:%S",
|
||||
time.localtime(time.time()))
|
||||
logger.info(message)
|
||||
callback = detected_callback[status-1]
|
||||
if callback is not None:
|
||||
callback()
|
||||
|
||||
if audio_recorder_callback is not None:
|
||||
state = "ACTIVE"
|
||||
continue
|
||||
|
||||
elif state == "ACTIVE":
|
||||
stopRecording = False
|
||||
if recordingCount > recording_timeout:
|
||||
stopRecording = True
|
||||
elif status == -2: #silence found
|
||||
if silentCount > silent_count_threshold:
|
||||
stopRecording = True
|
||||
else:
|
||||
silentCount = silentCount + 1
|
||||
elif status == 0: #voice found
|
||||
silentCount = 0
|
||||
|
||||
if stopRecording == True:
|
||||
fname = self.saveMessage()
|
||||
audio_recorder_callback(fname)
|
||||
state = "PASSIVE"
|
||||
continue
|
||||
|
||||
recordingCount = recordingCount + 1
|
||||
self.recordedData.append(data)
|
||||
|
||||
logger.debug("finished.")
|
||||
|
||||
def saveMessage(self):
|
||||
"""
|
||||
Save the message stored in self.recordedData to a timestamped file.
|
||||
"""
|
||||
filename = 'output' + str(int(time.time())) + '.wav'
|
||||
data = b''.join(self.recordedData)
|
||||
|
||||
#use wave to save data
|
||||
wf = wave.open(filename, 'wb')
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(self.audio.get_sample_size(
|
||||
self.audio.get_format_from_width(
|
||||
self.detector.BitsPerSample() / 8)))
|
||||
wf.setframerate(self.detector.SampleRate())
|
||||
wf.writeframes(data)
|
||||
wf.close()
|
||||
logger.debug("finished saving: " + filename)
|
||||
return filename
|
||||
|
||||
def terminate(self):
|
||||
"""
|
||||
Terminate audio stream. Users cannot call start() again to detect.
|
||||
:return: None
|
||||
"""
|
||||
self.stream_in.stop_stream()
|
||||
self.stream_in.close()
|
||||
self.audio.terminate()
|
@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import collections
|
||||
import snowboydetect
|
||||
import time
|
||||
import wave
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
logging.basicConfig()
|
||||
logger = logging.getLogger("snowboy")
|
||||
logger.setLevel(logging.INFO)
|
||||
TOP_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
|
||||
DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
|
||||
DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
|
||||
|
||||
|
||||
class RingBuffer(object):
|
||||
"""Ring buffer to hold audio from audio capturing tool"""
|
||||
def __init__(self, size = 4096):
|
||||
self._buf = collections.deque(maxlen=size)
|
||||
|
||||
def extend(self, data):
|
||||
"""Adds data to the end of buffer"""
|
||||
self._buf.extend(data)
|
||||
|
||||
def get(self):
|
||||
"""Retrieves data from the beginning of buffer and clears it"""
|
||||
tmp = bytes(bytearray(self._buf))
|
||||
self._buf.clear()
|
||||
return tmp
|
||||
|
||||
|
||||
def play_audio_file(fname=DETECT_DING):
|
||||
"""Simple callback function to play a wave file. By default it plays
|
||||
a Ding sound.
|
||||
|
||||
:param str fname: wave file name
|
||||
:return: None
|
||||
"""
|
||||
os.system("aplay " + fname + " > /dev/null 2>&1")
|
||||
|
||||
|
||||
class HotwordDetector(object):
|
||||
"""
|
||||
Snowboy decoder to detect whether a keyword specified by `decoder_model`
|
||||
exists in a microphone input stream.
|
||||
|
||||
:param decoder_model: decoder model file path, a string or a list of strings
|
||||
:param resource: resource file path.
|
||||
:param sensitivity: decoder sensitivity, a float of a list of floats.
|
||||
The bigger the value, the more senstive the
|
||||
decoder. If an empty list is provided, then the
|
||||
default sensitivity in the model will be used.
|
||||
:param audio_gain: multiply input volume by this factor.
|
||||
"""
|
||||
def __init__(self, decoder_model,
|
||||
resource=RESOURCE_FILE,
|
||||
sensitivity=[],
|
||||
audio_gain=1):
|
||||
|
||||
tm = type(decoder_model)
|
||||
ts = type(sensitivity)
|
||||
if tm is not list:
|
||||
decoder_model = [decoder_model]
|
||||
if ts is not list:
|
||||
sensitivity = [sensitivity]
|
||||
model_str = ",".join(decoder_model)
|
||||
|
||||
self.detector = snowboydetect.SnowboyDetect(
|
||||
resource_filename=resource.encode(), model_str=model_str.encode())
|
||||
self.detector.SetAudioGain(audio_gain)
|
||||
self.num_hotwords = self.detector.NumHotwords()
|
||||
|
||||
if len(decoder_model) > 1 and len(sensitivity) == 1:
|
||||
sensitivity = sensitivity*self.num_hotwords
|
||||
if len(sensitivity) != 0:
|
||||
assert self.num_hotwords == len(sensitivity), \
|
||||
"number of hotwords in decoder_model (%d) and sensitivity " \
|
||||
"(%d) does not match" % (self.num_hotwords, len(sensitivity))
|
||||
sensitivity_str = ",".join([str(t) for t in sensitivity])
|
||||
if len(sensitivity) != 0:
|
||||
self.detector.SetSensitivity(sensitivity_str.encode())
|
||||
|
||||
self.ring_buffer = RingBuffer(
|
||||
self.detector.NumChannels() * self.detector.SampleRate() * 5)
|
||||
|
||||
def record_proc(self):
|
||||
CHUNK = 2048
|
||||
RECORD_RATE = 16000
|
||||
cmd = 'arecord -q -r %d -f S16_LE' % RECORD_RATE
|
||||
process = subprocess.Popen(cmd.split(' '),
|
||||
stdout = subprocess.PIPE,
|
||||
stderr = subprocess.PIPE)
|
||||
wav = wave.open(process.stdout, 'rb')
|
||||
while self.recording:
|
||||
data = wav.readframes(CHUNK)
|
||||
self.ring_buffer.extend(data)
|
||||
process.terminate()
|
||||
|
||||
def init_recording(self):
|
||||
"""
|
||||
Start a thread for spawning arecord process and reading its stdout
|
||||
"""
|
||||
self.recording = True
|
||||
self.record_thread = threading.Thread(target = self.record_proc)
|
||||
self.record_thread.start()
|
||||
|
||||
def start(self, detected_callback=play_audio_file,
|
||||
interrupt_check=lambda: False,
|
||||
sleep_time=0.03):
|
||||
"""
|
||||
Start the voice detector. For every `sleep_time` second it checks the
|
||||
audio buffer for triggering keywords. If detected, then call
|
||||
corresponding function in `detected_callback`, which can be a single
|
||||
function (single model) or a list of callback functions (multiple
|
||||
models). Every loop it also calls `interrupt_check` -- if it returns
|
||||
True, then breaks from the loop and return.
|
||||
|
||||
:param detected_callback: a function or list of functions. The number of
|
||||
items must match the number of models in
|
||||
`decoder_model`.
|
||||
:param interrupt_check: a function that returns True if the main loop
|
||||
needs to stop.
|
||||
:param float sleep_time: how much time in second every loop waits.
|
||||
:return: None
|
||||
"""
|
||||
|
||||
self.init_recording()
|
||||
|
||||
if interrupt_check():
|
||||
logger.debug("detect voice return")
|
||||
return
|
||||
|
||||
tc = type(detected_callback)
|
||||
if tc is not list:
|
||||
detected_callback = [detected_callback]
|
||||
if len(detected_callback) == 1 and self.num_hotwords > 1:
|
||||
detected_callback *= self.num_hotwords
|
||||
|
||||
assert self.num_hotwords == len(detected_callback), \
|
||||
"Error: hotwords in your models (%d) do not match the number of " \
|
||||
"callbacks (%d)" % (self.num_hotwords, len(detected_callback))
|
||||
|
||||
logger.debug("detecting...")
|
||||
|
||||
while True:
|
||||
if interrupt_check():
|
||||
logger.debug("detect voice break")
|
||||
break
|
||||
data = self.ring_buffer.get()
|
||||
if len(data) == 0:
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
ans = self.detector.RunDetection(data)
|
||||
if ans == -1:
|
||||
logger.warning("Error initializing streams or reading audio data")
|
||||
elif ans > 0:
|
||||
message = "Keyword " + str(ans) + " detected at time: "
|
||||
message += time.strftime("%Y-%m-%d %H:%M:%S",
|
||||
time.localtime(time.time()))
|
||||
logger.info(message)
|
||||
callback = detected_callback[ans-1]
|
||||
if callback is not None:
|
||||
callback()
|
||||
|
||||
logger.debug("finished.")
|
||||
|
||||
def terminate(self):
|
||||
"""
|
||||
Terminate audio stream. Users cannot call start() again to detect.
|
||||
:return: None
|
||||
"""
|
||||
self.recording = False
|
||||
self.record_thread.join()
|
||||
|
@ -0,0 +1,96 @@
|
||||
import snowboydecoder
|
||||
import threading
|
||||
import Queue
|
||||
|
||||
|
||||
class ThreadedDetector(threading.Thread):
|
||||
"""
|
||||
Wrapper class around detectors to run them in a separate thread
|
||||
and provide methods to pause, resume, and modify detection
|
||||
"""
|
||||
|
||||
def __init__(self, models, **kwargs):
|
||||
"""
|
||||
Initialize Detectors object. **kwargs is for any __init__ keyword
|
||||
arguments to be passed into HotWordDetector __init__() method.
|
||||
"""
|
||||
threading.Thread.__init__(self)
|
||||
self.models = models
|
||||
self.init_kwargs = kwargs
|
||||
self.interrupted = True
|
||||
self.commands = Queue.Queue()
|
||||
self.vars_are_changed = True
|
||||
self.detectors = None # Initialize when thread is run in self.run()
|
||||
self.run_kwargs = None # Initialize when detectors start in self.start_recog()
|
||||
|
||||
def initialize_detectors(self):
|
||||
"""
|
||||
Returns initialized Snowboy HotwordDetector objects
|
||||
"""
|
||||
self.detectors = snowboydecoder.HotwordDetector(self.models, **self.init_kwargs)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Runs in separate thread - waits on command to either run detectors
|
||||
or terminate thread from commands queue
|
||||
"""
|
||||
try:
|
||||
while True:
|
||||
command = self.commands.get(True)
|
||||
if command == "Start":
|
||||
self.interrupted = False
|
||||
if self.vars_are_changed:
|
||||
# If there is an existing detector object, terminate it
|
||||
if self.detectors is not None:
|
||||
self.detectors.terminate()
|
||||
self.initialize_detectors()
|
||||
self.vars_are_changed = False
|
||||
# Start detectors - blocks until interrupted by self.interrupted variable
|
||||
self.detectors.start(interrupt_check=lambda: self.interrupted, **self.run_kwargs)
|
||||
elif command == "Terminate":
|
||||
# Program ending - terminate thread
|
||||
break
|
||||
finally:
|
||||
if self.detectors is not None:
|
||||
self.detectors.terminate()
|
||||
|
||||
def start_recog(self, **kwargs):
|
||||
"""
|
||||
Starts recognition in thread. Accepts kwargs to pass into the
|
||||
HotWordDetector.start() method, but does not accept interrupt_callback,
|
||||
as that is already set up.
|
||||
"""
|
||||
assert "interrupt_check" not in kwargs, \
|
||||
"Cannot set interrupt_check argument. To interrupt detectors, use Detectors.pause_recog() instead"
|
||||
self.run_kwargs = kwargs
|
||||
self.commands.put("Start")
|
||||
|
||||
def pause_recog(self):
|
||||
"""
|
||||
Halts recognition in thread.
|
||||
"""
|
||||
self.interrupted = True
|
||||
|
||||
def terminate(self):
|
||||
"""
|
||||
Terminates recognition thread - called when program terminates
|
||||
"""
|
||||
self.pause_recog()
|
||||
self.commands.put("Terminate")
|
||||
|
||||
def is_running(self):
|
||||
return not self.interrupted
|
||||
|
||||
def change_models(self, models):
|
||||
if self.is_running():
|
||||
print("Models will be changed after restarting detectors.")
|
||||
if self.models != models:
|
||||
self.models = models
|
||||
self.vars_are_changed = True
|
||||
|
||||
def change_sensitivity(self, sensitivity):
|
||||
if self.is_running():
|
||||
print("Sensitivity will be changed after restarting detectors.")
|
||||
if self.init_kwargs['sensitivity'] != sensitivity:
|
||||
self.init_kwargs['sensitivity'] = sensitivity
|
||||
self.vars_are_changed = True
|
@ -0,0 +1,35 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import signal
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: need to specify model name")
|
||||
print("Usage: python demo.py your.model")
|
||||
sys.exit(-1)
|
||||
|
||||
model = sys.argv[1]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
detector.start(detected_callback=snowboydecoder.play_audio_file,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.03)
|
||||
|
||||
detector.terminate()
|
@ -0,0 +1,41 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import signal
|
||||
|
||||
# Demo code for listening to two hotwords at the same time
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Error: need to specify 2 model names")
|
||||
print("Usage: python demo.py 1st.model 2nd.model")
|
||||
sys.exit(-1)
|
||||
|
||||
models = sys.argv[1:]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
sensitivity = [0.5]*len(models)
|
||||
detector = snowboydecoder.HotwordDetector(models, sensitivity=sensitivity)
|
||||
callbacks = [lambda: snowboydecoder.play_audio_file(snowboydecoder.DETECT_DING),
|
||||
lambda: snowboydecoder.play_audio_file(snowboydecoder.DETECT_DONG)]
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
# make sure you have the same numbers of callbacks and models
|
||||
detector.start(detected_callback=callbacks,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.03)
|
||||
|
||||
detector.terminate()
|
@ -0,0 +1,40 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import wave
|
||||
|
||||
# Demo code for detecting hotword in a .wav file
|
||||
# Example Usage:
|
||||
# $ python demo3.py resources/snowboy.wav resources/models/snowboy.umdl
|
||||
# Should print:
|
||||
# Hotword Detected!
|
||||
#
|
||||
# $ python demo3.py resources/ding.wav resources/models/snowboy.umdl
|
||||
# Should print:
|
||||
# Hotword Not Detected!
|
||||
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Error: need to specify wave file name and model name")
|
||||
print("Usage: python demo3.py wave_file model_file")
|
||||
sys.exit(-1)
|
||||
|
||||
wave_file = sys.argv[1]
|
||||
model_file = sys.argv[2]
|
||||
|
||||
f = wave.open(wave_file)
|
||||
assert f.getnchannels() == 1, "Error: Snowboy only supports 1 channel of audio (mono, not stereo)"
|
||||
assert f.getframerate() == 16000, "Error: Snowboy only supports 16K sampling rate"
|
||||
assert f.getsampwidth() == 2, "Error: Snowboy only supports 16bit per sample"
|
||||
data = f.readframes(f.getnframes())
|
||||
f.close()
|
||||
|
||||
sensitivity = 0.5
|
||||
detection = snowboydecoder.HotwordDetector(model_file, sensitivity=sensitivity)
|
||||
|
||||
ans = detection.detector.RunDetection(data)
|
||||
|
||||
if ans == 1:
|
||||
print('Hotword Detected!')
|
||||
else:
|
||||
print('Hotword Not Detected!')
|
||||
|
@ -0,0 +1,75 @@
|
||||
import snowboydecoder
|
||||
import sys
|
||||
import signal
|
||||
import speech_recognition as sr
|
||||
import os
|
||||
|
||||
"""
|
||||
This demo file shows you how to use the new_message_callback to interact with
|
||||
the recorded audio after a keyword is spoken. It uses the speech recognition
|
||||
library in order to convert the recorded audio into text.
|
||||
|
||||
Information on installing the speech recognition library can be found at:
|
||||
https://pypi.python.org/pypi/SpeechRecognition/
|
||||
"""
|
||||
|
||||
|
||||
interrupted = False
|
||||
|
||||
|
||||
def audioRecorderCallback(fname):
|
||||
print("converting audio to text")
|
||||
r = sr.Recognizer()
|
||||
with sr.AudioFile(fname) as source:
|
||||
audio = r.record(source) # read the entire audio file
|
||||
# recognize speech using Google Speech Recognition
|
||||
try:
|
||||
# for testing purposes, we're just using the default API key
|
||||
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
|
||||
# instead of `r.recognize_google(audio)`
|
||||
print(r.recognize_google(audio))
|
||||
except sr.UnknownValueError:
|
||||
print("Google Speech Recognition could not understand audio")
|
||||
except sr.RequestError as e:
|
||||
print("Could not request results from Google Speech Recognition service; {0}".format(e))
|
||||
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
|
||||
def detectedCallback():
|
||||
print('recording audio...', end='', flush=True)
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
|
||||
|
||||
def interrupt_callback():
|
||||
global interrupted
|
||||
return interrupted
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: need to specify model name")
|
||||
print("Usage: python demo.py your.model")
|
||||
sys.exit(-1)
|
||||
|
||||
model = sys.argv[1]
|
||||
|
||||
# capture SIGINT signal, e.g., Ctrl+C
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.38)
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
detector.start(detected_callback=detectedCallback,
|
||||
audio_recorder_callback=audioRecorderCallback,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.01)
|
||||
|
||||
detector.terminate()
|
||||
|
||||
|
||||
|
||||
|