Skip to content

Add no_std + alloc support #55

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ sudo: false
script:
- cargo build --verbose
- cargo test --verbose
- cargo test --verbose --no-default-features
- cargo package
- cd target/package/unicode-normalization-*
- cargo test --verbose
- cargo test --verbose --no-default-features
notifications:
email:
on_success: never
11 changes: 9 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "unicode-normalization"
version = "0.1.12"
version = "0.1.13"
authors = ["kwantam <[email protected]>"]

homepage = "https://github.com/unicode-rs/unicode-normalization"
Expand All @@ -18,8 +18,15 @@ Decomposition and Recomposition, as described in
Unicode Standard Annex #15.
"""

edition = "2018"

exclude = [ "target/*", "Cargo.lock", "scripts/tmp", "*.txt", "tests/*" ]

[dependencies.tinyvec]
version = "0.3.2"
version = "0.3.3"
features = ["alloc"]


[features]
default = ["std"]
std = []
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,9 @@ to your `Cargo.toml`:

```toml
[dependencies]
unicode-normalization = "0.1.8"
unicode-normalization = "0.1.13"
```

## `no_std` + `alloc` support

This crate is completely `no_std` + `alloc` compatible. This can be enabled by disabling the `std` feature, i.e. specifying `default-features = false` for this crate on your `Cargo.toml`.
4 changes: 2 additions & 2 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![feature(test)]
#![feature(iterator_step_by)]
extern crate unicode_normalization;

extern crate test;
extern crate unicode_normalization;

use std::fs;
use test::Bencher;
Expand Down
4 changes: 2 additions & 2 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,8 @@ def minimal_perfect_hash(d):
data = UnicodeData()
with open("tables.rs", "w", newline = "\n") as out:
out.write(PREAMBLE)
out.write("use quick_check::IsNormalized;\n")
out.write("use quick_check::IsNormalized::*;\n")
out.write("use crate::quick_check::IsNormalized;\n")
out.write("use crate::quick_check::IsNormalized::*;\n")
out.write("\n")

version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
Expand Down
7 changes: 6 additions & 1 deletion src/__test_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
//
// If you're caught using this outside this crates tests/, you get to clean up the mess.

#[cfg(not(feature = "std"))]
use crate::no_std_prelude::*;

use crate::stream_safe::StreamSafe;

pub fn stream_safe(s: &str) -> String {
StreamSafe::new(s.chars()).collect()
StreamSafe::new(s.chars()).collect()
}

pub mod quick_check {
pub use crate::quick_check::*;
}
14 changes: 7 additions & 7 deletions src/decompose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use core::fmt::{self, Write};
use core::iter::Fuse;
use core::ops::Range;
use tinyvec::TinyVec;
use std::fmt::{self, Write};
use std::iter::Fuse;
use std::ops::Range;

#[derive(Clone)]
enum DecompositionType {
Expand All @@ -37,7 +37,7 @@ pub struct Decompositions<I> {
}

#[inline]
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
pub fn new_canonical<I: Iterator<Item = char>>(iter: I) -> Decompositions<I> {
Decompositions {
kind: self::DecompositionType::Canonical,
iter: iter.fuse(),
Expand All @@ -47,7 +47,7 @@ pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
}

#[inline]
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
pub fn new_compatible<I: Iterator<Item = char>>(iter: I) -> Decompositions<I> {
Decompositions {
kind: self::DecompositionType::Compatible,
iter: iter.fuse(),
Expand Down Expand Up @@ -99,7 +99,7 @@ impl<I> Decompositions<I> {
}
}

impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
type Item = char;

#[inline]
Expand Down Expand Up @@ -149,7 +149,7 @@ impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
}
}

impl<I: Iterator<Item=char> + Clone> fmt::Display for Decompositions<I> {
impl<I: Iterator<Item = char> + Clone> fmt::Display for Decompositions<I> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.clone() {
f.write_char(c)?;
Expand Down
63 changes: 30 additions & 33 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,81 +38,78 @@
//! ```

#![deny(missing_docs, unsafe_code)]
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
#![doc(
html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
)]
#![cfg_attr(not(feature = "std"), no_std)]

#[cfg(not(feature = "std"))]
extern crate alloc;

#[cfg(feature = "std")]
extern crate core;

extern crate tinyvec;

pub use tables::UNICODE_VERSION;
pub use decompose::Decompositions;
pub use quick_check::{
pub use crate::decompose::Decompositions;
pub use crate::quick_check::{
is_nfc, is_nfc_quick, is_nfc_stream_safe, is_nfc_stream_safe_quick, is_nfd, is_nfd_quick,
is_nfd_stream_safe, is_nfd_stream_safe_quick, is_nfkc, is_nfkc_quick, is_nfkd, is_nfkd_quick,
IsNormalized,
is_nfc,
is_nfc_quick,
is_nfkc,
is_nfkc_quick,
is_nfc_stream_safe,
is_nfc_stream_safe_quick,
is_nfd,
is_nfd_quick,
is_nfkd,
is_nfkd_quick,
is_nfd_stream_safe,
is_nfd_stream_safe_quick,
};
pub use recompose::Recompositions;
pub use stream_safe::StreamSafe;
use std::str::Chars;
pub use crate::recompose::Recompositions;
pub use crate::stream_safe::StreamSafe;
pub use crate::tables::UNICODE_VERSION;
use core::str::Chars;

mod no_std_prelude;

mod decompose;
mod lookups;
mod normalize;
mod perfect_hash;
mod recompose;
mod quick_check;
mod recompose;
mod stream_safe;

#[rustfmt::skip]
mod tables;

#[cfg(test)]
mod test;
#[doc(hidden)]
pub mod __test_api;
#[cfg(test)]
mod test;

/// Methods for composing and decomposing characters.
pub mod char {
pub use normalize::{decompose_canonical, decompose_compatible, compose};
pub use crate::normalize::{compose, decompose_canonical, decompose_compatible};

pub use lookups::{canonical_combining_class, is_combining_mark};
pub use crate::lookups::{canonical_combining_class, is_combining_mark};
}


/// Methods for iterating over strings while applying Unicode normalizations
/// as described in
/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
pub trait UnicodeNormalization<I: Iterator<Item=char>> {
pub trait UnicodeNormalization<I: Iterator<Item = char>> {
/// Returns an iterator over the string in Unicode Normalization Form D
/// (canonical decomposition).
#[inline]
fn nfd(self) -> Decompositions<I>;

/// Returns an iterator over the string in Unicode Normalization Form KD
/// (compatibility decomposition).
#[inline]
fn nfkd(self) -> Decompositions<I>;

/// An Iterator over the string in Unicode Normalization Form C
/// (canonical decomposition followed by canonical composition).
#[inline]
fn nfc(self) -> Recompositions<I>;

/// An Iterator over the string in Unicode Normalization Form KC
/// (compatibility decomposition followed by canonical composition).
#[inline]
fn nfkc(self) -> Recompositions<I>;

/// An Iterator over the string with Conjoining Grapheme Joiner characters
/// inserted according to the Stream-Safe Text Process (UAX15-D4)
#[inline]
fn stream_safe(self) -> StreamSafe<I>;
}

Expand Down Expand Up @@ -143,7 +140,7 @@ impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
}
}

impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
impl<I: Iterator<Item = char>> UnicodeNormalization<I> for I {
#[inline]
fn nfd(self) -> Decompositions<I> {
decompose::new_canonical(self)
Expand Down
67 changes: 51 additions & 16 deletions src/lookups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,81 @@

//! Lookups of unicode properties using minimal perfect hashing.

use perfect_hash::mph_lookup;
use tables::*;
use crate::perfect_hash::mph_lookup;
use crate::tables::*;

/// Look up the canonical combining class for a codepoint.
///
///
/// The value returned is as defined in the Unicode Character Database.
pub fn canonical_combining_class(c: char) -> u8 {
mph_lookup(c.into(), CANONICAL_COMBINING_CLASS_SALT, CANONICAL_COMBINING_CLASS_KV,
u8_lookup_fk, u8_lookup_fv, 0)
mph_lookup(
c.into(),
CANONICAL_COMBINING_CLASS_SALT,
CANONICAL_COMBINING_CLASS_KV,
u8_lookup_fk,
u8_lookup_fv,
0,
)
}

pub(crate) fn composition_table(c1: char, c2: char) -> Option<char> {
if c1 < '\u{10000}' && c2 < '\u{10000}' {
mph_lookup((c1 as u32) << 16 | (c2 as u32),
COMPOSITION_TABLE_SALT, COMPOSITION_TABLE_KV,
pair_lookup_fk, pair_lookup_fv_opt, None)
mph_lookup(
(c1 as u32) << 16 | (c2 as u32),
COMPOSITION_TABLE_SALT,
COMPOSITION_TABLE_KV,
pair_lookup_fk,
pair_lookup_fv_opt,
None,
)
} else {
composition_table_astral(c1, c2)
}
}

pub(crate) fn canonical_fully_decomposed(c: char) -> Option<&'static [char]> {
mph_lookup(c.into(), CANONICAL_DECOMPOSED_SALT, CANONICAL_DECOMPOSED_KV,
pair_lookup_fk, pair_lookup_fv_opt, None)
mph_lookup(
c.into(),
CANONICAL_DECOMPOSED_SALT,
CANONICAL_DECOMPOSED_KV,
pair_lookup_fk,
pair_lookup_fv_opt,
None,
)
}

pub(crate) fn compatibility_fully_decomposed(c: char) -> Option<&'static [char]> {
mph_lookup(c.into(), COMPATIBILITY_DECOMPOSED_SALT, COMPATIBILITY_DECOMPOSED_KV,
pair_lookup_fk, pair_lookup_fv_opt, None)
mph_lookup(
c.into(),
COMPATIBILITY_DECOMPOSED_SALT,
COMPATIBILITY_DECOMPOSED_KV,
pair_lookup_fk,
pair_lookup_fv_opt,
None,
)
}

/// Return whether the given character is a combining mark (`General_Category=Mark`)
pub fn is_combining_mark(c: char) -> bool {
mph_lookup(c.into(), COMBINING_MARK_SALT, COMBINING_MARK_KV,
bool_lookup_fk, bool_lookup_fv, false)
mph_lookup(
c.into(),
COMBINING_MARK_SALT,
COMBINING_MARK_KV,
bool_lookup_fk,
bool_lookup_fv,
false,
)
}

pub fn stream_safe_trailing_nonstarters(c: char) -> usize {
mph_lookup(c.into(), TRAILING_NONSTARTERS_SALT, TRAILING_NONSTARTERS_KV,
u8_lookup_fk, u8_lookup_fv, 0) as usize
mph_lookup(
c.into(),
TRAILING_NONSTARTERS_SALT,
TRAILING_NONSTARTERS_KV,
u8_lookup_fk,
u8_lookup_fv,
0,
) as usize
}

/// Extract the key in a 24 bit key and 8 bit value packed in a u32.
Expand Down
6 changes: 6 additions & 0 deletions src/no_std_prelude.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#[cfg(not(feature = "std"))]
pub use alloc::{
str::Chars,
string::{String, ToString},
vec::Vec,
};
Loading