GeniHRN.pm.in 4.85 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
#!/usr/bin/perl -wT
#
# GENIPUBLIC-COPYRIGHT
# Copyright (c) 2009 University of Utah and the Flux Group.
# All rights reserved.
#
package GeniHRN;

use strict;
use Exporter;
use vars qw(@ISA @EXPORT);

@ISA    = "Exporter";
@EXPORT = qw ( );

# References:
#
#   GMOC Proposal: "Use URN as GENI identifiers" version 0.2, Viecco, 2009
#   RFC 2141, "URN Syntax", Moats, 1997
#   RFC 3151, "A URN Namespace for Public Identifiers", Walsh, 2001
#   RFC 3986, "URI Generic Syntax", Berners-Lee, 2005
#   RFC 3987, "Internationalised Resource Identifiers", Duerst, 2005
#   RFC 4343, "DNS Case Insensitivity Clarification", Eastlake, 2006

sub Unescape($)
{
    my ($uri) = @_;

    my $norm = "";
    while( $uri =~ /^([^%]*)%([0-9A-Fa-f]{2})(.*)$/ ) {
	$norm .= $1;
	my $val = hex( $2 );
	# Transform %-encoded sequences back to unreserved characters
	# where possible (see RFC 3986, section 2.3).
	if( $val == 0x2D || $val == 0x2E ||
	    ( $val >= 0x30 && $val <= 0x39 ) ||
	    ( $val >= 0x41 && $val <= 0x5A ) ||
	    $val == 0x5F ||
	    ( $val >= 0x61 && $val <= 0x7A ) ||
	    $val == 0x7E ) {
	    $norm .= chr( $val );
	} else {
	    $norm .= "%" . $2;
	}
	$uri = $3;
    }
    $norm .= $uri;

    return $norm;
}

sub IsValid($)
{
    my ($hrn) = @_;

    # Reject %00 sequences (see RFC 3986, section 7.3).
    return undef if $hrn =~ /%00/;

    # We accept ANY other %-encoded octet (following RFC 3987, section 5.3.2.3
    # in favour of RFC 2141, section 5, which specifies the opposite).
    $hrn = Unescape( $hrn );

    # The "urn" prefix is case-insensitive (see RFC 2141, section 2).
    # The "publicid" NID is case-insensitive (see RFC 2141, section 3).
    # The "IDN" specified by Viecco is believed to be case-sensitive (no
    #   authoritative reference known).
    # We regard Viecco's optional resource-type specifier as being
    #   mandatory: partly to avoid ambiguity between resource type
    #   namespaces, and partly to avoid ambiguity between a resource-type
    #   and a resource-name containing (escaped) whitespace.
    return $hrn =~ m'^[uU][rR][nN]:[pP][uU][bB][lL][iI][cC][iI][dD]:IDN\+[A-Za-z0-9.-]+(?::[A-Za-z0-9.-]+)*\+\w+\+(?:[-!$()*,.0-9=@A-Z_a-z]|(?:%[0-9A-Fa-f][0-9A-Fa-f]))+$';
}

# Break a URN into (sub-)authority, type, and ID components.  There
# might be further structure in the authority part, but we'll ignore
# that for now.
sub Parse($)
{
    my ($hrn) = @_;

    return undef if !IsValid( $hrn );

    $hrn = Unescape( $hrn );

    $hrn =~ /^[^+]*\+([^+]+)\+([^+]+)\+(.+)$/;

    return ($1, $2, $3);
}

# Generate a ProtoGENI URN.  Note that this is more restrictive than
# the general GENI naming scheme requires: we rely on the fact that
# Emulab identifiers are derived from very limited character sets to
# obtain the guarantee that escaping is never required.  (See the
# tbdb database regex table.)  This approach should suffice even
# if and when other GENI implementations decide to distribute URNs
# containing exotic characters, as long as we are careful to treat
# foreign URNs as opaque.
sub Generate($$$)
{
    my ($authority, $type, $id) = @_;

    # Assume that any sub-authorities are already encoded (see
    # RFC 3151, section 2).  We don't currently handle sub-authorities,
    # so this is irrelevant for now.
    $authority =~ tr/A-Z/a-z/;
    return undef if $authority !~ /^[-.0-9A-Za-z:]+$/;
    return undef if $type !~ /^[-.0-9A-Z_a-z~]+$/;
    return undef if $id !~ /^[-.0-9A-Z_a-z~]+$/;

    return "urn:publicid:IDN+" . $authority . "+" . $type . "+" . $id;
}

# Apply scheme-based (and other) normalisations to a URN (see RFC 3987,
# section 5.3).  This is conformant to RFC 2141, section 5 (we recognise
# all of those lexical equivalences, and introduce additional ones as
# is permitted).  We do not perform deep interpretation of the URN, so
# this procedure can and should be applied to foreign (non-ProtoGENI) URNs.
sub Normalise($)
{
    my ($hrn) = @_;

    return undef if !IsValid( $hrn );

    my ($authority, $type, $id) = Parse( $hrn );

    # Apply case normalisation to the authority; see RFC 3987, section
    # 5.3.2.1.  According to section 5.3.3, we are supposed to go
    # further and perform RFC 3490 ToASCII UseSTD3ASCIIRules and
    # AllowUnassigned and RFC 3491 Nameprep validation to interpret IRIs,
    # but quite frankly I think I've done more than enough RFC chasing already.
    $authority =~ tr/A-Z/a-z/;

    return "urn:publicid:IDN+" . $authority . "+" . $type . "+" . $id;
}

sub Equal($$)
{
    my ($hrn0, $hrn1) = @_;

    return undef if !IsValid( $hrn0 ) || !IsValid( $hrn1 );

    my $norm0 = Normalise( $hrn0 );
    my $norm1 = Normalise( $hrn1 );

    return $norm0 eq $norm1;
}

sub Authoritative($$)
{
    my ($hrn, $authority) = @_;

    $authority =~ tr/A-Z/a-z/;
    my @hrn = Parse( $hrn );
    $hrn[ 0 ] =~ tr/A-Z/a-z/;

    return $hrn[ 0 ] eq $authority;
}

# _Always_ make sure that this 1 is at the end of the file...
1;