resourceCommonInfo [ComponentId=‘clarin.eu:cr1:c_1396012485126’]:
identificationInfo [ComponentId=‘clarin.eu:cr1:c_1396012485125’]:
resourceName: NoWaC v 1.0 (Norwegian Web as Corpus)
description [xml:lang=‘en’]:
Web-based corpus of Bokmål Norwegian containing about 700 million
tokens. The corpus has been built by crawling, downloading and
processing web documents in the .no top-level internet domain between
November 2009 and January 2010. NoWaC has been built with permission
from the Norwegian Ministry of Culture (Kulturdepartementet).
There are no information about author, publisher, genre etc in the corpus.
NoWaC can be downloaded (scrambled version) or accessed through a search interface (Glossa).
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/prosjekter/nowac/index.html
PID: http://hdl.handle.net/11538/0000-0005-E7C0-D
distributionInfo [ComponentId=‘clarin.eu:cr1:c_1396012485124’]:
licenceInfo [ComponentId=‘clarin.eu:cr1:c_1396012485158’]:
distributionAccessMedium: downloadable
downloadLocation: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/prosjekter/nowac/index.html
licence [ComponentId=‘clarin.eu:cr1:c_1447674760330’]:
licenceFamily: Creative Commons (CC)
licenceName: Creative_Commons-BY-NC-SA (CC-BY-NC-SA)
licenceURL: http://creativecommons.org/licenses/by-nc-sa/2.0/
licensor:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName [xml:lang=‘en’]: University of Oslo
organizationName [xml:lang=‘no’]: Universitetet i Oslo
organizationShortName [xml:lang=‘no’]: UiO
organizationShortName [xml:lang=‘en’]: UoO
departmentName [xml:lang=‘en’]: Department of Linguistics and Scandinavian Studies
departmentName [xml:lang=‘no’]: Institutt for lingvistiske og nordiske studier (ILN)
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
distributionRightsHolder:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName [xml:lang=‘en’]: University of Oslo
organizationName [xml:lang=‘no’]: Universitetet i Oslo
organizationShortName [xml:lang=‘no’]: UiO
organizationShortName [xml:lang=‘en’]: UoO
departmentName [xml:lang=‘en’]: Department of Linguistics and Scandinavian Studies
departmentName [xml:lang=‘no’]: Institutt for lingvistiske og nordiske studier (ILN)
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
licenceInfo [ComponentId=‘clarin.eu:cr1:c_1396012485158’]:
distributionAccessMedium: accessibleThroughInterface
executionLocation: https://tekstlab.uio.no/glossa3/nowac_1_1
licence [ComponentId=‘clarin.eu:cr1:c_1447674760330’]:
licenceName: CLARIN_ACA-NC-LOC-ND
licenceURL:
https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaAca?ID=1&AFFIL=EDU&BY=1&NC=1&LOC=1&NORED=1&ND=1
nonStandardConditionsOfUse: The unscrambled corpus is accesible only through Glossa, a search and post-processing tool developed by the Text Laboratory.
licensor:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName [xml:lang=‘en’]: University of Oslo
organizationName [xml:lang=‘no’]: Universitetet i Oslo
organizationShortName [xml:lang=‘no’]: UiO
organizationShortName [xml:lang=‘en’]: UoO
departmentName [xml:lang=‘en’]: Department of Linguistics and Scandinavian Studies
departmentName [xml:lang=‘no’]: Institutt for lingvistiske og nordiske studier (ILN)
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
distributionRightsHolder:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName [xml:lang=‘en’]: University of Oslo
organizationName [xml:lang=‘no’]: Universitetet i Oslo
organizationShortName [xml:lang=‘no’]: UiO
organizationShortName [xml:lang=‘en’]: UoO
departmentName [xml:lang=‘en’]: Department of Linguistics and Scandinavian Studies
departmentName [xml:lang=‘no’]: Institutt for lingvistiske og nordiske studier (ILN)
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
iprHolder:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
personInfo [ComponentId=‘clarin.eu:cr1:c_1396012485192’]:
affiliation:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
departmentName: Department of Linguistics and Scandinavian Studies
contact:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
metadataInfo [ComponentId=‘clarin.eu:cr1:c_1407745711922’]:
metadataCreationDate: 2014-11-28
metadataLastDateUpdated: 2024-01-03
metadataCreator:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
personInfo [ComponentId=‘clarin.eu:cr1:c_1396012485192’]:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: kristin.hagen@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
versionInfo [ComponentId=‘clarin.eu:cr1:c_1430905751648’]:
resourceDocumentationInfo [ComponentId=‘clarin.eu:cr1:c_1355150532301’]:
documentationUnstructured [ComponentId=‘clarin.eu:cr1:c_1355150532302’]:
documentUnstructured: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/prosjekter/nowac/index.html
resourceCreationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711921’]:
creationStartDate: 2009-08-01
creationEndDate: 2010-12-31
resourceCreator:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
personInfo [ComponentId=‘clarin.eu:cr1:c_1396012485192’]:
affiliation:
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: emiguevara@gmail.com
fundingProject:
projectInfo [ComponentId=‘clarin.eu:cr1:c_1430905751647’]:
projectName: Emiliano Guevara's PhD project
fundingType: nationalFunds
relationInfo [ComponentId=‘clarin.eu:cr1:c_1396012485134’]:
resourceRelation [ComponentId=‘clarin.eu:cr1:c_1396012485137’]:
relatedResource [ComponentId=‘clarin.eu:cr1:c_1430905751623’] [ref=‘nowac-freq’] [role=‘source’] [relationRole=‘derivate-from’]:
referenceScope [ref=‘nowac-freq’]: externalResource
resourceReference [ref=‘nowac-freq’]: https://www.hf.uio.no/iln/english/about/organization/text-laboratory/projects/nowac/nowac-frequency.html
relatedResource [ComponentId=‘clarin.eu:cr1:c_1430905751623’] [ref=‘nowac-freq’] [relationRole=‘source’] [role=‘isPartOf’]:
referenceScope [ref=‘nowac-corpus’]: externalResource
relationType [ComponentId=‘clarin.eu:cr1:c_1396012485127’] [ref=‘nowac-freq’]:
relationName [ref=‘nowac-freq’]: derivate
corpusInfo [ComponentId=‘clarin.eu:cr1:c_1407745711878’]:
corpusType: Written Corpus
corpusPartInfo [ComponentId=‘clarin.eu:cr1:c_1407745711885’]:
corpusTextInfo [ComponentId=‘clarin.eu:cr1:c_1396012485188’]:
textFormatInfo [ComponentId=‘clarin.eu:cr1:c_1427452477072’]:
characterEncodingInfo [ComponentId=‘clarin.eu:cr1:c_1447674760355’]:
corpusPartGeneralInfo [ComponentId=‘clarin.eu:cr1:c_1407745711882’]:
lingualityInfo [ComponentId=‘clarin.eu:cr1:c_1355150532313’]:
lingualityType: monolingual
languageInfo [ComponentId=‘clarin.eu:cr1:c_1428388179423’]:
languageName: Norwegian Bokmål
sizeInfo [ComponentId=‘clarin.eu:cr1:c_1353678848785’]:
annotationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711924’]:
annotationType: morphosyntacticAnnotation-posTagging
annotationType: lemmatization
tagset: The Oslo Bergen-tagger tagset: http://tekstlab.uio.no/obt-ny/english/index.html
tagsetLanguageName: Norwegian bokmål
annotationMode: automatic
annotationManualUnstructured [ComponentId=‘clarin.eu:cr1:c_1355150532325’]:
documentUnstructured: http://www.tekstlab.uio.no/obt-ny/english/index.html
annotationTool [ComponentId=‘clarin.eu:cr1:c_1355150532326’]:
targetResourceNameURI: The Oslo-Bergen Tagger: http://tekstlab.uio.no/obt-ny/english/index.html
classificationInfo [ComponentId=‘clarin.eu:cr1:c_1403588862809’]:
conformanceToClassificationScheme: other
genreInfo [ComponentId=‘clarin.eu:cr1:c_1407745711877’]:
unstandardisedGenre: scrambled web corpus/searchable web corpus
timeCoverageInfo [ComponentId=‘clarin.eu:cr1:c_1447674760358’]:
timeCoverage: November 2009 - January 2010