CMDI 1.1. Metadata
Header
MdCreator: Kristin Hagen
MdCreationDate:
MdSelfLink:
MdProfile: clarin.eu:cr1:p_1407745711925
MdCollectionDisplayName: Clarino - Textlab
Resources
ResourceProxyList:
JournalFileProxyList:
ResourceRelationList:
IsPartOfList:
Components
corpusProfile:
resourceCommonInfo [ComponentId=‘clarin.eu:cr1:c_1396012485126’]:
resourceType: corpus
identificationInfo [ComponentId=‘clarin.eu:cr1:c_1396012485125’]:
resourceName [xml:lang=‘nb’]: Norsk talespråkskorpus - Oslodelen
description [xml:lang=‘en’]: NoTa-Oslo is a speech corpus with interviews and conversations from 166 informants born and raised in Oslo and the Oslo area. The informants are carefully selected w.r.t. sociolinguistic variables and therefore representative in terms of age, gender, place of residence and education. NoTa-Oslo consists of approx. 900 000 words that are orthographically transcribed and morphologically tagged. The corpus is searchable in a specially designed search interface, and the transcriptions are linked to audio and video files.
resourceShortName [xml:lang=‘en’]: NoTa-Oslo
resourceShortName [xml:lang=‘nb’]: NoTa-Oslo
url: http://www.tekstlab.uio.no/nota/oslo/english.html
url: http://www.tekstlab.uio.no/nota/oslo/index.html
PID: http://hdl.handle.net/11538/0000-0005-E7CA-3
distributionInfo [ComponentId=‘clarin.eu:cr1:c_1396012485124’]:
licenceInfo [ComponentId=‘clarin.eu:cr1:c_1396012485158’]:
userCategory: Academic
distributionAccessMedium: accessibleThroughInterface
executionLocation: http://www.tekstlab.uio.no/nota/oslo/index.html
executionLocation: http://www.tekstlab.uio.no/nota/oslo/english.html
licence [ComponentId=‘clarin.eu:cr1:c_1447674760330’]:
licenceFamily: CLARIN
licenceName: CLARIN_ACA-NC-LOC-PRIV-ND-*
licenceURL: https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEulaAca?ID=1&AFFIL=EDU&BY=1&NC=1&LOC=1&PRIV=1&NORED=1&ND=1
conditionsOfUse: *
conditionsOfUse: BY
conditionsOfUse: ID
conditionsOfUse: LOC
conditionsOfUse: NC
conditionsOfUse: ND
conditionsOfUse: NORED
conditionsOfUse: PRIV
nonStandardConditionsOfUse:The corpus has audio and video recordings classified as personal data. In agreement with NSD, the Data Protection Official in Norway, the corpus is accessible only through Glossa, a search and post-processing tool developed by the Text Laboratory.
The video and audio excerpts given by the search interface can not be shown in public unless you have an agreement with the Text Laboratory.
Please note that every individual researcher is responsible for treating the participants in the corpus with respect and sincerity. Furthermore, the participants must be kept anonymous in every published paper or other output.
licensor:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: organization
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName [xml:lang=‘en’]: University of Oslo
organizationName [xml:lang=‘no’]: Universitetet i Oslo
organizationShortName [xml:lang=‘no’]: UiO
organizationShortName [xml:lang=‘en’]: UoO
departmentName [xml:lang=‘en’]: Department of Linguistics and Scandinavian Studies
departmentName [xml:lang=‘no’]: Institutt for lingvistiske og nordiske studier (ILN)
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
distributionRightsHolder:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: organization
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName [xml:lang=‘en’]: University of Oslo
organizationName [xml:lang=‘no’]: Universitetet i Oslo
organizationShortName [xml:lang=‘no’]: UiO
organizationShortName [xml:lang=‘en’]: UoO
departmentName [xml:lang=‘en’]: Department of Linguistics and Scandinavian Studies
departmentName [xml:lang=‘no’]: Institutt for lingvistiske og nordiske studier (ILN)
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
contact:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: organization
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
metadataInfo [ComponentId=‘clarin.eu:cr1:c_1407745711922’]:
metadataCreationDate: 2014-11-26
metadataLastDateUpdated: 2017-06-08
metadataCreator:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: person
personInfo [ComponentId=‘clarin.eu:cr1:c_1396012485192’]:
surname: Hagen
givenName: Kristin
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: kristin.hagen@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
versionInfo [ComponentId=‘clarin.eu:cr1:c_1430905751648’]:
version: First version
validationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711923’]:
validated: true
validationType: content
validationMode: manual
validationModeDetails: The transcriptions are proof read against the audio files.
validationExtent: partial
validator:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: organization
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
resourceDocumentationInfo [ComponentId=‘clarin.eu:cr1:c_1355150532301’]:
documentationStructured [ComponentId=‘clarin.eu:cr1:c_1361876010648’]:
role: documentation
documentInfo [ComponentId=‘clarin.eu:cr1:c_1353678848788’]:
documentType: other
title: Norsk talespråkskorpus - Oslodelen
url: http://www.tekstlab.uio.no/nota/oslo/index.html
documentationStructured [ComponentId=‘clarin.eu:cr1:c_1361876010648’]:
role: documentation
documentInfo [ComponentId=‘clarin.eu:cr1:c_1353678848788’]:
documentType: inBook
title: Språk i Oslo. Ny forskning omkring talespråk.
editor: Janne Bondi Johannessen and Kristin Hagen
year: 2008
publisher: Novus forlag
bookTitle: Språk i Oslo. Ny forskning omkring talespråk.
ISBN: 978-82-7099-471-7
resourceCreationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711921’]:
creationStartDate: 2004-01-01
creationEndDate: 2006-12-31
resourceCreator:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: organization
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
fundingProject:
projectInfo [ComponentId=‘clarin.eu:cr1:c_1430905751647’]:
projectName [xml:lang=‘nb’]: Norsk talespråkskorpus - Oslodelen
projectShortName: NoTa-Oslo
url: http://www.tekstlab.uio.no/nota/oslo/index.html
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
fundingType: nationalFunds
funder: The Research Council of Norway
projectStartDate: 2004-01-01
projectEndDate: 2006-12-31
corpusInfo [ComponentId=‘clarin.eu:cr1:c_1407745711878’]:
corpusType: Multimodal Corpus
corpusPartInfo [ComponentId=‘clarin.eu:cr1:c_1407745711885’]:
mediaType: text
corpusTextInfo [ComponentId=‘clarin.eu:cr1:c_1396012485188’]:
textFormatInfo [ComponentId=‘clarin.eu:cr1:c_1427452477072’]:
mimeType: txt
sizePerTextFormat [ComponentId=‘clarin.eu:cr1:c_1447674760342’]:
sizeInfo [ComponentId=‘clarin.eu:cr1:c_1353678848785’]:
size: 908 057
sizeUnit: words
characterEncodingInfo [ComponentId=‘clarin.eu:cr1:c_1447674760355’]:
characterEncoding: latin1
corpusPartInfo [ComponentId=‘clarin.eu:cr1:c_1407745711885’]:
mediaType: video
corpusVideoInfo [ComponentId=‘clarin.eu:cr1:c_1407745711880’]:
videoContentInfo [ComponentId=‘clarin.eu:cr1:c_1360931019779’]:
typeOfVideoContent: Interviews and conversations from 166 informants born and raised in Oslo and the Oslo area.
textIncludedInVideo: none
dynamicElementInfo [ComponentId=‘clarin.eu:cr1:c_1360931019781’]:
bodyParts: arms
bodyParts: face
settingInfo [ComponentId=‘clarin.eu:cr1:c_1360230992162’]:
naturality: spontaneous
conversationalType: dialogue
audience: few
interactivity: overlapping
interaction: Two scenarios: one semiformal interview: research assistant and informant. One free conversation between two informants. Research assistants were often passively present in the room during the conversations to prevent conversations about sensitive matters
videoFormatInfo [ComponentId=‘clarin.eu:cr1:c_1427452477073’]:
mimeType: videos in mpeg4 streaming format available through Glossa
frameRate: 25
resolutionInfo [ComponentId=‘clarin.eu:cr1:c_1360931019784’]:
sizeWidth: 400
sizeHeight: 300
resolutionStandard: HD.720
compressionInfo [ComponentId=‘clarin.eu:cr1:c_1360230992165’]:
compression: true
compressionName: mpg
corpusPartInfo [ComponentId=‘clarin.eu:cr1:c_1407745711885’]:
mediaType: audio
corpusAudioInfo [ComponentId=‘clarin.eu:cr1:c_1404130561236’]:
audioSizeInfo [ComponentId=‘clarin.eu:cr1:c_1360230992160’]:
sizeInfo [ComponentId=‘clarin.eu:cr1:c_1353678848785’]:
size: Approx 40 GB
sizeUnit: gb
settingInfo [ComponentId=‘clarin.eu:cr1:c_1360230992162’]:
naturality: spontaneous
conversationalType: dialogue
audience: few
interactivity: overlapping
interaction: Two scenarios: one semiformal interview: research assistant and informant. One free conversation between two informants. Research assistants were often passively present in the room during the conversations to prevent conversations about sensitive matters
audioFormatInfo [ComponentId=‘clarin.eu:cr1:c_1427452477070’]:
mimeType: wav and mpeg4
signalEncoding: linearPCM
samplingRate: 32
quantization: 64
numberOfTracks: 1
recordingQuality: medium
compressionInfo [ComponentId=‘clarin.eu:cr1:c_1360230992165’]:
compression: true
compressionName: mpg
corpusPartGeneralInfo [ComponentId=‘clarin.eu:cr1:c_1407745711882’]:
personSourceSetInfo [ComponentId=‘clarin.eu:cr1:c_1360931019775’]:
numberOfPersons: 166
ageOfPersons: teenager
ageOfPersons: adult
ageOfPersons: elderly
ageRangeStart: 16
ageRangeEnd: 90
sexOfPersons: mixed
originOfPersons: native
dialectAccentOfPersons: Oslo dialect: half of the informants come from East Oslo, the other half from West Oslo
geographicDistributionOfPersons: Oslo and close Oslo area
lingualityInfo [ComponentId=‘clarin.eu:cr1:c_1355150532313’]:
lingualityType: monolingual
languageInfo [ComponentId=‘clarin.eu:cr1:c_1428388179423’]:
languageId: No
languageName: Norwegian
languageInfo [ComponentId=‘clarin.eu:cr1:c_1428388179423’]:
languageId: Nb
languageName: Norwegian Bokmål
modalityInfo [ComponentId=‘clarin.eu:cr1:c_1447674760356’]:
modalityType: spokenLanguage
modalityTypeDetails: Orthographic transcription
sizeInfo [ComponentId=‘clarin.eu:cr1:c_1353678848785’]:
size: 908 057
sizeUnit: words
annotationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711924’]:
annotationType: morphosyntacticAnnotation-posTagging
annotatedElements: other
segmentationLevel: word
tagset: POS tagset created for the statistical NoTa-tagger - based on the tagset of the Oslo Bergen Tagger.
tagsetLanguageId: nb
tagsetLanguageName: Norwegian Bokmål
theoreticModel: TreeTagger
annotationMode: automatic
annotationManualStructured [ComponentId=‘clarin.eu:cr1:c_1361876010647’]:
role: annotationManual
documentInfo [ComponentId=‘clarin.eu:cr1:c_1353678848788’]:
documentType: manual
title [xml:lang=‘nb’]: NoTa-taggeren: TAGGEVEILEDNING
author: Åshild Søfteland
year: 2007
url: http://www.tekstlab.uio.no/nota/oslo/Taggeveiledning2.pdf
documentLanguageName: Norwegian bokmål
documentLanguageId: nb
annotationManualStructured [ComponentId=‘clarin.eu:cr1:c_1361876010647’]:
role: annotationManual
documentInfo [ComponentId=‘clarin.eu:cr1:c_1353678848788’]:
documentType: article
title [xml:lang=‘en’]: Tagging a Norwegian Speech Corpus
author: Anders Nøklestad and Åshild Søfteland
editor: Joakim Nivre,Heiki-Jaan Kaalep,Kadri Muischnek, Mare Koit
year: 2007
bookTitle: Proceedings of the 16th Nordic Conference of Computational Linguistics NODALIDA-2007
pages: 245–248
conference: Nodalida 2007
documentLanguageName: English
documentLanguageId: en
annotationManualStructured [ComponentId=‘clarin.eu:cr1:c_1361876010647’]:
role: annotationManual
documentInfo [ComponentId=‘clarin.eu:cr1:c_1353678848788’]:
documentType: article
title [xml:lang=‘nb’]: Manuell morfologisk
tagging av NoTa-materialet med støtte fra en statistisk tagger.
author: Åshild Søfteland og Anders Nøklestad
editor: Janne Bondi Johannessen og Kristin Hagen
year: 2008
publisher: Novus forlag
bookTitle: Språk i Oslo. Ny forskning omkring talespråk
pages: 226–234.
ISBN: 978-82-7099-471-7
documentLanguageName: Norwegian
documentLanguageId: nb
annotationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711924’]:
annotationType: speechAnnotation-orthographicTranscription
annotationManualUnstructured [ComponentId=‘clarin.eu:cr1:c_1355150532325’]:
role: annotationManual
documentUnstructured: Orthographic transcription,cf Bokmålsordboka (Wangensteen 2004)
annotationManualStructured [ComponentId=‘clarin.eu:cr1:c_1361876010647’]:
role: annotationManual
documentInfo [ComponentId=‘clarin.eu:cr1:c_1353678848788’]:
documentType: manual
title [xml:lang=‘nb’]: Transkripsjonsveiledning for NoTa-Oslo
author: Kristin Hagen
year: 2008
url: http://www.tekstlab.uio.no/nota/oslo/transkripsjon/NoTa-transkripsjonsveil22.pdf
annotationTool [ComponentId=‘clarin.eu:cr1:c_1355150532326’]:
targetResourceNameURI: Transcriber (http://trans.sourceforge.net/en/presentation.php
)
classificationInfo [ComponentId=‘clarin.eu:cr1:c_1403588862809’]:
genreInfo [ComponentId=‘clarin.eu:cr1:c_1407745711877’]:
genreType: speechGenre
genre: informal
unstandardisedGenre: conversations
classificationInfo [ComponentId=‘clarin.eu:cr1:c_1403588862809’]:
genreInfo [ComponentId=‘clarin.eu:cr1:c_1407745711877’]:
genreType: speechGenre
genre: semi formal
unstandardisedGenre: interviews
timeCoverageInfo [ComponentId=‘clarin.eu:cr1:c_1447674760358’]:
timeCoverage: 2004 - 2006
geographicCoverageInfo [ComponentId=‘clarin.eu:cr1:c_1447674760357’]:
geographicCoverage: Oslo and the Oslo area
recordingInfo [ComponentId=‘clarin.eu:cr1:c_1426673949970’]:
recordingDeviceType: tapeVHS
recordingEnvironment: office
recordingEnvironment: closedPublicPlace
recordingEnvironment: conferenceRoom
recordingEnvironment: lectureRoom
recordingEnvironment: other
recorderActor:
actorInfo [ComponentId=‘clarin.eu:cr1:c_1396012485194’]:
actorType: organization
organizationInfo [ComponentId=‘clarin.eu:cr1:c_1407745711883’]:
organizationName: The Text Laboratory
organizationShortName: Textlab
departmentName: Department of Linguistics and Scandinavian Studies, University of Oslo
communicationInfo [ComponentId=‘clarin.eu:cr1:c_1352813745460’]:
email: tekstlab-post@iln.uio.no
url: http://www.hf.uio.no/iln/om/organisasjon/tekstlab/
address: Box 1102 Blindern
zipCode: 0317
city: OSLO
country: Norway
captureInfo [ComponentId=‘clarin.eu:cr1:c_1407745712025’]:
capturingDeviceType: microphone
capturingDeviceType: camera